Class | WWW::Mechanize::Util |
In: |
lib/www/mechanize/util.rb
|
Parent: | Object |
CODE_DIC | = | { :JIS => "ISO-2022-JP", :EUC => "EUC-JP", :SJIS => "SHIFT_JIS", :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"} |
# File lib/www/mechanize/util.rb, line 13 13: def build_query_string(parameters, enc=nil) 14: parameters.map { |k,v| 15: if k 16: # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*. 17: [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") 18: ?? 19: 20: end 21: }.compact.join('&') 22: end
# File lib/www/mechanize/util.rb, line 60 60: def detect_charset(src) 61: tmp = NKF.guess(src || "<html></html>") 62: if RUBY_VERSION >= "1.9.0" 63: enc = tmp.to_s.upcase 64: else 65: enc = NKF.constants.find{|c| 66: NKF.const_get(c) == tmp 67: } 68: enc = CODE_DIC[enc.intern] 69: end 70: enc || "ISO-8859-1" 71: end
# File lib/www/mechanize/util.rb, line 37 37: def from_native_charset(s, code) 38: if Mechanize.html_parser == Nokogiri::HTML 39: return unless s 40: Iconv.iconv(code || "", "UTF-8", s).join("") 41: else 42: return s 43: end 44: end
# File lib/www/mechanize/util.rb, line 46 46: def html_unescape(s) 47: return s unless s 48: s.gsub(/&(\w+|#[0-9]+);/) { |match| 49: number = case match 50: when /&(\w+);/ 51: Mechanize.html_parser::NamedCharacters[$1] 52: when /&#([0-9]+);/ 53: $1.to_i 54: end 55: 56: number ? ([number].pack('U') rescue match) : match 57: } 58: end