| 1 | #-- vim:sw=2:et |
|---|
| 2 | #++ |
|---|
| 3 | # |
|---|
| 4 | # :title: rbot HTTP provider |
|---|
| 5 | # |
|---|
| 6 | # Author:: Tom Gilbert <tom@linuxbrit.co.uk> |
|---|
| 7 | # Author:: Giuseppe "Oblomov" Bilotta <giuseppe.bilotta@gmail.com> |
|---|
| 8 | # Author:: Dmitry "jsn" Kim <dmitry point kim at gmail point com> |
|---|
| 9 | |
|---|
| 10 | require 'resolv' |
|---|
| 11 | require 'net/http' |
|---|
| 12 | require 'cgi' |
|---|
| 13 | require 'iconv' |
|---|
| 14 | begin |
|---|
| 15 | require 'net/https' |
|---|
| 16 | rescue LoadError => e |
|---|
| 17 | error "Couldn't load 'net/https': #{e.pretty_inspect}" |
|---|
| 18 | error "Secured HTTP connections will fail" |
|---|
| 19 | end |
|---|
| 20 | |
|---|
| 21 | # To handle Gzipped pages |
|---|
| 22 | require 'stringio' |
|---|
| 23 | require 'zlib' |
|---|
| 24 | |
|---|
| 25 | module ::Net |
|---|
| 26 | class HTTPResponse |
|---|
| 27 | attr_accessor :no_cache |
|---|
| 28 | unless method_defined? :raw_body |
|---|
| 29 | alias :raw_body :body |
|---|
| 30 | end |
|---|
| 31 | |
|---|
| 32 | def body_charset(str=self.raw_body) |
|---|
| 33 | ctype = self['content-type'] || 'text/html' |
|---|
| 34 | return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i |
|---|
| 35 | |
|---|
| 36 | charsets = ['latin1'] # should be in config |
|---|
| 37 | |
|---|
| 38 | if ctype.match(/charset=["']?([^\s"']+)["']?/i) |
|---|
| 39 | charsets << $1 |
|---|
| 40 | debug "charset #{charsets.last} added from header" |
|---|
| 41 | end |
|---|
| 42 | |
|---|
| 43 | case str |
|---|
| 44 | when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i |
|---|
| 45 | charsets << $1 |
|---|
| 46 | debug "xml charset #{charsets.last} added from xml pi" |
|---|
| 47 | when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i |
|---|
| 48 | meta = $1 |
|---|
| 49 | if meta =~ /charset=['"]?([^\s'";]+)['"]?/ |
|---|
| 50 | charsets << $1 |
|---|
| 51 | debug "html charset #{charsets.last} added from meta" |
|---|
| 52 | end |
|---|
| 53 | end |
|---|
| 54 | return charsets.uniq |
|---|
| 55 | end |
|---|
| 56 | |
|---|
| 57 | def body_to_utf(str) |
|---|
| 58 | charsets = self.body_charset(str) or return str |
|---|
| 59 | |
|---|
| 60 | charsets.reverse_each do |charset| |
|---|
| 61 | # XXX: this one is really ugly, but i don't know how to make it better |
|---|
| 62 | # -jsn |
|---|
| 63 | |
|---|
| 64 | 0.upto(5) do |off| |
|---|
| 65 | begin |
|---|
| 66 | debug "trying #{charset} / offset #{off}" |
|---|
| 67 | return Iconv.iconv('utf-8//ignore', |
|---|
| 68 | charset, |
|---|
| 69 | str.slice(0 .. (-1 - off))).first |
|---|
| 70 | rescue |
|---|
| 71 | debug "conversion failed for #{charset} / offset #{off}" |
|---|
| 72 | end |
|---|
| 73 | end |
|---|
| 74 | end |
|---|
| 75 | return str |
|---|
| 76 | end |
|---|
| 77 | |
|---|
| 78 | def decompress_body(str) |
|---|
| 79 | method = self['content-encoding'] |
|---|
| 80 | case method |
|---|
| 81 | when nil |
|---|
| 82 | return str |
|---|
| 83 | when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers |
|---|
| 84 | debug "gunzipping body" |
|---|
| 85 | begin |
|---|
| 86 | return Zlib::GzipReader.new(StringIO.new(str)).read |
|---|
| 87 | rescue Zlib::Error => e |
|---|
| 88 | # If we can't unpack the whole stream (e.g. because we're doing a |
|---|
| 89 | # partial read |
|---|
| 90 | debug "full gunzipping failed (#{e}), trying to recover as much as possible" |
|---|
| 91 | ret = "" |
|---|
| 92 | begin |
|---|
| 93 | Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte| |
|---|
| 94 | ret << byte |
|---|
| 95 | } |
|---|
| 96 | rescue |
|---|
| 97 | end |
|---|
| 98 | return ret |
|---|
| 99 | end |
|---|
| 100 | when 'deflate' |
|---|
| 101 | debug "inflating body" |
|---|
| 102 | # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread: |
|---|
| 103 | # -MAX_WBITS stops zlib from looking for a zlib header |
|---|
| 104 | inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS) |
|---|
| 105 | begin |
|---|
| 106 | return inflater.inflate(str) |
|---|
| 107 | rescue Zlib::Error => e |
|---|
| 108 | raise e |
|---|
| 109 | # TODO |
|---|
| 110 | # debug "full inflation failed (#{e}), trying to recover as much as possible" |
|---|
| 111 | end |
|---|
| 112 | when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i |
|---|
| 113 | # B0rked servers (Freshmeat being one of them) sometimes return the charset |
|---|
| 114 | # in the content-encoding; in this case we assume that the document has |
|---|
| 115 | # a standarc content-encoding |
|---|
| 116 | old_hsh = self.to_hash |
|---|
| 117 | self['content-type']= self['content-type']+"; charset="+method.downcase |
|---|
| 118 | warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}" |
|---|
| 119 | return str |
|---|
| 120 | else |
|---|
| 121 | debug self.to_hash |
|---|
| 122 | raise "Unhandled content encoding #{method}" |
|---|
| 123 | end |
|---|
| 124 | end |
|---|
| 125 | |
|---|
| 126 | def cooked_body |
|---|
| 127 | return self.body_to_utf(self.decompress_body(self.raw_body)) |
|---|
| 128 | end |
|---|
| 129 | |
|---|
| 130 | # Read chunks from the body until we have at least _size_ bytes, yielding |
|---|
| 131 | # the partial text at each chunk. Return the partial body. |
|---|
| 132 | def partial_body(size=0, &block) |
|---|
| 133 | |
|---|
| 134 | partial = String.new |
|---|
| 135 | |
|---|
| 136 | if @read |
|---|
| 137 | debug "using body() as partial" |
|---|
| 138 | partial = self.body |
|---|
| 139 | yield self.body_to_utf(self.decompress_body(partial)) if block_given? |
|---|
| 140 | else |
|---|
| 141 | debug "disabling cache" |
|---|
| 142 | self.no_cache = true |
|---|
| 143 | self.read_body { |chunk| |
|---|
| 144 | partial << chunk |
|---|
| 145 | yield self.body_to_utf(self.decompress_body(partial)) if block_given? |
|---|
| 146 | break if size and size > 0 and partial.length >= size |
|---|
| 147 | } |
|---|
| 148 | end |
|---|
| 149 | |
|---|
| 150 | return self.body_to_utf(self.decompress_body(partial)) |
|---|
| 151 | end |
|---|
| 152 | end |
|---|
| 153 | end |
|---|
| 154 | |
|---|
| 155 | Net::HTTP.version_1_2 |
|---|
| 156 | |
|---|
| 157 | module ::Irc |
|---|
| 158 | module Utils |
|---|
| 159 | |
|---|
| 160 | # class for making http requests easier (mainly for plugins to use) |
|---|
| 161 | # this class can check the bot proxy configuration to determine if a proxy |
|---|
| 162 | # needs to be used, which includes support for per-url proxy configuration. |
|---|
| 163 | class HttpUtil |
|---|
| 164 | Bot::Config.register Bot::Config::IntegerValue.new('http.read_timeout', |
|---|
| 165 | :default => 10, :desc => "Default read timeout for HTTP connections") |
|---|
| 166 | Bot::Config.register Bot::Config::IntegerValue.new('http.open_timeout', |
|---|
| 167 | :default => 20, :desc => "Default open timeout for HTTP connections") |
|---|
| 168 | Bot::Config.register Bot::Config::BooleanValue.new('http.use_proxy', |
|---|
| 169 | :default => false, :desc => "should a proxy be used for HTTP requests?") |
|---|
| 170 | Bot::Config.register Bot::Config::StringValue.new('http.proxy_uri', :default => false, |
|---|
| 171 | :desc => "Proxy server to use for HTTP requests (URI, e.g http://proxy.host:port)") |
|---|
| 172 | Bot::Config.register Bot::Config::StringValue.new('http.proxy_user', |
|---|
| 173 | :default => nil, |
|---|
| 174 | :desc => "User for authenticating with the http proxy (if required)") |
|---|
| 175 | Bot::Config.register Bot::Config::StringValue.new('http.proxy_pass', |
|---|
| 176 | :default => nil, |
|---|
| 177 | :desc => "Password for authenticating with the http proxy (if required)") |
|---|
| 178 | Bot::Config.register Bot::Config::ArrayValue.new('http.proxy_include', |
|---|
| 179 | :default => [], |
|---|
| 180 | :desc => "List of regexps to check against a URI's hostname/ip to see if we should use the proxy to access this URI. All URIs are proxied by default if the proxy is set, so this is only required to re-include URIs that might have been excluded by the exclude list. e.g. exclude /.*\.foo\.com/, include bar\.foo\.com") |
|---|
| 181 | Bot::Config.register Bot::Config::ArrayValue.new('http.proxy_exclude', |
|---|
| 182 | :default => [], |
|---|
| 183 | :desc => "List of regexps to check against a URI's hostname/ip to see if we should use avoid the proxy to access this URI and access it directly") |
|---|
| 184 | Bot::Config.register Bot::Config::IntegerValue.new('http.max_redir', |
|---|
| 185 | :default => 5, |
|---|
| 186 | :desc => "Maximum number of redirections to be used when getting a document") |
|---|
| 187 | Bot::Config.register Bot::Config::IntegerValue.new('http.expire_time', |
|---|
| 188 | :default => 60, |
|---|
| 189 | :desc => "After how many minutes since last use a cached document is considered to be expired") |
|---|
| 190 | Bot::Config.register Bot::Config::IntegerValue.new('http.max_cache_time', |
|---|
| 191 | :default => 60*24, |
|---|
| 192 | :desc => "After how many minutes since first use a cached document is considered to be expired") |
|---|
| 193 | Bot::Config.register Bot::Config::BooleanValue.new('http.no_expire_cache', |
|---|
| 194 | :default => false, |
|---|
| 195 | :desc => "Set this to true if you want the bot to never expire the cached pages") |
|---|
| 196 | Bot::Config.register Bot::Config::IntegerValue.new('http.info_bytes', |
|---|
| 197 | :default => 8192, |
|---|
| 198 | :desc => "How many bytes to download from a web page to find some information. Set to 0 to let the bot download the whole page.") |
|---|
| 199 | |
|---|
| 200 | class CachedObject |
|---|
| 201 | attr_accessor :response, :last_used, :first_used, :count, :expires, :date |
|---|
| 202 | |
|---|
| 203 | def self.maybe_new(resp) |
|---|
| 204 | debug "maybe new #{resp}" |
|---|
| 205 | return nil if resp.no_cache |
|---|
| 206 | return nil unless Net::HTTPOK === resp || |
|---|
| 207 | Net::HTTPMovedPermanently === resp || |
|---|
| 208 | Net::HTTPFound === resp || |
|---|
| 209 | Net::HTTPPartialContent === resp |
|---|
| 210 | |
|---|
| 211 | cc = resp['cache-control'] |
|---|
| 212 | return nil if cc && (cc =~ /no-cache/i) |
|---|
| 213 | |
|---|
| 214 | date = Time.now |
|---|
| 215 | if d = resp['date'] |
|---|
| 216 | date = Time.httpdate(d) |
|---|
| 217 | end |
|---|
| 218 | |
|---|
| 219 | return nil if resp['expires'] && (Time.httpdate(resp['expires']) < date) |
|---|
| 220 | |
|---|
| 221 | debug "creating cache obj" |
|---|
| 222 | |
|---|
| 223 | self.new(resp) |
|---|
| 224 | end |
|---|
| 225 | |
|---|
| 226 | def use |
|---|
| 227 | now = Time.now |
|---|
| 228 | @first_used = now if @count == 0 |
|---|
| 229 | @last_used = now |
|---|
| 230 | @count += 1 |
|---|
| 231 | end |
|---|
| 232 | |
|---|
| 233 | def expired? |
|---|
| 234 | debug "checking expired?" |
|---|
| 235 | if cc = self.response['cache-control'] && cc =~ /must-revalidate/ |
|---|
| 236 | return true |
|---|
| 237 | end |
|---|
| 238 | return self.expires < Time.now |
|---|
| 239 | end |
|---|
| 240 | |
|---|
| 241 | def setup_headers(hdr) |
|---|
| 242 | hdr['if-modified-since'] = self.date.rfc2822 |
|---|
| 243 | |
|---|
| 244 | debug "ims == #{hdr['if-modified-since']}" |
|---|
| 245 | |
|---|
| 246 | if etag = self.response['etag'] |
|---|
| 247 | hdr['if-none-match'] = etag |
|---|
| 248 | debug "etag: #{etag}" |
|---|
| 249 | end |
|---|
| 250 | end |
|---|
| 251 | |
|---|
| 252 | def revalidate(resp = self.response) |
|---|
| 253 | @count = 0 |
|---|
| 254 | self.use |
|---|
| 255 | self.date = resp.key?('date') ? Time.httpdate(resp['date']) : Time.now |
|---|
| 256 | |
|---|
| 257 | cc = resp['cache-control'] |
|---|
| 258 | if cc && (cc =~ /max-age=(\d+)/) |
|---|
| 259 | self.expires = self.date + $1.to_i |
|---|
| 260 | elsif resp.key?('expires') |
|---|
| 261 | self.expires = Time.httpdate(resp['expires']) |
|---|
| 262 | elsif lm = resp['last-modified'] |
|---|
| 263 | delta = self.date - Time.httpdate(lm) |
|---|
| 264 | delta = 10 if delta <= 0 |
|---|
| 265 | delta /= 5 |
|---|
| 266 | self.expires = self.date + delta |
|---|
| 267 | else |
|---|
| 268 | self.expires = self.date + 300 |
|---|
| 269 | end |
|---|
| 270 | # self.expires = Time.now + 10 # DEBUG |
|---|
| 271 | debug "expires on #{self.expires}" |
|---|
| 272 | |
|---|
| 273 | return true |
|---|
| 274 | end |
|---|
| 275 | |
|---|
| 276 | private |
|---|
| 277 | def initialize(resp) |
|---|
| 278 | @response = resp |
|---|
| 279 | begin |
|---|
| 280 | self.revalidate |
|---|
| 281 | self.response.raw_body |
|---|
| 282 | rescue Exception => e |
|---|
| 283 | error e |
|---|
| 284 | raise e |
|---|
| 285 | end |
|---|
| 286 | end |
|---|
| 287 | end |
|---|
| 288 | |
|---|
| 289 | # Create the HttpUtil instance, associating it with Bot _bot_ |
|---|
| 290 | # |
|---|
| 291 | def initialize(bot) |
|---|
| 292 | @bot = bot |
|---|
| 293 | @cache = Hash.new |
|---|
| 294 | @headers = { |
|---|
| 295 | 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8', |
|---|
| 296 | 'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2', |
|---|
| 297 | 'User-Agent' => |
|---|
| 298 | "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})" |
|---|
| 299 | } |
|---|
| 300 | debug "starting http cache cleanup timer" |
|---|
| 301 | @timer = @bot.timer.add(300) { |
|---|
| 302 | self.remove_stale_cache unless @bot.config['http.no_expire_cache'] |
|---|
| 303 | } |
|---|
| 304 | end |
|---|
| 305 | |
|---|
| 306 | # Clean up on HttpUtil unloading, by stopping the cache cleanup timer. |
|---|
| 307 | def cleanup |
|---|
| 308 | debug 'stopping http cache cleanup timer' |
|---|
| 309 | @bot.timer.remove(@timer) |
|---|
| 310 | end |
|---|
| 311 | |
|---|
| 312 | # This method checks if a proxy is required to access _uri_, by looking at |
|---|
| 313 | # the values of config values +http.proxy_include+ and +http.proxy_exclude+. |
|---|
| 314 | # |
|---|
| 315 | # Each of these config values, if set, should be a Regexp the server name and |
|---|
| 316 | # IP address should be checked against. |
|---|
| 317 | # |
|---|
| 318 | def proxy_required(uri) |
|---|
| 319 | use_proxy = true |
|---|
| 320 | if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty? |
|---|
| 321 | return use_proxy |
|---|
| 322 | end |
|---|
| 323 | |
|---|
| 324 | list = [uri.host] |
|---|
| 325 | begin |
|---|
| 326 | list.concat Resolv.getaddresses(uri.host) |
|---|
| 327 | rescue StandardError => err |
|---|
| 328 | warning "couldn't resolve host uri.host" |
|---|
| 329 | end |
|---|
| 330 | |
|---|
| 331 | unless @bot.config["http.proxy_exclude"].empty? |
|---|
| 332 | re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)} |
|---|
| 333 | re.each do |r| |
|---|
| 334 | list.each do |item| |
|---|
| 335 | if r.match(item) |
|---|
| 336 | use_proxy = false |
|---|
| 337 | break |
|---|
| 338 | end |
|---|
| 339 | end |
|---|
| 340 | end |
|---|
| 341 | end |
|---|
| 342 | unless @bot.config["http.proxy_include"].empty? |
|---|
| 343 | re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)} |
|---|
| 344 | re.each do |r| |
|---|
| 345 | list.each do |item| |
|---|
| 346 | if r.match(item) |
|---|
| 347 | use_proxy = true |
|---|
| 348 | break |
|---|
| 349 | end |
|---|
| 350 | end |
|---|
| 351 | end |
|---|
| 352 | end |
|---|
| 353 | debug "using proxy for uri #{uri}?: #{use_proxy}" |
|---|
| 354 | return use_proxy |
|---|
| 355 | end |
|---|
| 356 | |
|---|
| 357 | # _uri_:: URI to create a proxy for |
|---|
| 358 | # |
|---|
| 359 | # Return a net/http Proxy object, configured for proxying based on the |
|---|
| 360 | # bot's proxy configuration. See proxy_required for more details on this. |
|---|
| 361 | # |
|---|
| 362 | def get_proxy(uri, options = {}) |
|---|
| 363 | opts = { |
|---|
| 364 | :read_timeout => @bot.config["http.read_timeout"], |
|---|
| 365 | :open_timeout => @bot.config["http.open_timeout"] |
|---|
| 366 | }.merge(options) |
|---|
| 367 | |
|---|
| 368 | proxy = nil |
|---|
| 369 | proxy_host = nil |
|---|
| 370 | proxy_port = nil |
|---|
| 371 | proxy_user = nil |
|---|
| 372 | proxy_pass = nil |
|---|
| 373 | |
|---|
| 374 | if @bot.config["http.use_proxy"] |
|---|
| 375 | if (ENV['http_proxy']) |
|---|
| 376 | proxy = URI.parse ENV['http_proxy'] rescue nil |
|---|
| 377 | end |
|---|
| 378 | if (@bot.config["http.proxy_uri"]) |
|---|
| 379 | proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil |
|---|
| 380 | end |
|---|
| 381 | if proxy |
|---|
| 382 | debug "proxy is set to #{proxy.host} port #{proxy.port}" |
|---|
| 383 | if proxy_required(uri) |
|---|
| 384 | proxy_host = proxy.host |
|---|
| 385 | proxy_port = proxy.port |
|---|
| 386 | proxy_user = @bot.config["http.proxy_user"] |
|---|
| 387 | proxy_pass = @bot.config["http.proxy_pass"] |
|---|
| 388 | end |
|---|
| 389 | end |
|---|
| 390 | end |
|---|
| 391 | |
|---|
| 392 | h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass) |
|---|
| 393 | h.use_ssl = true if uri.scheme == "https" |
|---|
| 394 | |
|---|
| 395 | h.read_timeout = opts[:read_timeout] |
|---|
| 396 | h.open_timeout = opts[:open_timeout] |
|---|
| 397 | return h |
|---|
| 398 | end |
|---|
| 399 | |
|---|
| 400 | # Internal method used to hanlde response _resp_ received when making a |
|---|
| 401 | # request for URI _uri_. |
|---|
| 402 | # |
|---|
| 403 | # It follows redirects, optionally yielding them if option :yield is :all. |
|---|
| 404 | # |
|---|
| 405 | # Also yields and returns the final _resp_. |
|---|
| 406 | # |
|---|
| 407 | def handle_response(uri, resp, opts, &block) # :yields: resp |
|---|
| 408 | if Net::HTTPRedirection === resp && opts[:max_redir] >= 0 |
|---|
| 409 | if resp.key?('location') |
|---|
| 410 | raise 'Too many redirections' if opts[:max_redir] <= 0 |
|---|
| 411 | yield resp if opts[:yield] == :all && block_given? |
|---|
| 412 | loc = resp['location'] |
|---|
| 413 | new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc) |
|---|
| 414 | new_opts = opts.dup |
|---|
| 415 | new_opts[:max_redir] -= 1 |
|---|
| 416 | case opts[:method].to_s.downcase.intern |
|---|
| 417 | when :post, :"net::http::post" |
|---|
| 418 | new_opts[:method] = :get |
|---|
| 419 | end |
|---|
| 420 | if resp['set-cookie'] |
|---|
| 421 | debug "setting cookie #{resp['set-cookie']}" |
|---|
| 422 | new_opts[:headers] ||= Hash.new |
|---|
| 423 | new_opts[:headers]['Cookie'] = resp['set-cookie'] |
|---|
| 424 | end |
|---|
| 425 | debug "following the redirect to #{new_loc}" |
|---|
| 426 | return get_response(new_loc, new_opts, &block) |
|---|
| 427 | else |
|---|
| 428 | warning ":| redirect w/o location?" |
|---|
| 429 | end |
|---|
| 430 | end |
|---|
| 431 | class << resp |
|---|
| 432 | undef_method :body |
|---|
| 433 | alias :body :cooked_body |
|---|
| 434 | end |
|---|
| 435 | unless resp['content-type'] |
|---|
| 436 | debug "No content type, guessing" |
|---|
| 437 | resp['content-type'] = |
|---|
| 438 | case resp['x-rbot-location'] |
|---|
| 439 | when /.html?$/i |
|---|
| 440 | 'text/html' |
|---|
| 441 | when /.xml$/i |
|---|
| 442 | 'application/xml' |
|---|
| 443 | when /.xhtml$/i |
|---|
| 444 | 'application/xml+xhtml' |
|---|
| 445 | when /.(gif|png|jpe?g|jp2|tiff?)$/i |
|---|
| 446 | "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}" |
|---|
| 447 | else |
|---|
| 448 | 'application/octetstream' |
|---|
| 449 | end |
|---|
| 450 | end |
|---|
| 451 | if block_given? |
|---|
| 452 | yield(resp) |
|---|
| 453 | else |
|---|
| 454 | # Net::HTTP wants us to read the whole body here |
|---|
| 455 | resp.raw_body |
|---|
| 456 | end |
|---|
| 457 | return resp |
|---|
| 458 | end |
|---|
| 459 | |
|---|
| 460 | # _uri_:: uri to query (URI object or String) |
|---|
| 461 | # |
|---|
| 462 | # Generic http transaction method. It will return a Net::HTTPResponse |
|---|
| 463 | # object or raise an exception |
|---|
| 464 | # |
|---|
| 465 | # If a block is given, it will yield the response (see :yield option) |
|---|
| 466 | # |
|---|
| 467 | # Currently supported _options_: |
|---|
| 468 | # |
|---|
| 469 | # method:: request method [:get (default), :post or :head] |
|---|
| 470 | # open_timeout:: open timeout for the proxy |
|---|
| 471 | # read_timeout:: read timeout for the proxy |
|---|
| 472 | # cache:: should we cache results? |
|---|
| 473 | # yield:: if :final [default], calls the block for the response object; |
|---|
| 474 | # if :all, call the block for all intermediate redirects, too |
|---|
| 475 | # max_redir:: how many redirects to follow before raising the exception |
|---|
| 476 | # if -1, don't follow redirects, just return them |
|---|
| 477 | # range:: make a ranged request (usually GET). accepts a string |
|---|
| 478 | # for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000") |
|---|
| 479 | # body:: request body (usually for POST requests) |
|---|
| 480 | # headers:: additional headers to be set for the request. Its value must |
|---|
| 481 | # be a Hash in the form { 'Header' => 'value' } |
|---|
| 482 | # |
|---|
| 483 | def get_response(uri_or_s, options = {}, &block) # :yields: resp |
|---|
| 484 | uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s) |
|---|
| 485 | unless URI::HTTP === uri |
|---|
| 486 | if uri.scheme |
|---|
| 487 | raise "#{uri.scheme.inspect} URI scheme is not supported" |
|---|
| 488 | else |
|---|
| 489 | raise "don't know what to do with #{uri.to_s.inspect}" |
|---|
| 490 | end |
|---|
| 491 | end |
|---|
| 492 | |
|---|
| 493 | opts = { |
|---|
| 494 | :max_redir => @bot.config['http.max_redir'], |
|---|
| 495 | :yield => :final, |
|---|
| 496 | :cache => true, |
|---|
| 497 | :method => :GET |
|---|
| 498 | }.merge(options) |
|---|
| 499 | |
|---|
| 500 | resp = nil |
|---|
| 501 | |
|---|
| 502 | req_class = case opts[:method].to_s.downcase.intern |
|---|
| 503 | when :head, :"net::http::head" |
|---|
| 504 | opts[:max_redir] = -1 |
|---|
| 505 | Net::HTTP::Head |
|---|
| 506 | when :get, :"net::http::get" |
|---|
| 507 | Net::HTTP::Get |
|---|
| 508 | when :post, :"net::http::post" |
|---|
| 509 | opts[:cache] = false |
|---|
| 510 | opts[:body] or raise 'post request w/o a body?' |
|---|
| 511 | warning "refusing to cache POST request" if options[:cache] |
|---|
| 512 | Net::HTTP::Post |
|---|
| 513 | else |
|---|
| 514 | warning "unsupported method #{opts[:method]}, doing GET" |
|---|
| 515 | Net::HTTP::Get |
|---|
| 516 | end |
|---|
| 517 | |
|---|
| 518 | if req_class != Net::HTTP::Get && opts[:range] |
|---|
| 519 | warning "can't request ranges for #{req_class}" |
|---|
| 520 | opts.delete(:range) |
|---|
| 521 | end |
|---|
| 522 | |
|---|
| 523 | cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}" |
|---|
| 524 | |
|---|
| 525 | if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head |
|---|
| 526 | if opts[:cache] |
|---|
| 527 | warning "can't cache #{req_class.inspect} requests, working w/o cache" |
|---|
| 528 | opts[:cache] = false |
|---|
| 529 | end |
|---|
| 530 | end |
|---|
| 531 | |
|---|
| 532 | debug "get_response(#{uri}, #{opts.inspect})" |
|---|
| 533 | |
|---|
| 534 | cached = @cache[cache_key] |
|---|
| 535 | |
|---|
| 536 | if opts[:cache] && cached |
|---|
| 537 | debug "got cached" |
|---|
| 538 | if !cached.expired? |
|---|
| 539 | debug "using cached" |
|---|
| 540 | cached.use |
|---|
| 541 | return handle_response(uri, cached.response, opts, &block) |
|---|
| 542 | end |
|---|
| 543 | end |
|---|
| 544 | |
|---|
| 545 | headers = @headers.dup.merge(opts[:headers] || {}) |
|---|
| 546 | headers['Range'] = opts[:range] if opts[:range] |
|---|
| 547 | headers['Authorization'] = opts[:auth_head] if opts[:auth_head] |
|---|
| 548 | |
|---|
| 549 | if opts[:cache] && cached && (req_class == Net::HTTP::Get) |
|---|
| 550 | cached.setup_headers headers |
|---|
| 551 | end |
|---|
| 552 | |
|---|
| 553 | req = req_class.new(uri.request_uri, headers) |
|---|
| 554 | if uri.user && uri.password |
|---|
| 555 | req.basic_auth(uri.user, uri.password) |
|---|
| 556 | opts[:auth_head] = req['Authorization'] |
|---|
| 557 | end |
|---|
| 558 | req.body = opts[:body] if req_class == Net::HTTP::Post |
|---|
| 559 | debug "prepared request: #{req.to_hash.inspect}" |
|---|
| 560 | |
|---|
| 561 | begin |
|---|
| 562 | get_proxy(uri, opts).start do |http| |
|---|
| 563 | http.request(req) do |resp| |
|---|
| 564 | resp['x-rbot-location'] = uri.to_s |
|---|
| 565 | if Net::HTTPNotModified === resp |
|---|
| 566 | debug "not modified" |
|---|
| 567 | begin |
|---|
| 568 | cached.revalidate(resp) |
|---|
| 569 | rescue Exception => e |
|---|
| 570 | error e |
|---|
| 571 | end |
|---|
| 572 | debug "reusing cached" |
|---|
| 573 | resp = cached.response |
|---|
| 574 | elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp |
|---|
| 575 | debug "http error, deleting cached obj" if cached |
|---|
| 576 | @cache.delete(cache_key) |
|---|
| 577 | end |
|---|
| 578 | |
|---|
| 579 | begin |
|---|
| 580 | return handle_response(uri, resp, opts, &block) |
|---|
| 581 | ensure |
|---|
| 582 | if cached = CachedObject.maybe_new(resp) rescue nil |
|---|
| 583 | debug "storing to cache" |
|---|
| 584 | @cache[cache_key] = cached |
|---|
| 585 | end |
|---|
| 586 | end |
|---|
| 587 | end |
|---|
| 588 | end |
|---|
| 589 | rescue Exception => e |
|---|
| 590 | error e |
|---|
| 591 | raise e.message |
|---|
| 592 | end |
|---|
| 593 | end |
|---|
| 594 | |
|---|
| 595 | # _uri_:: uri to query (URI object or String) |
|---|
| 596 | # |
|---|
| 597 | # Simple GET request, returns (if possible) response body following redirs |
|---|
| 598 | # and caching if requested, yielding the actual response(s) to the optional |
|---|
| 599 | # block. See get_response for details on the supported _options_ |
|---|
| 600 | # |
|---|
| 601 | def get(uri, options = {}, &block) # :yields: resp |
|---|
| 602 | begin |
|---|
| 603 | resp = get_response(uri, options, &block) |
|---|
| 604 | raise "http error: #{resp}" unless Net::HTTPOK === resp || |
|---|
| 605 | Net::HTTPPartialContent === resp |
|---|
| 606 | return resp.body |
|---|
| 607 | rescue Exception => e |
|---|
| 608 | error e |
|---|
| 609 | end |
|---|
| 610 | return nil |
|---|
| 611 | end |
|---|
| 612 | |
|---|
| 613 | # _uri_:: uri to query (URI object or String) |
|---|
| 614 | # |
|---|
| 615 | # Simple HEAD request, returns (if possible) response head following redirs |
|---|
| 616 | # and caching if requested, yielding the actual response(s) to the optional |
|---|
| 617 | # block. See get_response for details on the supported _options_ |
|---|
| 618 | # |
|---|
| 619 | def head(uri, options = {}, &block) # :yields: resp |
|---|
| 620 | opts = {:method => :head}.merge(options) |
|---|
| 621 | begin |
|---|
| 622 | resp = get_response(uri, opts, &block) |
|---|
| 623 | # raise "http error #{resp}" if Net::HTTPClientError === resp || |
|---|
| 624 | # Net::HTTPServerError == resp |
|---|
| 625 | return resp |
|---|
| 626 | rescue Exception => e |
|---|
| 627 | error e |
|---|
| 628 | end |
|---|
| 629 | return nil |
|---|
| 630 | end |
|---|
| 631 | |
|---|
| 632 | # _uri_:: uri to query (URI object or String) |
|---|
| 633 | # _data_:: body of the POST |
|---|
| 634 | # |
|---|
| 635 | # Simple POST request, returns (if possible) response following redirs and |
|---|
| 636 | # caching if requested, yielding the response(s) to the optional block. See |
|---|
| 637 | # get_response for details on the supported _options_ |
|---|
| 638 | # |
|---|
| 639 | def post(uri, data, options = {}, &block) # :yields: resp |
|---|
| 640 | opts = {:method => :post, :body => data, :cache => false}.merge(options) |
|---|
| 641 | begin |
|---|
| 642 | resp = get_response(uri, opts, &block) |
|---|
| 643 | raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp |
|---|
| 644 | return resp |
|---|
| 645 | rescue Exception => e |
|---|
| 646 | error e |
|---|
| 647 | end |
|---|
| 648 | return nil |
|---|
| 649 | end |
|---|
| 650 | |
|---|
| 651 | # _uri_:: uri to query (URI object or String) |
|---|
| 652 | # _nbytes_:: number of bytes to get |
|---|
| 653 | # |
|---|
| 654 | # Partial GET request, returns (if possible) the first _nbytes_ bytes of the |
|---|
| 655 | # response body, following redirs and caching if requested, yielding the |
|---|
| 656 | # actual response(s) to the optional block. See get_response for details on |
|---|
| 657 | # the supported _options_ |
|---|
| 658 | # |
|---|
| 659 | def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp |
|---|
| 660 | opts = {:range => "bytes=0-#{nbytes}"}.merge(options) |
|---|
| 661 | return get(uri, opts, &block) |
|---|
| 662 | end |
|---|
| 663 | |
|---|
| 664 | def remove_stale_cache |
|---|
| 665 | debug "Removing stale cache" |
|---|
| 666 | now = Time.new |
|---|
| 667 | max_last = @bot.config['http.expire_time'] * 60 |
|---|
| 668 | max_first = @bot.config['http.max_cache_time'] * 60 |
|---|
| 669 | debug "#{@cache.size} pages before" |
|---|
| 670 | begin |
|---|
| 671 | @cache.reject! { |k, val| |
|---|
| 672 | (now - val.last_used > max_last) || (now - val.first_used > max_first) |
|---|
| 673 | } |
|---|
| 674 | rescue => e |
|---|
| 675 | error "Failed to remove stale cache: #{e.pretty_inspect}" |
|---|
| 676 | end |
|---|
| 677 | debug "#{@cache.size} pages after" |
|---|
| 678 | end |
|---|
| 679 | |
|---|
| 680 | end |
|---|
| 681 | end |
|---|
| 682 | end |
|---|
| 683 | |
|---|
| 684 | class HttpUtilPlugin < CoreBotModule |
|---|
| 685 | def initialize(*a) |
|---|
| 686 | super(*a) |
|---|
| 687 | debug 'initializing httputil' |
|---|
| 688 | @bot.httputil = Irc::Utils::HttpUtil.new(@bot) |
|---|
| 689 | end |
|---|
| 690 | |
|---|
| 691 | def cleanup |
|---|
| 692 | debug 'shutting down httputil' |
|---|
| 693 | @bot.httputil.cleanup |
|---|
| 694 | @bot.httputil = nil |
|---|
| 695 | super |
|---|
| 696 | end |
|---|
| 697 | end |
|---|
| 698 | |
|---|
| 699 | HttpUtilPlugin.new |
|---|