Monday, May 23, 2011

Handling gzip responses in Ruby Net::HTTP library


require 'net/http'

debug = Proc.new{|msg| STDERR.puts "[#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}] #{msg}" }

page = nil
http = Net::HTTP.new( "www.google.com", 80 )
req = Net::HTTP::Get.new( "/search?num=20&hl=en&noj=1&q=test&btnG=Search", { "Accept-Encoding" => "gzip", "User-Agent" => "gzip" } )
debug.call( "Performing HTTP GET request for (#{req.path})." )
res = http.request( req )
debug.call( "Received HTTP Response Code (#{res.code})" )
case res
when Net::HTTPSuccess then
begin
if res.header[ 'Content-Encoding' ].eql?( 'gzip' ) then
debug.call( "Performing gzip decompression for response body." )
sio = StringIO.new( res.body )
gz = Zlib::GzipReader.new( sio )
page = gz.read()
debug.call( "Finished decompressing gzipped response body." )
else
debug.call( "Page is not compressed. Using text response body. " )
page = res.body
end
rescue Exception
debug.call( "Error occurred (#{$!.message})" )
# handle errors
raise $!.message
end
end

puts page