Initial work

This commit is contained in:
erwanlr
2013-02-20 17:34:17 +01:00
parent a01e1ab29f
commit 8b9aec468a
11 changed files with 26 additions and 78 deletions

View File

@@ -1,11 +1,11 @@
source "https://rubygems.org" source "https://rubygems.org"
gem "typhoeus", "0.4.2" gem "typhoeus", "~>0.6.1"
gem "nokogiri" gem "nokogiri"
gem "json" gem "json"
group :development, :test do group :development, :test do
gem "webmock", "1.8.11" gem "webmock", "~>1.9.3"
gem "simplecov" gem "simplecov"
gem "rspec", :require => "spec" gem "rspec", :require => "spec"
end end

View File

@@ -48,7 +48,7 @@ class Browser
@hydra = Typhoeus::Hydra.new( @hydra = Typhoeus::Hydra.new(
max_concurrency: @max_threads, max_concurrency: @max_threads,
timeout: @request_timeout #connecttimeout: @request_timeout
) )
# TODO : add an option for the cache dir instead of using a constant # TODO : add an option for the cache dir instead of using a constant
@@ -56,8 +56,7 @@ class Browser
@cache.clean @cache.clean
# might be in CacheFileStore #Typhoeus::Config.cache = @cache
setup_cache_handlers
end end
private_class_method :new private_class_method :new
@@ -146,24 +145,6 @@ class Browser
end end
end end
def setup_cache_handlers
@hydra.cache_setter do |request|
@cache.write_entry(
Browser.generate_cache_key_from_request(request),
request.response,
request.cache_timeout
)
end
@hydra.cache_getter do |request|
@cache.read_entry(
Browser.generate_cache_key_from_request(request)
) rescue nil
end
end
private :setup_cache_handlers
def get(url, params = {}) def get(url, params = {})
run_request( run_request(
forge_request(url, params.merge(method: :get)) forge_request(url, params.merge(method: :get))
@@ -177,10 +158,10 @@ class Browser
end end
def get_and_follow_location(url, params = {}) def get_and_follow_location(url, params = {})
params[:max_redirects] ||= 2 params[:maxredirs] ||= 2
run_request( run_request(
forge_request(url, params.merge(method: :get, follow_location: true)) forge_request(url, params.merge(method: :get, followlocation: true))
) )
end end
@@ -208,12 +189,13 @@ class Browser
end end
end end
unless params.has_key?(:disable_ssl_host_verification) # TODO : check if it's the default value into ethon. If so, removed the lines from here
params = params.merge(:disable_ssl_host_verification => true) unless params.has_key?(:ssl_verifyhost)
params = params.merge(ssl_verifyhost: 0)
end end
unless params.has_key?(:disable_ssl_peer_verification) unless params.has_key?(:ssl_verifypeer)
params = params.merge(:disable_ssl_peer_verification => true) params = params.merge(ssl_verifypeer: false)
end end
if !params.has_key?(:headers) if !params.has_key?(:headers)
@@ -223,9 +205,9 @@ class Browser
end end
# Used to enable the cache system if :cache_timeout > 0 # Used to enable the cache system if :cache_timeout > 0
unless params.has_key?(:cache_timeout) #unless params.has_key?(:cache_ttl)
params = params.merge(:cache_timeout => @cache_timeout) # params = params.merge(cache_ttl: @cache_timeout)
end #end
params params
end end
@@ -247,17 +229,4 @@ class Browser
end end
end end
end end
# The Typhoeus::Request.cache_key only hash the url :/
# this one will include the params
# TODO : include also the method (:get, :post, :any)
def self.generate_cache_key_from_request(request)
cache_key = request.cache_key
if request.params
cache_key = Digest::SHA1.hexdigest("#{cache_key}-#{request.params.hash}")
end
cache_key
end
end end

View File

@@ -51,7 +51,7 @@ class CacheFileStore
end end
end end
def read_entry(key) def get(key)
entry_file_path = get_entry_file_path(key) entry_file_path = get_entry_file_path(key)
if File.exists?(entry_file_path) if File.exists?(entry_file_path)
@@ -59,7 +59,7 @@ class CacheFileStore
end end
end end
def write_entry(key, data_to_store, cache_timeout) def set(key, data_to_store, cache_timeout)
if cache_timeout > 0 if cache_timeout > 0
File.open(get_entry_file_path(key), 'w') do |f| File.open(get_entry_file_path(key), 'w') do |f|
f.write(@serializer.dump(data_to_store)) f.write(@serializer.dump(data_to_store))

View File

@@ -33,7 +33,6 @@ begin
require 'rbconfig' require 'rbconfig'
require 'pp' require 'pp'
# Third party libs # Third party libs
gem 'typhoeus', '=0.4.2'
require 'typhoeus' require 'typhoeus'
require 'json' require 'json'
require 'nokogiri' require 'nokogiri'

View File

@@ -60,7 +60,7 @@ module WpUsernames
end end
def get_nickname_from_url(url) def get_nickname_from_url(url)
resp = Browser.instance.get(url, { follow_location: true, max_redirects: 2 }) resp = Browser.instance.get_and_follow_location(url)
nickname = nil nickname = nil
if resp.code == 200 if resp.code == 200
nickname = extract_nickname_from_body(resp.body) nickname = extract_nickname_from_body(resp.body)

View File

@@ -55,7 +55,7 @@ class WpEnumerator
targets.each do |target| targets.each do |target|
url = target.get_full_url url = target.get_full_url
request = enum_browser.forge_request(url, { cache_timeout: 0, follow_location: true }) request = enum_browser.forge_request(url, { cache_ttl: 0, followlocation: true })
request_count += 1 request_count += 1
request.on_complete do |response| request.on_complete do |response|

View File

@@ -74,7 +74,7 @@ class WpTheme < WpItem
# Discover the wordpress theme name by parsing the css link rel # Discover the wordpress theme name by parsing the css link rel
def self.find_from_css_link(target_uri) def self.find_from_css_link(target_uri)
response = Browser.instance.get(target_uri.to_s, { follow_location: true, max_redirects: 2 }) response = Browser.instance.get_and_follow_location(target_uri.to_s)
# https + domain is optional because of relative links # https + domain is optional because of relative links
matches = %r{(?:https?://[^"']+)?/([^/]+)/themes/([^"']+)/style.css}i.match(response.body) matches = %r{(?:https?://[^"']+)?/([^/]+)/themes/([^"']+)/style.css}i.match(response.body)

View File

@@ -63,7 +63,7 @@ class CheckerPlugin < Plugin
number_of_urls = urls.size number_of_urls = urls.size
urls.each do |url| urls.each do |url|
request = browser.forge_request(url, { cache_timeout: 0, follow_location: true }) request = browser.forge_request(url, { cache_ttl: 0, followlocation: true })
request_count += 1 request_count += 1
request.on_complete do |response| request.on_complete do |response|

View File

@@ -354,26 +354,6 @@ describe Browser do
#end #end
end end
describe '#Browser.generate_cache_key_from_request' do
it '2 requests with the same url, without params must have the same cache_key' do
url = 'http://example.com'
key1 = Browser.generate_cache_key_from_request(@browser.forge_request(url))
key2 = Browser.generate_cache_key_from_request(@browser.forge_request(url))
key1.should === key2
end
it '2 requests with the same url, but with different params should have a different cache_key' do
url = 'http://example.com'
key1 = Browser.generate_cache_key_from_request(@browser.forge_request(url, params: { login: 'master', password: 'it\'s me !' }))
key2 = Browser.generate_cache_key_from_request(@browser.forge_request(url))
key1.should_not == key2
end
end
describe 'testing caching' do describe 'testing caching' do
it 'should only do 1 request, and retrieve the other one from the cache' do it 'should only do 1 request, and retrieve the other one from the cache' do

View File

@@ -61,17 +61,17 @@ describe CacheFileStore do
end end
end end
describe '#read_entry (nonexistent entry)' do describe '#get (nonexistent entry)' do
it 'should return nil' do it 'should return nil' do
@cache.read_entry(Digest::SHA1.hexdigest('hello world')).should be_nil @cache.get(Digest::SHA1.hexdigest('hello world')).should be_nil
end end
end end
describe '#write_entry, #read_entry' do describe '#set, #get' do
after :each do after :each do
@cache.write_entry(@key, @data, @timeout) @cache.set(@key, @data, @timeout)
@cache.read_entry(@key).should === @expected @cache.get(@key).should === @expected
end end
it 'should get the correct entry (string)' do it 'should get the correct entry (string)' do

View File

@@ -27,7 +27,7 @@ end
require File.expand_path(File.dirname(__FILE__) + '/../lib/common/common_helper') require File.expand_path(File.dirname(__FILE__) + '/../lib/common/common_helper')
gem 'webmock', '=1.8.11' #gem 'webmock', '=1.8.11'
require 'webmock/rspec' require 'webmock/rspec'
SPEC_DIR = ROOT_DIR + '/spec' SPEC_DIR = ROOT_DIR + '/spec'