diff --git a/lib/common/browser.rb b/lib/common/browser.rb index cb6d0ab2..033dd5ce 100644 --- a/lib/common/browser.rb +++ b/lib/common/browser.rb @@ -8,25 +8,36 @@ class Browser extend Browser::Actions include Browser::Options + OPTIONS = [ + :available_user_agents, + :basic_auth, + :cache_ttl, + :max_threads, + :user_agent, + :user_agent_mode, + :proxy, + :proxy_auth + ] + @@instance = nil - attr_reader :hydra, :config_file + attr_reader :hydra, :config_file, :cache_dir # @param [ Hash ] options + # @options def initialize(options = {}) @config_file = options[:config_file] || CONF_DIR + '/browser.conf.json' - @cache_dir = CACHE_DIR + '/browser' + @cache_dir = options[:cache_dir] || CACHE_DIR + '/browser' - options.delete(:config_file) + #options.delete(:config_file) load_config() - if options.length > 0 - override_config_with_options(options) - end + #if options.length > 0 + override_config(options) + #end @hydra = Typhoeus::Hydra.new(max_concurrency: self.max_threads) - # TODO : add an argument for the cache dir instead of using a constant @cache = TyphoeusCache.new(@cache_dir) @cache.clean @@ -47,7 +58,7 @@ class Browser end # TODO reload hydra (if the .load_config is called on a browser object, - # hydra will not have the new @max_threads and @request_timeout) + # hydra will not have the new @max_threads) def load_config(config_file = nil) @config_file = config_file || @config_file @@ -57,20 +68,17 @@ class Browser data = JSON.parse(File.read(@config_file)) end - Options::OPTIONS.each do |option| + OPTIONS.each do |option| option_name = option.to_s - if data[option_name] + unless data[option_name].nil? self.send(:"#{option_name}=", data[option_name]) end end end def forge_request(url, params = {}) - Typhoeus::Request.new( - url.to_s, - merge_request_params(params) - ) + Typhoeus::Request.new(url, merge_request_params(params)) end def merge_request_params(params = {}) diff --git a/lib/common/browser/options.rb b/lib/common/browser/options.rb index 23ec2949..364ca346 100644 --- a/lib/common/browser/options.rb +++ b/lib/common/browser/options.rb @@ -3,18 +3,6 @@ class Browser module Options - OPTIONS = [ - :available_user_agents, - :basic_auth, - :cache_ttl, - :max_threads, - :user_agent, - :user_agent_mode, - :proxy, - :proxy_auth, - #:request_timeout, - ] - USER_AGENT_MODES = %w{ static semi-static random } attr_reader :basic_auth, :user_agent_mode, :proxy, :proxy_auth @@ -31,9 +19,9 @@ class Browser # @return [ void ] def basic_auth=(auth) if auth.index(':') - @basic_auth = "Basic #{Base64.encode64(auth.chomp)}" - elsif auth =~ /\ABasic .*\z/ - @basic_auth = auth.chomp + @basic_auth = "Basic #{Base64.encode64(auth).chomp}" + elsif auth =~ /\ABasic [a-zA-Z0-9=]+\z/ + @basic_auth = auth else raise 'Invalid basic authentication format, "login:password" or "Basic base_64_encoded" expected' end @@ -80,7 +68,10 @@ class Browser # Sets the proxy # Accepted format: - # host:post + # [protocol://]host:post + # + # Supported protocols: + # Depends on the curl protocols, See curl --version # # @param [ String ] proxy # @@ -89,7 +80,7 @@ class Browser if proxy.index(':') @proxy = proxy else - raise 'Invalid proxy format. Should be host:port.' + raise 'Invalid proxy format. Should be [protocol://]host:port.' end end @@ -123,7 +114,7 @@ class Browser # @param [ Hash ] options # # @return [ void ] - def override_config_with_options(options = {}) + def override_config(options = {}) options.each do |option, value| if value != nil and OPTIONS.include?(option) self.send(:"#{option}=", value) diff --git a/spec/lib/common/browser_spec.rb b/spec/lib/common/browser_spec.rb index e3c4b0b2..7fe078b5 100644 --- a/spec/lib/common/browser_spec.rb +++ b/spec/lib/common/browser_spec.rb @@ -8,12 +8,16 @@ describe Browser do CONFIG_FILE_WITHOUT_PROXY = SPEC_FIXTURES_CONF_DIR + '/browser/browser.conf.json' CONFIG_FILE_WITH_PROXY = SPEC_FIXTURES_CONF_DIR + '/browser/browser.conf_proxy.json' - CONFIG_FILE_WITH_PROXY_AND_AUTH = SPEC_FIXTURES_CONF_DIR + '/browser/browser.conf_proxy_auth.json' - INSTANCE_VARS_TO_CHECK = ['user_agent', 'user_agent_mode', 'available_user_agents', 'proxy', 'max_threads', 'cache_ttl'] + #CONFIG_FILE_WITH_PROXY_AND_AUTH = SPEC_FIXTURES_CONF_DIR + '/browser/browser.conf_proxy_auth.json' subject(:browser) { - Browser::reset - Browser.instance + Browser.reset + Browser.instance(options) + } + let(:options) { {} } + let(:instance_vars_to_check) { + ['user_agent', 'user_agent_mode', 'available_user_agents', 'proxy', + 'max_threads', 'cache_ttl'] } before :all do @@ -21,146 +25,11 @@ describe Browser do @json_config_with_proxy = JSON.parse(File.read(CONFIG_FILE_WITH_PROXY)) end - before :each do - Browser::reset - @browser = Browser.instance(config_file: CONFIG_FILE_WITHOUT_PROXY) - end - def check_instance_variables(browser, json_expected_vars) json_expected_vars['max_threads'] ||= 1 # max_thread can not be nil - INSTANCE_VARS_TO_CHECK.each do |instance_variable_name| - browser.send(:"#{instance_variable_name}").should === json_expected_vars[instance_variable_name] - end - end - - describe '#user_agent_mode setter / getter' do - # Testing all valid modes - Browser::USER_AGENT_MODES.each do |user_agent_mode| - it "should set / return #{user_agent_mode}" do - @browser.user_agent_mode = user_agent_mode - @browser.user_agent_mode.should === user_agent_mode - end - end - - it "shoud set the mode to 'static' if nil is given" do - @browser.user_agent_mode = nil - @browser.user_agent_mode.should === 'static' - end - - it 'should raise an error if the mode in not valid' do - expect { @browser.user_agent_mode = 'invalid-mode' }.to raise_error - end - end - - #describe '#max_threads=' do - # it 'should set max_threads to 1 if nil is given' do - # @browser.max_threads = nil - # @browser.max_threads.should === 1 - # end -# - # it 'should set max_threads to 1 if 0 is given' do - # @browser.max_threads = 0 - # @browser.max_threads.should === 1 - # end - #end - - describe '#proxy_auth=' do - after :each do - if @raise_error - expect { @browser.proxy_auth = @proxy_auth }.to raise_error - else - @browser.proxy_auth = @proxy_auth - @browser.proxy_auth.should === @expected - end - end - - context 'when the auth supplied is' do - - context 'not a String or a Hash' do - it 'raises an error' do - @proxy_auth = 10 - @raise_error = true - end - end - - context 'a String with' do - context 'invalid format' do - it 'raises an error' do - @proxy_auth = 'invaludauthformat' - @raise_error = true - end - end - - context 'valid format' do - it 'sets the auth' do - @proxy_auth = 'username:passwd' - @expected = @proxy_auth - end - end - end - - context 'a Hash with' do - context 'only :proxy_username' do - it 'raises an error' do - @proxy_auth = { proxy_username: 'username' } - @raise_error = true - end - end - - context 'only :proxy_password' do - it 'raises an error' do - @proxy_auth = { proxy_password: 'hello' } - @raise_error = true - end - end - - context ':proxy_username and :proxy_password' do - it 'sets the auth' do - @proxy_auth = { proxy_username: 'user', proxy_password: 'pass' } - @expected = 'user:pass' - end - end - end - - end - end - - describe '#user_agent' do - available_user_agents = %w{ ua-1 ua-2 ua-3 ua-4 ua-6 ua-7 ua-8 ua-9 ua-10 ua-11 ua-12 ua-13 ua-14 ua-15 ua-16 ua-17 } - - it 'should always return the same user agent in static mode' do - @browser.user_agent = 'fake UA' - @browser.user_agent_mode = 'static' - - (1..3).each do - @browser.user_agent.should === 'fake UA' - end - end - - it 'should choose a random user_agent in the available_user_agents array an always return it' do - @browser.available_user_agents = available_user_agents - @browser.user_agent = 'Firefox 11.0' - @browser.user_agent_mode = 'semi-static' - - user_agent = @browser.user_agent - user_agent.should_not === 'Firefox 11.0' - available_user_agents.include?(user_agent).should be_true - - (1..3).each do - @browser.user_agent.should === user_agent - end - end - - it 'should return a random user agent each time' do - @browser.available_user_agents = available_user_agents - @browser.user_agent_mode = 'random' - - ua_1 = @browser.user_agent - ua_2 = @browser.user_agent - ua_3 = @browser.user_agent - - fail if ua_1 === ua_2 and ua_2 === ua_3 + instance_vars_to_check.each do |variable_name| + browser.send(:"#{variable_name}").should === json_expected_vars[variable_name] end end @@ -170,48 +39,32 @@ describe Browser do end end - describe "#instance with :config_file = #{CONFIG_FILE_WITHOUT_PROXY}" do - it 'will check the instance vars' do - Browser.reset - check_instance_variables( - Browser.instance(config_file: CONFIG_FILE_WITHOUT_PROXY), - @json_config_without_proxy - ) - end - end + describe '::instance' do + after { check_instance_variables(browser, @json_expected_vars) } - describe "#instance with :config_file = #{CONFIG_FILE_WITH_PROXY}" do - it 'will check the instance vars' do - Browser.reset - check_instance_variables( - Browser.instance(config_file: CONFIG_FILE_WITH_PROXY), - @json_config_with_proxy - ) - end - end - - # TODO Write something to test all possible overriding - describe 'override option : user_agent & threads' do - it 'will check the instance vars, with an overriden one' do - Browser.reset - check_instance_variables( - Browser.instance( - config_file: CONFIG_FILE_WITHOUT_PROXY, - user_agent: 'fake IE' - ), - @json_config_without_proxy.merge('user_agent' => 'fake IE') - ) + context "when default config_file = #{CONFIG_FILE_WITHOUT_PROXY}" do + it 'will check the instance vars' do + @json_expected_vars = @json_config_without_proxy + end end - it 'should not override the max_threads if max_threads = nil' do - Browser.reset - check_instance_variables( - Browser.instance( - config_file: CONFIG_FILE_WITHOUT_PROXY, - max_threads: nil - ), - @json_config_without_proxy - ) + context "when :config_file = #{CONFIG_FILE_WITH_PROXY}" do + let(:options) { { config_file: CONFIG_FILE_WITH_PROXY } } + + it 'will check the instance vars' do + @json_expected_vars = @json_config_with_proxy + end + end + + context 'when options[:cache_dir]' do + let(:cache_dir) { CACHE_DIR + '/somewhere' } + let(:options) { { cache_dir: cache_dir } } + + after { subject.cache_dir.should == cache_dir } + + it 'sets @cache_dir' do + @json_expected_vars = @json_config_without_proxy + end end end @@ -219,7 +72,6 @@ describe Browser do describe '#load_config' do it 'should raise an error if file is a symlink' do symlink = './rspec_symlink' - browser = Browser.instance File.symlink('./testfile', symlink) expect { browser.load_config(symlink) }.to raise_error("[ERROR] Config file is a symlink.") @@ -227,7 +79,7 @@ describe Browser do end end - describe '#append_params_header_field' do + describe '::append_params_header_field' do after :each do Browser.append_params_header_field( @params, @@ -264,7 +116,6 @@ describe Browser do end end end - end describe '#merge_request_params' do @@ -280,10 +131,10 @@ describe Browser do } after :each do - @browser.stub(user_agent: 'SomeUA') - @browser.cache_ttl = 250 + browser.stub(user_agent: 'SomeUA') + browser.cache_ttl = 250 - @browser.merge_request_params(params).should == @expected + browser.merge_request_params(params).should == @expected end it 'sets the User-Agent header field and cache_ttl' do @@ -296,27 +147,26 @@ describe Browser do let(:proxy_expectation) { default_expectation.merge(proxy: proxy) } it 'merges the proxy' do - @browser.proxy = proxy - @expected = proxy_expectation + browser.proxy = proxy + @expected = proxy_expectation end context 'when @proxy_auth' do it 'sets the proxy_auth' do - @browser.proxy = proxy - @browser.proxy_auth = 'user:pass' - @expected = proxy_expectation.merge(proxyauth: 'user:pass') + browser.proxy = proxy + browser.proxy_auth = 'user:pass' + @expected = proxy_expectation.merge(proxyauth: 'user:pass') end end end context 'when @basic_auth' do it 'appends the basic_auth' do - @browser.basic_auth = 'user:pass' + browser.basic_auth = 'user:pass' @expected = default_expectation.merge( - headers: default_expectation[:headers].merge('Authorization' => 'Basic '+Base64.encode64('user:pass')) + headers: default_expectation[:headers].merge('Authorization' => 'Basic '+Base64.encode64('user:pass').chomp) ) end - end context 'when the cache_ttl is alreday set' do @@ -326,11 +176,19 @@ describe Browser do @expected = default_expectation.merge(params) end end - end - # TODO describe '#forge_request' do + let(:url) { 'http://example.localhost' } + + it 'returns the correct Typhoeus::Request' do + subject.stub(merge_request_params: { cache_ttl: 10 }) + + request = subject.forge_request(url) + request.should be_a Typhoeus::Request + request.url.should == url + request.cache_ttl.should == 10 + end end @@ -359,4 +217,3 @@ describe Browser do end end end - diff --git a/spec/shared_examples/browser/options.rb b/spec/shared_examples/browser/options.rb index a6a6378b..01011c35 100644 --- a/spec/shared_examples/browser/options.rb +++ b/spec/shared_examples/browser/options.rb @@ -2,32 +2,250 @@ shared_examples 'Browser::Options' do - describe 'basic_auth=' do + describe '#basic_auth=' do + let(:exception) { 'Invalid basic authentication format, "login:password" or "Basic base_64_encoded" expected' } + after do + if @expected + subject.basic_auth = @auth + subject.basic_auth.should == @expected + else + expect { subject.basic_auth = @auth }.to raise_error(exception) + end + end + + context 'when invalid format' do + it 'raises an error' do + @auth = 'invalid' + end + end + + context 'when login:password' do + it 'sets the basic auth' do + @auth = 'admin:weakpass' + @expected = 'Basic YWRtaW46d2Vha3Bhc3M=' + end + end + + context 'when Basic base_64_encoded' do + context 'when invalid base_64_encoded' do + it 'raises an error' do + @auth = 'Basic ' + end + end + + it 'sets the basic auth' do + @auth = 'Basic dXNlcm5hbWU6dGhlYmlncGFzc3dvcmRzb3dlYWs=' + @expected = @auth + end + end end - describe 'max_threads' do + describe '#max_threads' do + after do + subject.max_threads = @max_threads + subject.max_threads.should == @expected + end + context 'when no @max_threads' do + @max_threads = nil + @expected = 1 + end + + context 'when @max_threads' do + it 'returns the @max_threads' do + @max_threads = 10 + @expected = 10 + end + end end - describe 'user_agent=' do + describe '#user_agent_mode= & #user_agent_mode' do + # Testing all valid modes + Browser::USER_AGENT_MODES.each do |user_agent_mode| + it "sets & returns #{user_agent_mode}" do + subject.user_agent_mode = user_agent_mode + subject.user_agent_mode.should === user_agent_mode + end + end + it 'sets the mode to "static" if nil is given' do + subject.user_agent_mode = nil + subject.user_agent_mode.should === 'static' + end + + it 'raises an error if the mode is not valid' do + expect { subject.user_agent_mode = 'invalid-mode' }.to raise_error + end end - describe 'user_agent' do + describe '#user_agent= & #user_agent' do + let(:available_user_agents) { %w{ ua-1 ua-2 ua-3 ua-4 ua-6 ua-7 ua-8 ua-9 ua-10 ua-11 ua-12 ua-13 ua-14 ua-15 ua-16 ua-17 } } + context 'when static mode' do + it 'returns the same user agent' do + subject.user_agent = 'fake UA' + subject.user_agent_mode = 'static' + + (1..3).each do + subject.user_agent.should === 'fake UA' + end + end + end + + context 'when semi-static mode' do + it 'chooses a random user_agent in the available_user_agents array and always return it' do + subject.available_user_agents = available_user_agents + subject.user_agent = 'Firefox 11.0' + subject.user_agent_mode = 'semi-static' + + user_agent = subject.user_agent + user_agent.should_not === 'Firefox 11.0' + available_user_agents.include?(user_agent).should be_true + + (1..3).each do + subject.user_agent.should === user_agent + end + end + end + + context 'when random' do + it 'returns a random user agent each time' do + subject.available_user_agents = available_user_agents + subject.user_agent_mode = 'random' + + ua_1 = subject.user_agent + ua_2 = subject.user_agent + ua_3 = subject.user_agent + + fail if ua_1 === ua_2 and ua_2 === ua_3 + end + end end describe 'proxy=' do + let(:exception) { 'Invalid proxy format. Should be [protocol://]host:port.' } + after do + if @expected + subject.proxy = @proxy + subject.proxy.should == @expected + else + expect { subject.proxy = @proxy }.to raise_error(exception) + end + end + + context 'when invalid format' do + it 'raises an error' do + @proxy = 'yolo' + end + end + + context 'when valid format' do + @proxy = '127.0.0.1:9050' + @expected = @proxy + end end describe 'proxy_auth=' do + let(:exception) { 'Invalid proxy auth format, expected username:password or {proxy_username: username, proxy_password: password}' } + after :each do + if @expected + subject.proxy_auth = @proxy_auth + subject.proxy_auth.should === @expected + else + expect { subject.proxy_auth = @proxy_auth }.to raise_error + end + end + + context 'when the auth supplied is' do + context 'not a String or a Hash' do + it 'raises an error' do + @proxy_auth = 10 + end + end + + context 'a String with' do + context 'invalid format' do + it 'raises an error' do + @proxy_auth = 'invaludauthformat' + end + end + + context 'valid format' do + it 'sets the auth' do + @proxy_auth = 'username:passwd' + @expected = @proxy_auth + end + end + end + + context 'a Hash with' do + context 'only :proxy_username' do + it 'raises an error' do + @proxy_auth = { proxy_username: 'username' } + end + end + + context 'only :proxy_password' do + it 'raises an error' do + @proxy_auth = { proxy_password: 'hello' } + end + end + + context ':proxy_username and :proxy_password' do + it 'sets the auth' do + @proxy_auth = { proxy_username: 'user', proxy_password: 'pass' } + @expected = 'user:pass' + end + end + end + end end - describe 'override_config_with_options' do + describe '#override_config' do + after do + subject.send(:override_config, override_options) + end + let(:config) { JSON.parse(File.read(subject.config_file)) } + + context 'when an option value is nil' do + let(:override_options) { { max_threads: nil } } + + it 'does not set it' do + subject.should_not_receive(:max_threads=) + end + end + + context 'when an option is no allowed' do + let(:override_options) { { not_allowed: 'owned' } } + + it 'does not set it' do + subject.should_not_receive(:not_allowed=) + end + end + + context 'when valid option' do + let(:override_options) { { max_threads: 30 } } + + it 'sets it' do + subject.should_receive(:max_threads=).with(30) + end + end + + context 'when multiple options' do + let(:override_options) { + { max_threads: 10, not_allowed: 'owned', proxy: 'host:port' } + } + + it 'sets @max_threads, @proxy' do + subject.should_not_receive(:not_allowed=) + subject.should_receive(:max_threads=).with(10) + subject.should_receive(:proxy=).with('host:port') + end + end end end