Browser modules final work
This commit is contained in:
@@ -24,20 +24,19 @@ class Browser
|
|||||||
attr_reader :hydra, :config_file, :cache_dir
|
attr_reader :hydra, :config_file, :cache_dir
|
||||||
|
|
||||||
# @param [ Hash ] options
|
# @param [ Hash ] options
|
||||||
# @options
|
#
|
||||||
|
# @return [ Browser ]
|
||||||
def initialize(options = {})
|
def initialize(options = {})
|
||||||
@config_file = options[:config_file] || CONF_DIR + '/browser.conf.json'
|
@config_file = options[:config_file] || CONF_DIR + '/browser.conf.json'
|
||||||
@cache_dir = options[:cache_dir] || CACHE_DIR + '/browser'
|
@cache_dir = options[:cache_dir] || CACHE_DIR + '/browser'
|
||||||
|
|
||||||
#options.delete(:config_file)
|
|
||||||
|
|
||||||
load_config()
|
load_config()
|
||||||
|
override_config(options)
|
||||||
|
|
||||||
#if options.length > 0
|
unless @hydra
|
||||||
override_config(options)
|
@hydra = Typhoeus::Hydra.new(max_concurrency: self.max_threads)
|
||||||
#end
|
end
|
||||||
|
|
||||||
@hydra = Typhoeus::Hydra.new(max_concurrency: self.max_threads)
|
|
||||||
@cache = TyphoeusCache.new(@cache_dir)
|
@cache = TyphoeusCache.new(@cache_dir)
|
||||||
@cache.clean
|
@cache.clean
|
||||||
|
|
||||||
@@ -46,6 +45,9 @@ class Browser
|
|||||||
|
|
||||||
private_class_method :new
|
private_class_method :new
|
||||||
|
|
||||||
|
# @param [ Hash ] options
|
||||||
|
#
|
||||||
|
# @return [ Browser ]
|
||||||
def self.instance(options = {})
|
def self.instance(options = {})
|
||||||
unless @@instance
|
unless @@instance
|
||||||
@@instance = new(options)
|
@@instance = new(options)
|
||||||
@@ -57,8 +59,13 @@ class Browser
|
|||||||
@@instance = nil
|
@@instance = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO reload hydra (if the .load_config is called on a browser object,
|
#
|
||||||
# hydra will not have the new @max_threads)
|
# If an option was set but is not in the new config_file
|
||||||
|
# it's value is kept
|
||||||
|
#
|
||||||
|
# @param [ String ] config_file
|
||||||
|
#
|
||||||
|
# @return [ void ]
|
||||||
def load_config(config_file = nil)
|
def load_config(config_file = nil)
|
||||||
@config_file = config_file || @config_file
|
@config_file = config_file || @config_file
|
||||||
|
|
||||||
@@ -77,10 +84,17 @@ class Browser
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# @param [ String ] url
|
||||||
|
# @param [ Hash ] params
|
||||||
|
#
|
||||||
|
# @return [ Typhoeus::Request ]
|
||||||
def forge_request(url, params = {})
|
def forge_request(url, params = {})
|
||||||
Typhoeus::Request.new(url, merge_request_params(params))
|
Typhoeus::Request.new(url, merge_request_params(params))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# @param [ Hash ] params
|
||||||
|
#
|
||||||
|
# @return [ Hash ]
|
||||||
def merge_request_params(params = {})
|
def merge_request_params(params = {})
|
||||||
params = Browser.append_params_header_field(
|
params = Browser.append_params_header_field(
|
||||||
params,
|
params,
|
||||||
@@ -121,7 +135,11 @@ class Browser
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
# return Array
|
# @param [ Hash ] params
|
||||||
|
# @param [ String ] field
|
||||||
|
# @param [ Mixed ] field_value
|
||||||
|
#
|
||||||
|
# @return [ Array ]
|
||||||
def self.append_params_header_field(params = {}, field, field_value)
|
def self.append_params_header_field(params = {}, field, field_value)
|
||||||
if !params.has_key?(:headers)
|
if !params.has_key?(:headers)
|
||||||
params = params.merge(:headers => { field => field_value })
|
params = params.merge(:headers => { field => field_value })
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ class Browser
|
|||||||
|
|
||||||
USER_AGENT_MODES = %w{ static semi-static random }
|
USER_AGENT_MODES = %w{ static semi-static random }
|
||||||
|
|
||||||
attr_reader :basic_auth, :user_agent_mode, :proxy, :proxy_auth
|
|
||||||
attr_accessor :available_user_agents, :cache_ttl
|
attr_accessor :available_user_agents, :cache_ttl
|
||||||
attr_writer :max_threads, :user_agent
|
attr_reader :basic_auth, :user_agent_mode, :proxy, :proxy_auth
|
||||||
|
attr_writer :user_agent
|
||||||
|
|
||||||
# Sets the Basic Authentification credentials
|
# Sets the Basic Authentification credentials
|
||||||
# Accepted format:
|
# Accepted format:
|
||||||
@@ -32,11 +32,22 @@ class Browser
|
|||||||
@max_threads || 1
|
@max_threads || 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def max_threads=(threads)
|
||||||
|
if threads.is_a?(Integer) && threads > 0
|
||||||
|
@max_threads = threads
|
||||||
|
@hydra = Typhoeus::Hydra.new(max_concurrency: threads)
|
||||||
|
else
|
||||||
|
raise 'max_threads must be an Integer > 0'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Sets the user_agent_mode, which can be one of the following:
|
# Sets the user_agent_mode, which can be one of the following:
|
||||||
# static: The UA is defined by the user, and will be the same in each requests
|
# static: The UA is defined by the user, and will be the same in each requests
|
||||||
# semi-static: The UA is randomly chosen at the first request, and will not change
|
# semi-static: The UA is randomly chosen at the first request, and will not change
|
||||||
# random: UA randomly chosen each request
|
# random: UA randomly chosen each request
|
||||||
#
|
#
|
||||||
|
# UA are from @available_user_agents
|
||||||
|
#
|
||||||
# @param [ String ] ua_mode
|
# @param [ String ] ua_mode
|
||||||
#
|
#
|
||||||
# @return [ void ]
|
# @return [ void ]
|
||||||
|
|||||||
@@ -19,11 +19,8 @@ describe Browser do
|
|||||||
['user_agent', 'user_agent_mode', 'available_user_agents', 'proxy',
|
['user_agent', 'user_agent_mode', 'available_user_agents', 'proxy',
|
||||||
'max_threads', 'cache_ttl']
|
'max_threads', 'cache_ttl']
|
||||||
}
|
}
|
||||||
|
let(:json_config_without_proxy) { JSON.parse(File.read(CONFIG_FILE_WITHOUT_PROXY)) }
|
||||||
before :all do
|
let(:json_config_with_proxy) { JSON.parse(File.read(CONFIG_FILE_WITH_PROXY)) }
|
||||||
@json_config_without_proxy = JSON.parse(File.read(CONFIG_FILE_WITHOUT_PROXY))
|
|
||||||
@json_config_with_proxy = JSON.parse(File.read(CONFIG_FILE_WITH_PROXY))
|
|
||||||
end
|
|
||||||
|
|
||||||
def check_instance_variables(browser, json_expected_vars)
|
def check_instance_variables(browser, json_expected_vars)
|
||||||
json_expected_vars['max_threads'] ||= 1 # max_thread can not be nil
|
json_expected_vars['max_threads'] ||= 1 # max_thread can not be nil
|
||||||
@@ -44,7 +41,7 @@ describe Browser do
|
|||||||
|
|
||||||
context "when default config_file = #{CONFIG_FILE_WITHOUT_PROXY}" do
|
context "when default config_file = #{CONFIG_FILE_WITHOUT_PROXY}" do
|
||||||
it 'will check the instance vars' do
|
it 'will check the instance vars' do
|
||||||
@json_expected_vars = @json_config_without_proxy
|
@json_expected_vars = json_config_without_proxy
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -52,7 +49,7 @@ describe Browser do
|
|||||||
let(:options) { { config_file: CONFIG_FILE_WITH_PROXY } }
|
let(:options) { { config_file: CONFIG_FILE_WITH_PROXY } }
|
||||||
|
|
||||||
it 'will check the instance vars' do
|
it 'will check the instance vars' do
|
||||||
@json_expected_vars = @json_config_with_proxy
|
@json_expected_vars = json_config_with_proxy
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -63,19 +60,32 @@ describe Browser do
|
|||||||
after { subject.cache_dir.should == cache_dir }
|
after { subject.cache_dir.should == cache_dir }
|
||||||
|
|
||||||
it 'sets @cache_dir' do
|
it 'sets @cache_dir' do
|
||||||
@json_expected_vars = @json_config_without_proxy
|
@json_expected_vars = json_config_without_proxy
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO
|
|
||||||
describe '#load_config' do
|
describe '#load_config' do
|
||||||
it 'should raise an error if file is a symlink' do
|
context 'when config_file is a symlink' do
|
||||||
symlink = './rspec_symlink'
|
let(:config_file) { './rspec_symlink' }
|
||||||
|
|
||||||
File.symlink('./testfile', symlink)
|
it 'raises an error' do
|
||||||
expect { browser.load_config(symlink) }.to raise_error("[ERROR] Config file is a symlink.")
|
File.symlink('./testfile', config_file)
|
||||||
File.unlink(symlink)
|
expect { browser.load_config(config_file) }.to raise_error("[ERROR] Config file is a symlink.")
|
||||||
|
File.unlink(config_file)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'otherwise' do
|
||||||
|
after do
|
||||||
|
browser.load_config(@config_file)
|
||||||
|
check_instance_variables(browser, @expected)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'sets the correct variables' do
|
||||||
|
@config_file = CONFIG_FILE_WITH_PROXY
|
||||||
|
@expected = json_config_without_proxy.merge(json_config_with_proxy)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -197,8 +207,7 @@ describe Browser do
|
|||||||
|
|
||||||
url = 'http://example.localhost'
|
url = 'http://example.localhost'
|
||||||
|
|
||||||
stub_request(:get, url).
|
stub_request(:get, url).to_return(status: 200, body: 'Hello World !')
|
||||||
to_return(status: 200, body: 'Hello World !')
|
|
||||||
|
|
||||||
response1 = Browser.get(url)
|
response1 = Browser.get(url)
|
||||||
response2 = Browser.get(url)
|
response2 = Browser.get(url)
|
||||||
@@ -212,8 +221,9 @@ describe Browser do
|
|||||||
it 'should not throw an encoding exception' do
|
it 'should not throw an encoding exception' do
|
||||||
url = SPEC_FIXTURES_DIR + '/utf8.html'
|
url = SPEC_FIXTURES_DIR + '/utf8.html'
|
||||||
stub_request(:get, url).to_return(status: 200, body: File.read(url))
|
stub_request(:get, url).to_return(status: 200, body: File.read(url))
|
||||||
response1 = Browser.get(url)
|
|
||||||
expect { response1.body }.to_not raise_error
|
response = Browser.get(url)
|
||||||
|
expect { response.body }.to_not raise_error
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -7,10 +7,10 @@ shared_examples 'Browser::Options' do
|
|||||||
|
|
||||||
after do
|
after do
|
||||||
if @expected
|
if @expected
|
||||||
subject.basic_auth = @auth
|
browser.basic_auth = @auth
|
||||||
subject.basic_auth.should == @expected
|
browser.basic_auth.should == @expected
|
||||||
else
|
else
|
||||||
expect { subject.basic_auth = @auth }.to raise_error(exception)
|
expect { browser.basic_auth = @auth }.to raise_error(exception)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -41,18 +41,29 @@ shared_examples 'Browser::Options' do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#max_threads' do
|
describe '#max_threads= & #max_threads' do
|
||||||
|
let(:exception) { 'max_threads must be an Integer > 0' }
|
||||||
|
|
||||||
after do
|
after do
|
||||||
subject.max_threads = @max_threads
|
if @expected
|
||||||
subject.max_threads.should == @expected
|
browser.max_threads = @max_threads
|
||||||
|
browser.max_threads.should == @expected
|
||||||
|
else
|
||||||
|
expect { browser.max_threads = @max_threads }.to raise_error(exception)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'when no @max_threads' do
|
context 'when the argument is not an Integer > 0' do
|
||||||
@max_threads = nil
|
it 'raises an error' do
|
||||||
@expected = 1
|
@max_thrads = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'raises an error' do
|
||||||
|
@max_threads = -3
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'when @max_threads' do
|
context 'when the argument is an Integer' do
|
||||||
it 'returns the @max_threads' do
|
it 'returns the @max_threads' do
|
||||||
@max_threads = 10
|
@max_threads = 10
|
||||||
@expected = 10
|
@expected = 10
|
||||||
@@ -64,18 +75,18 @@ shared_examples 'Browser::Options' do
|
|||||||
# Testing all valid modes
|
# Testing all valid modes
|
||||||
Browser::USER_AGENT_MODES.each do |user_agent_mode|
|
Browser::USER_AGENT_MODES.each do |user_agent_mode|
|
||||||
it "sets & returns #{user_agent_mode}" do
|
it "sets & returns #{user_agent_mode}" do
|
||||||
subject.user_agent_mode = user_agent_mode
|
browser.user_agent_mode = user_agent_mode
|
||||||
subject.user_agent_mode.should === user_agent_mode
|
browser.user_agent_mode.should === user_agent_mode
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'sets the mode to "static" if nil is given' do
|
it 'sets the mode to "static" if nil is given' do
|
||||||
subject.user_agent_mode = nil
|
browser.user_agent_mode = nil
|
||||||
subject.user_agent_mode.should === 'static'
|
browser.user_agent_mode.should === 'static'
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'raises an error if the mode is not valid' do
|
it 'raises an error if the mode is not valid' do
|
||||||
expect { subject.user_agent_mode = 'invalid-mode' }.to raise_error
|
expect { browser.user_agent_mode = 'invalid-mode' }.to raise_error
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -84,39 +95,39 @@ shared_examples 'Browser::Options' do
|
|||||||
|
|
||||||
context 'when static mode' do
|
context 'when static mode' do
|
||||||
it 'returns the same user agent' do
|
it 'returns the same user agent' do
|
||||||
subject.user_agent = 'fake UA'
|
browser.user_agent = 'fake UA'
|
||||||
subject.user_agent_mode = 'static'
|
browser.user_agent_mode = 'static'
|
||||||
|
|
||||||
(1..3).each do
|
(1..3).each do
|
||||||
subject.user_agent.should === 'fake UA'
|
browser.user_agent.should === 'fake UA'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'when semi-static mode' do
|
context 'when semi-static mode' do
|
||||||
it 'chooses a random user_agent in the available_user_agents array and always return it' do
|
it 'chooses a random user_agent in the available_user_agents array and always return it' do
|
||||||
subject.available_user_agents = available_user_agents
|
browser.available_user_agents = available_user_agents
|
||||||
subject.user_agent = 'Firefox 11.0'
|
browser.user_agent = 'Firefox 11.0'
|
||||||
subject.user_agent_mode = 'semi-static'
|
browser.user_agent_mode = 'semi-static'
|
||||||
|
|
||||||
user_agent = subject.user_agent
|
user_agent = browser.user_agent
|
||||||
user_agent.should_not === 'Firefox 11.0'
|
user_agent.should_not === 'Firefox 11.0'
|
||||||
available_user_agents.include?(user_agent).should be_true
|
available_user_agents.include?(user_agent).should be_true
|
||||||
|
|
||||||
(1..3).each do
|
(1..3).each do
|
||||||
subject.user_agent.should === user_agent
|
browser.user_agent.should === user_agent
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'when random' do
|
context 'when random' do
|
||||||
it 'returns a random user agent each time' do
|
it 'returns a random user agent each time' do
|
||||||
subject.available_user_agents = available_user_agents
|
browser.available_user_agents = available_user_agents
|
||||||
subject.user_agent_mode = 'random'
|
browser.user_agent_mode = 'random'
|
||||||
|
|
||||||
ua_1 = subject.user_agent
|
ua_1 = browser.user_agent
|
||||||
ua_2 = subject.user_agent
|
ua_2 = browser.user_agent
|
||||||
ua_3 = subject.user_agent
|
ua_3 = browser.user_agent
|
||||||
|
|
||||||
fail if ua_1 === ua_2 and ua_2 === ua_3
|
fail if ua_1 === ua_2 and ua_2 === ua_3
|
||||||
end
|
end
|
||||||
@@ -128,10 +139,10 @@ shared_examples 'Browser::Options' do
|
|||||||
|
|
||||||
after do
|
after do
|
||||||
if @expected
|
if @expected
|
||||||
subject.proxy = @proxy
|
browser.proxy = @proxy
|
||||||
subject.proxy.should == @expected
|
browser.proxy.should == @expected
|
||||||
else
|
else
|
||||||
expect { subject.proxy = @proxy }.to raise_error(exception)
|
expect { browser.proxy = @proxy }.to raise_error(exception)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -152,10 +163,10 @@ shared_examples 'Browser::Options' do
|
|||||||
|
|
||||||
after :each do
|
after :each do
|
||||||
if @expected
|
if @expected
|
||||||
subject.proxy_auth = @proxy_auth
|
browser.proxy_auth = @proxy_auth
|
||||||
subject.proxy_auth.should === @expected
|
browser.proxy_auth.should === @expected
|
||||||
else
|
else
|
||||||
expect { subject.proxy_auth = @proxy_auth }.to raise_error
|
expect { browser.proxy_auth = @proxy_auth }.to raise_error
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -206,16 +217,16 @@ shared_examples 'Browser::Options' do
|
|||||||
|
|
||||||
describe '#override_config' do
|
describe '#override_config' do
|
||||||
after do
|
after do
|
||||||
subject.send(:override_config, override_options)
|
browser.send(:override_config, override_options)
|
||||||
end
|
end
|
||||||
|
|
||||||
let(:config) { JSON.parse(File.read(subject.config_file)) }
|
let(:config) { JSON.parse(File.read(browser.config_file)) }
|
||||||
|
|
||||||
context 'when an option value is nil' do
|
context 'when an option value is nil' do
|
||||||
let(:override_options) { { max_threads: nil } }
|
let(:override_options) { { max_threads: nil } }
|
||||||
|
|
||||||
it 'does not set it' do
|
it 'does not set it' do
|
||||||
subject.should_not_receive(:max_threads=)
|
browser.should_not_receive(:max_threads=)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -223,7 +234,7 @@ shared_examples 'Browser::Options' do
|
|||||||
let(:override_options) { { not_allowed: 'owned' } }
|
let(:override_options) { { not_allowed: 'owned' } }
|
||||||
|
|
||||||
it 'does not set it' do
|
it 'does not set it' do
|
||||||
subject.should_not_receive(:not_allowed=)
|
browser.should_not_receive(:not_allowed=)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -231,7 +242,7 @@ shared_examples 'Browser::Options' do
|
|||||||
let(:override_options) { { max_threads: 30 } }
|
let(:override_options) { { max_threads: 30 } }
|
||||||
|
|
||||||
it 'sets it' do
|
it 'sets it' do
|
||||||
subject.should_receive(:max_threads=).with(30)
|
browser.should_receive(:max_threads=).with(30)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -241,9 +252,9 @@ shared_examples 'Browser::Options' do
|
|||||||
}
|
}
|
||||||
|
|
||||||
it 'sets @max_threads, @proxy' do
|
it 'sets @max_threads, @proxy' do
|
||||||
subject.should_not_receive(:not_allowed=)
|
browser.should_not_receive(:not_allowed=)
|
||||||
subject.should_receive(:max_threads=).with(10)
|
browser.should_receive(:max_threads=).with(10)
|
||||||
subject.should_receive(:proxy=).with('host:port')
|
browser.should_receive(:proxy=).with('host:port')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user