Adds more improvements re passive scanning when there are a lot of urls

This commit is contained in:
erwanlr
2020-02-13 15:36:42 +00:00
parent 7d2b8a2a8b
commit 72d699b39a
18 changed files with 214 additions and 38 deletions

View File

@@ -2,12 +2,14 @@
shared_examples 'App::Finders::WpItems::UrlsInPage' do
before do
stub_request(:get, page_url).to_return(body: File.read(fixtures.join(file)))
allow(finder.target).to receive(:content_dir).and_return('wp-content')
stub_request(:get, page_url).to_return(body: defined?(body) ? body : File.read(fixtures.join(fixture)))
end
describe '#items_from_links' do
context 'when none found' do
let(:file) { 'none.html' }
let(:fixture) { 'none.html' }
it 'returns an empty array' do
expect(finder.items_from_links(type)).to eql([])
@@ -15,21 +17,31 @@ shared_examples 'App::Finders::WpItems::UrlsInPage' do
end
context 'when found' do
let(:file) { 'found.html' }
let(:fixture) { 'found.html' }
it 'returns the expected array' do
expect(finder.target).to receive(:content_dir).at_least(1).and_return('wp-content')
expect(finder.items_from_links(type, uniq_links)).to eql expected_from_links
end
end
context 'when a lof of unrelated links' do
let(:body) do
Array.new(250) { |i| "<a href='#{url}#{i}.html'>Link</a><img src='#{url}img-#{i}.gif'/>" }.join("\n")
end
it 'should not take a while to process the page' do
time_start = Time.now
expect(finder.items_from_links(type)).to eql []
time_end = Time.now
expect(time_end - time_start).to be < 1
end
end
end
describe '#items_from_codes' do
before { expect(finder.target).to receive(:content_dir).at_least(1).and_return('wp-content') }
context 'when none found' do
let(:file) { 'none.html' }
let(:fixture) { 'none.html' }
it 'returns an empty array' do
expect(finder.items_from_codes(type)).to eql([])
@@ -37,7 +49,7 @@ shared_examples 'App::Finders::WpItems::UrlsInPage' do
end
context 'when found' do
let(:file) { 'found.html' }
let(:fixture) { 'found.html' }
it 'returns the expected array' do
expect(finder.items_from_codes(type, uniq_codes)).to eql expected_from_codes