parse robots.txt

This commit is contained in:
Christian Mehlmauer
2013-08-10 11:35:17 +02:00
parent 6c8e76060a
commit 6c008015e9
10 changed files with 171 additions and 33 deletions

View File

@@ -6,6 +6,8 @@ describe 'WebSite' do
let(:fixtures_dir) { SPEC_FIXTURES_WPSCAN_WEB_SITE_DIR }
subject(:web_site) { WebSite.new('http://example.localhost/') }
it_behaves_like 'WebSite::RobotsTxt'
before :all do
Browser::reset
Browser.instance(
@@ -147,24 +149,6 @@ describe 'WebSite' do
end
end
describe '#robots_url' do
it 'returns the correct url' do
web_site.robots_url.should === 'http://example.localhost/robots.txt'
end
end
describe '#has_robots?' do
it 'returns true' do
stub_request(:get, web_site.robots_url).to_return(status: 200)
web_site.has_robots?.should be_true
end
it 'returns false' do
stub_request(:get, web_site.robots_url).to_return(status: 404)
web_site.has_robots?.should be_false
end
end
describe '::has_log?' do
let(:log_url) { web_site.uri.merge('log.txt').to_s }
let(:pattern) { %r{PHP Fatal error} }

View File

@@ -0,0 +1,6 @@
ölhkdfys
opihufgcasfa
dsds
fsdf
s
dtf

View File

@@ -0,0 +1,10 @@
User-agent: *
Disallow: /wp-admin/
Disallow: /wp-includes/
Disallow: /wordpress/admin/
Disallow: /wordpress/wp-admin/
Disallow: /wordpress/secret/
Disallow: /Wordpress/wp-admin/
Allow: /asdf/
Sitemap: http://10.0.0.0/sitemap.xml.gz

View File

@@ -0,0 +1,78 @@
# encoding: UTF-8
shared_examples 'WebSite::RobotsTxt' do
let(:known_dirs) { WebSite::RobotsTxt.known_dirs }
describe '#robots_url' do
it 'returns the correct url' do
web_site.robots_url.should === 'http://example.localhost/robots.txt'
end
end
describe '#has_robots?' do
it 'returns true' do
stub_request(:get, web_site.robots_url).to_return(status: 200)
web_site.has_robots?.should be_true
end
it 'returns false' do
stub_request(:get, web_site.robots_url).to_return(status: 404)
web_site.has_robots?.should be_false
end
end
describe '#parse_robots_txt' do
context 'installed in root' do
after :each do
stub_request_to_fixture(url: web_site.robots_url, fixture: @fixture)
robots = web_site.parse_robots_txt
robots.should =~ @expected
end
it 'returns an empty Array (empty robots.txt)' do
@fixture = fixtures_dir + '/robots_txt/empty_robots.txt'
@expected = []
end
it 'returns an empty Array (invalid robots.txt)' do
@fixture = fixtures_dir + '/robots_txt/invalid_robots.txt'
@expected = []
end
it 'returns an Array of urls (valid robots.txt)' do
@fixture = fixtures_dir + '/robots_txt/robots.txt'
@expected = %w(
http://example.localhost/wordpress/admin/
http://example.localhost/wordpress/wp-admin/
http://example.localhost/wordpress/secret/
http://example.localhost/Wordpress/wp-admin/
http://example.localhost/asdf/
)
end
end
context 'installed in sub directory' do
it 'returns an Array of urls (valid robots.txt, WP installed in subdir)' do
web_site_sub = WebSite.new('http://example.localhost/wordpress/')
fixture = fixtures_dir + '/robots_txt/robots.txt'
expected = %w(
http://example.localhost/wordpress/admin/
http://example.localhost/wordpress/secret/
http://example.localhost/Wordpress/wp-admin/
http://example.localhost/asdf/
)
stub_request_to_fixture(url: web_site_sub.robots_url, fixture: fixture)
robots = web_site_sub.parse_robots_txt
robots.should =~ expected
end
end
end
describe '#known_dirs' do
it 'does not contain duplicates' do
known_dirs.flatten.uniq.length.should == known_dirs.length
end
end
end