parse robots.txt

2013-08-10 11:35:17 +02:00
parent 6c8e76060a
commit 6c008015e9
10 changed files with 171 additions and 33 deletions
--- a/spec/lib/wpscan/web_site_spec.rb
+++ b/spec/lib/wpscan/web_site_spec.rb
@@ -6,6 +6,8 @@ describe 'WebSite' do
  let(:fixtures_dir) { SPEC_FIXTURES_WPSCAN_WEB_SITE_DIR }
  subject(:web_site) { WebSite.new('http://example.localhost/') }

+  it_behaves_like 'WebSite::RobotsTxt'
+
  before :all do
    Browser::reset
    Browser.instance(
@@ -147,24 +149,6 @@ describe 'WebSite' do
    end
  end

-  describe '#robots_url' do
-    it 'returns the correct url' do
-      web_site.robots_url.should === 'http://example.localhost/robots.txt'
-    end
-  end
-
-  describe '#has_robots?' do
-    it 'returns true' do
-      stub_request(:get, web_site.robots_url).to_return(status: 200)
-      web_site.has_robots?.should be_true
-    end
-
-    it 'returns false' do
-      stub_request(:get, web_site.robots_url).to_return(status: 404)
-      web_site.has_robots?.should be_false
-    end
-  end
-
  describe '::has_log?' do
    let(:log_url) { web_site.uri.merge('log.txt').to_s }
    let(:pattern) { %r{PHP Fatal error} }
--- a/spec/samples/wpscan/web_site/robots_txt/empty_robots.txt
+++ b/spec/samples/wpscan/web_site/robots_txt/empty_robots.txt
--- a/spec/samples/wpscan/web_site/robots_txt/invalid_robots.txt
+++ b/spec/samples/wpscan/web_site/robots_txt/invalid_robots.txt
@@ -0,0 +1,6 @@
+ölhkdfys
+opihufgcasfa
+dsds
+fsdf
+s
+dtf
--- a/spec/samples/wpscan/web_site/robots_txt/robots.txt
+++ b/spec/samples/wpscan/web_site/robots_txt/robots.txt
@@ -0,0 +1,10 @@
+User-agent: *
+Disallow: /wp-admin/
+Disallow: /wp-includes/
+Disallow: /wordpress/admin/
+Disallow: /wordpress/wp-admin/
+Disallow: /wordpress/secret/
+Disallow: /Wordpress/wp-admin/
+Allow: /asdf/
+
+Sitemap: http://10.0.0.0/sitemap.xml.gz
--- a/spec/shared_examples/web_site/robots_txt.rb
+++ b/spec/shared_examples/web_site/robots_txt.rb
@@ -0,0 +1,78 @@
+# encoding: UTF-8
+
+shared_examples 'WebSite::RobotsTxt' do
+  let(:known_dirs) { WebSite::RobotsTxt.known_dirs }
+
+  describe '#robots_url' do
+    it 'returns the correct url' do
+      web_site.robots_url.should === 'http://example.localhost/robots.txt'
+    end
+  end
+
+  describe '#has_robots?' do
+    it 'returns true' do
+      stub_request(:get, web_site.robots_url).to_return(status: 200)
+      web_site.has_robots?.should be_true
+    end
+
+    it 'returns false' do
+      stub_request(:get, web_site.robots_url).to_return(status: 404)
+      web_site.has_robots?.should be_false
+    end
+  end
+
+  describe '#parse_robots_txt' do
+
+    context 'installed in root' do
+      after :each do
+        stub_request_to_fixture(url: web_site.robots_url, fixture: @fixture)
+        robots = web_site.parse_robots_txt
+        robots.should =~ @expected
+      end
+
+      it 'returns an empty Array (empty robots.txt)' do
+        @fixture = fixtures_dir + '/robots_txt/empty_robots.txt'
+        @expected = []
+      end
+
+      it 'returns an empty Array (invalid robots.txt)' do
+        @fixture = fixtures_dir + '/robots_txt/invalid_robots.txt'
+        @expected = []
+      end
+
+      it 'returns an Array of urls (valid robots.txt)' do
+        @fixture = fixtures_dir + '/robots_txt/robots.txt'
+        @expected = %w(
+          http://example.localhost/wordpress/admin/
+          http://example.localhost/wordpress/wp-admin/
+          http://example.localhost/wordpress/secret/
+          http://example.localhost/Wordpress/wp-admin/
+          http://example.localhost/asdf/
+        )
+      end
+    end
+
+    context 'installed in sub directory' do
+      it 'returns an Array of urls (valid robots.txt, WP installed in subdir)' do
+        web_site_sub = WebSite.new('http://example.localhost/wordpress/')
+        fixture = fixtures_dir + '/robots_txt/robots.txt'
+        expected = %w(
+            http://example.localhost/wordpress/admin/
+            http://example.localhost/wordpress/secret/
+            http://example.localhost/Wordpress/wp-admin/
+            http://example.localhost/asdf/
+          )
+        stub_request_to_fixture(url: web_site_sub.robots_url, fixture: fixture)
+        robots = web_site_sub.parse_robots_txt
+        robots.should =~ expected
+      end
+    end
+  end
+
+  describe '#known_dirs' do
+    it 'does not contain duplicates' do
+      known_dirs.flatten.uniq.length.should == known_dirs.length
+    end
+  end
+
+end