diff --git a/lib/wpscan/web_site.rb b/lib/wpscan/web_site.rb index 13e815ce..3448f494 100644 --- a/lib/wpscan/web_site.rb +++ b/lib/wpscan/web_site.rb @@ -1,6 +1,9 @@ # encoding: UTF-8 +require 'web_site/robots_txt' + class WebSite + include WebSite::RobotsTxt attr_reader :uri @@ -94,18 +97,6 @@ class WebSite homepage_body[%r{}, 1] end - # Checks if a robots.txt file exists - def has_robots? - Browser.get(robots_url).code == 200 - end - - # Gets a robots.txt URL - # - # @return [ String ] - def robots_url - @uri.merge('robots.txt').to_s - end - # Only the first 700 bytes are checked to avoid the download # of the whole file which can be very huge (like 2 Go) # diff --git a/lib/wpscan/web_site/robots_txt.rb b/lib/wpscan/web_site/robots_txt.rb new file mode 100644 index 00000000..00d54618 --- /dev/null +++ b/lib/wpscan/web_site/robots_txt.rb @@ -0,0 +1,64 @@ +# encoding: UTF-8 + +class WebSite + module RobotsTxt + + # Checks if a robots.txt file exists + # @return [ Boolean ] + def has_robots? + Browser.get(robots_url).code == 200 + end + + # Gets a robots.txt URL + # @return [ String ] + def robots_url + temp = @uri.clone + temp.path = '/robots.txt' + temp.to_s + end + + + # Parse robots.txt + # @return [ Array ] URLs generated from robots.txt + def parse_robots_txt + return unless has_robots? + + return_object = [] + response = Browser.get(robots_url.to_s) + body = response.body + # Get all allow and disallow urls + entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i) + if entries + entries.flatten! + entries.compact.sort! + wordpress_path = @uri.path + RobotsTxt.known_dirs.each do |d| + entries.delete(d) + # also delete when wordpress is installed in subdir + dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/') + entries.delete(dir_with_subdir) + end + + entries.each do |d| + temp = @uri.clone + temp.path = d + return_object << temp.to_s + end + end + return_object + end + + protected + + # @return [ Array ] + def self.known_dirs + %w{ + / + /wp-admin/ + /wp-includes/ + /wp-content/ + } + end + + end +end diff --git a/lib/wpscan/wp_target/interesting_headers.rb b/lib/wpscan/wp_target/interesting_headers.rb index fdb835bc..9f1ba90d 100644 --- a/lib/wpscan/wp_target/interesting_headers.rb +++ b/lib/wpscan/wp_target/interesting_headers.rb @@ -4,6 +4,7 @@ class WpTarget < WebSite module InterestingHeaders # Checks for interesting headers + # @return [ Array ] Interesting Headers def interesting_headers response = Browser.head(@uri.to_s) headers = response.headers @@ -15,7 +16,7 @@ class WpTarget < WebSite protected - # @return Array + # @return [ Array ] def self.known_headers %w{ Location diff --git a/lib/wpscan/wp_target/wp_config_backup.rb b/lib/wpscan/wp_target/wp_config_backup.rb index e39afa22..5824d295 100644 --- a/lib/wpscan/wp_target/wp_config_backup.rb +++ b/lib/wpscan/wp_target/wp_config_backup.rb @@ -5,7 +5,7 @@ class WpTarget < WebSite # Checks to see if wp-config.php has a backup # See http://www.feross.org/cmsploit/ - # return an array of backup config files url + # @return [ Array ] Backup config files def config_backup found = [] backups = WpConfigBackup.config_backup_files @@ -37,7 +37,7 @@ class WpTarget < WebSite found end - # @return Array + # @return [ Array ] def self.config_backup_files %w{ wp-config.php~ #wp-config.php# wp-config.php.save wp-config.php.swp wp-config.php.swo wp-config.php_bak diff --git a/spec/lib/wpscan/web_site_spec.rb b/spec/lib/wpscan/web_site_spec.rb index a3585b36..26181a2a 100644 --- a/spec/lib/wpscan/web_site_spec.rb +++ b/spec/lib/wpscan/web_site_spec.rb @@ -6,6 +6,8 @@ describe 'WebSite' do let(:fixtures_dir) { SPEC_FIXTURES_WPSCAN_WEB_SITE_DIR } subject(:web_site) { WebSite.new('http://example.localhost/') } + it_behaves_like 'WebSite::RobotsTxt' + before :all do Browser::reset Browser.instance( @@ -147,24 +149,6 @@ describe 'WebSite' do end end - describe '#robots_url' do - it 'returns the correct url' do - web_site.robots_url.should === 'http://example.localhost/robots.txt' - end - end - - describe '#has_robots?' do - it 'returns true' do - stub_request(:get, web_site.robots_url).to_return(status: 200) - web_site.has_robots?.should be_true - end - - it 'returns false' do - stub_request(:get, web_site.robots_url).to_return(status: 404) - web_site.has_robots?.should be_false - end - end - describe '::has_log?' do let(:log_url) { web_site.uri.merge('log.txt').to_s } let(:pattern) { %r{PHP Fatal error} } diff --git a/spec/samples/wpscan/web_site/robots_txt/empty_robots.txt b/spec/samples/wpscan/web_site/robots_txt/empty_robots.txt new file mode 100644 index 00000000..e69de29b diff --git a/spec/samples/wpscan/web_site/robots_txt/invalid_robots.txt b/spec/samples/wpscan/web_site/robots_txt/invalid_robots.txt new file mode 100644 index 00000000..f7bc0aa4 --- /dev/null +++ b/spec/samples/wpscan/web_site/robots_txt/invalid_robots.txt @@ -0,0 +1,6 @@ +ölhkdfys +opihufgcasfa +dsds +fsdf +s +dtf \ No newline at end of file diff --git a/spec/samples/wpscan/web_site/robots_txt/robots.txt b/spec/samples/wpscan/web_site/robots_txt/robots.txt new file mode 100644 index 00000000..39c62f8b --- /dev/null +++ b/spec/samples/wpscan/web_site/robots_txt/robots.txt @@ -0,0 +1,10 @@ +User-agent: * +Disallow: /wp-admin/ +Disallow: /wp-includes/ +Disallow: /wordpress/admin/ +Disallow: /wordpress/wp-admin/ +Disallow: /wordpress/secret/ +Disallow: /Wordpress/wp-admin/ +Allow: /asdf/ + +Sitemap: http://10.0.0.0/sitemap.xml.gz diff --git a/spec/shared_examples/web_site/robots_txt.rb b/spec/shared_examples/web_site/robots_txt.rb new file mode 100644 index 00000000..2264196f --- /dev/null +++ b/spec/shared_examples/web_site/robots_txt.rb @@ -0,0 +1,78 @@ +# encoding: UTF-8 + +shared_examples 'WebSite::RobotsTxt' do + let(:known_dirs) { WebSite::RobotsTxt.known_dirs } + + describe '#robots_url' do + it 'returns the correct url' do + web_site.robots_url.should === 'http://example.localhost/robots.txt' + end + end + + describe '#has_robots?' do + it 'returns true' do + stub_request(:get, web_site.robots_url).to_return(status: 200) + web_site.has_robots?.should be_true + end + + it 'returns false' do + stub_request(:get, web_site.robots_url).to_return(status: 404) + web_site.has_robots?.should be_false + end + end + + describe '#parse_robots_txt' do + + context 'installed in root' do + after :each do + stub_request_to_fixture(url: web_site.robots_url, fixture: @fixture) + robots = web_site.parse_robots_txt + robots.should =~ @expected + end + + it 'returns an empty Array (empty robots.txt)' do + @fixture = fixtures_dir + '/robots_txt/empty_robots.txt' + @expected = [] + end + + it 'returns an empty Array (invalid robots.txt)' do + @fixture = fixtures_dir + '/robots_txt/invalid_robots.txt' + @expected = [] + end + + it 'returns an Array of urls (valid robots.txt)' do + @fixture = fixtures_dir + '/robots_txt/robots.txt' + @expected = %w( + http://example.localhost/wordpress/admin/ + http://example.localhost/wordpress/wp-admin/ + http://example.localhost/wordpress/secret/ + http://example.localhost/Wordpress/wp-admin/ + http://example.localhost/asdf/ + ) + end + end + + context 'installed in sub directory' do + it 'returns an Array of urls (valid robots.txt, WP installed in subdir)' do + web_site_sub = WebSite.new('http://example.localhost/wordpress/') + fixture = fixtures_dir + '/robots_txt/robots.txt' + expected = %w( + http://example.localhost/wordpress/admin/ + http://example.localhost/wordpress/secret/ + http://example.localhost/Wordpress/wp-admin/ + http://example.localhost/asdf/ + ) + stub_request_to_fixture(url: web_site_sub.robots_url, fixture: fixture) + robots = web_site_sub.parse_robots_txt + robots.should =~ expected + end + end + end + + describe '#known_dirs' do + it 'does not contain duplicates' do + known_dirs.flatten.uniq.length.should == known_dirs.length + end + end + +end diff --git a/wpscan.rb b/wpscan.rb index baefc5dd..3943eef2 100755 --- a/wpscan.rb +++ b/wpscan.rb @@ -105,6 +105,10 @@ def main if wp_target.has_robots? puts green('[+]') + " robots.txt available under '#{wp_target.robots_url}'" + + wp_target.parse_robots_txt.each do |dir| + puts "#{green('[+]')} Interesting entry from robots.txt: #{dir}" + end end if wp_target.has_readme?