parse robots.txt
This commit is contained in:
@@ -1,6 +1,9 @@
|
|||||||
# encoding: UTF-8
|
# encoding: UTF-8
|
||||||
|
|
||||||
|
require 'web_site/robots_txt'
|
||||||
|
|
||||||
class WebSite
|
class WebSite
|
||||||
|
include WebSite::RobotsTxt
|
||||||
|
|
||||||
attr_reader :uri
|
attr_reader :uri
|
||||||
|
|
||||||
@@ -94,18 +97,6 @@ class WebSite
|
|||||||
homepage_body[%r{<link .* type="application/rss\+xml" .* href="([^"]+)" />}, 1]
|
homepage_body[%r{<link .* type="application/rss\+xml" .* href="([^"]+)" />}, 1]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Checks if a robots.txt file exists
|
|
||||||
def has_robots?
|
|
||||||
Browser.get(robots_url).code == 200
|
|
||||||
end
|
|
||||||
|
|
||||||
# Gets a robots.txt URL
|
|
||||||
#
|
|
||||||
# @return [ String ]
|
|
||||||
def robots_url
|
|
||||||
@uri.merge('robots.txt').to_s
|
|
||||||
end
|
|
||||||
|
|
||||||
# Only the first 700 bytes are checked to avoid the download
|
# Only the first 700 bytes are checked to avoid the download
|
||||||
# of the whole file which can be very huge (like 2 Go)
|
# of the whole file which can be very huge (like 2 Go)
|
||||||
#
|
#
|
||||||
|
|||||||
64
lib/wpscan/web_site/robots_txt.rb
Normal file
64
lib/wpscan/web_site/robots_txt.rb
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# encoding: UTF-8
|
||||||
|
|
||||||
|
class WebSite
|
||||||
|
module RobotsTxt
|
||||||
|
|
||||||
|
# Checks if a robots.txt file exists
|
||||||
|
# @return [ Boolean ]
|
||||||
|
def has_robots?
|
||||||
|
Browser.get(robots_url).code == 200
|
||||||
|
end
|
||||||
|
|
||||||
|
# Gets a robots.txt URL
|
||||||
|
# @return [ String ]
|
||||||
|
def robots_url
|
||||||
|
temp = @uri.clone
|
||||||
|
temp.path = '/robots.txt'
|
||||||
|
temp.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# Parse robots.txt
|
||||||
|
# @return [ Array ] URLs generated from robots.txt
|
||||||
|
def parse_robots_txt
|
||||||
|
return unless has_robots?
|
||||||
|
|
||||||
|
return_object = []
|
||||||
|
response = Browser.get(robots_url.to_s)
|
||||||
|
body = response.body
|
||||||
|
# Get all allow and disallow urls
|
||||||
|
entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i)
|
||||||
|
if entries
|
||||||
|
entries.flatten!
|
||||||
|
entries.compact.sort!
|
||||||
|
wordpress_path = @uri.path
|
||||||
|
RobotsTxt.known_dirs.each do |d|
|
||||||
|
entries.delete(d)
|
||||||
|
# also delete when wordpress is installed in subdir
|
||||||
|
dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/')
|
||||||
|
entries.delete(dir_with_subdir)
|
||||||
|
end
|
||||||
|
|
||||||
|
entries.each do |d|
|
||||||
|
temp = @uri.clone
|
||||||
|
temp.path = d
|
||||||
|
return_object << temp.to_s
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return_object
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
# @return [ Array ]
|
||||||
|
def self.known_dirs
|
||||||
|
%w{
|
||||||
|
/
|
||||||
|
/wp-admin/
|
||||||
|
/wp-includes/
|
||||||
|
/wp-content/
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -4,6 +4,7 @@ class WpTarget < WebSite
|
|||||||
module InterestingHeaders
|
module InterestingHeaders
|
||||||
|
|
||||||
# Checks for interesting headers
|
# Checks for interesting headers
|
||||||
|
# @return [ Array ] Interesting Headers
|
||||||
def interesting_headers
|
def interesting_headers
|
||||||
response = Browser.head(@uri.to_s)
|
response = Browser.head(@uri.to_s)
|
||||||
headers = response.headers
|
headers = response.headers
|
||||||
@@ -15,7 +16,7 @@ class WpTarget < WebSite
|
|||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
||||||
# @return Array
|
# @return [ Array ]
|
||||||
def self.known_headers
|
def self.known_headers
|
||||||
%w{
|
%w{
|
||||||
Location
|
Location
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ class WpTarget < WebSite
|
|||||||
|
|
||||||
# Checks to see if wp-config.php has a backup
|
# Checks to see if wp-config.php has a backup
|
||||||
# See http://www.feross.org/cmsploit/
|
# See http://www.feross.org/cmsploit/
|
||||||
# return an array of backup config files url
|
# @return [ Array ] Backup config files
|
||||||
def config_backup
|
def config_backup
|
||||||
found = []
|
found = []
|
||||||
backups = WpConfigBackup.config_backup_files
|
backups = WpConfigBackup.config_backup_files
|
||||||
@@ -37,7 +37,7 @@ class WpTarget < WebSite
|
|||||||
found
|
found
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return Array
|
# @return [ Array ]
|
||||||
def self.config_backup_files
|
def self.config_backup_files
|
||||||
%w{
|
%w{
|
||||||
wp-config.php~ #wp-config.php# wp-config.php.save wp-config.php.swp wp-config.php.swo wp-config.php_bak
|
wp-config.php~ #wp-config.php# wp-config.php.save wp-config.php.swp wp-config.php.swo wp-config.php_bak
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ describe 'WebSite' do
|
|||||||
let(:fixtures_dir) { SPEC_FIXTURES_WPSCAN_WEB_SITE_DIR }
|
let(:fixtures_dir) { SPEC_FIXTURES_WPSCAN_WEB_SITE_DIR }
|
||||||
subject(:web_site) { WebSite.new('http://example.localhost/') }
|
subject(:web_site) { WebSite.new('http://example.localhost/') }
|
||||||
|
|
||||||
|
it_behaves_like 'WebSite::RobotsTxt'
|
||||||
|
|
||||||
before :all do
|
before :all do
|
||||||
Browser::reset
|
Browser::reset
|
||||||
Browser.instance(
|
Browser.instance(
|
||||||
@@ -147,24 +149,6 @@ describe 'WebSite' do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe '#robots_url' do
|
|
||||||
it 'returns the correct url' do
|
|
||||||
web_site.robots_url.should === 'http://example.localhost/robots.txt'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe '#has_robots?' do
|
|
||||||
it 'returns true' do
|
|
||||||
stub_request(:get, web_site.robots_url).to_return(status: 200)
|
|
||||||
web_site.has_robots?.should be_true
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'returns false' do
|
|
||||||
stub_request(:get, web_site.robots_url).to_return(status: 404)
|
|
||||||
web_site.has_robots?.should be_false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
describe '::has_log?' do
|
describe '::has_log?' do
|
||||||
let(:log_url) { web_site.uri.merge('log.txt').to_s }
|
let(:log_url) { web_site.uri.merge('log.txt').to_s }
|
||||||
let(:pattern) { %r{PHP Fatal error} }
|
let(:pattern) { %r{PHP Fatal error} }
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
ölhkdfys
|
||||||
|
opihufgcasfa
|
||||||
|
dsds
|
||||||
|
fsdf
|
||||||
|
s
|
||||||
|
dtf
|
||||||
10
spec/samples/wpscan/web_site/robots_txt/robots.txt
Normal file
10
spec/samples/wpscan/web_site/robots_txt/robots.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
User-agent: *
|
||||||
|
Disallow: /wp-admin/
|
||||||
|
Disallow: /wp-includes/
|
||||||
|
Disallow: /wordpress/admin/
|
||||||
|
Disallow: /wordpress/wp-admin/
|
||||||
|
Disallow: /wordpress/secret/
|
||||||
|
Disallow: /Wordpress/wp-admin/
|
||||||
|
Allow: /asdf/
|
||||||
|
|
||||||
|
Sitemap: http://10.0.0.0/sitemap.xml.gz
|
||||||
78
spec/shared_examples/web_site/robots_txt.rb
Normal file
78
spec/shared_examples/web_site/robots_txt.rb
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
# encoding: UTF-8
|
||||||
|
|
||||||
|
shared_examples 'WebSite::RobotsTxt' do
|
||||||
|
let(:known_dirs) { WebSite::RobotsTxt.known_dirs }
|
||||||
|
|
||||||
|
describe '#robots_url' do
|
||||||
|
it 'returns the correct url' do
|
||||||
|
web_site.robots_url.should === 'http://example.localhost/robots.txt'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#has_robots?' do
|
||||||
|
it 'returns true' do
|
||||||
|
stub_request(:get, web_site.robots_url).to_return(status: 200)
|
||||||
|
web_site.has_robots?.should be_true
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns false' do
|
||||||
|
stub_request(:get, web_site.robots_url).to_return(status: 404)
|
||||||
|
web_site.has_robots?.should be_false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#parse_robots_txt' do
|
||||||
|
|
||||||
|
context 'installed in root' do
|
||||||
|
after :each do
|
||||||
|
stub_request_to_fixture(url: web_site.robots_url, fixture: @fixture)
|
||||||
|
robots = web_site.parse_robots_txt
|
||||||
|
robots.should =~ @expected
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns an empty Array (empty robots.txt)' do
|
||||||
|
@fixture = fixtures_dir + '/robots_txt/empty_robots.txt'
|
||||||
|
@expected = []
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns an empty Array (invalid robots.txt)' do
|
||||||
|
@fixture = fixtures_dir + '/robots_txt/invalid_robots.txt'
|
||||||
|
@expected = []
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns an Array of urls (valid robots.txt)' do
|
||||||
|
@fixture = fixtures_dir + '/robots_txt/robots.txt'
|
||||||
|
@expected = %w(
|
||||||
|
http://example.localhost/wordpress/admin/
|
||||||
|
http://example.localhost/wordpress/wp-admin/
|
||||||
|
http://example.localhost/wordpress/secret/
|
||||||
|
http://example.localhost/Wordpress/wp-admin/
|
||||||
|
http://example.localhost/asdf/
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'installed in sub directory' do
|
||||||
|
it 'returns an Array of urls (valid robots.txt, WP installed in subdir)' do
|
||||||
|
web_site_sub = WebSite.new('http://example.localhost/wordpress/')
|
||||||
|
fixture = fixtures_dir + '/robots_txt/robots.txt'
|
||||||
|
expected = %w(
|
||||||
|
http://example.localhost/wordpress/admin/
|
||||||
|
http://example.localhost/wordpress/secret/
|
||||||
|
http://example.localhost/Wordpress/wp-admin/
|
||||||
|
http://example.localhost/asdf/
|
||||||
|
)
|
||||||
|
stub_request_to_fixture(url: web_site_sub.robots_url, fixture: fixture)
|
||||||
|
robots = web_site_sub.parse_robots_txt
|
||||||
|
robots.should =~ expected
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#known_dirs' do
|
||||||
|
it 'does not contain duplicates' do
|
||||||
|
known_dirs.flatten.uniq.length.should == known_dirs.length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
@@ -105,6 +105,10 @@ def main
|
|||||||
|
|
||||||
if wp_target.has_robots?
|
if wp_target.has_robots?
|
||||||
puts green('[+]') + " robots.txt available under '#{wp_target.robots_url}'"
|
puts green('[+]') + " robots.txt available under '#{wp_target.robots_url}'"
|
||||||
|
|
||||||
|
wp_target.parse_robots_txt.each do |dir|
|
||||||
|
puts "#{green('[+]')} Interesting entry from robots.txt: #{dir}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if wp_target.has_readme?
|
if wp_target.has_readme?
|
||||||
|
|||||||
Reference in New Issue
Block a user