Files
wpscan/lib/wpscan/web_site/robots_txt.rb
2018-05-14 17:56:49 +01:00

71 lines
1.6 KiB
Ruby
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# encoding: UTF-8
class WebSite
module RobotsTxt
# Checks if a robots.txt file exists
# @return [ Boolean ]
def has_robots?
Browser.get(robots_url).code == 200
end
# Gets a robots.txt URL
# @return [ String ]
def robots_url
@uri.clone.merge('robots.txt').to_s
end
# Parse robots.txt
# @return [ Array ] URLs generated from robots.txt
def parse_robots_txt
return_object = []
# Make request
response = Browser.get(robots_url.to_s)
body = response.body
# Get all allow and disallow urls
entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i)
# Did we get something?
if entries
# Remove any rubbish
entries = clean_uri(entries)
# Sort
entries.sort!
# Wordpress URL
wordpress_path = @uri.path
# Each "boring" value as defined below, remove
RobotsTxt.known_dirs.each do |d|
entries.delete(d)
# Also delete when wordpress is installed in subdir
dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/')
entries.delete(dir_with_subdir)
end
# Convert to full URIs
return_object = full_uri(entries)
end
return return_object
end
protected
# Useful ~ "function do_robots()" -> https://github.com/WordPress/WordPress/blob/master/wp-includes/functions.php
#
# @return [ Array ]
def self.known_dirs
%w{
/
/wp-admin/
/wp-admin/admin-ajax.php
/wp-includes/
/wp-content/
}
end
end
end