Check for sitemaps (using /robots.txt)
This commit is contained in:
@@ -4,12 +4,14 @@ require 'web_site/robots_txt'
|
||||
require 'web_site/humans_txt'
|
||||
require 'web_site/security_txt'
|
||||
require 'web_site/interesting_headers'
|
||||
require 'web_site/sitemap'
|
||||
|
||||
class WebSite
|
||||
include WebSite::RobotsTxt
|
||||
include WebSite::HumansTxt
|
||||
include WebSite::SecurityTxt
|
||||
include WebSite::InterestingHeaders
|
||||
include WebSite::Sitemap
|
||||
|
||||
attr_reader :uri
|
||||
|
||||
|
||||
68
lib/wpscan/web_site/sitemap.rb
Normal file
68
lib/wpscan/web_site/sitemap.rb
Normal file
@@ -0,0 +1,68 @@
|
||||
# encoding: UTF-8
|
||||
|
||||
class WebSite
|
||||
module Sitemap
|
||||
|
||||
# Checks if a sitemap.txt file exists
|
||||
# @return [ Boolean ]
|
||||
def has_sitemap?
|
||||
# Make the request
|
||||
response = Browser.get(sitemap_url)
|
||||
|
||||
# Make sure its HTTP 200
|
||||
return false unless response.code == 200
|
||||
|
||||
# Is there a sitemap value?
|
||||
result = response.body.scan(/^sitemap\s*:\s*(.*)$/i)
|
||||
return true if result[0]
|
||||
return false
|
||||
end
|
||||
|
||||
# Gets a robots.txt URL
|
||||
# @return [ String ]
|
||||
def sitemap_url
|
||||
@uri.clone.merge('robots.txt').to_s
|
||||
end
|
||||
|
||||
# Parse robots.txt
|
||||
# @return [ Array ] URLs generated from robots.txt
|
||||
def parse_sitemap
|
||||
return_object = []
|
||||
|
||||
# Make request
|
||||
response = Browser.get(sitemap_url.to_s)
|
||||
body = response.body
|
||||
|
||||
# Get all allow and disallow urls
|
||||
entries = body.scan(/^sitemap\s*:\s*(.*)$/i)
|
||||
|
||||
# Did we get something?
|
||||
if entries
|
||||
# Extract elements
|
||||
entries.flatten!
|
||||
# Remove any leading/trailing spaces
|
||||
entries.collect{|x| x.strip || x }
|
||||
# End Of Line issues
|
||||
entries.collect{|x| x.chomp! || x }
|
||||
# Remove nil's and sort
|
||||
entries.compact.sort!
|
||||
# Unique values only
|
||||
entries.uniq!
|
||||
|
||||
# Each value now, try and make it a full URL
|
||||
entries.each do |d|
|
||||
begin
|
||||
temp = @uri.clone
|
||||
temp.path = d.strip
|
||||
rescue URI::Error
|
||||
temp = d.strip
|
||||
end
|
||||
return_object << temp.to_s
|
||||
end
|
||||
|
||||
end
|
||||
return_object
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user