Check for sitemaps (using /robots.txt)
This commit is contained in:
@@ -4,12 +4,14 @@ require 'web_site/robots_txt'
|
|||||||
require 'web_site/humans_txt'
|
require 'web_site/humans_txt'
|
||||||
require 'web_site/security_txt'
|
require 'web_site/security_txt'
|
||||||
require 'web_site/interesting_headers'
|
require 'web_site/interesting_headers'
|
||||||
|
require 'web_site/sitemap'
|
||||||
|
|
||||||
class WebSite
|
class WebSite
|
||||||
include WebSite::RobotsTxt
|
include WebSite::RobotsTxt
|
||||||
include WebSite::HumansTxt
|
include WebSite::HumansTxt
|
||||||
include WebSite::SecurityTxt
|
include WebSite::SecurityTxt
|
||||||
include WebSite::InterestingHeaders
|
include WebSite::InterestingHeaders
|
||||||
|
include WebSite::Sitemap
|
||||||
|
|
||||||
attr_reader :uri
|
attr_reader :uri
|
||||||
|
|
||||||
|
|||||||
68
lib/wpscan/web_site/sitemap.rb
Normal file
68
lib/wpscan/web_site/sitemap.rb
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# encoding: UTF-8
|
||||||
|
|
||||||
|
class WebSite
|
||||||
|
module Sitemap
|
||||||
|
|
||||||
|
# Checks if a sitemap.txt file exists
|
||||||
|
# @return [ Boolean ]
|
||||||
|
def has_sitemap?
|
||||||
|
# Make the request
|
||||||
|
response = Browser.get(sitemap_url)
|
||||||
|
|
||||||
|
# Make sure its HTTP 200
|
||||||
|
return false unless response.code == 200
|
||||||
|
|
||||||
|
# Is there a sitemap value?
|
||||||
|
result = response.body.scan(/^sitemap\s*:\s*(.*)$/i)
|
||||||
|
return true if result[0]
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
# Gets a robots.txt URL
|
||||||
|
# @return [ String ]
|
||||||
|
def sitemap_url
|
||||||
|
@uri.clone.merge('robots.txt').to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parse robots.txt
|
||||||
|
# @return [ Array ] URLs generated from robots.txt
|
||||||
|
def parse_sitemap
|
||||||
|
return_object = []
|
||||||
|
|
||||||
|
# Make request
|
||||||
|
response = Browser.get(sitemap_url.to_s)
|
||||||
|
body = response.body
|
||||||
|
|
||||||
|
# Get all allow and disallow urls
|
||||||
|
entries = body.scan(/^sitemap\s*:\s*(.*)$/i)
|
||||||
|
|
||||||
|
# Did we get something?
|
||||||
|
if entries
|
||||||
|
# Extract elements
|
||||||
|
entries.flatten!
|
||||||
|
# Remove any leading/trailing spaces
|
||||||
|
entries.collect{|x| x.strip || x }
|
||||||
|
# End Of Line issues
|
||||||
|
entries.collect{|x| x.chomp! || x }
|
||||||
|
# Remove nil's and sort
|
||||||
|
entries.compact.sort!
|
||||||
|
# Unique values only
|
||||||
|
entries.uniq!
|
||||||
|
|
||||||
|
# Each value now, try and make it a full URL
|
||||||
|
entries.each do |d|
|
||||||
|
begin
|
||||||
|
temp = @uri.clone
|
||||||
|
temp.path = d.strip
|
||||||
|
rescue URI::Error
|
||||||
|
temp = d.strip
|
||||||
|
end
|
||||||
|
return_object << temp.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
return_object
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user