Check HTTP status of each value in /robots.txt

This commit is contained in:
g0tmi1k
2018-05-14 15:57:33 +01:00
parent 3b94fc49a7
commit 4b4b968710
2 changed files with 7 additions and 1 deletions

View File

@@ -15,6 +15,12 @@ class WebSite
@uri.clone.merge('robots.txt').to_s
end
# Check status code for each robots.txt entry
def header_robots_txt(url)
code = Browser.get(url).code
puts info("Interesting entry from robots.txt: #{url} [HTTP #{code}]")
end
# Parse robots.txt
# @return [ Array ] URLs generated from robots.txt
def parse_robots_txt