Fix EOL issue when checking /robots.txt
This commit is contained in:
@@ -23,20 +23,32 @@ class WebSite
|
|||||||
return_object = []
|
return_object = []
|
||||||
response = Browser.get(robots_url.to_s)
|
response = Browser.get(robots_url.to_s)
|
||||||
body = response.body
|
body = response.body
|
||||||
|
|
||||||
# Get all allow and disallow urls
|
# Get all allow and disallow urls
|
||||||
entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i)
|
entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i)
|
||||||
if entries
|
if entries
|
||||||
|
#extract elements
|
||||||
entries.flatten!
|
entries.flatten!
|
||||||
|
# Remove any leading/trailing spaces
|
||||||
|
entries.collect{|x| x.strip || x }
|
||||||
|
# End Of Line issues
|
||||||
|
entries.collect{|x| x.chomp! || x }
|
||||||
|
# Remove nil's and sort
|
||||||
entries.compact.sort!
|
entries.compact.sort!
|
||||||
|
# Unique values only
|
||||||
entries.uniq!
|
entries.uniq!
|
||||||
|
# Wordpress URL
|
||||||
wordpress_path = @uri.path
|
wordpress_path = @uri.path
|
||||||
|
|
||||||
|
# Each "boring" value as defined below, remove
|
||||||
RobotsTxt.known_dirs.each do |d|
|
RobotsTxt.known_dirs.each do |d|
|
||||||
entries.delete(d)
|
entries.delete(d)
|
||||||
# also delete when wordpress is installed in subdir
|
# Also delete when wordpress is installed in subdir
|
||||||
dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/')
|
dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/')
|
||||||
entries.delete(dir_with_subdir)
|
entries.delete(dir_with_subdir)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Each value now, try and make it a full URL
|
||||||
entries.each do |d|
|
entries.each do |d|
|
||||||
begin
|
begin
|
||||||
temp = @uri.clone
|
temp = @uri.clone
|
||||||
@@ -46,17 +58,21 @@ class WebSite
|
|||||||
end
|
end
|
||||||
return_object << temp.to_s
|
return_object << temp.to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
return_object
|
return_object
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
||||||
|
# Useful ~ "function do_robots()" -> https://github.com/WordPress/WordPress/blob/master/wp-includes/functions.php
|
||||||
|
#
|
||||||
# @return [ Array ]
|
# @return [ Array ]
|
||||||
def self.known_dirs
|
def self.known_dirs
|
||||||
%w{
|
%w{
|
||||||
/
|
/
|
||||||
/wp-admin/
|
/wp-admin/
|
||||||
|
/wp-admin/admin-ajax.php
|
||||||
/wp-includes/
|
/wp-includes/
|
||||||
/wp-content/
|
/wp-content/
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user