Module: WebSite::RobotsTxt

Included in:: WebSite
Defined in:: lib/wpscan/web_site/robots_txt.rb

Class Method Summary (collapse)

+ (Array) known_dirs protected

Instance Method Summary (collapse)

- (Boolean) has_robots?

Checks if a robots.txt file exists.
- (Array) parse_robots_txt

Parse robots.txt.
- (String) robots_url

Gets a robots.txt URL.

Class Method Details

+ (`Array`) known_dirs (protected)

Returns:

(Array)

# File 'lib/wpscan/web_site/robots_txt.rb', line 54

def self.known_dirs
  %w{
    /
    /wp-admin/
    /wp-includes/
    /wp-content/
  }
end

Instance Method Details

- (`Boolean`) has_robots?

Checks if a robots.txt file exists

Returns:

(Boolean)



8
9
10

# File 'lib/wpscan/web_site/robots_txt.rb', line 8

def has_robots?
  Browser.get(robots_url).code == 200
end

- (`Array`) parse_robots_txt

Parse robots.txt

Returns:

(Array) —

URLs generated from robots.txt

# File 'lib/wpscan/web_site/robots_txt.rb', line 23

def parse_robots_txt
  return unless has_robots?

  return_object = []
  response = Browser.get(robots_url.to_s)
  body = response.body
  # Get all allow and disallow urls
  entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i)
  if entries
    entries.flatten!
    entries.compact.sort!
    wordpress_path = @uri.path
    RobotsTxt.known_dirs.each do |d|
      entries.delete(d)
      # also delete when wordpress is installed in subdir
      dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/')
      entries.delete(dir_with_subdir)
    end

    entries.each do |d|
      temp = @uri.clone
      temp.path = d
      return_object << temp.to_s
    end
  end
  return_object
end

- (`String`) robots_url

Gets a robots.txt URL

Returns:

(String)

# File 'lib/wpscan/web_site/robots_txt.rb', line 14

def robots_url
  temp = @uri.clone
  temp.path = '/robots.txt'
  temp.to_s
end

Module: WebSite::RobotsTxt

Class Method Summary (collapse)

Instance Method Summary (collapse)

Class Method Details

+ (Array) known_dirs (protected)

Instance Method Details

- (Boolean) has_robots?

- (Array) parse_robots_txt

- (String) robots_url

+ (`Array`) known_dirs (protected)

- (`Boolean`) has_robots?

- (`Array`) parse_robots_txt

- (`String`) robots_url