Merge pull request #1019 from wpscanteam/hash

remove scripts before calculating hashes
This commit is contained in:
Christian Mehlmauer
2016-10-26 11:48:13 +02:00
committed by GitHub
2 changed files with 17 additions and 3 deletions

View File

@@ -91,15 +91,18 @@ class WebSite
end
# Compute the MD5 of the page
# Comments are deleted from the page to avoid cache generation details
# Comments and scripts are deleted from the page to avoid cache generation details
#
# @param [ String, Typhoeus::Response ] page The url of the response of the page
#
# @return [ String ] The MD5 hash of the page
def self.page_hash(page)
page = Browser.get(page, { followlocation: true, cache_ttl: 0 }) unless page.is_a?(Typhoeus::Response)
Digest::MD5.hexdigest(page.body.gsub(/<!--.*?-->/m, ''))
# remove comments
page = page.body.gsub(/<!--.*?-->/m, '')
# remove javascript stuff
page = page.gsub(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/m, '')
Digest::MD5.hexdigest(page)
end
def homepage_hash