diff --git a/lib/common/collections/wp_items/detectable.rb b/lib/common/collections/wp_items/detectable.rb index f67b143b..c2b89896 100755 --- a/lib/common/collections/wp_items/detectable.rb +++ b/lib/common/collections/wp_items/detectable.rb @@ -80,13 +80,31 @@ class WpItems < Array # # @return [ WpItems ] def passive_detection(wp_target, options = {}) - results = new(wp_target) - body = Browser.get(wp_target.url).body + results = new(wp_target) # improves speed - body = remove_base64_images_from_html(body) - names = body.scan(passive_detection_pattern(wp_target)) + body = remove_base64_images_from_html(Browser.get(wp_target.url).body) + page = Nokogiri::HTML(remove_conditional_comments(body)) + names = [] - names.flatten.uniq.each { |name| results.add(name) } + page.css('link,script,style').each do |tag| + %w(href src).each do |attribute| + attr_value = tag.attribute(attribute).to_s + next unless attr_value + + names << Regexp.last_match[1] if attr_value.match(attribute_pattern(wp_target)) + end + + next unless tag.name == 'script' || tag.name == 'style' + + code = tag.text.to_s + next if code.empty? + + code.scan(code_pattern(wp_target)).flatten.uniq.each do |item_name| + names << item_name + end + end + + names.uniq.each { |name| results.add(name) } results.sort! results @@ -97,13 +115,29 @@ class WpItems < Array # @param [ WpTarget ] wp_target # # @return [ Regex ] - def passive_detection_pattern(wp_target) - type = self.to_s.gsub(/Wp/, '').downcase - regex1 = %r{(?:[^=:\(]+)\s?(?:=|:|\()\s?(?:"|')[^"']+\\?/} - regex2 = %r{\\?/} - regex3 = %r{\\?/([^/\\"']+)\\?(?:/|"|')} + def item_pattern(wp_target) + type = to_s.gsub(/Wp/, '').downcase + wp_content_dir = wp_target.wp_content_dir + wp_content_url = wp_target.uri.merge(wp_content_dir).to_s - /#{regex1}#{Regexp.escape(wp_target.wp_content_dir)}#{regex2}#{Regexp.escape(type)}#{regex3}/i + url = /#{wp_content_url.gsub(%r{\A(?:http|https)}, 'https?').gsub('/', '\\\\\?\/')}/i + content_dir = %r{(?:#{url}|\\?\/\\?\/?#{wp_content_dir})}i + + %r{#{content_dir}\\?/#{type}\\?/} + end + + # @param [ WpTarget ] wp_target + # + # @return [ Regex ] + def attribute_pattern(wp_target) + /\A#{item_pattern(wp_target)}([^\/]+)/i + end + + # @param [ WpTarget ] wp_target + # + # @return [ Regex ] + def code_pattern(wp_target) + /["'\(]#{item_pattern(wp_target)}([^\\\/\)"']+)/i end # The default request parameters diff --git a/lib/common/common_helper.rb b/lib/common/common_helper.rb index 38b4e0d7..22fad14c 100644 --- a/lib/common/common_helper.rb +++ b/lib/common/common_helper.rb @@ -73,6 +73,10 @@ def add_trailing_slash(url) url =~ /\/$/ ? url : "#{url}/" end +def remove_conditional_comments(text) + text.gsub(/ + - - - Example.com - - - - + + + example.com + + - + + + - + - + + + - - - -
-
-

- Blablabla the following plugin should not match : /wp-content/items/this-plugin-should-not-match/sub.css -

-
-
- + - - + - - - + + + + http://example.com/wp-content/items/this-should-not-match/sub.css + href="http://example.com/wp-content/items/this-should-not-match/sub.css" + /wp-content/items/this-should-not-match/sub.css + //wp-content/items/this-should-not-match/sub.css + src='/wp-content/items/this-should-not-match/sub.css' + + diff --git a/spec/samples/common/collections/wp_themes/detectable/passive_detection.html b/spec/samples/common/collections/wp_themes/detectable/passive_detection.html index 9f484178..9b741167 100644 --- a/spec/samples/common/collections/wp_themes/detectable/passive_detection.html +++ b/spec/samples/common/collections/wp_themes/detectable/passive_detection.html @@ -8,9 +8,9 @@ - - - + + +