From 9015834b150ff1878f0224f7217cae89b32a0d66 Mon Sep 17 00:00:00 2001 From: Christian Mehlmauer Date: Sat, 17 Aug 2013 10:54:28 +0200 Subject: [PATCH] fix issue #265 - remove base64 images before passive detection --- lib/common/collections/wp_items/detectable.rb | 2 ++ lib/common/common_helper.rb | 7 +++++++ spec/lib/common/common_helper_spec.rb | 17 +++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/lib/common/collections/wp_items/detectable.rb b/lib/common/collections/wp_items/detectable.rb index 9ac66e91..501454b2 100755 --- a/lib/common/collections/wp_items/detectable.rb +++ b/lib/common/collections/wp_items/detectable.rb @@ -73,6 +73,8 @@ class WpItems < Array def passive_detection(wp_target, options = {}) results = new(wp_target) body = Browser.get(wp_target.url).body + # improves speed + body = remove_base64_images_from_html(body) names = body.scan(passive_detection_pattern(wp_target)) names.flatten.uniq.each { |name| results.add(name) } diff --git a/lib/common/common_helper.rb b/lib/common/common_helper.rb index 1f5f854a..00d781c4 100644 --- a/lib/common/common_helper.rb +++ b/lib/common/common_helper.rb @@ -149,3 +149,10 @@ def get_equal_string_end(stringarray = ['']) end already_found end + +def remove_base64_images_from_html(html) + # remove data:image/png;base64, images + base64regex = %r{(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?} + imageregex = %r{data\s*:\s*image/[^\s;]+\s*;\s*base64\s*,\s*} + html.gsub(/["']\s*#{imageregex}#{base64regex}\s*["']/, '""') +end diff --git a/spec/lib/common/common_helper_spec.rb b/spec/lib/common/common_helper_spec.rb index 7b186220..17c98063 100644 --- a/spec/lib/common/common_helper_spec.rb +++ b/spec/lib/common/common_helper_spec.rb @@ -71,4 +71,21 @@ describe 'common_helper' do @expected = ' | test' end end + + describe '#remove_base64_images_from_html' do + after :each do + output = remove_base64_images_from_html(@html) + output.should == @expected + end + + it 'removes the valid base64 image' do + @html = '' + @expected = '' + end + + it 'ignores invalid base64 content' do + @html = '' + @expected = @html + end + end end \ No newline at end of file