# encoding: UTF-8 class WpTarget < WebSite module Malwares # Used as cache : # nil => malwares not checked, # [] => no malwares, # otherwise array of malwares url found @malwares = nil def has_malwares?(malwares_file_path = nil) !malwares(malwares_file_path).empty? end # return array of string (url of malwares found) def malwares(malwares_file_path = nil) unless @malwares malwares_found = [] malwares_file = Malwares.malwares_file(malwares_file_path) index_page_body = Browser.get(@uri.to_s).body File.open(malwares_file, 'r') do |file| file.readlines.collect do |url| chomped_url = url.chomp if chomped_url.length > 0 malwares_found += index_page_body.scan(Malwares.malware_pattern(chomped_url)) end end end malwares_found.flatten! malwares_found.uniq! @malwares = malwares_found end @malwares end def self.malwares_file(malwares_file_path) malwares_file_path || DATA_DIR + '/malwares.txt' end def self.malware_pattern(url_regex) # no need to escape regex here, because malware.txt contains regex %r{<(?:script|iframe).* src=(?:"|')(#{url_regex}[^"']*)(?:"|')[^>]*>}i end end end