diff --git a/dev/wp-versions.db b/dev/wp-versions.db new file mode 100644 index 00000000..15961841 Binary files /dev/null and b/dev/wp-versions.db differ diff --git a/dev/wp-versions.rb b/dev/wp-versions.rb new file mode 100755 index 00000000..3e540baf --- /dev/null +++ b/dev/wp-versions.rb @@ -0,0 +1,237 @@ +#!/usr/bin/env ruby + +require 'rubygems' +require 'uri' +require 'dm-core' +require 'dm-migrations' +require 'dm-constraints' +require 'optparse' +require 'nokogiri' +require 'typhoeus' + +@db = "#{Dir.pwd}/wp-versions.db" + +# return [ Array ] The Stable versions (sorted by number DESC) +def get_remote_wp_versions + versions = [] + page = Nokogiri::HTML(Typhoeus.get('http://wordpress.org/download/release-archive/').body) + + page.css('.widefat').first.css('tbody tr td:first').each do |node| + versions << node.text.strip + end + versions.reverse +end + +def remove_dir(dir) + %x{rm -rf #{dir}} +end + +def download(file_url, dest) + %x{wget -q -np -O #{dest} #{file_url} > /dev/null} +end + +def wp_version_zip_url(version) + "http://wordpress.org/wordpress-#{version}.zip" +end + +def wp_version_zip_md5(version) + Typhoeus.get("#{wp_version_zip_url(version)}.md5").body +end + +def file_md5(file_path) + Digest::MD5.file(file_path).hexdigest +end + +def web_page_md5(url) + Digest::MD5.hexdigest(Typhoeus.get(url).body) +end + +def download_and_unzip_version(version, dest) + dest_zip = "/tmp/wp-#{version}.zip" + + download(wp_version_zip_url(version), dest_zip) + + if $?.exitstatus === 0 and File.exists?(dest_zip) + if file_md5(dest_zip) === wp_version_zip_md5(version) + remove_dir("#{dest}/wordpress/") + unzip(dest_zip, dest) + + return true + else + raise 'Invalid md5' + # Redownload the file ? + end + else + raise 'Download error' + end +end + +def unzip(zip_path, dest) + %x{unzip -o -d #{dest} #{zip_path}} +end + +parser = OptionParser.new("Usage: ruby #{$0} [options]", 50) do |opts| + opts.on('--db PATH-TO-DB', '-d', 'Path to the db, default: wp-versions.db') do |db| + @db = db + end + + opts.on('--update', '-u', 'Update the db') do + @update = true + end + + opts.on('--verbose', '-v', 'Verbose Mode') do + @verbose = true + end + + opts.on('--show-unique-fingerprints WP-VERSION', '--suf', 'Output the unique file hashes for the given version of WordPress') do |version| + @version = version + end + + opts.on('--search-hash HASH', '--sh', 'Search the hash and output the WP versions & file') do |hash| + @hash = hash + end + + opts.on('--search-file RELATIVE-FILE-PATH', '--sf', 'Search the file and output the Wp versions & hashes') do |file| + @file = file + end + + opts.on('--fingerprint URL', 'Fingerprint a remote wordpress blog') do |url| + @target_url = url + @target_url += '/' if @target_url[-1,1] != '/' + end +end +parser.parse! + +DataMapper::Logger.new($stdout, @verbose ? :debug : :fatal) +DataMapper::setup(:default, "sqlite://#{@db}") + +class Version + include DataMapper::Resource + + has n, :fingerprints, constraint: :destroy + + property :id, Serial + property :number, String, required: true, unique: true +end + +class Path + include DataMapper::Resource + + has n, :fingerprints, constraint: :destroy + + property :id, Serial + property :value, String, required: true, unique: true +end + +class Fingerprint + include DataMapper::Resource + + belongs_to :version, key: true + belongs_to :path, key: true + + property :md5_hash, String, required: true, length: 32 + + # DataMapper does not seem to support ordering by a column in a joining model + # Solution found on StackOverflow ("DataMapper: Sorting results though association") + def self.order_by_version(direction = :asc) + order = DataMapper::Query::Direction.new(version.number, direction) + query = all.query + query.instance_variable_set('@order', [order]) + query.instance_variable_set('@links', [relationships['version'].inverse]) + all(query) + end +end + +DataMapper.auto_upgrade! + +# Update +if @update + remote_versions = get_remote_wp_versions() + puts "#{remote_versions.size} remote versions number retrieved" + + remote_versions.each do |version| + unless Version.first(number: version) + db_version = Version.create(number: version) + version_dir = "/tmp/wordpress/" + + puts "Downloading and unziping v#{version} to #{version_dir}" + download_and_unzip_version(version, '/tmp/') + + puts 'Processing Fingerprints' + Dir[File.join(version_dir, '**', '*')].reject { |f| f =~ /^*.php$/ || Dir.exists?(f) }.each do |filename| + hash = Digest::MD5.file(filename).hexdigest + file_path = filename.gsub(version_dir, '') + db_path = Path.first_or_create(value: file_path) + fingerprint = Fingerprint.create(path_id: db_path.id, md5_hash: hash) + + + db_version.fingerprints << fingerprint + end + db_version.save + else + puts "Version #{version} already in DB, skipping" + end + end +end + +if @version + if version = Version.first(number: @version) + repository(:default).adapter.select('SELECT md5_hash, path_id, version_id, paths.value AS path FROM fingerprints LEFT JOIN paths ON path_id = id GROUP BY md5_hash ORDER BY path ASC').each do |f| + if f.version_id == version.id + puts "#{f.md5_hash} #{f.path}" + end + end + else + puts "The version supplied: '#{@version}' is not in the database" + end +end + +if @hash + puts "Results for #{@hash}:" + Fingerprint.order_by_version(:desc).all(md5_hash: @hash).each do |f| + puts " #{f.version.number} #{f.path.value}" + end +end + +if @file + puts "Results for #{@file}:" + + if path = Path.first(value: @file) + Fingerprint.order_by_version(:desc).all(path_id: path.id).each do |f| + puts " #{f.md5_hash} #{f.version.number}" + end + else + puts 'File not found (the argument must be a relative file path. e.g: wp-admin/css/widgets.css)' + end +end + +if @target_url + uri = URI.parse(@target_url) + + Version.all(order: [ :number.desc ]).each do |version| + total_urls = version.fingerprints.count + matches = 0 + percent = 0 + + version.fingerprints.each do |f| + url = uri.merge(f.path.value).to_s + + if web_page_md5(url) == f.md5_hash + matches += 1 + puts "#{url} matches v#{version.number}" if @verbose + end + + percent = ((matches / total_urls.to_f) * 100).round(2) + + print("Version #{version.number} [#{matches}/#{total_urls} #{percent}% matches]\r") + end + + puts + + if percent == 100.0 + puts "The remote version is #{version.number}" + exit + end + end +end +