wordpress.org is really unstable at the moment

This commit is contained in:
Christian Mehlmauer
2013-07-23 18:16:48 +02:00
parent 2fb6f7169a
commit 0f7bc49992

View File

@@ -21,8 +21,7 @@ class GenerateList
raise "Type #{type} not defined"
end
@verbose = verbose
@browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1)
@hydra = @browser.hydra
@browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1, cache_ttl: 0)
end
def set_file_name(type)
@@ -67,43 +66,28 @@ class GenerateList
def get_popular_items(pages)
found_items = []
page_count = 1
queue_count = 0
(1...(pages.to_i + 1)).each do |page|
# First page has another URL
url = (page == 1) ? @popular_url : @popular_url + 'page/' + page.to_s + '/'
request = @browser.forge_request(url)
queue_count += 1
request.on_complete do |response|
if response.code != 200
puts red("Got HTTP Status #{response.code} for page #{page}. Retrying request...")
# Retry
@hydra.queue(request)
next
end
puts "[+] Parsing page #{page_count}" if @verbose
page_count += 1
found = 0
response.body.scan(@popular_regex).each do |item|
found_items << item[0]
found = found + 1
end
puts "[+] Found #{found} items on page #{page}" if @verbose
puts "[+] Parsing page #{page_count}" if @verbose
code = 0
while code != 200
puts red("[!] Retrying request for page #{page} (Code: #{code})") unless code == 0
request = @browser.forge_request(url)
response = request.run
code = response.code
sleep(5) unless code == 200
end
@hydra.queue(request)
if queue_count == @browser.max_threads
@hydra.run
queue_count = 0
page_count += 1
found = 0
response.body.scan(@popular_regex).each do |item|
found_items << item[0]
found = found + 1
end
puts "[+] Found #{found} items on page #{page}" if @verbose
end
@hydra.run
found_items.sort!
found_items.uniq
end