wordpress.org is really unstable at the moment
This commit is contained in:
@@ -21,8 +21,7 @@ class GenerateList
|
|||||||
raise "Type #{type} not defined"
|
raise "Type #{type} not defined"
|
||||||
end
|
end
|
||||||
@verbose = verbose
|
@verbose = verbose
|
||||||
@browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1)
|
@browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1, cache_ttl: 0)
|
||||||
@hydra = @browser.hydra
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_file_name(type)
|
def set_file_name(type)
|
||||||
@@ -67,23 +66,19 @@ class GenerateList
|
|||||||
def get_popular_items(pages)
|
def get_popular_items(pages)
|
||||||
found_items = []
|
found_items = []
|
||||||
page_count = 1
|
page_count = 1
|
||||||
queue_count = 0
|
|
||||||
|
|
||||||
(1...(pages.to_i + 1)).each do |page|
|
(1...(pages.to_i + 1)).each do |page|
|
||||||
# First page has another URL
|
# First page has another URL
|
||||||
url = (page == 1) ? @popular_url : @popular_url + 'page/' + page.to_s + '/'
|
url = (page == 1) ? @popular_url : @popular_url + 'page/' + page.to_s + '/'
|
||||||
request = @browser.forge_request(url)
|
|
||||||
|
|
||||||
queue_count += 1
|
|
||||||
|
|
||||||
request.on_complete do |response|
|
|
||||||
if response.code != 200
|
|
||||||
puts red("Got HTTP Status #{response.code} for page #{page}. Retrying request...")
|
|
||||||
# Retry
|
|
||||||
@hydra.queue(request)
|
|
||||||
next
|
|
||||||
end
|
|
||||||
puts "[+] Parsing page #{page_count}" if @verbose
|
puts "[+] Parsing page #{page_count}" if @verbose
|
||||||
|
code = 0
|
||||||
|
while code != 200
|
||||||
|
puts red("[!] Retrying request for page #{page} (Code: #{code})") unless code == 0
|
||||||
|
request = @browser.forge_request(url)
|
||||||
|
response = request.run
|
||||||
|
code = response.code
|
||||||
|
sleep(5) unless code == 200
|
||||||
|
end
|
||||||
page_count += 1
|
page_count += 1
|
||||||
found = 0
|
found = 0
|
||||||
response.body.scan(@popular_regex).each do |item|
|
response.body.scan(@popular_regex).each do |item|
|
||||||
@@ -93,17 +88,6 @@ class GenerateList
|
|||||||
puts "[+] Found #{found} items on page #{page}" if @verbose
|
puts "[+] Found #{found} items on page #{page}" if @verbose
|
||||||
end
|
end
|
||||||
|
|
||||||
@hydra.queue(request)
|
|
||||||
|
|
||||||
if queue_count == @browser.max_threads
|
|
||||||
@hydra.run
|
|
||||||
queue_count = 0
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
@hydra.run
|
|
||||||
|
|
||||||
found_items.sort!
|
found_items.sort!
|
||||||
found_items.uniq
|
found_items.uniq
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user