diff --git a/lib/wpstools/plugins/list_generator/generate_list.rb b/lib/wpstools/plugins/list_generator/generate_list.rb index 2e77d724..a0ff0ded 100644 --- a/lib/wpstools/plugins/list_generator/generate_list.rb +++ b/lib/wpstools/plugins/list_generator/generate_list.rb @@ -21,8 +21,7 @@ class GenerateList raise "Type #{type} not defined" end @verbose = verbose - @browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1) - @hydra = @browser.hydra + @browser = Browser.instance(request_timeout: 20000, connect_timeout: 20000, max_threads: 1, cache_ttl: 0) end def set_file_name(type) @@ -67,43 +66,28 @@ class GenerateList def get_popular_items(pages) found_items = [] page_count = 1 - queue_count = 0 (1...(pages.to_i + 1)).each do |page| # First page has another URL url = (page == 1) ? @popular_url : @popular_url + 'page/' + page.to_s + '/' - request = @browser.forge_request(url) - - queue_count += 1 - - request.on_complete do |response| - if response.code != 200 - puts red("Got HTTP Status #{response.code} for page #{page}. Retrying request...") - # Retry - @hydra.queue(request) - next - end - puts "[+] Parsing page #{page_count}" if @verbose - page_count += 1 - found = 0 - response.body.scan(@popular_regex).each do |item| - found_items << item[0] - found = found + 1 - end - puts "[+] Found #{found} items on page #{page}" if @verbose + puts "[+] Parsing page #{page_count}" if @verbose + code = 0 + while code != 200 + puts red("[!] Retrying request for page #{page} (Code: #{code})") unless code == 0 + request = @browser.forge_request(url) + response = request.run + code = response.code + sleep(5) unless code == 200 end - - @hydra.queue(request) - - if queue_count == @browser.max_threads - @hydra.run - queue_count = 0 + page_count += 1 + found = 0 + response.body.scan(@popular_regex).each do |item| + found_items << item[0] + found = found + 1 end - + puts "[+] Found #{found} items on page #{page}" if @verbose end - @hydra.run - found_items.sort! found_items.uniq end