-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScraper.rb
More file actions
34 lines (30 loc) · 886 Bytes
/
Scraper.rb
File metadata and controls
34 lines (30 loc) · 886 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#Proxy Scraper v0.1
#JoeWHoward on GitHub
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'net/https'
inputFile = "sources.txt"
outputFile = "out.txt"
counter = 0
File.readlines(inputFile).each do |line|
begin
page = Nokogiri::HTML(open(line.strip))
fullText = page.css("body").text.to_s
ipArray = fullText.scan(/\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\:[0-9]{1,5}\b/)
ipArray.each {|x| File.open(outputFile, 'a') {|f| f.write(x + "\n")}}
counter+=1
rescue OpenURI::HTTPError => e
if e.message == '404 Not Found'
puts "#{line} returned a 404 Error"
else
end
rescue SocketError
puts "Line #{counter} returned a SocketError"
rescue Errno::ECONNRESET
puts "Line #{counter} returned a reset by peer error"
rescue Exception => e
puts "Line #{counter} returned an error that we don't care to parse"
end
end
puts "Pulled proxies from #{counter} sources."