File tree Expand file tree Collapse file tree 1 file changed +31
-0
lines changed Expand file tree Collapse file tree 1 file changed +31
-0
lines changed Original file line number Diff line number Diff line change
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from concurrent .futures import ThreadPoolExecutor
4
+
5
+ base_url = "http://localhost:8080"
6
+
7
+ full_path = f"{ base_url } /r/politics"
8
+
9
+ ctr = 0
10
+
11
+ def fetch_url (url ):
12
+ global ctr
13
+ response = requests .get (url )
14
+ ctr += 1
15
+ print (f"Request count: { ctr } " )
16
+ return response
17
+
18
+ while full_path :
19
+ response = requests .get (full_path )
20
+ ctr += 1
21
+ print (f"Request count: { ctr } " )
22
+ soup = BeautifulSoup (response .text , 'html.parser' )
23
+ comment_links = soup .find_all ('a' , class_ = 'post_comments' )
24
+ comment_urls = [base_url + link ['href' ] for link in comment_links ]
25
+ with ThreadPoolExecutor (max_workers = 10 ) as executor :
26
+ executor .map (fetch_url , comment_urls )
27
+ next_link = soup .find ('a' , accesskey = 'N' )
28
+ if next_link :
29
+ full_path = base_url + next_link ['href' ]
30
+ else :
31
+ break
You can’t perform that action at this time.
0 commit comments