-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebscraper.py
More file actions
50 lines (42 loc) · 1.56 KB
/
webscraper.py
File metadata and controls
50 lines (42 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# -*- coding: utf-8 -*-
import os
from argparse import ArgumentParser
import template
import url
import content
from hparser import MyHTMLParser
class WebScraper:
""""Точка входа в программу"""
def __init__(self):
self.template = template.Template(template)
arg_parser = ArgumentParser()
arg_parser.add_argument("-u", "--url", dest="url")
args = arg_parser.parse_args()
if args.url is None:
print("Вы забыли ввести URL")
else:
page = url.Url(args.url)
data = page.get_content()
path = page.parse_url()
current_template = self.template.get_template()
html_parser = MyHTMLParser(current_template)
html_parser.feed(data)
self.content = content.Content(current_template, html_parser.content)
write_data = self.content.format_data()
self.save_data(path, write_data)
@staticmethod
def save_data(path, data):
path = path.replace('/', '\\')
split_path = os.path.split(path)
if len(split_path[1]) == 0:
path = '%s%s' % (split_path[0], '.txt')
else:
path = '%s%s%s' % (split_path[0], split_path[1].split('.')[0], '.txt')
file_path = os.getcwd() + '\\' + path
dir_name = os.path.dirname(file_path)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
with open(file_path, 'w', encoding="utf-8") as f:
f.write(data)
if __name__ == '__main__':
main = WebScraper()