-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
48 lines (37 loc) · 1.26 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import sys
from utils.db import create_tables
from crawl import get_links, crawl_page
from models import Article, Category
def crawl_and_get_links():
links = get_links()
cat = Category.create(name='Sport')
for link in links:
article = Article.create(url=link, category=cat)
print(article.id)
def scrape_pages():
articles = Article.select().where(Article.is_completed == False)
for article in articles:
try:
data = crawl_page(article.url)
except:
article.is_completed = True
article.save()
else:
article.title = data['title']
article.body = data['body']
article.is_completed = True
article.save()
def show_stats():
articles = Article.select().count()
categories = Category.select().count()
completed = Article.select().where(Article.is_completed == True).count()
print(f'articles : {articles} category: {categories} completed : {completed}')
if __name__ == '__main__':
if sys.argv[1] == 'create_tables':
create_tables()
elif sys.argv[1] == 'get_links':
crawl_and_get_links()
elif sys.argv[1] == 'get_articles':
scrape_pages()
elif sys.argv[1] == 'stats':
show_stats()