-
Notifications
You must be signed in to change notification settings - Fork 2
/
arlanglinks.py
72 lines (68 loc) · 1.48 KB
/
arlanglinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
python3 core8/pwb.py dump/arlanglinks
"""
import sys
import json
from pathlib import Path
# ---
# ---
# Dump_Dir = Path(__file__).parent # /data/project/himo/bots/dump_core/dump/labels
Himo_Dir = Path(__file__).parent.parent.parent.parent # Dump_Dir:/data/project/himo
# ---
Dump_Dir = "/data/project/himo/bots/dumps"
Dump_Dir = f"{Himo_Dir}/dumps"
# ---
print(f"Himo_Dir:{Himo_Dir}, Dump_Dir:{Dump_Dir}")
# ---
# ---
from api_sql import wiki_sql
# ---
dump_file = f"{Dump_Dir}/langlinks.json"
# ---
qua = """select
CONCAT('"Category:', p1.page_title, '"') AS en, CONCAT(':"',ll_title, '",') AS ar
from page AS p1, langlinks
where p1.page_id = ll_from
AND ll_lang = "ar"
AND p1.page_namespace = 14
"""
# ---
table = {}
# ---
TEST = "test" in sys.argv
all = 20 if TEST else 1000
# ---
offset = 0
limit = 200000
# ---
for i in range(1, all):
# ---
if i != 1:
offset += limit
# ---
line = f"limit {limit} offset {offset}"
# ---
print(line)
# ---
qun = qua
# ---
qun += line
# ---
if TEST:
continue
# ---
result = wiki_sql.sql_new(qun, wiki="en", printqua=False)
# ---
if not result or len(result) == 0:
print("result is empty...")
break
# ---
for x in result:
en = x["en"].replace("_", " ")
ar = x["ar"].replace("_", " ")
table[en] = ar
# ---
print(f"len of table:{len(table)}")
# ---
json.dump(table, open(dump_file, "w", encoding="utf-8"))
# ---