Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,8 @@ import textwrap
|
|
8 |
import traceback
|
9 |
|
10 |
# 爬虫-------------
|
11 |
-
from save_cookie import save_cookie, get_cookie, cookie_f
|
12 |
-
from scrap_util import getDriver, titleLocInfo, find_key_paragrap, extract_from_driver, table_record_doc
|
13 |
import helium as hm
|
14 |
from postDouyin import senDouyin
|
15 |
# 模型-------------
|
@@ -54,12 +54,12 @@ HEIGHT = 1400
|
|
54 |
WIDTH,HEIGHT = a_.size
|
55 |
|
56 |
# should load from files and build new from file
|
57 |
-
cookie_fns = ["抖音北京人事考试","抖音广东人事考试","抖音四川人事考试","抖音浙江人事考试","抖音江苏人事考试","抖音山东人事考试","抖音河南人事考试"]
|
58 |
-
cookie_fns = os.listdir("./cookie_list/")
|
59 |
-
cookie_fn = cookie_fns[0]
|
60 |
description = "URL--> 爬取-->解析--> 音频--> 图片--> 视频"
|
61 |
|
62 |
-
driver = getDriver()
|
63 |
# sub_url ="https://www.js.msa.gov.cn/art/2023/2/24/art_11436_1391666.html"
|
64 |
# hm.set_driver(driver) # 给它一个selnuim driver
|
65 |
# hm.go_to(sub_url)
|
@@ -84,30 +84,6 @@ def custom_predict(context, question):
|
|
84 |
score = answer_result["score"]
|
85 |
return answer, score
|
86 |
|
87 |
-
def get_content_from_driver(task_link, content=None):
|
88 |
-
driver = getDriver()
|
89 |
-
hm.set_driver(driver) # 给它一个selnuim driver
|
90 |
-
if content:
|
91 |
-
pass
|
92 |
-
hm.go_to(task_link)
|
93 |
-
print("hm.go_to(task_link)")
|
94 |
-
time.sleep(1)
|
95 |
-
items_ = driver.find_elements_by_xpath("//p")
|
96 |
-
items_ = [i.text for i in items_ if i.text != ""]
|
97 |
-
context_to_label = "\n".join(items_)
|
98 |
-
doc = extract_from_driver(driver)
|
99 |
-
doc["url"] = task_link
|
100 |
-
doc["content"] = context_to_label
|
101 |
-
n_bm_sj,_ = custom_predict(context = context_to_label, question="报名时间")
|
102 |
-
n_fee_sj,_ = custom_predict(context = context_to_label, question="缴费时间")
|
103 |
-
n_ks_sj,_ = custom_predict(context = context_to_label, question="考试时间")
|
104 |
-
n_zkz_sj,_ = custom_predict(context = context_to_label, question="准考证时间")
|
105 |
-
need = [doc["title"],doc["zwlx"], doc["zwk_sheng"], doc["zwk_diqu"],
|
106 |
-
doc["tidy_bm_sj"], doc["tidy_fee_sj"], doc["tidy_ks_sj"], doc["tidy_zkz_sj"],context_to_label,
|
107 |
-
n_bm_sj, n_fee_sj, n_ks_sj, n_zkz_sj
|
108 |
-
]
|
109 |
-
return need
|
110 |
-
|
111 |
def image_preview(orimage=None, text="Hello Ai", x=10, y=20, w=500, h=100, bac_color = "#FFbbFF",
|
112 |
txt_color = "#000000",front="simsun.ttc", size = 50):
|
113 |
if orimage is None:
|
@@ -473,8 +449,8 @@ with gr.Blocks() as demo:
|
|
473 |
# outputs=[img_files])
|
474 |
# outputs=[movie_file, img_files])
|
475 |
# exit_.click(fn=exit_func)
|
476 |
-
login_.click(fn=loginDouyin, outputs=[login_qr])
|
477 |
-
login_save.click(fn=run_save_cookie, inputs = [account_name_new])
|
478 |
# post_.click(fn=mySendDouyin, inputs = [account_fn, movie_file])
|
479 |
# 绑定clear点击函数
|
480 |
# clear.click(fn=clear_input, inputs=[], outputs=[context, question, answer, score])
|
|
|
8 |
import traceback
|
9 |
|
10 |
# 爬虫-------------
|
11 |
+
# from save_cookie import save_cookie, get_cookie, cookie_f
|
12 |
+
# from scrap_util import getDriver, titleLocInfo, find_key_paragrap, extract_from_driver, table_record_doc
|
13 |
import helium as hm
|
14 |
from postDouyin import senDouyin
|
15 |
# 模型-------------
|
|
|
54 |
WIDTH,HEIGHT = a_.size
|
55 |
|
56 |
# should load from files and build new from file
|
57 |
+
# cookie_fns = ["抖音北京人事考试","抖音广东人事考试","抖音四川人事考试","抖音浙江人事考试","抖音江苏人事考试","抖音山东人事考试","抖音河南人事考试"]
|
58 |
+
# cookie_fns = os.listdir("./cookie_list/")
|
59 |
+
# cookie_fn = cookie_fns[0]
|
60 |
description = "URL--> 爬取-->解析--> 音频--> 图片--> 视频"
|
61 |
|
62 |
+
# driver = getDriver()
|
63 |
# sub_url ="https://www.js.msa.gov.cn/art/2023/2/24/art_11436_1391666.html"
|
64 |
# hm.set_driver(driver) # 给它一个selnuim driver
|
65 |
# hm.go_to(sub_url)
|
|
|
84 |
score = answer_result["score"]
|
85 |
return answer, score
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def image_preview(orimage=None, text="Hello Ai", x=10, y=20, w=500, h=100, bac_color = "#FFbbFF",
|
88 |
txt_color = "#000000",front="simsun.ttc", size = 50):
|
89 |
if orimage is None:
|
|
|
449 |
# outputs=[img_files])
|
450 |
# outputs=[movie_file, img_files])
|
451 |
# exit_.click(fn=exit_func)
|
452 |
+
# login_.click(fn=loginDouyin, outputs=[login_qr])
|
453 |
+
# login_save.click(fn=run_save_cookie, inputs = [account_name_new])
|
454 |
# post_.click(fn=mySendDouyin, inputs = [account_fn, movie_file])
|
455 |
# 绑定clear点击函数
|
456 |
# clear.click(fn=clear_input, inputs=[], outputs=[context, question, answer, score])
|