Spaces:
Runtime error
Runtime error
first release
Browse files- app.py +112 -30
- chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/data_level0.bin +0 -3
- chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/header.bin +0 -3
- chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/length.bin +0 -3
- chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/link_lists.bin +0 -0
- chroma/chroma.sqlite3 +2 -2
- data/DSC_0105.jpg +0 -0
- data/riddles_data +0 -0
- requirements.txt +1 -1
app.py
CHANGED
@@ -4,10 +4,10 @@ from langchain.vectorstores import Chroma
|
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
|
6 |
import gradio as gr
|
7 |
-
import torch
|
8 |
-
|
9 |
import hanzidentifier
|
10 |
-
|
|
|
|
|
11 |
|
12 |
# %%
|
13 |
#Load the LLM model and pipeline directly
|
@@ -32,6 +32,7 @@ huggingface_embeddings= HuggingFaceEmbeddings(
|
|
32 |
# %%
|
33 |
persist_directory = 'chroma/'
|
34 |
vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
|
|
|
35 |
|
36 |
# %%
|
37 |
# helper functions for prompt processing for this LLM
|
@@ -46,8 +47,9 @@ def postprocess(text):
|
|
46 |
|
47 |
# get answer from LLM with prompt input
|
48 |
def answer(text,context=""):
|
49 |
-
text = f"{context}\n{text}\n
|
50 |
text = text.strip()
|
|
|
51 |
text = preprocess(text)
|
52 |
out_text = pipe(text)
|
53 |
|
@@ -64,17 +66,22 @@ def helper_rag(text):
|
|
64 |
return context
|
65 |
|
66 |
# helper function for prompt
|
67 |
-
def helper_text(text_input):
|
68 |
chinese_type = "simplified"
|
69 |
|
70 |
if hanzidentifier.is_traditional(text_input):
|
71 |
-
text_input =
|
72 |
chinese_type = "traditional"
|
73 |
|
|
|
|
|
74 |
if not any(c in text_input for c in ["猜", "打"]):
|
75 |
-
|
|
|
|
|
|
|
76 |
|
77 |
-
text=f"""谜面:{text_input}
|
78 |
"""
|
79 |
|
80 |
context = helper_rag(text)
|
@@ -82,63 +89,138 @@ def helper_text(text_input):
|
|
82 |
output = answer(text,context=context)
|
83 |
|
84 |
if chinese_type == "traditional":
|
85 |
-
output =
|
|
|
|
|
86 |
|
87 |
-
return output
|
88 |
|
89 |
# Gradio function for configure the language of UI
|
90 |
-
def change_language(radio):
|
91 |
if radio == "简体中文":
|
92 |
index = 0
|
93 |
-
text_input_update=
|
94 |
-
text_output_update=
|
|
|
|
|
95 |
elif radio == "繁體中文":
|
96 |
index = 1
|
97 |
-
text_input_update=
|
98 |
-
text_output_update=
|
|
|
|
|
99 |
elif radio == "English":
|
100 |
index = 2
|
101 |
-
text_input_update=
|
102 |
-
text_output_update=
|
|
|
|
|
103 |
else:
|
104 |
index = 0
|
105 |
-
text_input_update=
|
106 |
-
text_output_update=
|
|
|
|
|
107 |
|
108 |
-
clear_btn_update =
|
109 |
-
submit_btn_update =
|
110 |
|
111 |
-
return [text_input_update,text_output_update,clear_btn_update,submit_btn_update]
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
117 |
|
|
|
118 |
text_input_label=["谜面","謎面","Riddle"]
|
119 |
text_output_label=["谜底","謎底","Answer"]
|
120 |
|
121 |
clear_label = ["清除","清除","Clear"]
|
122 |
submit_label = ["提交","提交","Submit"]
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
with gr.Blocks() as demo:
|
125 |
index = 0
|
126 |
radio = gr.Radio(
|
127 |
["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
|
128 |
)
|
|
|
|
|
|
|
|
|
|
|
129 |
with gr.Row():
|
130 |
with gr.Column():
|
131 |
text_input = gr.Textbox(label=text_input_label[index],
|
132 |
value="小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)", lines = 2)
|
133 |
with gr.Row():
|
134 |
-
clear_btn = gr.ClearButton(value=clear_label[index],components=text_input)
|
135 |
submit_btn = gr.Button(value=submit_label[index], variant = "primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
with gr.Column():
|
137 |
text_output = gr.Textbox(label=text_output_label[index])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
submit_btn.click(fn=helper_text, inputs=text_input, outputs=text_output)
|
140 |
-
radio.change(fn=change_language,inputs=radio,outputs=[text_input,text_output,clear_btn,submit_btn])
|
141 |
|
142 |
-
demo.launch()
|
143 |
|
144 |
|
|
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
|
6 |
import gradio as gr
|
|
|
|
|
7 |
import hanzidentifier
|
8 |
+
import re
|
9 |
+
|
10 |
+
import chinese_converter
|
11 |
|
12 |
# %%
|
13 |
#Load the LLM model and pipeline directly
|
|
|
32 |
# %%
|
33 |
persist_directory = 'chroma/'
|
34 |
vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
|
35 |
+
print(vectordb._collection.count())
|
36 |
|
37 |
# %%
|
38 |
# helper functions for prompt processing for this LLM
|
|
|
47 |
|
48 |
# get answer from LLM with prompt input
|
49 |
def answer(text,context=""):
|
50 |
+
text = f"{context}\n{text}\n谜底:"
|
51 |
text = text.strip()
|
52 |
+
|
53 |
text = preprocess(text)
|
54 |
out_text = pipe(text)
|
55 |
|
|
|
66 |
return context
|
67 |
|
68 |
# helper function for prompt
|
69 |
+
def helper_text(text_input,radio=None):
|
70 |
chinese_type = "simplified"
|
71 |
|
72 |
if hanzidentifier.is_traditional(text_input):
|
73 |
+
text_input = chinese_converter.to_traditional(text_input)
|
74 |
chinese_type = "traditional"
|
75 |
|
76 |
+
text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
|
77 |
+
|
78 |
if not any(c in text_input for c in ["猜", "打"]):
|
79 |
+
warning = "请给一个提示,提示格式,例子:猜一水果,打一字。"
|
80 |
+
if chinese_type == "traditional" or radio == "繁體中文":
|
81 |
+
warning = chinese_converter.to_traditional(warning)
|
82 |
+
return warning
|
83 |
|
84 |
+
text=f"""谜面:{text_input}
|
85 |
"""
|
86 |
|
87 |
context = helper_rag(text)
|
|
|
89 |
output = answer(text,context=context)
|
90 |
|
91 |
if chinese_type == "traditional":
|
92 |
+
output = chinese_converter.to_traditional(output)
|
93 |
+
|
94 |
+
output = re.split(r'\s+',output)
|
95 |
|
96 |
+
return output[0]
|
97 |
|
98 |
# Gradio function for configure the language of UI
|
99 |
+
def change_language(radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1):
|
100 |
if radio == "简体中文":
|
101 |
index = 0
|
102 |
+
text_input_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
|
103 |
+
text_output_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
|
104 |
+
markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
|
105 |
+
markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
|
106 |
elif radio == "繁體中文":
|
107 |
index = 1
|
108 |
+
text_input_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
|
109 |
+
text_output_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
|
110 |
+
markdown_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown))
|
111 |
+
markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown_msg1))
|
112 |
elif radio == "English":
|
113 |
index = 2
|
114 |
+
text_input_update=gr.Textbox.update(label = text_input_label[index])
|
115 |
+
text_output_update=gr.Textbox.update(label = text_output_label[index])
|
116 |
+
markdown_update=gr.Markdown.update(value = markdown)
|
117 |
+
markdown_msg1_update=gr.Markdown.update(value = markdown_msg1)
|
118 |
else:
|
119 |
index = 0
|
120 |
+
text_input_update=gr.Textbox.update(label = text_input_label[index])
|
121 |
+
text_output_update=gr.Textbox.update(label = text_output_label[index])
|
122 |
+
markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
|
123 |
+
markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
|
124 |
|
125 |
+
clear_btn_update = gr.ClearButton.update(value = clear_label[index])
|
126 |
+
submit_btn_update = gr.Button.update(value = submit_label[index])
|
127 |
|
128 |
+
return [text_input_update,text_output_update,clear_btn_update,submit_btn_update,markdown_update, markdown_msg1_update ]
|
129 |
|
130 |
+
def clear_text():
|
131 |
+
text_input_update=gr.Textbox.update(value=None)
|
132 |
+
text_output_update=gr.Textbox.update(value=None)
|
133 |
+
|
134 |
+
return [text_input_update,text_output_update]
|
135 |
+
|
136 |
|
137 |
+
# %%
|
138 |
text_input_label=["谜面","謎面","Riddle"]
|
139 |
text_output_label=["谜底","謎底","Answer"]
|
140 |
|
141 |
clear_label = ["清除","清除","Clear"]
|
142 |
submit_label = ["提交","提交","Submit"]
|
143 |
|
144 |
+
# css = """
|
145 |
+
# #markdown { background-image: url("file/data/DSC_0105.jpg");
|
146 |
+
# background-size: cover;
|
147 |
+
# }
|
148 |
+
# """
|
149 |
+
|
150 |
with gr.Blocks() as demo:
|
151 |
index = 0
|
152 |
radio = gr.Radio(
|
153 |
["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
|
154 |
)
|
155 |
+
markdown = gr.Markdown(
|
156 |
+
"""
|
157 |
+
# Chinese Lantern Riddles Solver with LLM
|
158 |
+
## 用语言大模型来猜灯谜
|
159 |
+
""",elem_id="markdown")
|
160 |
with gr.Row():
|
161 |
with gr.Column():
|
162 |
text_input = gr.Textbox(label=text_input_label[index],
|
163 |
value="小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)", lines = 2)
|
164 |
with gr.Row():
|
165 |
+
clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
|
166 |
submit_btn = gr.Button(value=submit_label[index], variant = "primary")
|
167 |
+
|
168 |
+
markdown_msg1 = gr.Markdown(
|
169 |
+
"""
|
170 |
+
灯谜是中华文化特色文娱活动,自北宋盛行。每年逢正月十五元宵节,或是八月十五中秋节,将谜语贴在花灯上,让大家可一起猜谜。
|
171 |
+
|
172 |
+
Lantern riddle is a traditional Chinese cultural activity. Being popular since the Song Dynasty (960-1276), it \
|
173 |
+
is held in the Lantern Festival (15th day of the first lunar month) or the Mid-Autumn Festival (15th day of \
|
174 |
+
the eighth lunar month). When people are viewing the flower lanterns, they can guess the riddles on the lanterns together.
|
175 |
+
|
176 |
+
----
|
177 |
+
|
178 |
+
# 声明 Disclaimer
|
179 |
+
|
180 |
+
本应用输出的文本为机器基于模型生成的结果,不代表任何人观点,请谨慎辨别和参考。请在法律允许的范围内使用。
|
181 |
+
|
182 |
+
本应用调用了 [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) 对话语言大模型,\
|
183 |
+
使用本应用前请务必阅读和同意遵守其[使用授权许可证](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE)。
|
184 |
+
|
185 |
+
本应用仅供非商业用途。
|
186 |
+
|
187 |
+
The outputs of this application are machine-generated with a statistical model. \
|
188 |
+
The outputs do not reflect any opinions of any human subjects. You must identify the outputs in caution. \
|
189 |
+
It is your responsbility to decide whether to accept the outputs. You must use the applicaiton in obedience to the Law.
|
190 |
+
|
191 |
+
This application utilizes [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) \
|
192 |
+
Conversational Large Language Model. Before using this application, you must read and accept to follow \
|
193 |
+
the [LICENSE](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE).
|
194 |
+
|
195 |
+
This application is for non-commercial use only.
|
196 |
+
|
197 |
+
"""
|
198 |
+
)
|
199 |
with gr.Column():
|
200 |
text_output = gr.Textbox(label=text_output_label[index])
|
201 |
+
markdown_msg2 = gr.Markdown(
|
202 |
+
"""
|
203 |
+
<br/>
|
204 |
+
<br/>
|
205 |
+
<br/>
|
206 |
+
<br/>
|
207 |
+
|
208 |
+

|
209 |
+
""")
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
|
215 |
+
clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
|
216 |
+
radio.change(fn=change_language,inputs=[radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1],
|
217 |
+
outputs=[text_input,text_output,clear_btn,submit_btn, markdown, markdown_msg1])
|
218 |
+
|
219 |
+
#demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
|
220 |
+
# flagging_options=["Inappropriate"],allow_flagging="never",
|
221 |
+
# title="aaa",description="aaa",article="aaa")
|
222 |
+
demo.launch()
|
223 |
|
|
|
|
|
224 |
|
|
|
225 |
|
226 |
|
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/data_level0.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:207a5df9a46016ace6d0ceb2102ed87cb5d858741dee310d15add55db6a2f72e
|
3 |
-
size 3212000
|
|
|
|
|
|
|
|
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/header.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
|
3 |
-
size 100
|
|
|
|
|
|
|
|
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/length.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
|
3 |
-
size 4000
|
|
|
|
|
|
|
|
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/link_lists.bin
DELETED
File without changes
|
chroma/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:400671ea32a93c4b82d6a518a1f50021ecb8e356da6712b00bfb1dc35602b807
|
3 |
+
size 6164480
|
data/DSC_0105.jpg
ADDED
![]() |
data/riddles_data
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -5,4 +5,4 @@ chromadb
|
|
5 |
sentence-transformers
|
6 |
sentencepiece
|
7 |
hanzidentifier
|
8 |
-
|
|
|
5 |
sentence-transformers
|
6 |
sentencepiece
|
7 |
hanzidentifier
|
8 |
+
chinese-converter
|