3v324v23 commited on
Commit
9f3d19f
·
1 Parent(s): 804bcc2

first release

Browse files
app.py CHANGED
@@ -4,10 +4,10 @@ from langchain.vectorstores import Chroma
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
 
6
  import gradio as gr
7
- import torch
8
-
9
  import hanzidentifier
10
- from hanziconv import HanziConv
 
 
11
 
12
  # %%
13
  #Load the LLM model and pipeline directly
@@ -32,6 +32,7 @@ huggingface_embeddings= HuggingFaceEmbeddings(
32
  # %%
33
  persist_directory = 'chroma/'
34
  vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
 
35
 
36
  # %%
37
  # helper functions for prompt processing for this LLM
@@ -46,8 +47,9 @@ def postprocess(text):
46
 
47
  # get answer from LLM with prompt input
48
  def answer(text,context=""):
49
- text = f"{context}\n{text}\n答案:"
50
  text = text.strip()
 
51
  text = preprocess(text)
52
  out_text = pipe(text)
53
 
@@ -64,17 +66,22 @@ def helper_rag(text):
64
  return context
65
 
66
  # helper function for prompt
67
- def helper_text(text_input):
68
  chinese_type = "simplified"
69
 
70
  if hanzidentifier.is_traditional(text_input):
71
- text_input = HanziConv.toSimplified(text_input)
72
  chinese_type = "traditional"
73
 
 
 
74
  if not any(c in text_input for c in ["猜", "打"]):
75
- return "请给一个提示,提示格式,例子:猜一水果。"
 
 
 
76
 
77
- text=f"""谜面:{text_input} 谜底:
78
  """
79
 
80
  context = helper_rag(text)
@@ -82,63 +89,138 @@ def helper_text(text_input):
82
  output = answer(text,context=context)
83
 
84
  if chinese_type == "traditional":
85
- output = HanziConv.toTraditional(output)
 
 
86
 
87
- return output
88
 
89
  # Gradio function for configure the language of UI
90
- def change_language(radio):
91
  if radio == "简体中文":
92
  index = 0
93
- text_input_update=text_input.update(value = HanziConv.toSimplified(text_input.value), label = text_input_label[index])
94
- text_output_update=text_output.update(value = HanziConv.toSimplified(text_output.value),label = text_output_label[index])
 
 
95
  elif radio == "繁體中文":
96
  index = 1
97
- text_input_update=text_input.update(value = HanziConv.toTraditional(text_input.value),label = text_input_label[index])
98
- text_output_update=text_output.update(value = HanziConv.toTraditional(text_output.value),label = text_output_label[index])
 
 
99
  elif radio == "English":
100
  index = 2
101
- text_input_update=text_input.update(label = text_input_label[index])
102
- text_output_update=text_output.update(label = text_output_label[index])
 
 
103
  else:
104
  index = 0
105
- text_input_update=text_input.update(label = text_input_label[index])
106
- text_output_update=text_output.update(label = text_output_label[index])
 
 
107
 
108
- clear_btn_update = clear_btn.update(value = clear_label[index])
109
- submit_btn_update = submit_btn.update(value = submit_label[index])
110
 
111
- return [text_input_update,text_output_update,clear_btn_update,submit_btn_update]
112
 
113
- # %%
114
- # index==0: Simpified Chinese
115
- # index==1: Traditional Chinese
116
- # index==2: English
 
 
117
 
 
118
  text_input_label=["谜面","謎面","Riddle"]
119
  text_output_label=["谜底","謎底","Answer"]
120
 
121
  clear_label = ["清除","清除","Clear"]
122
  submit_label = ["提交","提交","Submit"]
123
 
 
 
 
 
 
 
124
  with gr.Blocks() as demo:
125
  index = 0
126
  radio = gr.Radio(
127
  ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
128
  )
 
 
 
 
 
129
  with gr.Row():
130
  with gr.Column():
131
  text_input = gr.Textbox(label=text_input_label[index],
132
  value="小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)", lines = 2)
133
  with gr.Row():
134
- clear_btn = gr.ClearButton(value=clear_label[index],components=text_input)
135
  submit_btn = gr.Button(value=submit_label[index], variant = "primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  with gr.Column():
137
  text_output = gr.Textbox(label=text_output_label[index])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- submit_btn.click(fn=helper_text, inputs=text_input, outputs=text_output)
140
- radio.change(fn=change_language,inputs=radio,outputs=[text_input,text_output,clear_btn,submit_btn])
141
 
142
- demo.launch()
143
 
144
 
 
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
 
6
  import gradio as gr
 
 
7
  import hanzidentifier
8
+ import re
9
+
10
+ import chinese_converter
11
 
12
  # %%
13
  #Load the LLM model and pipeline directly
 
32
  # %%
33
  persist_directory = 'chroma/'
34
  vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
35
+ print(vectordb._collection.count())
36
 
37
  # %%
38
  # helper functions for prompt processing for this LLM
 
47
 
48
  # get answer from LLM with prompt input
49
  def answer(text,context=""):
50
+ text = f"{context}\n{text}\n谜底:"
51
  text = text.strip()
52
+
53
  text = preprocess(text)
54
  out_text = pipe(text)
55
 
 
66
  return context
67
 
68
  # helper function for prompt
69
+ def helper_text(text_input,radio=None):
70
  chinese_type = "simplified"
71
 
72
  if hanzidentifier.is_traditional(text_input):
73
+ text_input = chinese_converter.to_traditional(text_input)
74
  chinese_type = "traditional"
75
 
76
+ text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
77
+
78
  if not any(c in text_input for c in ["猜", "打"]):
79
+ warning = "请给一个提示,提示格式,例子:猜一水果,打一字。"
80
+ if chinese_type == "traditional" or radio == "繁體中文":
81
+ warning = chinese_converter.to_traditional(warning)
82
+ return warning
83
 
84
+ text=f"""谜面:{text_input}
85
  """
86
 
87
  context = helper_rag(text)
 
89
  output = answer(text,context=context)
90
 
91
  if chinese_type == "traditional":
92
+ output = chinese_converter.to_traditional(output)
93
+
94
+ output = re.split(r'\s+',output)
95
 
96
+ return output[0]
97
 
98
  # Gradio function for configure the language of UI
99
+ def change_language(radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1):
100
  if radio == "简体中文":
101
  index = 0
102
+ text_input_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
103
+ text_output_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
104
+ markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
105
+ markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
106
  elif radio == "繁體中文":
107
  index = 1
108
+ text_input_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
109
+ text_output_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
110
+ markdown_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown))
111
+ markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown_msg1))
112
  elif radio == "English":
113
  index = 2
114
+ text_input_update=gr.Textbox.update(label = text_input_label[index])
115
+ text_output_update=gr.Textbox.update(label = text_output_label[index])
116
+ markdown_update=gr.Markdown.update(value = markdown)
117
+ markdown_msg1_update=gr.Markdown.update(value = markdown_msg1)
118
  else:
119
  index = 0
120
+ text_input_update=gr.Textbox.update(label = text_input_label[index])
121
+ text_output_update=gr.Textbox.update(label = text_output_label[index])
122
+ markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
123
+ markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
124
 
125
+ clear_btn_update = gr.ClearButton.update(value = clear_label[index])
126
+ submit_btn_update = gr.Button.update(value = submit_label[index])
127
 
128
+ return [text_input_update,text_output_update,clear_btn_update,submit_btn_update,markdown_update, markdown_msg1_update ]
129
 
130
+ def clear_text():
131
+ text_input_update=gr.Textbox.update(value=None)
132
+ text_output_update=gr.Textbox.update(value=None)
133
+
134
+ return [text_input_update,text_output_update]
135
+
136
 
137
+ # %%
138
  text_input_label=["谜面","謎面","Riddle"]
139
  text_output_label=["谜底","謎底","Answer"]
140
 
141
  clear_label = ["清除","清除","Clear"]
142
  submit_label = ["提交","提交","Submit"]
143
 
144
+ # css = """
145
+ # #markdown { background-image: url("file/data/DSC_0105.jpg");
146
+ # background-size: cover;
147
+ # }
148
+ # """
149
+
150
  with gr.Blocks() as demo:
151
  index = 0
152
  radio = gr.Radio(
153
  ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
154
  )
155
+ markdown = gr.Markdown(
156
+ """
157
+ # Chinese Lantern Riddles Solver with LLM
158
+ ## 用语言大模型来猜灯谜
159
+ """,elem_id="markdown")
160
  with gr.Row():
161
  with gr.Column():
162
  text_input = gr.Textbox(label=text_input_label[index],
163
  value="小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)", lines = 2)
164
  with gr.Row():
165
+ clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
166
  submit_btn = gr.Button(value=submit_label[index], variant = "primary")
167
+
168
+ markdown_msg1 = gr.Markdown(
169
+ """
170
+ 灯谜是中华文化特色文娱活动,自北宋盛行。每年逢正月十五元宵节,或是八月十五中秋节,将谜语贴在花灯上,让大家可一起猜谜。
171
+
172
+ Lantern riddle is a traditional Chinese cultural activity. Being popular since the Song Dynasty (960-1276), it \
173
+ is held in the Lantern Festival (15th day of the first lunar month) or the Mid-Autumn Festival (15th day of \
174
+ the eighth lunar month). When people are viewing the flower lanterns, they can guess the riddles on the lanterns together.
175
+
176
+ ----
177
+
178
+ # 声明 Disclaimer
179
+
180
+ 本应用输出的文本为机器基于模型生成的结果,不代表任何人观点,请谨慎辨别和参考。请在法律允许的范围内使用。
181
+
182
+ 本应用调用了 [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) 对话语言大模型,\
183
+ 使用本应用前请务必阅读和同意遵守其[使用授权许可证](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE)。
184
+
185
+ 本应用仅供非商业用途。
186
+
187
+ The outputs of this application are machine-generated with a statistical model. \
188
+ The outputs do not reflect any opinions of any human subjects. You must identify the outputs in caution. \
189
+ It is your responsbility to decide whether to accept the outputs. You must use the applicaiton in obedience to the Law.
190
+
191
+ This application utilizes [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) \
192
+ Conversational Large Language Model. Before using this application, you must read and accept to follow \
193
+ the [LICENSE](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE).
194
+
195
+ This application is for non-commercial use only.
196
+
197
+ """
198
+ )
199
  with gr.Column():
200
  text_output = gr.Textbox(label=text_output_label[index])
201
+ markdown_msg2 = gr.Markdown(
202
+ """
203
+ <br/>
204
+ <br/>
205
+ <br/>
206
+ <br/>
207
+
208
+ ![lantern](file/data/DSC_0105.jpg)
209
+ """)
210
+
211
+
212
+
213
+
214
+ submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
215
+ clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
216
+ radio.change(fn=change_language,inputs=[radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1],
217
+ outputs=[text_input,text_output,clear_btn,submit_btn, markdown, markdown_msg1])
218
+
219
+ #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
220
+ # flagging_options=["Inappropriate"],allow_flagging="never",
221
+ # title="aaa",description="aaa",article="aaa")
222
+ demo.launch()
223
 
 
 
224
 
 
225
 
226
 
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/data_level0.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:207a5df9a46016ace6d0ceb2102ed87cb5d858741dee310d15add55db6a2f72e
3
- size 3212000
 
 
 
 
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/header.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
3
- size 100
 
 
 
 
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/length.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
3
- size 4000
 
 
 
 
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/link_lists.bin DELETED
File without changes
chroma/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bb4311d4e7478489f9ee4fe2bdaa140ea342b7ab3a77dd14e62936aee60ede6
3
- size 1540096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400671ea32a93c4b82d6a518a1f50021ecb8e356da6712b00bfb1dc35602b807
3
+ size 6164480
data/DSC_0105.jpg ADDED
data/riddles_data CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -5,4 +5,4 @@ chromadb
5
  sentence-transformers
6
  sentencepiece
7
  hanzidentifier
8
- hanziconv
 
5
  sentence-transformers
6
  sentencepiece
7
  hanzidentifier
8
+ chinese-converter