Spaces:

ytyeung
/

chinese_lantern_riddles

Runtime error

App Files Files

3v324v23 commited on Sep 26, 2023

Commit

9f3d19f

1 Parent(s): 804bcc2

first release

Browse files

Files changed (9) hide show

app.py +112 -30
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/data_level0.bin +0 -3
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/header.bin +0 -3
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/length.bin +0 -3
chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/link_lists.bin +0 -0
chroma/chroma.sqlite3 +2 -2
data/DSC_0105.jpg +0 -0
data/riddles_data +0 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -4,10 +4,10 @@ from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 import gradio as gr
-import torch
 import hanzidentifier
-from hanziconv import HanziConv
 # %%
 #Load the LLM model and pipeline directly
@@ -32,6 +32,7 @@ huggingface_embeddings= HuggingFaceEmbeddings(
 # %%
 persist_directory = 'chroma/'
 vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
 # %%
 # helper functions for prompt processing for this LLM
@@ -46,8 +47,9 @@ def postprocess(text):
 # get answer from LLM with prompt input
 def answer(text,context=""):
-  text = f"{context}\n{text}\n答案:"
   text = text.strip()
   text = preprocess(text)
   out_text = pipe(text)
@@ -64,17 +66,22 @@ def helper_rag(text):
     return context
 # helper function for prompt
-def helper_text(text_input):
     chinese_type = "simplified"
     if hanzidentifier.is_traditional(text_input):
-        text_input = HanziConv.toSimplified(text_input)
         chinese_type = "traditional"
     if not any(c in text_input for c in ["猜", "打"]):
-        return "请给一个提示，提示格式，例子：猜一水果。"
-    text=f"""谜面：{text_input} 谜底：
     """
     context = helper_rag(text)
@@ -82,63 +89,138 @@ def helper_text(text_input):
     output = answer(text,context=context)
     if chinese_type == "traditional":
-        output = HanziConv.toTraditional(output)
-    return output
 # Gradio function for configure the language of UI
-def change_language(radio):
     if radio == "简体中文":
         index = 0
-        text_input_update=text_input.update(value = HanziConv.toSimplified(text_input.value), label = text_input_label[index])
-        text_output_update=text_output.update(value = HanziConv.toSimplified(text_output.value),label = text_output_label[index])
     elif radio == "繁體中文":
         index = 1
-        text_input_update=text_input.update(value = HanziConv.toTraditional(text_input.value),label = text_input_label[index])
-        text_output_update=text_output.update(value = HanziConv.toTraditional(text_output.value),label = text_output_label[index])
     elif radio == "English":
         index = 2
-        text_input_update=text_input.update(label = text_input_label[index])
-        text_output_update=text_output.update(label = text_output_label[index])
     else:
         index = 0
-        text_input_update=text_input.update(label = text_input_label[index])
-        text_output_update=text_output.update(label = text_output_label[index])
-    clear_btn_update = clear_btn.update(value = clear_label[index])
-    submit_btn_update = submit_btn.update(value = submit_label[index])
-    return [text_input_update,text_output_update,clear_btn_update,submit_btn_update]
-# %%
-# index==0: Simpified Chinese
-# index==1: Traditional Chinese
-# index==2: English
 text_input_label=["谜面","謎面","Riddle"]
 text_output_label=["谜底","謎底","Answer"]
 clear_label = ["清除","清除","Clear"]
 submit_label = ["提交","提交","Submit"]
 with gr.Blocks() as demo:
     index = 0
     radio = gr.Radio(
         ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
     )
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(label=text_input_label[index],
                          value="小家伙穿黄袍，花丛中把房造。飞到西来飞到东，人人夸他爱劳动。（猜一动物）", lines = 2)
             with gr.Row():
-                clear_btn = gr.ClearButton(value=clear_label[index],components=text_input)
                 submit_btn = gr.Button(value=submit_label[index], variant = "primary")
         with gr.Column():
             text_output = gr.Textbox(label=text_output_label[index])
-    submit_btn.click(fn=helper_text, inputs=text_input, outputs=text_output)
-    radio.change(fn=change_language,inputs=radio,outputs=[text_input,text_output,clear_btn,submit_btn])
-demo.launch()

 from langchain.embeddings import HuggingFaceEmbeddings
 import gradio as gr
 import hanzidentifier
+import re
+import chinese_converter
 # %%
 #Load the LLM model and pipeline directly
 # %%
 persist_directory = 'chroma/'
 vectordb = Chroma(embedding_function=huggingface_embeddings,persist_directory=persist_directory)
+print(vectordb._collection.count())
 # %%
 # helper functions for prompt processing for this LLM
 # get answer from LLM with prompt input
 def answer(text,context=""):
+  text = f"{context}\n{text}\n谜底："
   text = text.strip()
   text = preprocess(text)
   out_text = pipe(text)
     return context
 # helper function for prompt
+def helper_text(text_input,radio=None):
     chinese_type = "simplified"
     if hanzidentifier.is_traditional(text_input):
+        text_input = chinese_converter.to_traditional(text_input)
         chinese_type = "traditional"
+    text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
     if not any(c in text_input for c in ["猜", "打"]):
+        warning = "请给一个提示，提示格式，例子：猜一水果，打一字。"
+        if chinese_type == "traditional" or radio == "繁體中文":
+            warning = chinese_converter.to_traditional(warning)
+        return warning
+    text=f"""谜面：{text_input}
     """
     context = helper_rag(text)
     output = answer(text,context=context)
     if chinese_type == "traditional":
+        output = chinese_converter.to_traditional(output)
+    output = re.split(r'\s+',output)
+    return output[0]
 # Gradio function for configure the language of UI
+def change_language(radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1):
     if radio == "简体中文":
         index = 0
+        text_input_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
+        text_output_update=gr.Textbox.update(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
+        markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
+        markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
     elif radio == "繁體中文":
         index = 1
+        text_input_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
+        text_output_update=gr.Textbox.update(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
+        markdown_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown))
+        markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_traditional(markdown_msg1))
     elif radio == "English":
         index = 2
+        text_input_update=gr.Textbox.update(label = text_input_label[index])
+        text_output_update=gr.Textbox.update(label = text_output_label[index])
+        markdown_update=gr.Markdown.update(value = markdown)
+        markdown_msg1_update=gr.Markdown.update(value = markdown_msg1)
     else:
         index = 0
+        text_input_update=gr.Textbox.update(label = text_input_label[index])
+        text_output_update=gr.Textbox.update(label = text_output_label[index])
+        markdown_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown))
+        markdown_msg1_update=gr.Markdown.update(value = chinese_converter.to_simplified(markdown_msg1))
+    clear_btn_update = gr.ClearButton.update(value = clear_label[index])
+    submit_btn_update = gr.Button.update(value = submit_label[index])
+    return [text_input_update,text_output_update,clear_btn_update,submit_btn_update,markdown_update, markdown_msg1_update ]
+def clear_text():
+    text_input_update=gr.Textbox.update(value=None)
+    text_output_update=gr.Textbox.update(value=None)
+    return [text_input_update,text_output_update]
+# %%
 text_input_label=["谜面","謎面","Riddle"]
 text_output_label=["谜底","謎底","Answer"]
 clear_label = ["清除","清除","Clear"]
 submit_label = ["提交","提交","Submit"]
+# css = """
+# #markdown { background-image: url("file/data/DSC_0105.jpg");
+#             background-size: cover;
+#           }
+# """
 with gr.Blocks() as demo:
     index = 0
     radio = gr.Radio(
         ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
     )
+    markdown = gr.Markdown(
+            """
+            # Chinese Lantern Riddles Solver with LLM
+            ## 用语言大模型来猜灯谜
+            """,elem_id="markdown")
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(label=text_input_label[index],
                          value="小家伙穿黄袍，花丛中把房造。飞到西来飞到东，人人夸他爱劳动。（猜一动物）", lines = 2)
             with gr.Row():
+                clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
                 submit_btn = gr.Button(value=submit_label[index], variant = "primary")
+            markdown_msg1 = gr.Markdown(
+                """
+                灯谜是中华文化特色文娱活动，自北宋盛行。每年逢正月十五元宵节，或是八月十五中秋节，将谜语贴在花灯上，让大家可一起猜谜。
+                Lantern riddle is a traditional Chinese cultural activity. Being popular since the Song Dynasty (960-1276), it \
+                is held in the Lantern Festival (15th day of the first lunar month) or the Mid-Autumn Festival (15th day of \
+                the eighth lunar month). When people are viewing the flower lanterns, they can guess the riddles on the lanterns together.
+                ----
+                # 声明 Disclaimer
+                本应用输出的文本为机器基于模型生成的结果，不代表任何人观点，请谨慎辨别和参考。请在法律允许的范围内使用。
+                本应用调用了 [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) 对话语言大模型，\
+                使用本应用前请务必阅读和同意遵守其[使用授权许可证](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE)。
+                本应用仅供非商业用途。
+                The outputs of this application are machine-generated with a statistical model. \
+                The outputs do not reflect any opinions of any human subjects. You must identify the outputs in caution. \
+                It is your responsbility to decide whether to accept the outputs. You must use the applicaiton in obedience to the Law.
+                This application utilizes [ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) \
+                Conversational Large Language Model. Before using this application, you must read and accept to follow \
+                the [LICENSE](https://huggingface.co/ClueAI/ChatYuan-large-v2/blob/main/LICENSE).
+                This application is for non-commercial use only.
+                """
+            )
         with gr.Column():
             text_output = gr.Textbox(label=text_output_label[index])
+            markdown_msg2 = gr.Markdown(
+            """
+            <br/>
+            <br/>
+            <br/>
+            <br/>
+            ![lantern](file/data/DSC_0105.jpg)
+            """)
+    submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
+    clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
+    radio.change(fn=change_language,inputs=[radio,text_input,text_output,clear_btn,submit_btn,markdown, markdown_msg1],
+                 outputs=[text_input,text_output,clear_btn,submit_btn, markdown, markdown_msg1])
+    #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
+    #                      flagging_options=["Inappropriate"],allow_flagging="never",
+    #                      title="aaa",description="aaa",article="aaa")
+demo.launch()

chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/data_level0.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:207a5df9a46016ace6d0ceb2102ed87cb5d858741dee310d15add55db6a2f72e
-size 3212000

chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/header.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
-size 100

chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/length.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
-size 4000

chroma/b2eaf580-4fb0-4664-98fc-9fb9dabaa035/link_lists.bin DELETED Viewed

File without changes

chroma/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bb4311d4e7478489f9ee4fe2bdaa140ea342b7ab3a77dd14e62936aee60ede6
-size 1540096

 version https://git-lfs.github.com/spec/v1
+oid sha256:400671ea32a93c4b82d6a518a1f50021ecb8e356da6712b00bfb1dc35602b807
+size 6164480

data/DSC_0105.jpg ADDED Viewed

data/riddles_data CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ chromadb
 sentence-transformers
 sentencepiece
 hanzidentifier
-hanziconv

 sentence-transformers
 sentencepiece
 hanzidentifier
+chinese-converter