Spaces:

raynardj
/

duguwen-classical-chinese-to-morden-translate

Runtime error

App Files Files Community

raynardj commited on Jan 4, 2022

Commit

e0a6c5a

1 Parent(s): cf33f35

🪁 a decent version

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +78 -53

README.md CHANGED Viewed

@@ -11,5 +11,5 @@ pinned: true
 ## 随无涯
 > 强大的Transformer翻译 + 殆知阁的文言文库
-* 朕亲自下厨的[🤗 翻译模型](https://github.com/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
 * 📚 书籍来自 [殆知阁](http://www.daizhige.org/)，只为了便于展示翻译，喜欢请访问网站，书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)

 ## 随无涯
 > 强大的Transformer翻译 + 殆知阁的文言文库
+* 朕亲自下厨的[🤗 翻译模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
 * 📚 书籍来自 [殆知阁](http://www.daizhige.org/)，只为了便于展示翻译，喜欢请访问网站，书籍[github文件链接](https://github.com/garychowcmu/daizhigev20)

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import torch
 st.set_page_config(layout="wide")
 @st.cache(allow_output_mutation=True)
 def load_model():
     from transformers import (
@@ -53,13 +52,15 @@ def get_file_df():
 file_df = get_file_df()
-st.sidebar.title("【随无涯】")
 st.sidebar.markdown("""
-* 朕亲自下厨的[🤗 翻译模型](https://github.com/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 训练笔记](https://github.com/raynardj/yuan)
-* 📚 书籍来自 [殆知阁](http://www.daizhige.org/)，文本的[github api](https://github.com/garychowcmu/daizhigev20)
 """)
 c2 = st.container()
 c = st.container()
 USER_ID = st.secrets["USER_ID"]
@@ -84,7 +85,7 @@ def show_file_size(size: int):
     else:
         return f"{size/1024//1024} MB"
 def fetch_file(path):
     # reading from local path first
     if (Path("data")/path).exists():
@@ -104,6 +105,7 @@ def fetch_file(path):
         r.raise_for_status()
 def fetch_from_df(sub_paths: str = ""):
     sub_df = file_df.copy()
     for idx, step in enumerate(sub_paths):
@@ -112,71 +114,94 @@ def fetch_from_df(sub_paths: str = ""):
             return None
     return list(sub_df[f"col_{len(sub_paths)}"].unique())
-# root_data = fetch_from_github()
-if 'pathway' in st.session_state:
-    pass
-else:
-    st.session_state.pathway = []
-path_text = st.sidebar.text("/".join(st.session_state.pathway))
-def reset_path():
-    st.session_state.pathway = []
-    path_text.text(st.session_state.pathway)
-if st.sidebar.button("回到根目录"):
-    reset_path()
-st.session_state.translating = False
-def display_tree():
-    sublist = fetch_from_df(st.session_state.pathway)
-    dropdown = st.sidebar.selectbox("【选书】", options=sublist)
-    with st.spinner("加载���..."):
-        st.session_state.pathway.append(dropdown)
-        if dropdown.endswith('.txt'):
-            filepath = "/".join(st.session_state.pathway)
-            file_size = file_size_map[filepath]
-            with st.spinner(f"loading file:{filepath},({show_file_size(file_size)})"):
-                # if file size is too large, we will not load it
-                if file_size > 3*1024*1024:
-                    urlpath = filepath.replace(".txt", ".html")
-                    dzg = f"http://www.daizhige.org/{urlpath}"
-                    st.markdown(f"文件太大，[前往殆知阁页面]({dzg}), 或挑挑其他的书吧")
-                    reset_path()
-                    return None
-                path_text.text(filepath)
-                text = fetch_file(filepath)
-                # create markdown with max heights
-                c.markdown(
-                    f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
-                    )
-                reset_path()
-        else:
-            sub_list = fetch_from_df(
-                st.session_state.pathway)
-            path_text.text("/".join(st.session_state.pathway))
-            display_tree()
-if st.session_state.translating == False:
-    display_tree()
 def translate_text():
     st.session_state.translating = True
-    if c2.button("【翻译】"):
         if cc:
             if len(cc) > 168:
-                c2.write(f"句子太长，最多168个字符")
             else:
                 c2.markdown(f"""```{inference(cc)}```""")
         else:
-            c2.write("请输入文本")
     st.session_state.translating = False
-cc = c2.text_area("【输入文本】", height=150)
 translate_text()

 st.set_page_config(layout="wide")
 @st.cache(allow_output_mutation=True)
 def load_model():
     from transformers import (
 file_df = get_file_df()
+st.sidebar.title("【隨無涯】")
 st.sidebar.markdown("""
+* 朕自庖[🤗 模型](https://huggingface.co/raynardj/wenyanwen-ancient-translate-to-modern), [⭐️ 訓習處](https://github.com/raynardj/yuan)
+* 📚 充棟汗牛，取自[殆知閣](http://www.daizhige.org/)，[github api](https://github.com/garychowcmu/daizhigev20)
 """)
 c2 = st.container()
+c2.write("The entirety of ancient Chinese literature, with a modern translator at your side.")
+st.markdown("""---""")
 c = st.container()
 USER_ID = st.secrets["USER_ID"]
     else:
         return f"{size/1024//1024} MB"
+@st.cache(max_entries=100, allow_output_mutation=True)
 def fetch_file(path):
     # reading from local path first
     if (Path("data")/path).exists():
         r.raise_for_status()
+@st.cache(allow_output_mutation=True, max_entries=100)
 def fetch_from_df(sub_paths: str = ""):
     sub_df = file_df.copy()
     for idx, step in enumerate(sub_paths):
             return None
     return list(sub_df[f"col_{len(sub_paths)}"].unique())
+def show_filepath(filepath: str):
+    text = fetch_file(filepath)
+    c.markdown(
+            f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True)
+if st.sidebar.selectbox(label="何以尋跡 How to search",options=["以類尋書 category","書名求書 search"])=="以類尋書 category":
+    st.session_state.translating = False
+    # root_data = fetch_from_github()
+    if 'pathway' in st.session_state:
+        pass
+    else:
+        st.session_state.pathway = []
+    path_text = st.sidebar.text("/".join(st.session_state.pathway))
+    def reset_path():
+        st.session_state.pathway = []
+        path_text.text(st.session_state.pathway)
+    if st.sidebar.button("還至初錄(back to root)"):
+        reset_path()
+    def display_tree():
+        sublist = fetch_from_df(st.session_state.pathway)
+        dropdown = st.sidebar.selectbox("【擇書 choose】", options=sublist)
+        with st.spinner("書非借不能讀也..."):
+            st.session_state.pathway.append(dropdown)
+            if dropdown.endswith('.txt'):
+                filepath = "/".join(st.session_state.pathway)
+                file_size = file_size_map[filepath]
+                with st.spinner(f"Load 載文:{filepath},({show_file_size(file_size)})"):
+                    # if file size is too large, we will not load it
+                    if file_size > 3*1024*1024:
+                        urlpath = filepath.replace(".txt", ".html")
+                        dzg = f"http://www.daizhige.org/{urlpath}"
+                        st.markdown(f"File too big 其文碩而難載，不能為之，[往 殆知閣]({dzg}), 或擇他書")
+                        reset_path()
+                        return None
+                    path_text.text(filepath)
+                    text = fetch_file(filepath)
+                    # create markdown with max heights
+                    c.markdown(
+                        f"""<pre style='max-height:300px;overflow-y:auto'>{text}</pre>""", unsafe_allow_html=True
+                        )
+                    reset_path()
+            else:
+                sub_list = fetch_from_df(
+                    st.session_state.pathway)
+                path_text.text("/".join(st.session_state.pathway))
+                display_tree()
+    if st.session_state.translating == False:
+        display_tree()
+else:
+    def search_kw():
+        result = file_df[file_df.filepath.str.contains(st.session_state.kw)].reset_index(drop=True)
+        if len(result) == 0:
+            st.sidebar.write(f"尋之不得:{st.session_state.kw}")
+        else:
+            filepath = st.sidebar.selectbox("選一書名", options=list(result.head(15).filepath))
+            show_filepath(filepath)
+    def loading_with_search():
+        kw = st.sidebar.text_input("書名求書 Search", value="楞伽经")
+        st.session_state.kw = kw
+        search_kw()
+    if st.session_state.translating == False:
+        loading_with_search()
 def translate_text():
     st.session_state.translating = True
+    if c2.button("【曉文達義 Translate】"):
         if cc:
             if len(cc) > 168:
+                c2.write(f"句甚長 不得過百又六十八字 Sentence too long, should be less than 168 characters")
             else:
                 c2.markdown(f"""```{inference(cc)}```""")
         else:
+            c2.write("【入難曉之文字 Please input sentence for translating】")
     st.session_state.translating = False
+cc = c2.text_area("【入難曉之文字 Input sentence】", height=150)
 translate_text()