Spaces:

javiercha
/

HanmunRoBERTa_century

Runtime error

App Files Files Community

javiercha commited on Mar 29, 2024

Commit

ae0961d

verified ·

1 Parent(s): ab28271

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -2,10 +2,10 @@ import gradio as gr
 import os
 from transformers import pipeline
 import re
-import pandas as pd
 def preprocess_text(text):
-    text = re.sub(r'[^\u4e00-\u9fff]', '', text)
     return text
 os.environ['HF_TOKEN'] = os.environ['Century_Test']
@@ -16,7 +16,21 @@ def predict_century(text):
     preprocessed_input = preprocess_text(text)
     result = nlp(preprocessed_input)
     result.sort(key=lambda x: x['score'], reverse=True)
-    return {item['label']: item['score'] for item in result}
-iface = gr.Interface(fn=predict_century, inputs="text", outputs="json")
 iface.launch()

 import os
 from transformers import pipeline
 import re
+import matplotlib.pyplot as plt
 def preprocess_text(text):
+    text = re.sub(r'[^\u4e00-\u9fff]', '', text)
     return text
 os.environ['HF_TOKEN'] = os.environ['Century_Test']
     preprocessed_input = preprocess_text(text)
     result = nlp(preprocessed_input)
     result.sort(key=lambda x: x['score'], reverse=True)
+    scores = {f"{i}th century": 0 for i in range(15, 20)}
+    for item in result:
+        scores[f"{item['label']}th century"] = item['score']
+    scores_text = "\n".join([f"{century}: {score*100:.2f}%" for century, score in scores.items()])
+    return preprocessed_input, scores_text
+iface = gr.Interface(fn=predict_century,
+                     inputs=gr.Textbox(label="Enter your text here:"),
+                     outputs=[
+                         gr.Textbox(label="Processed text (non-Sinitic and special characters removed):"),
+                         "text"
+                     ],
+                     description="This Gradio web app uses the HanmunRoBERTa model \(March 2024 version\) to predict the century during which the inputted text was written. HanmunRoBERTa is a transformer model trained exclusively on literary Sinitic corpora written by Koreans before the 20th century. Please note that this is an early prototype optimised using the Veritable Records and the Diary of the Royal Secretariat data sets. The model is likely overfitted and requires fine-tuning and refinement.")
 iface.launch()