Spaces:
Runtime error
Runtime error
create app
Browse files
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import typing as t
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
from huggingface_hub import InferenceClient
|
6 |
+
|
7 |
+
|
8 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
9 |
+
if not HF_API_TOKEN:
|
10 |
+
raise ValueError("Error: Hugging Face API token is not set. Please set the HF_API_TOKEN environment variable.")
|
11 |
+
|
12 |
+
MODEL_REPO_ID = "Qwen/Qwen2.5-72B-Instruct"
|
13 |
+
|
14 |
+
|
15 |
+
def format_messages(user_text: str, level: int) -> t.Mapping[str, str]:
|
16 |
+
if level == 1:
|
17 |
+
system_message = (
|
18 |
+
"You are a text simplification tool for the Russian language. "
|
19 |
+
"You are given a text in Russian and you need to give a simple version of it in Russian. "
|
20 |
+
"Your task is to simplify complex sentences into simple ones. "
|
21 |
+
"Avoid cramming too many details into one sentence; distribute them across multiple sentences where needed. "
|
22 |
+
"Rephrase sentences to remove participial and gerundial constructions. "
|
23 |
+
"Where possible, replace passive voice with active voice. "
|
24 |
+
"If a sentence consists of only a noun, add a verb. "
|
25 |
+
"Replace rare or low-frequency words with more common ones. "
|
26 |
+
"Where appropriate, remove or replace foreign words. "
|
27 |
+
"Clarify ambiguous phrases by replacing them with more concrete, easily understandable words. "
|
28 |
+
"Where possible, avoid words that have paronyms. "
|
29 |
+
"Use only Russian, English is forbidden at any cost."
|
30 |
+
)
|
31 |
+
elif level == 2:
|
32 |
+
system_message = (
|
33 |
+
"You are a text simplification tool for the Russian language. "
|
34 |
+
"You are given a text in Russian and you need to give a simple version of it in Russian. "
|
35 |
+
"Simplify complex or compound sentences by breaking them into shorter ones, aiming for a sentence length of no more than seven words. "
|
36 |
+
"Ensure each sentence contains only one idea. "
|
37 |
+
"Avoid participial and gerundial constructions, and prefer active voice over passive voice. "
|
38 |
+
"Keep essential information like names, nationalities, and roles. Do not remove important details. "
|
39 |
+
"Remove unnecessary foreign words (like brand names) and replace rare or long words with simpler, shorter ones. "
|
40 |
+
"Simplify ambiguous phrases by using more concrete, clear words. "
|
41 |
+
"Remove minor details that do not add significant meaning, but ensure key information remains intact. "
|
42 |
+
"Use only Russian, English is forbidden at any cost."
|
43 |
+
)
|
44 |
+
elif level == 3:
|
45 |
+
system_message = (
|
46 |
+
"You are a text simplification assistant for the Russian language. "
|
47 |
+
"You are given a text in Russian and you need to give a simple version of it in Russian. "
|
48 |
+
"Your task is to make the text as simple as possible. "
|
49 |
+
"Each sentence should contain only one idea and be no longer than five words. "
|
50 |
+
"Remove or replace foreign words (such as names, places, or brands), and avoid minor details. "
|
51 |
+
"Eliminate numbers and remove any unnecessary details. "
|
52 |
+
"Focus on using the nominative and genitive cases for nouns, and only the present or past tense for verbs. "
|
53 |
+
"Avoid passive voice and inverted word order. "
|
54 |
+
"Replace rare or low-frequency words with more common ones. "
|
55 |
+
"Where possible, replace complex phrases with common expressions, clichés, or idioms. "
|
56 |
+
"Remove any extraneous details (if it is possible to without removing original semantics of the sentence) and simplify ambiguous phrases as much as possible."
|
57 |
+
"Use only Russian, English is forbidden at any cost."
|
58 |
+
)
|
59 |
+
|
60 |
+
messages = [
|
61 |
+
{"role": "system", "content": system_message},
|
62 |
+
{"role": "user", "content": f"Simplify the following text in Russian: {user_text}"}
|
63 |
+
]
|
64 |
+
return messages
|
65 |
+
|
66 |
+
|
67 |
+
def simplify_text(user_text: str, level: int) -> str:
|
68 |
+
client = InferenceClient(token=HF_API_TOKEN)
|
69 |
+
messages = format_messages(user_text, level)
|
70 |
+
|
71 |
+
response = client.text_generation(
|
72 |
+
model=MODEL_REPO_ID,
|
73 |
+
inputs={"messages": messages},
|
74 |
+
parameters={"max_new_tokens": 512, "temperature": 0.7, "top_p": 0.9},
|
75 |
+
)
|
76 |
+
return response
|
77 |
+
|
78 |
+
|
79 |
+
with gr.Blocks() as demo:
|
80 |
+
gr.Markdown("# Text Simplification Tool for People with Aphasia")
|
81 |
+
gr.Markdown("Simplify Russian text into easier-to-understand Russian based on the chosen level of simplification.")
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
user_text = gr.Textbox(label="Input Text (Russian)", lines=5, placeholder="Введите текст здесь...")
|
85 |
+
level = gr.Radio(
|
86 |
+
choices=[1, 2, 3],
|
87 |
+
value=1,
|
88 |
+
label="Simplification Level",
|
89 |
+
info="Choose the level of simplification:\n1 = Basic, 2 = Intermediate, 3 = Advanced"
|
90 |
+
)
|
91 |
+
|
92 |
+
output_text = gr.Textbox(label="Simplified Text", lines=5, interactive=False)
|
93 |
+
simplify_button = gr.Button("Simplify")
|
94 |
+
|
95 |
+
simplify_button.click(simplify_text, inputs=[user_text, level], outputs=output_text)
|
96 |
+
|
97 |
+
|
98 |
+
demo.launch()
|