z00logist commited on
Commit
8a86c13
·
verified ·
1 Parent(s): 0147611

create app

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import typing as t
3
+
4
+ import gradio as gr
5
+ from huggingface_hub import InferenceClient
6
+
7
+
8
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN")
9
+ if not HF_API_TOKEN:
10
+ raise ValueError("Error: Hugging Face API token is not set. Please set the HF_API_TOKEN environment variable.")
11
+
12
+ MODEL_REPO_ID = "Qwen/Qwen2.5-72B-Instruct"
13
+
14
+
15
+ def format_messages(user_text: str, level: int) -> t.Mapping[str, str]:
16
+ if level == 1:
17
+ system_message = (
18
+ "You are a text simplification tool for the Russian language. "
19
+ "You are given a text in Russian and you need to give a simple version of it in Russian. "
20
+ "Your task is to simplify complex sentences into simple ones. "
21
+ "Avoid cramming too many details into one sentence; distribute them across multiple sentences where needed. "
22
+ "Rephrase sentences to remove participial and gerundial constructions. "
23
+ "Where possible, replace passive voice with active voice. "
24
+ "If a sentence consists of only a noun, add a verb. "
25
+ "Replace rare or low-frequency words with more common ones. "
26
+ "Where appropriate, remove or replace foreign words. "
27
+ "Clarify ambiguous phrases by replacing them with more concrete, easily understandable words. "
28
+ "Where possible, avoid words that have paronyms. "
29
+ "Use only Russian, English is forbidden at any cost."
30
+ )
31
+ elif level == 2:
32
+ system_message = (
33
+ "You are a text simplification tool for the Russian language. "
34
+ "You are given a text in Russian and you need to give a simple version of it in Russian. "
35
+ "Simplify complex or compound sentences by breaking them into shorter ones, aiming for a sentence length of no more than seven words. "
36
+ "Ensure each sentence contains only one idea. "
37
+ "Avoid participial and gerundial constructions, and prefer active voice over passive voice. "
38
+ "Keep essential information like names, nationalities, and roles. Do not remove important details. "
39
+ "Remove unnecessary foreign words (like brand names) and replace rare or long words with simpler, shorter ones. "
40
+ "Simplify ambiguous phrases by using more concrete, clear words. "
41
+ "Remove minor details that do not add significant meaning, but ensure key information remains intact. "
42
+ "Use only Russian, English is forbidden at any cost."
43
+ )
44
+ elif level == 3:
45
+ system_message = (
46
+ "You are a text simplification assistant for the Russian language. "
47
+ "You are given a text in Russian and you need to give a simple version of it in Russian. "
48
+ "Your task is to make the text as simple as possible. "
49
+ "Each sentence should contain only one idea and be no longer than five words. "
50
+ "Remove or replace foreign words (such as names, places, or brands), and avoid minor details. "
51
+ "Eliminate numbers and remove any unnecessary details. "
52
+ "Focus on using the nominative and genitive cases for nouns, and only the present or past tense for verbs. "
53
+ "Avoid passive voice and inverted word order. "
54
+ "Replace rare or low-frequency words with more common ones. "
55
+ "Where possible, replace complex phrases with common expressions, clichés, or idioms. "
56
+ "Remove any extraneous details (if it is possible to without removing original semantics of the sentence) and simplify ambiguous phrases as much as possible."
57
+ "Use only Russian, English is forbidden at any cost."
58
+ )
59
+
60
+ messages = [
61
+ {"role": "system", "content": system_message},
62
+ {"role": "user", "content": f"Simplify the following text in Russian: {user_text}"}
63
+ ]
64
+ return messages
65
+
66
+
67
+ def simplify_text(user_text: str, level: int) -> str:
68
+ client = InferenceClient(token=HF_API_TOKEN)
69
+ messages = format_messages(user_text, level)
70
+
71
+ response = client.text_generation(
72
+ model=MODEL_REPO_ID,
73
+ inputs={"messages": messages},
74
+ parameters={"max_new_tokens": 512, "temperature": 0.7, "top_p": 0.9},
75
+ )
76
+ return response
77
+
78
+
79
+ with gr.Blocks() as demo:
80
+ gr.Markdown("# Text Simplification Tool for People with Aphasia")
81
+ gr.Markdown("Simplify Russian text into easier-to-understand Russian based on the chosen level of simplification.")
82
+
83
+ with gr.Row():
84
+ user_text = gr.Textbox(label="Input Text (Russian)", lines=5, placeholder="Введите текст здесь...")
85
+ level = gr.Radio(
86
+ choices=[1, 2, 3],
87
+ value=1,
88
+ label="Simplification Level",
89
+ info="Choose the level of simplification:\n1 = Basic, 2 = Intermediate, 3 = Advanced"
90
+ )
91
+
92
+ output_text = gr.Textbox(label="Simplified Text", lines=5, interactive=False)
93
+ simplify_button = gr.Button("Simplify")
94
+
95
+ simplify_button.click(simplify_text, inputs=[user_text, level], outputs=output_text)
96
+
97
+
98
+ demo.launch()