|
import os |
|
import typing as t |
|
|
|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
HF_API_TOKEN = os.getenv("HF_API_TOKEN") |
|
if not HF_API_TOKEN: |
|
raise ValueError("Error: Hugging Face API token is not set. Please set the HF_API_TOKEN environment variable.") |
|
|
|
MODEL_REPO_ID = "Qwen/Qwen2.5-72B-Instruct" |
|
|
|
|
|
def format_messages(user_text: str, level: int) -> t.Mapping[str, str]: |
|
if level == 1: |
|
system_message = ( |
|
"You are a text simplification tool for the Russian language. " |
|
"You are given a text in Russian and you need to give a simple version of it in Russian. " |
|
"Your task is to simplify complex sentences into simple ones. " |
|
"Avoid cramming too many details into one sentence; distribute them across multiple sentences where needed. " |
|
"Rephrase sentences to remove participial and gerundial constructions. " |
|
"Where possible, replace passive voice with active voice. " |
|
"If a sentence consists of only a noun, add a verb. " |
|
"Replace rare or low-frequency words with more common ones. " |
|
"Where appropriate, remove or replace foreign words. " |
|
"Clarify ambiguous phrases by replacing them with more concrete, easily understandable words. " |
|
"Where possible, avoid words that have paronyms. " |
|
"Use only Russian, English is forbidden at any cost." |
|
) |
|
elif level == 2: |
|
system_message = ( |
|
"You are a text simplification tool for the Russian language. " |
|
"You are given a text in Russian and you need to give a simple version of it in Russian. " |
|
"Simplify complex or compound sentences by breaking them into shorter ones, aiming for a sentence length of no more than seven words. " |
|
"Ensure each sentence contains only one idea. " |
|
"Avoid participial and gerundial constructions, and prefer active voice over passive voice. " |
|
"Keep essential information like names, nationalities, and roles. Do not remove important details. " |
|
"Remove unnecessary foreign words (like brand names) and replace rare or long words with simpler, shorter ones. " |
|
"Simplify ambiguous phrases by using more concrete, clear words. " |
|
"Remove minor details that do not add significant meaning, but ensure key information remains intact. " |
|
"Use only Russian, English is forbidden at any cost." |
|
) |
|
elif level == 3: |
|
system_message = ( |
|
"You are a text simplification assistant for the Russian language. " |
|
"You are given a text in Russian and you need to give a simple version of it in Russian. " |
|
"Your task is to make the text as simple as possible. " |
|
"Each sentence should contain only one idea and be no longer than five words. " |
|
"Remove or replace foreign words (such as names, places, or brands), and avoid minor details. " |
|
"Eliminate numbers and remove any unnecessary details. " |
|
"Focus on using the nominative and genitive cases for nouns, and only the present or past tense for verbs. " |
|
"Avoid passive voice and inverted word order. " |
|
"Replace rare or low-frequency words with more common ones. " |
|
"Where possible, replace complex phrases with common expressions, clichés, or idioms. " |
|
"Remove any extraneous details (if it is possible to without removing original semantics of the sentence) and simplify ambiguous phrases as much as possible." |
|
"Use only Russian, English is forbidden at any cost." |
|
) |
|
|
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": f"Simplify the following text in Russian: {user_text}"} |
|
] |
|
return messages |
|
|
|
|
|
def simplify_text(user_text: str, level: int) -> str: |
|
client = InferenceClient(model=MODEL_REPO_ID, token=HF_API_TOKEN) |
|
messages = format_messages(user_text, level) |
|
|
|
response = client.chat_completion(messages, max_tokens=512) |
|
if response is not None and response.choices is not None: |
|
content = response.choices[0].message.content |
|
return content |
|
else: |
|
return "Error: No valid response generated." |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Text Simplification Tool for People with Aphasia") |
|
gr.Markdown("Simplify Russian text into easier-to-understand Russian based on the chosen level of simplification.") |
|
|
|
with gr.Row(): |
|
user_text = gr.Textbox(label="Input Text (Russian)", lines=5, placeholder="Введите текст здесь...") |
|
level = gr.Radio( |
|
choices=[1, 2, 3], |
|
value=1, |
|
label="Simplification Level", |
|
info="Choose the level of simplification:\n1 = Basic, 2 = Intermediate, 3 = Advanced" |
|
) |
|
|
|
output_text = gr.Textbox(label="Simplified Text", lines=5, interactive=False) |
|
simplify_button = gr.Button("Simplify") |
|
|
|
simplify_button.click(simplify_text, inputs=[user_text, level], outputs=output_text) |
|
|
|
|
|
demo.launch() |
|
|