|
"""Module for gradio chat-based translation agent interface.""" |
|
|
|
import os |
|
import re |
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
|
|
from agent.workflow import ( |
|
report_translation_target_files, |
|
translate_docs_interactive, |
|
generate_github_pr, |
|
) |
|
from pr_generator.searcher import find_reference_pr_simple_stream |
|
|
|
|
|
|
|
class ChatState: |
|
def __init__(self): |
|
self.step = "welcome" |
|
self.target_language = "ko" |
|
self.k_files = 10 |
|
self.files_to_translate = [] |
|
self.current_file_content = {"translated": ""} |
|
self.pr_result = None |
|
|
|
self.github_config = { |
|
"token": "", |
|
"owner": "", |
|
"repo_name": "", |
|
"reference_pr_url": "https://github.com/huggingface/transformers/pull/24968", |
|
} |
|
|
|
|
|
state = ChatState() |
|
|
|
|
|
def _extract_content_for_display(content: str) -> str: |
|
"""Extract text from document for display.""" |
|
|
|
to_translate = re.sub(r"<!--.*?-->", "", content, count=1, flags=re.DOTALL) |
|
to_translate = to_translate.strip() |
|
|
|
to_translate = re.sub(r"```.*?```", "", to_translate, flags=re.DOTALL) |
|
|
|
to_translate = re.sub(r"^\|.*\|$\n?", "", to_translate, flags=re.MULTILINE) |
|
|
|
to_translate = re.sub(r"\n\n+", "\n\n", to_translate) |
|
|
|
return to_translate |
|
|
|
|
|
def get_welcome_message(): |
|
"""Initial welcome message with file finding controls""" |
|
return """**π Welcome to π Hugging Face i18n Translation Agent!** |
|
|
|
I'll help you find files that need translation and translate them in a streamlined workflow. |
|
|
|
**π Let's start by finding files that need translation.** |
|
|
|
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started. |
|
""" |
|
|
|
|
|
def process_file_search_handler(lang: str, k: int, history: list) -> tuple: |
|
"""Process file search request and update Gradio UI components.""" |
|
global state |
|
state.target_language = lang |
|
state.k_files = k |
|
state.step = "find_files" |
|
|
|
status_report, files_list = report_translation_target_files(lang, k) |
|
state.files_to_translate = [file[0] for file in files_list] if files_list else [] |
|
|
|
response = f"""**β
File search completed!** |
|
|
|
**Status Report:** |
|
{status_report} |
|
|
|
**π Found first {len(state.files_to_translate)} files to translate:** |
|
""" |
|
|
|
if state.files_to_translate: |
|
for i, file in enumerate(state.files_to_translate[:5], 1): |
|
response += f"\n{i}. `{file}`" |
|
|
|
if len(state.files_to_translate) > 5: |
|
response += f"\n... and {len(state.files_to_translate) - 5} more files" |
|
|
|
response += "\n\n**π Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?" |
|
else: |
|
response += "\nNo files found that need translation." |
|
|
|
|
|
history.append(["Please find files that need translation", response]) |
|
cleared_input = "" |
|
selected_tab = 1 if state.files_to_translate else 0 |
|
|
|
return history, cleared_input, update_status(), gr.Tabs(selected=selected_tab) |
|
|
|
|
|
def start_translation_process(): |
|
"""Start the translation process for the first file""" |
|
if not state.files_to_translate: |
|
return "β No files available for translation." |
|
|
|
current_file = state.files_to_translate[0] |
|
|
|
|
|
try: |
|
status, translated = translate_docs_interactive( |
|
state.target_language, [[current_file]] |
|
) |
|
|
|
state.current_file_content = {"translated": translated} |
|
path = ( |
|
Path(__file__).resolve().parent.parent |
|
/ f"translation_result/{current_file}" |
|
) |
|
p = Path(path) |
|
p.parent.mkdir(parents=True, exist_ok=True) |
|
p.write_text(translated, encoding="utf-8") |
|
|
|
original_file_link = ( |
|
"https://github.com/huggingface/transformers/blob/main/" + current_file |
|
) |
|
response = ( |
|
f"""π Translation for: `{current_file}`**\n""" |
|
"**π Original Content Link:**\n" |
|
"" |
|
f"{original_file_link}\n" |
|
"**π Translated Content:**\n" |
|
f"\n```\n\n{_extract_content_for_display(translated)}```\n" |
|
f"{status}\n" |
|
) |
|
print("translated:") |
|
print(translated) |
|
print("extracted") |
|
|
|
except Exception as e: |
|
response = f"β Translation failed: {str(e)}" |
|
response += "\n**β‘οΈ Please try from the beginning.**" |
|
|
|
return response |
|
|
|
|
|
def handle_general_message(message): |
|
"""Handle general messages""" |
|
message_lower = message.lower() |
|
|
|
if any(word in message_lower for word in ["help", "what", "how"]): |
|
return """**π€ I'm your Hugging Face i18n Translation Agent!** |
|
|
|
I can help you: |
|
1. **π Find files** that need translation |
|
2. **π Translate documents** using AI |
|
3. **π Review translations** for quality |
|
4. **π Create GitHub PR** for translation |
|
|
|
Currently available actions with quick controls: |
|
- "find files" - Search for files needing translation |
|
- "translate" - Start translation process |
|
- "review" - Review current translation |
|
- "github" - Create GitHub Pull Request |
|
- "restart" - Start over""" |
|
|
|
elif "restart" in message_lower: |
|
global state |
|
state = ChatState() |
|
return get_welcome_message() |
|
|
|
else: |
|
return """I understand you want to work on translations! |
|
|
|
To get started, please use the controls above to configure your translation settings and find files that need translation. |
|
""" |
|
|
|
|
|
|
|
def handle_user_message(message, history): |
|
"""Handle user messages and provide appropriate responses""" |
|
global state |
|
|
|
if not message.strip(): |
|
return history, "" |
|
|
|
elif state.step == "find_files" and any( |
|
word in message.lower() |
|
for word in ["yes", "proceed", "start", "translate", "translation"] |
|
): |
|
|
|
if state.files_to_translate: |
|
state.step = "translate" |
|
response = start_translation_process() |
|
else: |
|
response = ( |
|
"β No files available for translation. Please search for files first." |
|
) |
|
|
|
|
|
else: |
|
|
|
response = handle_general_message(message) |
|
|
|
history.append([message, response]) |
|
return history, "" |
|
|
|
|
|
def update_status(): |
|
if state.step == "welcome": |
|
return """ |
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;"> |
|
<div><strong>π Step:</strong> Welcome</div> |
|
<div><strong>π Files:</strong> 0</div> |
|
<div><strong>π Language:</strong> ko</div> |
|
<div><strong>β³ Progress:</strong> Ready</div> |
|
</div> |
|
""" |
|
|
|
step_map = { |
|
"welcome": "Welcome", |
|
"find_files": "Finding Files", |
|
"translate": "Translating", |
|
"review": "Reviewing", |
|
"create_github_pr": "Creating PR", |
|
} |
|
|
|
progress_map = { |
|
"welcome": "Ready to start", |
|
"find_files": "Files found", |
|
"translate": f"{len(state.files_to_translate)} remaining", |
|
"review": "Review complete", |
|
"create_github_pr": "PR generation in progress", |
|
} |
|
|
|
|
|
github_status = "β Not configured" |
|
if all( |
|
[ |
|
state.github_config["token"], |
|
state.github_config["owner"], |
|
state.github_config["repo_name"], |
|
] |
|
): |
|
github_status = ( |
|
f"β
{state.github_config['owner']}/{state.github_config['repo_name']}" |
|
) |
|
|
|
status_html = f""" |
|
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;"> |
|
<div><strong>π Step:</strong> {step_map.get(state.step, state.step)}</div> |
|
<div><strong>π Files:</strong> {len(state.files_to_translate)}</div> |
|
<div><strong>π Language:</strong> {state.target_language}</div> |
|
<div><strong>β³ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div> |
|
<div><strong>π§ GitHub:</strong> {github_status}</div> |
|
</div> |
|
""" |
|
|
|
return status_html |
|
|
|
|
|
|
|
|
|
|
|
def sync_language_displays(lang): |
|
return lang |
|
|
|
|
|
def update_github_config(token, owner, repo, reference_pr_url): |
|
"""Update GitHub configuration settings.""" |
|
global state |
|
|
|
|
|
if token: |
|
os.environ["GITHUB_TOKEN"] = token |
|
|
|
|
|
state.github_config.update( |
|
{ |
|
"token": token, |
|
"owner": owner, |
|
"repo_name": repo, |
|
"reference_pr_url": reference_pr_url |
|
or state.github_config["reference_pr_url"], |
|
} |
|
) |
|
|
|
return f"β
GitHub configuration updated: {owner}/{repo}" |
|
|
|
|
|
def send_message(message, history): |
|
new_history, cleared_input = handle_user_message(message, history) |
|
return new_history, cleared_input, update_status() |
|
|
|
|
|
|
|
def start_translate_handler(history, anthropic_key): |
|
os.environ["ANTHROPIC_API_KEY"] = anthropic_key |
|
new_hist, cleared_input = handle_user_message("start translation", history) |
|
selected_tabs = 2 if state.current_file_content["translated"] else 0 |
|
return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs) |
|
|
|
|
|
def approve_handler(history, owner, repo, reference_pr_url): |
|
"""Handles the request to generate a GitHub PR.""" |
|
global state |
|
state.step = "create_github_pr" |
|
|
|
|
|
state.github_config["owner"] = owner |
|
state.github_config["repo_name"] = repo |
|
state.github_config["reference_pr_url"] = reference_pr_url |
|
|
|
|
|
github_config = state.github_config |
|
if not all([github_config.get("token"), owner, repo]): |
|
response = "β GitHub configuration incomplete. Please provide GitHub Token, Owner, and Repository Name in Tab 3." |
|
history.append(["GitHub PR creation request", response]) |
|
return history, "", update_status() |
|
|
|
|
|
if not github_config.get("reference_pr_url"): |
|
response = "π€ **Reference PR URL not found. The agent will now search for a suitable one...**" |
|
try: |
|
|
|
stream_gen = find_reference_pr_simple_stream( |
|
target_language=state.target_language, |
|
context="documentation translation", |
|
) |
|
|
|
final_result = None |
|
try: |
|
while True: |
|
|
|
next(stream_gen) |
|
except StopIteration as e: |
|
final_result = e.value |
|
|
|
if final_result and final_result.get("status") == "success": |
|
result_text = final_result.get("result", "") |
|
match = re.search(r"https://github.com/[^\s]+", result_text) |
|
if match: |
|
found_url = match.group(0) |
|
state.github_config["reference_pr_url"] = found_url |
|
response += f"\nβ
**Agent found a reference PR:** {found_url}" |
|
else: |
|
raise ValueError( |
|
"Could not extract a valid PR URL from agent's response." |
|
) |
|
else: |
|
error_message = final_result.get("message") or final_result.get( |
|
"result", "Unknown error" |
|
) |
|
raise ValueError(f"Agent failed to find a PR. Reason: {error_message}") |
|
except Exception as e: |
|
response += f"\nβ **Agent failed to find a reference PR.**\nReason: {e}\n\nPlease provide a reference PR URL manually in Tab 3 and try again." |
|
history.append(["Agent searching for PR", response]) |
|
return history, "", update_status() |
|
|
|
|
|
if state.files_to_translate and state.current_file_content.get("translated"): |
|
current_file = state.files_to_translate[0] |
|
translated_content = state.current_file_content["translated"] |
|
response += "\n\nπ **Generating GitHub PR...**" |
|
|
|
pr_response = generate_github_pr( |
|
target_language=state.target_language, |
|
filepath=current_file, |
|
translated_content=translated_content, |
|
github_config=state.github_config, |
|
) |
|
response += f"\n{pr_response}" |
|
else: |
|
response = "β No translated file available. Please complete the translation process first." |
|
|
|
history.append(["GitHub PR creation request", response]) |
|
return history, "", update_status() |
|
|
|
|
|
def restart_handler(history): |
|
"""Resets the state and UI.""" |
|
global state |
|
state = ChatState() |
|
welcome_msg = get_welcome_message() |
|
new_hist = [[None, welcome_msg]] |
|
return new_hist, "", update_status(), gr.Tabs(selected=0) |
|
|