harheem's picture
Upload project files
3bfe3dd verified
"""Module for gradio chat-based translation agent interface."""
import os
import re
from pathlib import Path
import gradio as gr
from agent.workflow import (
report_translation_target_files,
translate_docs_interactive,
generate_github_pr,
)
from pr_generator.searcher import find_reference_pr_simple_stream
# State management
class ChatState:
def __init__(self):
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
self.target_language = "ko"
self.k_files = 10
self.files_to_translate = []
self.current_file_content = {"translated": ""}
self.pr_result = None # Store PR creation result
# GitHub configuration
self.github_config = {
"token": "",
"owner": "",
"repo_name": "",
"reference_pr_url": "https://github.com/huggingface/transformers/pull/24968",
}
state = ChatState()
def _extract_content_for_display(content: str) -> str:
"""Extract text from document for display."""
# Remove Copyright header
to_translate = re.sub(r"<!--.*?-->", "", content, count=1, flags=re.DOTALL)
to_translate = to_translate.strip()
## remove code blocks from text
to_translate = re.sub(r"```.*?```", "", to_translate, flags=re.DOTALL)
## remove markdown tables from text
to_translate = re.sub(r"^\|.*\|$\n?", "", to_translate, flags=re.MULTILINE)
## remove empty lines from text
to_translate = re.sub(r"\n\n+", "\n\n", to_translate)
return to_translate
def get_welcome_message():
"""Initial welcome message with file finding controls"""
return """**πŸ‘‹ Welcome to 🌐 Hugging Face i18n Translation Agent!**
I'll help you find files that need translation and translate them in a streamlined workflow.
**πŸ”Ž Let's start by finding files that need translation.**
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
"""
def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
"""Process file search request and update Gradio UI components."""
global state
state.target_language = lang
state.k_files = k
state.step = "find_files"
status_report, files_list = report_translation_target_files(lang, k)
state.files_to_translate = [file[0] for file in files_list] if files_list else []
response = f"""**βœ… File search completed!**
**Status Report:**
{status_report}
**πŸ“ Found first {len(state.files_to_translate)} files to translate:**
"""
if state.files_to_translate:
for i, file in enumerate(state.files_to_translate[:5], 1): # Show first 5
response += f"\n{i}. `{file}`"
if len(state.files_to_translate) > 5:
response += f"\n... and {len(state.files_to_translate) - 5} more files"
response += "\n\n**πŸš€ Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
else:
response += "\nNo files found that need translation."
# Add to history
history.append(["Please find files that need translation", response])
cleared_input = ""
selected_tab = 1 if state.files_to_translate else 0
return history, cleared_input, update_status(), gr.Tabs(selected=selected_tab)
def start_translation_process():
"""Start the translation process for the first file"""
if not state.files_to_translate:
return "❌ No files available for translation."
current_file = state.files_to_translate[0]
# Call translation function (simplified for demo)
try:
status, translated = translate_docs_interactive(
state.target_language, [[current_file]]
)
state.current_file_content = {"translated": translated}
path = (
Path(__file__).resolve().parent.parent
/ f"translation_result/{current_file}"
)
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(translated, encoding="utf-8")
original_file_link = (
"https://github.com/huggingface/transformers/blob/main/" + current_file
)
response = (
f"""πŸ”„ Translation for: `{current_file}`**\n"""
"**πŸ“„ Original Content Link:**\n"
""
f"{original_file_link}\n"
"**🌐 Translated Content:**\n"
f"\n```\n\n{_extract_content_for_display(translated)}```\n"
f"{status}\n"
)
print("translated:")
print(translated)
print("extracted")
except Exception as e:
response = f"❌ Translation failed: {str(e)}"
response += "\n**➑️ Please try from the beginning.**"
return response
def handle_general_message(message):
"""Handle general messages"""
message_lower = message.lower()
if any(word in message_lower for word in ["help", "what", "how"]):
return """**πŸ€– I'm your Hugging Face i18n Translation Agent!**
I can help you:
1. **πŸ” Find files** that need translation
2. **🌐 Translate documents** using AI
3. **πŸ“‹ Review translations** for quality
4. **πŸš€ Create GitHub PR** for translation
Currently available actions with quick controls:
- "find files" - Search for files needing translation
- "translate" - Start translation process
- "review" - Review current translation
- "github" - Create GitHub Pull Request
- "restart" - Start over"""
elif "restart" in message_lower:
global state
state = ChatState()
return get_welcome_message()
else:
return """I understand you want to work on translations!
To get started, please use the controls above to configure your translation settings and find files that need translation.
"""
# Main handler
def handle_user_message(message, history):
"""Handle user messages and provide appropriate responses"""
global state
if not message.strip():
return history, ""
elif state.step == "find_files" and any(
word in message.lower()
for word in ["yes", "proceed", "start", "translate", "translation"]
):
# User wants to start translation
if state.files_to_translate:
state.step = "translate"
response = start_translation_process()
else:
response = (
"❌ No files available for translation. Please search for files first."
)
# Handle GitHub PR creation - This part is removed as approve_handler is the main entry point
else:
# General response
response = handle_general_message(message)
history.append([message, response])
return history, ""
def update_status():
if state.step == "welcome":
return """
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
<div><strong>πŸ”„ Step:</strong> Welcome</div>
<div><strong>πŸ“ Files:</strong> 0</div>
<div><strong>🌍 Language:</strong> ko</div>
<div><strong>⏳ Progress:</strong> Ready</div>
</div>
"""
step_map = {
"welcome": "Welcome",
"find_files": "Finding Files",
"translate": "Translating",
"review": "Reviewing",
"create_github_pr": "Creating PR",
}
progress_map = {
"welcome": "Ready to start",
"find_files": "Files found",
"translate": f"{len(state.files_to_translate)} remaining",
"review": "Review complete",
"create_github_pr": "PR generation in progress",
}
# Check GitHub configuration status
github_status = "❌ Not configured"
if all(
[
state.github_config["token"],
state.github_config["owner"],
state.github_config["repo_name"],
]
):
github_status = (
f"βœ… {state.github_config['owner']}/{state.github_config['repo_name']}"
)
status_html = f"""
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
<div><strong>πŸ”„ Step:</strong> {step_map.get(state.step, state.step)}</div>
<div><strong>πŸ“ Files:</strong> {len(state.files_to_translate)}</div>
<div><strong>🌍 Language:</strong> {state.target_language}</div>
<div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
<div><strong>πŸ”§ GitHub:</strong> {github_status}</div>
</div>
"""
return status_html
# Event handlers
def sync_language_displays(lang):
return lang
def update_github_config(token, owner, repo, reference_pr_url):
"""Update GitHub configuration settings."""
global state
# Set GitHub token in environment variables
if token:
os.environ["GITHUB_TOKEN"] = token
# Save GitHub configuration to state
state.github_config.update(
{
"token": token,
"owner": owner,
"repo_name": repo,
"reference_pr_url": reference_pr_url
or state.github_config["reference_pr_url"],
}
)
return f"βœ… GitHub configuration updated: {owner}/{repo}"
def send_message(message, history):
new_history, cleared_input = handle_user_message(message, history)
return new_history, cleared_input, update_status()
# Button handlers with tab switching
def start_translate_handler(history, anthropic_key):
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
new_hist, cleared_input = handle_user_message("start translation", history)
selected_tabs = 2 if state.current_file_content["translated"] else 0
return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)
def approve_handler(history, owner, repo, reference_pr_url):
"""Handles the request to generate a GitHub PR."""
global state
state.step = "create_github_pr"
# Update github config from the latest UI values
state.github_config["owner"] = owner
state.github_config["repo_name"] = repo
state.github_config["reference_pr_url"] = reference_pr_url
# Validate GitHub configuration
github_config = state.github_config
if not all([github_config.get("token"), owner, repo]):
response = "❌ GitHub configuration incomplete. Please provide GitHub Token, Owner, and Repository Name in Tab 3."
history.append(["GitHub PR creation request", response])
return history, "", update_status()
# If reference PR is not provided, use the agent to find one
if not github_config.get("reference_pr_url"):
response = "πŸ€– **Reference PR URL not found. The agent will now search for a suitable one...**"
try:
# This part is simplified to avoid streaming logic in a non-generator function
stream_gen = find_reference_pr_simple_stream(
target_language=state.target_language,
context="documentation translation",
)
# We will just get the final result from the generator
final_result = None
try:
while True:
# We are not interested in the streamed messages here, just the final result.
next(stream_gen)
except StopIteration as e:
final_result = e.value
if final_result and final_result.get("status") == "success":
result_text = final_result.get("result", "")
match = re.search(r"https://github.com/[^\s]+", result_text)
if match:
found_url = match.group(0)
state.github_config["reference_pr_url"] = found_url
response += f"\nβœ… **Agent found a reference PR:** {found_url}"
else:
raise ValueError(
"Could not extract a valid PR URL from agent's response."
)
else:
error_message = final_result.get("message") or final_result.get(
"result", "Unknown error"
)
raise ValueError(f"Agent failed to find a PR. Reason: {error_message}")
except Exception as e:
response += f"\n❌ **Agent failed to find a reference PR.**\nReason: {e}\n\nPlease provide a reference PR URL manually in Tab 3 and try again."
history.append(["Agent searching for PR", response])
return history, "", update_status()
# Proceed with PR generation
if state.files_to_translate and state.current_file_content.get("translated"):
current_file = state.files_to_translate[0]
translated_content = state.current_file_content["translated"]
response += "\n\nπŸš€ **Generating GitHub PR...**"
pr_response = generate_github_pr(
target_language=state.target_language,
filepath=current_file,
translated_content=translated_content,
github_config=state.github_config,
)
response += f"\n{pr_response}"
else:
response = "❌ No translated file available. Please complete the translation process first."
history.append(["GitHub PR creation request", response])
return history, "", update_status()
def restart_handler(history):
"""Resets the state and UI."""
global state
state = ChatState()
welcome_msg = get_welcome_message()
new_hist = [[None, welcome_msg]]
return new_hist, "", update_status(), gr.Tabs(selected=0)