File size: 13,656 Bytes
3bfe3dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
"""Module for gradio chat-based translation agent interface."""

import os
import re
from pathlib import Path

import gradio as gr

from agent.workflow import (
    report_translation_target_files,
    translate_docs_interactive,
    generate_github_pr,
)
from pr_generator.searcher import find_reference_pr_simple_stream


# State management
class ChatState:
    def __init__(self):
        self.step = "welcome"  # welcome -> find_files -> translate -> create_github_pr
        self.target_language = "ko"
        self.k_files = 10
        self.files_to_translate = []
        self.current_file_content = {"translated": ""}
        self.pr_result = None  # Store PR creation result
        # GitHub configuration
        self.github_config = {
            "token": "",
            "owner": "",
            "repo_name": "",
            "reference_pr_url": "https://github.com/huggingface/transformers/pull/24968",
        }


state = ChatState()


def _extract_content_for_display(content: str) -> str:
    """Extract text from document for display."""
    # Remove Copyright header
    to_translate = re.sub(r"<!--.*?-->", "", content, count=1, flags=re.DOTALL)
    to_translate = to_translate.strip()
    ## remove code blocks from text
    to_translate = re.sub(r"```.*?```", "", to_translate, flags=re.DOTALL)
    ## remove markdown tables from text
    to_translate = re.sub(r"^\|.*\|$\n?", "", to_translate, flags=re.MULTILINE)
    ## remove empty lines from text
    to_translate = re.sub(r"\n\n+", "\n\n", to_translate)

    return to_translate


def get_welcome_message():
    """Initial welcome message with file finding controls"""
    return """**πŸ‘‹ Welcome to 🌐 Hugging Face i18n Translation Agent!**

I'll help you find files that need translation and translate them in a streamlined workflow.

**πŸ”Ž Let's start by finding files that need translation.**

Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
"""


def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
    """Process file search request and update Gradio UI components."""
    global state
    state.target_language = lang
    state.k_files = k
    state.step = "find_files"

    status_report, files_list = report_translation_target_files(lang, k)
    state.files_to_translate = [file[0] for file in files_list] if files_list else []

    response = f"""**βœ… File search completed!**

**Status Report:**
{status_report}

**πŸ“ Found first {len(state.files_to_translate)} files to translate:**
"""

    if state.files_to_translate:
        for i, file in enumerate(state.files_to_translate[:5], 1):  # Show first 5
            response += f"\n{i}. `{file}`"

        if len(state.files_to_translate) > 5:
            response += f"\n... and {len(state.files_to_translate) - 5} more files"

        response += "\n\n**πŸš€ Ready to start translation?**\nI can begin translating these files one by one. Would you like to proceed?"
    else:
        response += "\nNo files found that need translation."

    # Add to history
    history.append(["Please find files that need translation", response])
    cleared_input = ""
    selected_tab = 1 if state.files_to_translate else 0

    return history, cleared_input, update_status(), gr.Tabs(selected=selected_tab)


def start_translation_process():
    """Start the translation process for the first file"""
    if not state.files_to_translate:
        return "❌ No files available for translation."

    current_file = state.files_to_translate[0]

    # Call translation function (simplified for demo)
    try:
        status, translated = translate_docs_interactive(
            state.target_language, [[current_file]]
        )

        state.current_file_content = {"translated": translated}
        path = (
            Path(__file__).resolve().parent.parent
            / f"translation_result/{current_file}"
        )
        p = Path(path)
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(translated, encoding="utf-8")

        original_file_link = (
            "https://github.com/huggingface/transformers/blob/main/" + current_file
        )
        response = (
            f"""πŸ”„ Translation for: `{current_file}`**\n"""
            "**πŸ“„ Original Content Link:**\n"
            ""
            f"{original_file_link}\n"
            "**🌐 Translated Content:**\n"
            f"\n```\n\n{_extract_content_for_display(translated)}```\n"
            f"{status}\n"
        )
        print("translated:")
        print(translated)
        print("extracted")

    except Exception as e:
        response = f"❌ Translation failed: {str(e)}"
        response += "\n**➑️ Please try from the beginning.**"

    return response


def handle_general_message(message):
    """Handle general messages"""
    message_lower = message.lower()

    if any(word in message_lower for word in ["help", "what", "how"]):
        return """**πŸ€– I'm your Hugging Face i18n Translation Agent!**

I can help you:
1. **πŸ” Find files** that need translation
2. **🌐 Translate documents** using AI
3. **πŸ“‹ Review translations** for quality
4. **πŸš€ Create GitHub PR** for translation

Currently available actions with quick controls:
- "find files" - Search for files needing translation
- "translate" - Start translation process  
- "review" - Review current translation
- "github" - Create GitHub Pull Request
- "restart" - Start over"""

    elif "restart" in message_lower:
        global state
        state = ChatState()
        return get_welcome_message()

    else:
        return """I understand you want to work on translations! 

To get started, please use the controls above to configure your translation settings and find files that need translation.
"""


# Main handler
def handle_user_message(message, history):
    """Handle user messages and provide appropriate responses"""
    global state

    if not message.strip():
        return history, ""

    elif state.step == "find_files" and any(
        word in message.lower()
        for word in ["yes", "proceed", "start", "translate", "translation"]
    ):
        # User wants to start translation
        if state.files_to_translate:
            state.step = "translate"
            response = start_translation_process()
        else:
            response = (
                "❌ No files available for translation. Please search for files first."
            )

    # Handle GitHub PR creation - This part is removed as approve_handler is the main entry point
    else:
        # General response
        response = handle_general_message(message)

    history.append([message, response])
    return history, ""


def update_status():
    if state.step == "welcome":
        return """
        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
            <div><strong>πŸ”„ Step:</strong> Welcome</div>
            <div><strong>πŸ“ Files:</strong> 0</div>
            <div><strong>🌍 Language:</strong> ko</div>
            <div><strong>⏳ Progress:</strong> Ready</div>
        </div>
        """

    step_map = {
        "welcome": "Welcome",
        "find_files": "Finding Files",
        "translate": "Translating",
        "review": "Reviewing",
        "create_github_pr": "Creating PR",
    }

    progress_map = {
        "welcome": "Ready to start",
        "find_files": "Files found",
        "translate": f"{len(state.files_to_translate)} remaining",
        "review": "Review complete",
        "create_github_pr": "PR generation in progress",
    }

    # Check GitHub configuration status
    github_status = "❌ Not configured"
    if all(
        [
            state.github_config["token"],
            state.github_config["owner"],
            state.github_config["repo_name"],
        ]
    ):
        github_status = (
            f"βœ… {state.github_config['owner']}/{state.github_config['repo_name']}"
        )

    status_html = f"""
    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
        <div><strong>πŸ”„ Step:</strong> {step_map.get(state.step, state.step)}</div>
        <div><strong>πŸ“ Files:</strong> {len(state.files_to_translate)}</div>
        <div><strong>🌍 Language:</strong> {state.target_language}</div>
        <div><strong>⏳ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
        <div><strong>πŸ”§ GitHub:</strong> {github_status}</div>
    </div>
    """

    return status_html


# Event handlers


def sync_language_displays(lang):
    return lang


def update_github_config(token, owner, repo, reference_pr_url):
    """Update GitHub configuration settings."""
    global state

    # Set GitHub token in environment variables
    if token:
        os.environ["GITHUB_TOKEN"] = token

    # Save GitHub configuration to state
    state.github_config.update(
        {
            "token": token,
            "owner": owner,
            "repo_name": repo,
            "reference_pr_url": reference_pr_url
            or state.github_config["reference_pr_url"],
        }
    )

    return f"βœ… GitHub configuration updated: {owner}/{repo}"


def send_message(message, history):
    new_history, cleared_input = handle_user_message(message, history)
    return new_history, cleared_input, update_status()


# Button handlers with tab switching
def start_translate_handler(history, anthropic_key):
    os.environ["ANTHROPIC_API_KEY"] = anthropic_key
    new_hist, cleared_input = handle_user_message("start translation", history)
    selected_tabs = 2 if state.current_file_content["translated"] else 0
    return new_hist, cleared_input, update_status(), gr.Tabs(selected=selected_tabs)


def approve_handler(history, owner, repo, reference_pr_url):
    """Handles the request to generate a GitHub PR."""
    global state
    state.step = "create_github_pr"

    # Update github config from the latest UI values
    state.github_config["owner"] = owner
    state.github_config["repo_name"] = repo
    state.github_config["reference_pr_url"] = reference_pr_url

    # Validate GitHub configuration
    github_config = state.github_config
    if not all([github_config.get("token"), owner, repo]):
        response = "❌ GitHub configuration incomplete. Please provide GitHub Token, Owner, and Repository Name in Tab 3."
        history.append(["GitHub PR creation request", response])
        return history, "", update_status()

    # If reference PR is not provided, use the agent to find one
    if not github_config.get("reference_pr_url"):
        response = "πŸ€– **Reference PR URL not found. The agent will now search for a suitable one...**"
        try:
            # This part is simplified to avoid streaming logic in a non-generator function
            stream_gen = find_reference_pr_simple_stream(
                target_language=state.target_language,
                context="documentation translation",
            )
            # We will just get the final result from the generator
            final_result = None
            try:
                while True:
                    # We are not interested in the streamed messages here, just the final result.
                    next(stream_gen)
            except StopIteration as e:
                final_result = e.value

            if final_result and final_result.get("status") == "success":
                result_text = final_result.get("result", "")
                match = re.search(r"https://github.com/[^\s]+", result_text)
                if match:
                    found_url = match.group(0)
                    state.github_config["reference_pr_url"] = found_url
                    response += f"\nβœ… **Agent found a reference PR:** {found_url}"
                else:
                    raise ValueError(
                        "Could not extract a valid PR URL from agent's response."
                    )
            else:
                error_message = final_result.get("message") or final_result.get(
                    "result", "Unknown error"
                )
                raise ValueError(f"Agent failed to find a PR. Reason: {error_message}")
        except Exception as e:
            response += f"\n❌ **Agent failed to find a reference PR.**\nReason: {e}\n\nPlease provide a reference PR URL manually in Tab 3 and try again."
            history.append(["Agent searching for PR", response])
            return history, "", update_status()

    # Proceed with PR generation
    if state.files_to_translate and state.current_file_content.get("translated"):
        current_file = state.files_to_translate[0]
        translated_content = state.current_file_content["translated"]
        response += "\n\nπŸš€ **Generating GitHub PR...**"

        pr_response = generate_github_pr(
            target_language=state.target_language,
            filepath=current_file,
            translated_content=translated_content,
            github_config=state.github_config,
        )
        response += f"\n{pr_response}"
    else:
        response = "❌ No translated file available. Please complete the translation process first."

    history.append(["GitHub PR creation request", response])
    return history, "", update_status()


def restart_handler(history):
    """Resets the state and UI."""
    global state
    state = ChatState()
    welcome_msg = get_welcome_message()
    new_hist = [[None, welcome_msg]]
    return new_hist, "", update_status(), gr.Tabs(selected=0)