translation

Sleeping

File size: 9,577 Bytes

717452a
 
 
e9bec21
 
717452a
fcc0568
4fe238f
fcc0568
4fe238f
717452a
 
 
d117130
 
717452a
 
7a8369a
717452a
fcc0568
4fe238f
fcc0568
717452a
fcc0568
 
 
 
 
 
 
 
 
 
4fe238f
 
0ed057c
4fe238f
717452a
fcc0568
717452a
 
4fe238f
fcc0568
717452a
 
fcc0568
 
e9bec21
4fe238f
e9bec21
fcc0568
e9bec21
fcc0568
 
 
 
 
 
 
4fe238f
fcc0568
 
 
 
 
 
 
 
 
 
717452a
fcc0568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fe238f
fcc0568
 
 
 
 
 
 
 
 
4fe238f
fcc0568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeaf737
fcc0568
 
 
 
 
 
 
25cc8b2
fcc0568
 
4fe238f
fcc0568
 
 
 
 
4fe238f
fcc0568
 
aeaf737
fcc0568
 
 
 
 
 
4fe238f
fcc0568
 
 
 
 
717452a
e9bec21
fcc0568
 
 
 
 
 
717452a
4fe238f

import os
from threading import Thread, Event
from typing import Iterator
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, GemmaTokenizerFast, TextIteratorStreamer
from gradio import ChatMessage

# Constants and model initialization
path = "TenzinGayche/tpo_v1.0.0_dpo_2_3ep_ft"
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

# Load the model and tokenizer
tokenizer = GemmaTokenizerFast.from_pretrained(path)
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16).to("cuda")
model.config.sliding_window = 4096
model.eval()
model.config.use_cache = True

stop_event = Event()

def stream_translation(user_message: str, messages: list) -> Iterator[list]:
    stop_event.clear()
    message = user_message.replace('\n', ' ')
    
    # Initialize the chat history if empty
    if not messages:
        messages = []
    
    # Add user message if not already present
    if not messages or (isinstance(messages[-1], dict) and messages[-1]["role"] != "user"):
        messages.append({"role": "user", "content": message})
    
    # Prepare input for the model
    conversation = [
        {"role": "user", "content": f"Please translate the following into English: {message} Translation:"}
    ]
    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
    
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(f"Input trimmed as it exceeded {MAX_INPUT_TOKEN_LENGTH} tokens.")
    
    input_ids = input_ids.to(model.device)
    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
    
    # Generation parameters
    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=2000,
    )
    
    # Start generation in a separate thread
    Thread(target=model.generate, kwargs=generate_kwargs).start()
    
    # Initialize response tracking
    thought_buffer = ""
    translation_buffer = ""
    in_translation = False
    accumulated_text = ""
    
    # Add initial thinking message
    messages.append({
        "role": "assistant",
        "content": "",
        "metadata": {"title": "🤔 Thinking...", "status": "pending"}
    })
    yield messages
    
    for text in streamer:
        accumulated_text += text
        
        # Check for the marker in the accumulated text
        if "#Final Translation:" in accumulated_text and not in_translation:
            # Split at the marker and handle both parts
            parts = accumulated_text.split("#Final Translation:", 1)
            thought_buffer = parts[0].strip()
            translation_start = parts[1] if len(parts) > 1 else ""
            
            # Complete the thinking phase
            messages[-1] = {
                "role": "assistant",
                "content": thought_buffer,
                "metadata": {"title": "🤔 Thought Process", "status": "done"},
                 "collapsed": True 
            }
            yield messages
            thought_buffer=""
            
            # Start translation phase as a normal message
            in_translation = True
            messages.append({
                "role": "assistant",
                "content": translation_start.strip()  # No metadata for normal response
            })
            translation_buffer = translation_start
            yield messages
            
            continue
            
        if in_translation:
            translation_buffer += text
            messages[-1] = {
                "role": "assistant",
                "content": translation_buffer.strip()  # No metadata for normal response
            }
        else:
            thought_buffer += text
            messages[-1] = {
                "role": "assistant",
                "content": thought_buffer.strip(),
                "metadata": {"title": "🤔 Thinking...", "status": "pending"}
            }
        
        yield messages

with gr.Blocks(title="Monlam Translation System", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 💭 Samloe Melong Translate")
    gr.Markdown("It's a proof of concept. The model first generates detailed reasoning and then provides the translation. It only works for Tibetan to English (for now)!!")
    
    chatbot = gr.Chatbot(
        type="messages",
        show_label=False,
        render_markdown=True,
        scale=1
    )
    
    with gr.Row():
        input_box = gr.Textbox(
            lines=3,
            label="Enter Tibetan text",
            placeholder="Type Tibetan text here...",
            show_label=True,
        )
        submit_btn = gr.Button("Translate", variant="primary", scale=0.15)
    

    examples = [
        ["རྟག་པར་མངོན་ཞེན་གྱིས་བསླང་བའམ། །གཉེན་པོ་ཡིས་ནི་བསླང་བ་ཉིད། །ཡོན་ཏན་དང་ནི་ཕན་འདོགས་ཞིང་། །སྡུག་བསྔལ་བ་ལ་དགེ་ཆེན་འགྱུར། "],
        ["ད་ཆ་ཨ་རིའི་ཚོང་རའི་ནང་དུ་གླེང་གཞི་ཤུགས་ཆེར་འགྱུར་བཞིན་པའི་ Deep Seek ཞེས་རྒྱ་ནག་གི་མི་བཟོས་རིག་ནུས་མཉེན་ཆས་དེས་བོད་ནང་དུ་དེ་སྔ་ནས་དམ་དྲག་ཤུགས་ཆེ་ཡོད་པའི་ཐོག་ད་དུང་ཤུགས་ཆེ་རུ་གཏོང་སྲིད་པ་གསུངས་སོང་།"],
        ["མཉེན་ཆས་འདི་བཞིན་ཨ་རི་དང་རྒྱ་ནག་གཉིས་དབར་ཚོང་འབྲེལ་བཀག་སྡོམ་གྱི་གནད་དོན་ཁྲོད་ཨ་རིའི་མི་བཟོས་རིག་ནུས་ཀྱི་ Chips ཅིབ་སེ་མ་ལག་རྒྱ་ནག་ནང་དུ་ཚོང་འགྲེམ་བཀག་སྡོམ་བྱས་མིན་ལ་མ་ལྟོས་པར། ཚོང་འབྲེལ་བཀག་སྡོམ་གྱི་སྔོན་ཚུད་ནས་རྒྱ་ནག་གི་ཉོ་ཚོང་བྱས་པའི་ཅིབ་སེ་མ་ལག་དོན་ཐེངས་རྙིང་པའི་ཐོག་བཟོ་བསྐྲུན་བྱས་པ་དང་། ཨ་སྒོར་ཐེར་འབུམ་མང་པོའི་འགྲོ་གྲོན་ཐོག་བཟོ་བསྐྲུན་བྱས་པའི་ AI འམ་མི་བཟོས་རིག་ནུས་ཀྱི་མཉེན་ཆས་གཞན་དང་མི་འདྲ་བར་ Deep seek མཉེན་ཆས་དེ་བཞིན་ཨ་སྒོར་ས་ཡ་ ༦ ཁོ་ནའི་འགྲོ་གྲོན་ཐོག་བཟོ་བསྐྲུན་བྱས་པའི་གནད་དོན་སོགས་ཀྱི་རྐྱེན་པས་ཁ་སང་ཨ་རིའི་ཚོང་རའི་ནང་དུ་མི་བཟོས་རིག་ནུས་མཉེན་ཆས་འཕྲུལ་རིག་གི་ Chips ཅིབ་སེ་མ་ལག་བཟོ་བསྐྲུན་བྱས་མཁན་ NVidia ལྟ་བུར་ཨ་སྒོར་ཐེར་འབུམ་ ༦ མིན་ཙམ་གྱི་གྱོན་རྒུད་ཕོག་པའི་གནས་ཚུལ་བྱུང་ཡོད་འདུག"],
        ["དེ་ཡང་དེ་རིང་ BBC དང་ Reuters སོགས་རྒྱལ་སྤྱིའི་བརྒྱུད་ལམ་ཁག་གི་གནས་ཚུལ་སྤེལ་བར་གཞིགས་ན། རྒྱ་ནག་གི་ Huangzhou གྲོང་ཁྱེར་ནང་དུ་བཟོ་བསྐྲུན་བྱས་པའི་ Deep Seek མི་བཟོས་རིག་ནུས་མཉེན་ཆས་དེ་བཞིན་ ChatGPT དང་ Gemini སོགས་མི་བཟོས་རིག་ནུས་ཀྱི་མཉེན་ཆས་གཞན་དང་བསྡུར་ན་མགྱོགས་ཚད་དང་ནུས་པའི་ཆ་ནས་གཅིག་མཚུངས་ཡོད་པ་མ་ཟད། མཉེན་ཆས་དེ་ཉིད་རིན་མེད་ཡིན་པའི་ཆ་ནས་ཨ་རི་དང་ཨིན་ཡུལ། དེ་བཞིན་རྒྱ་ནག་གསུམ་གྱི་ནང་དུ་སྐུ་ཤུ་རྟགས་ཅན་འཕྲུལ་རིག་གི་ App Store གཉེན་ཆས་བངས་མཛོད་ནང་ནས་ Deep Seek དེ་ཉིད་མཉེན་ཆས་ཕབ་ལེན་མང་ཤོས་བྱས་པ་ཞིག་ཆགས་ཡོད་པ་རེད་འདུག"],
    ]

    gr.Examples(
        examples=examples,
        inputs=[input_box],
        label="Try these examples",
        examples_per_page=3
    )

    # Connect components with correct inputs
    submit_btn.click(
        fn=stream_translation,
        inputs=[input_box, chatbot],
        outputs=chatbot
    )
if __name__ == "__main__":
    demo.queue(max_size=20).launch(share=True)