Spaces:

alakxender
/

dv-gf-textgen

Sleeping

File size: 2,878 Bytes

cb39169
76fc39a
cb39169
4fb5fb4
cb39169
76fc39a
32dcc58
cb39169
ededc7b
 
32dcc58
cb39169
ededc7b
 
 
 
e17ea47
cb39169
 
 
ededc7b
cb39169
e17ea47
cb39169
 
4fb5fb4
cb39169
 
 
 
 
 
76fc39a
 
 
cb39169
76fc39a
cb39169
76fc39a
cb39169
 
 
76fc39a

# Import necessary libraries
import gradio as gr
import torch
import spaces
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline


# Load Goldfish model for Dhivehi
model_name = 'div_thaa_full'
HF_CACHE = '.hf_cache'

# Load model
goldfish_model = 'goldfish-models/' + model_name
config = AutoConfig.from_pretrained(goldfish_model, cache_dir=HF_CACHE)
tokenizer = AutoTokenizer.from_pretrained(goldfish_model, cache_dir=HF_CACHE)
model = AutoModelForCausalLM.from_pretrained(goldfish_model, config=config, cache_dir=HF_CACHE)

if torch.cuda.is_available():
    model = model.cuda()  # Load onto GPU


# Create text generation pipeline
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Function to generate text
@spaces.GPU
def generate_text(input_text):
    output = text_generator(input_text, max_new_tokens=25, add_special_tokens=False, do_sample=True)
    return output[0]['generated_text']

# Create Gradio interface

# Custom CSS with modern Gradio styling
custom_css = """
.thaana-textbox textarea {
    font-size: 18px !important;
    font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma', 'Noto Sans Thaana', 'MV Boli' !important;
    line-height: 1.8 !important;
    direction: rtl !important;
}
"""

# Example inputs
examples = [
    ["ދިވެހިރާއްޖެ"],
    ["އެމެރިކާ އިންތިޚާބު"],
    ["ސަލާމް"],
    ["ދުނިޔޭގެ ސިއްޙަތު ޖަމްޢިއްޔާ"],
    ["ޤަދީމީ ސަގާފަތް"],
    ["ޑިމޮކްރަސީ"]
]

# Create Gradio app with modern components
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# Demo Dhivehi Text Generator")
    gr.Markdown("Generate text in Dhivehi language. This model is trained to generate coherent text based on the input prompt.")
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Enter Dhivehi Text",
            lines=2,
            elem_classes=["thaana-textbox"],
            rtl=True
        )
        output_text = gr.Textbox(
            label="Generated Text",
            lines=2,
            elem_classes=["thaana-textbox"],
            rtl=True
        )
    
    generate_btn = gr.Button("Generate")
    generate_btn.click(
        fn=generate_text,
        inputs=input_text,
        outputs=output_text
    )
    
    gr.Examples(
        examples=examples,
        inputs=input_text,
        outputs=output_text,
        fn=generate_text,
        cache_examples=True
    )
    
    gr.Markdown("""
    ### Model Information
    Model: Goldfish is a suite of monolingual language models trained for 350 languages. 
    This model is the Dhivehi (Thaana script). For more details, visit the 
    [Goldfish Models GitHub repository](https://github.com/tylerachang/goldfish).
    """)

if __name__ == "__main__":
    demo.launch()