Spaces:

darshankr
/

trans-en-indic

Runtime error

App Files Files Community

darshankr commited on Oct 27, 2024

Commit

2b53f2b

verified ·

1 Parent(s): b864750

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -74

app.py CHANGED Viewed

@@ -1,84 +1,52 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
-# Initialize FastAPI app
-app = FastAPI(
-    title="Indic Translation API",
-    description="API for translating text between English and Indic languages",
-    version="1.0.0"
 )
-# Define request body model
 class InputData(BaseModel):
     sentences: List[str]
     target_lang: str
-    class Config:
-        schema_extra = {
-            "example": {
-                "sentences": ["Hello, how are you?", "What is your name?"],
-                "target_lang": "hin_Deva"
-            }
-        }
-# Initialize models and processors
-try:
-    model = AutoModelForSeq2SeqLM.from_pretrained(
-        "ai4bharat/indictrans2-en-indic-1B",
-        trust_remote_code=True
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        "ai4bharat/indictrans2-en-indic-1B",
-        trust_remote_code=True
-    )
-    ip = IndicProcessor(inference=True)
-    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-    model = model.to(DEVICE)
-except Exception as e:
-    raise RuntimeError(f"Failed to load models: {str(e)}")
-@app.get("/")
-async def root():
-    """Root endpoint returning API information"""
-    return {
-        "message": "Welcome to the Indic Translation API",
-        "status": "active",
-        "supported_languages": [
-            "hin_Deva",  # Hindi
-            "ben_Beng",  # Bengali
-            "tam_Taml",  # Tamil
-            # Add other supported languages here
-        ]
-    }
 @app.post("/translate/")
 async def translate(input_data: InputData):
-    """
-    Translate text from English to specified Indic language
-    Args:
-        input_data: InputData object containing sentences and target language
-    Returns:
-        Dictionary containing translated text
-    """
     try:
-        # Source language is always English
         src_lang = "eng_Latn"
         tgt_lang = input_data.target_lang
-        # Preprocess the input sentences
         batch = ip.preprocess_batch(
             input_data.sentences,
             src_lang=src_lang,
             tgt_lang=tgt_lang
         )
-        # Tokenize the sentences
         inputs = tokenizer(
             batch,
             truncation=True,
@@ -86,8 +54,7 @@ async def translate(input_data: InputData):
             return_tensors="pt",
             return_attention_mask=True
         ).to(DEVICE)
-        # Generate translations
         with torch.no_grad():
             generated_tokens = model.generate(
                 **inputs,
@@ -97,32 +64,77 @@ async def translate(input_data: InputData):
                 num_beams=5,
                 num_return_sequences=1
             )
-        # Decode the generated tokens
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
-        # Postprocess the translations
         translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
         return {
             "translations": translations,
             "source_language": src_lang,
             "target_language": tgt_lang
         }
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Translation error: {str(e)}"
-        )
-# Add health check endpoint
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy"}

+# app.py
+import streamlit as st
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
+import uvicorn
+import nest_asyncio
+import threading
+# Initialize FastAPI
+app = FastAPI()
+# Initialize models and processors
+model = AutoModelForSeq2SeqLM.from_pretrained(
+    "ai4bharat/indictrans2-en-indic-1B",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(
+    "ai4bharat/indictrans2-en-indic-1B",
+    trust_remote_code=True
 )
+ip = IndicProcessor(inference=True)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(DEVICE)
 class InputData(BaseModel):
     sentences: List[str]
     target_lang: str
+# FastAPI endpoints
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
 @app.post("/translate/")
 async def translate(input_data: InputData):
     try:
         src_lang = "eng_Latn"
         tgt_lang = input_data.target_lang
         batch = ip.preprocess_batch(
             input_data.sentences,
             src_lang=src_lang,
             tgt_lang=tgt_lang
         )
         inputs = tokenizer(
             batch,
             truncation=True,
             return_tensors="pt",
             return_attention_mask=True
         ).to(DEVICE)
         with torch.no_grad():
             generated_tokens = model.generate(
                 **inputs,
                 num_beams=5,
                 num_return_sequences=1
             )
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
         translations = ip.postprocess_batch(generated_tokens, lang=tgt_lang)
         return {
             "translations": translations,
             "source_language": src_lang,
             "target_language": tgt_lang
         }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Streamlit interface
+def main():
+    st.title("Indic Language Translator")
+    # Input text
+    text_input = st.text_area("Enter text to translate:", "Hello, how are you?")
+    # Language selection
+    target_languages = {
+        "Hindi": "hin_Deva",
+        "Bengali": "ben_Beng",
+        "Tamil": "tam_Taml",
+        "Telugu": "tel_Telu",
+        "Marathi": "mar_Deva",
+        "Gujarati": "guj_Gujr",
+        "Kannada": "kan_Knda",
+        "Malayalam": "mal_Mlym",
+        "Punjabi": "pan_Guru",
+        "Odia": "ori_Orya"
+    }
+    target_lang = st.selectbox(
+        "Select target language:",
+        options=list(target_languages.keys())
+    )
+    if st.button("Translate"):
+        try:
+            # Prepare input data
+            input_data = InputData(
+                sentences=[text_input],
+                target_lang=target_languages[target_lang]
+            )
+            # Call translation function directly
+            result = translate(input_data)
+            # Display result
+            st.success("Translation:")
+            st.write(result["translations"][0])
+        except Exception as e:
+            st.error(f"Translation failed: {str(e)}")
+def run_fastapi():
+    nest_asyncio.apply()
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == "__main__":
+    # Start FastAPI in a separate thread
+    api_thread = threading.Thread(target=run_fastapi, daemon=True)
+    api_thread.start()
+    # Run Streamlit interface
+    main()