Aswinthmani commited on
Commit
9487f03
Β·
verified Β·
1 Parent(s): b720f2d

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +108 -0
  2. requirements.txt +13 -0
main.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form
2
+ from fastapi.responses import JSONResponse
3
+ from enum import Enum
4
+ from transformers import pipeline, MarianMTModel, MarianTokenizer
5
+ import shutil
6
+ import os
7
+ import uuid
8
+ from googletrans import Translator
9
+
10
+ app = FastAPI()
11
+
12
+ # 🌍 Language Enum for dropdown in Swagger
13
+ class LanguageEnum(str, Enum):
14
+ ta = "ta" # Tamil
15
+ fr = "fr" # French
16
+ es = "es" # Spanish
17
+ de = "de" # German
18
+ it = "it" # Italian
19
+ hi = "hi" # Hindi
20
+ ru = "ru" # Russian
21
+ zh = "zh" # Chinese
22
+ ar = "ar" # Arabic
23
+
24
+ # 🌐 Map target language to translation model
25
+ model_map = {
26
+ "fr": "Helsinki-NLP/opus-mt-en-fr",
27
+ "es": "Helsinki-NLP/opus-mt-en-es",
28
+ "de": "Helsinki-NLP/opus-mt-en-de",
29
+ "it": "Helsinki-NLP/opus-mt-en-it",
30
+ "hi": "Helsinki-NLP/opus-mt-en-hi",
31
+ "ru": "Helsinki-NLP/opus-mt-en-ru",
32
+ "zh": "Helsinki-NLP/opus-mt-en-zh",
33
+ "ar": "Helsinki-NLP/opus-mt-en-ar",
34
+ "ta": "gsarti/opus-mt-en-ta"
35
+ }
36
+
37
+ def translate_text(text, target_lang):
38
+ if target_lang == "ta":
39
+ try:
40
+ translator = Translator()
41
+ result = translator.translate(text, dest="ta")
42
+ return result.text
43
+ except Exception as e:
44
+ return f"Google Translate failed: {str(e)}"
45
+
46
+ if target_lang not in model_map:
47
+ return f"No model for language: {target_lang}"
48
+
49
+ model_name = model_map[target_lang]
50
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
51
+ model = MarianMTModel.from_pretrained(model_name)
52
+ encoded = tokenizer([text], return_tensors="pt", padding=True)
53
+ translated = model.generate(**encoded)
54
+ return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
55
+
56
+ # 🧠 Generate a random English sentence
57
+ def generate_random_sentence(prompt="Daily conversation", max_length=30):
58
+ generator = pipeline("text-generation", model="distilgpt2")
59
+ result = generator(prompt, max_length=max_length, num_return_sequences=1)
60
+ return result[0]["generated_text"].strip()
61
+
62
+ # 🎀 Transcription endpoint
63
+ @app.post("/transcribe")
64
+ async def transcribe(audio: UploadFile = File(...)):
65
+ temp_filename = f"temp_{uuid.uuid4().hex}.wav"
66
+ with open(temp_filename, "wb") as f:
67
+ shutil.copyfileobj(audio.file, f)
68
+ try:
69
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
70
+ result = asr(temp_filename)
71
+ return JSONResponse(content={"transcribed_text": result["text"]})
72
+ finally:
73
+ os.remove(temp_filename)
74
+
75
+ # 🌍 Translation endpoint
76
+ @app.post("/translate")
77
+ async def translate(text: str = Form(...), target_lang: LanguageEnum = Form(...)):
78
+ translated = translate_text(text, target_lang.value)
79
+ return JSONResponse(content={"translated_text": translated})
80
+
81
+ # πŸ” Combined endpoint (speech-to-translation)
82
+ @app.post("/process")
83
+ async def process(audio: UploadFile = File(...), target_lang: LanguageEnum = Form(...)):
84
+ temp_filename = f"temp_{uuid.uuid4().hex}.wav"
85
+ with open(temp_filename, "wb") as f:
86
+ shutil.copyfileobj(audio.file, f)
87
+ try:
88
+ asr = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
89
+ result = asr(temp_filename)
90
+ transcribed_text = result["text"]
91
+ translated_text = translate_text(transcribed_text, target_lang.value)
92
+ return JSONResponse(content={
93
+ "transcribed_text": transcribed_text,
94
+ "translated_text": translated_text
95
+ })
96
+ finally:
97
+ os.remove(temp_filename)
98
+
99
+ # ✨ Generate + Translate endpoint
100
+ @app.get("/generate")
101
+ def generate(prompt: str = "Daily conversation", target_lang: LanguageEnum = LanguageEnum.it):
102
+ english = generate_random_sentence(prompt)
103
+ translated = translate_text(english, target_lang.value)
104
+ return {
105
+ "prompt": prompt,
106
+ "english": english,
107
+ "translated": translated
108
+ }
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ googletrans==4.0.0-rc1
6
+ python-multipart
7
+ streamlit
8
+ requests
9
+ streamlit-webrtc
10
+ av
11
+ aiohttp
12
+ numpy
13
+