nurfarah57 commited on
Commit
00903db
·
verified ·
1 Parent(s): aa1c4c9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ from fastapi import FastAPI
4
+ from pydantic import BaseModel
5
+ from transformers import AutoProcessor, VitsForConditionalGeneration
6
+ import torch
7
+ from fastapi.responses import StreamingResponse
8
+
9
+ # Use /tmp for cache to avoid permission errors
10
+ os.environ["HF_HOME"] = "/tmp"
11
+
12
+ app = FastAPI()
13
+
14
+ # Load processor and model once on startup
15
+ model_name = "Somali-tts/somali_tts_model"
16
+ processor = AutoProcessor.from_pretrained(model_name)
17
+ model = VitsForConditionalGeneration.from_pretrained(model_name)
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ model.to(device)
20
+
21
+ class TextInput(BaseModel):
22
+ inputs: str
23
+
24
+ @app.post("/synthesize")
25
+ async def synthesize_tts(data: TextInput):
26
+ inputs = processor(data.inputs, return_tensors="pt").to(device)
27
+ with torch.no_grad():
28
+ audio = model.generate(**inputs)
29
+ audio = audio.squeeze().cpu().numpy()
30
+
31
+ # Convert to WAV bytes in-memory
32
+ import soundfile as sf
33
+ buf = io.BytesIO()
34
+ sf.write(buf, audio, samplerate=22050, format="WAV")
35
+ buf.seek(0)
36
+
37
+ return StreamingResponse(buf, media_type="audio/wav")