|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
from typing import List, Tuple |
|
from huggingface_hub import InferenceClient |
|
import edge_tts |
|
import tempfile |
|
import asyncio |
|
import os |
|
from fastapi.responses import FileResponse |
|
from groq import Groq |
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
client = Groq( |
|
api_key='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' |
|
) |
|
|
|
async def text_to_speech(text, voice, rate, pitch): |
|
voice_short_name = voice.split(" - ")[0] |
|
rate_str = f"{rate:+d}%" |
|
pitch_str = f"{pitch:+d}Hz" |
|
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str) |
|
submaker = edge_tts.SubMaker() |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
tmp_path = tmp_file.name |
|
|
|
async for chunk in communicate.stream(): |
|
if chunk["type"] == "audio": |
|
tmp_file.write(chunk["data"]) |
|
elif chunk["type"] == "WordBoundary": |
|
submaker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"]) |
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w+', encoding='utf-8') as tmp_file: |
|
tmp_vtt_path = tmp_file.name |
|
tmp_file.write(submaker.generate_subs()) |
|
|
|
return tmp_path, tmp_vtt_path, None |
|
|
|
def tts_interface(text, voice, rate, pitch): |
|
audio, vtt, warning = asyncio.run(text_to_speech(text, voice, rate, pitch)) |
|
return audio, vtt, warning |
|
|
|
@app.get("/") |
|
def greet_json(): |
|
return {"Hello": "World!"} |
|
|
|
|
|
|
|
class ChatRequest(BaseModel): |
|
message: str |
|
history: List[Tuple[str, str]] = [] |
|
system_message: str |
|
max_tokens: int = 512 |
|
temperature: float = 0.7 |
|
top_p: float = 0.95 |
|
|
|
|
|
@app.get("/file/") |
|
def file(path: str): |
|
return FileResponse(path, media_type="audio/mpeg", filename="audio.mp3") |
|
|
|
@app.get("/file-vtt/") |
|
def fileVtt(path: str): |
|
return FileResponse(path) |
|
|
|
|
|
@app.post("/chat") |
|
def chat(request: ChatRequest): |
|
messages = [{"role": "system", "content": request.system_message}] |
|
|
|
for val in request.history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
messages.append({"role": "user", "content": request.message}) |
|
|
|
try: |
|
response = client.chat.completions.create( |
|
model="llama-3.1-8b-instant", |
|
messages=messages, |
|
max_tokens=request.max_tokens, |
|
stream=False, |
|
stop=None, |
|
temperature=request.temperature, |
|
top_p=request.top_p, |
|
) |
|
|
|
data = tts_interface((response.choices[0].message.content.replace('**', '')).replace('**', ''), 'en-GB-MaisieNeural - en-GB (Female)', 0, 0) |
|
|
|
if os.path.exists(data[0]): |
|
return { |
|
"text": response.choices[0].message.content.replace('**', ''), |
|
"audio" : data[0], |
|
"vtt" : data[1] |
|
} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |