AyushS9020 commited on
Commit
0f7e22b
·
verified ·
1 Parent(s): 6d68b80

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +22 -0
  2. __pycache__/app.cpython-39.pyc +0 -0
  3. app.py +114 -0
  4. docker-compose.yml +26 -0
  5. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ # Install dependencies required by PyTorch and torchaudio
5
+ RUN apt-get update && apt-get install -y \
6
+ libgomp1 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set the working directory in the container
10
+ WORKDIR /app
11
+
12
+ # Copy the current directory contents into the container at /app
13
+ COPY . /app
14
+
15
+ # Install any needed packages specified in requirements.txt
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Expose port 8000 to the outside world
19
+ EXPOSE 8000
20
+
21
+ # Run the FastAPI app using python app.py
22
+ CMD ["python", "app.py"]
__pycache__/app.cpython-39.pyc ADDED
Binary file (3.4 kB). View file
 
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import asyncio
4
+ from fastapi import FastAPI, File, UploadFile, Response
5
+ import uvicorn
6
+ from groq import Groq
7
+ from transformers import VitsModel, AutoTokenizer
8
+ import torch
9
+ import torchaudio
10
+ from io import BytesIO
11
+
12
+ # Preload TTS models
13
+ tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
14
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
15
+
16
+ # Ensure the models are using GPU if available
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ tts_model = tts_model.to(device)
19
+
20
+ # Initialize the Groq client with API key
21
+ api_key = "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" # Replace with your actual Groq API key
22
+ chat_model = "llama3-8b-8192"
23
+ client = Groq(api_key=api_key)
24
+
25
+ # Initialize FastAPI app
26
+ app = FastAPI()
27
+
28
+ # Convert audio to text using Groq's API
29
+ async def audio_to_text(file: UploadFile):
30
+ audio_data = await file.read()
31
+ transcription = client.audio.transcriptions.create(
32
+ file=(file.filename, audio_data),
33
+ model="whisper-large-v3", # The Whisper model for transcription
34
+ prompt="Specify context or spelling", # Optional: Customize transcription context
35
+ response_format="json",
36
+ language="en",
37
+ temperature=0.0
38
+ )
39
+ return transcription.text
40
+
41
+ # Get chat response from Groq API
42
+ async def get_chat_response(api_key, model, user_message, temperature=0.5, max_tokens=258, top_p=1, stop=None):
43
+ client = Groq(api_key=api_key)
44
+
45
+ # Chat completion with a system message to control output format
46
+ chat_completion = client.chat.completions.create(
47
+ messages=[
48
+ {"role": "system", "content": "You are a virtual human assistant in an AR and VR environment. Your responses should be short, concise, and suitable for text-to-speech conversion. Avoid numbers in digit form."},
49
+ {"role": "user", "content": user_message}
50
+ ],
51
+ model=model,
52
+ temperature=temperature,
53
+ max_tokens=max_tokens,
54
+ top_p=top_p,
55
+ stop=stop,
56
+ stream=False,
57
+ )
58
+
59
+ return chat_completion.choices[0].message.content
60
+
61
+ # Convert text to speech using the Vits TTS model
62
+ async def text_to_speech(text, filename="output.wav"):
63
+ if not text or text.strip() == "":
64
+ raise ValueError("Input text is empty or invalid")
65
+
66
+ # Tokenize the input text for TTS
67
+ inputs = tokenizer(text, return_tensors="pt")
68
+ inputs['input_ids'] = inputs['input_ids'].to(torch.long)
69
+ inputs = {key: value.to(device) for key, value in inputs.items()}
70
+
71
+ print(f"Input IDs shape: {inputs['input_ids'].shape}")
72
+
73
+ # Generate waveform from text
74
+ with torch.no_grad():
75
+ output = tts_model(**inputs).waveform
76
+
77
+ # Save the generated waveform to a temporary WAV file
78
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
79
+ temp_filename = temp_file.name
80
+ torchaudio.save(temp_filename, output.cpu(), sample_rate=tts_model.config.sampling_rate, format="wav")
81
+
82
+ # Read the generated audio into a BytesIO buffer
83
+ with open(temp_filename, "rb") as f:
84
+ audio_buffer = BytesIO(f.read())
85
+
86
+ os.remove(temp_filename) # Delete the temporary file after reading
87
+
88
+ audio_buffer.seek(0) # Rewind the buffer for reading
89
+ return audio_buffer
90
+
91
+ # Main API endpoint for processing audio
92
+ @app.post("/processaudio")
93
+ async def process_audio(audio_file: UploadFile = File(...)):
94
+ # Convert uploaded audio to text
95
+ user_message = await audio_to_text(audio_file)
96
+
97
+ # Generate a chat response from Groq
98
+ response_text = await get_chat_response(api_key, chat_model, user_message)
99
+
100
+ # Ensure response_text is valid
101
+ if not response_text:
102
+ return Response(content="Error: Generated response text is empty or invalid.", media_type="text/plain")
103
+
104
+ # Convert the chat response to speech
105
+ audio_output = await text_to_speech(response_text)
106
+
107
+ # Return the generated speech as a WAV file in the response
108
+ return Response(content=audio_output.read(), media_type="audio/wav", headers={
109
+ "Content-Disposition": "attachment; filename=response.wav"
110
+ })
111
+
112
+ # Start the Uvicorn server for FastAPI
113
+ if __name__ == "__main__":
114
+ uvicorn.run(app, host="0.0.0.0", port=8000)
docker-compose.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+
3
+ services:
4
+ fastapi-audio-app:
5
+ build: .
6
+ container_name: fastapi-audio-app
7
+ ports:
8
+ - "8000:8000" # Map container's port 8000 to host's port 8000
9
+ volumes:
10
+ - .:/app # Mount the current directory to the container's /app folder
11
+ environment:
12
+ - API_KEY=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 # Set the Groq API key here
13
+ depends_on:
14
+ - redis
15
+ command: ["python", "app.py"] # Use `python app.py` to start the FastAPI app
16
+
17
+ redis:
18
+ image: "redis:alpine"
19
+ container_name: redis
20
+ ports:
21
+ - "6379:6379"
22
+ volumes:
23
+ - redis-data:/data
24
+
25
+ volumes:
26
+ redis-data:
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ speechrecognition
4
+ groq
5
+ transformers
6
+ torch
7
+ torchaudio
8
+ python-multipart