GoodML commited on
Commit
d509284
·
verified ·
1 Parent(s): 9893fb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -16
app.py CHANGED
@@ -5,6 +5,8 @@ import asyncio
5
  import aiohttp # For making async HTTP requests
6
  from quart import Quart, request, jsonify, render_template
7
  from dotenv import load_dotenv
 
 
8
  import warnings
9
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
 
@@ -18,11 +20,17 @@ print("ENV LOADED, ANIKET")
18
 
19
  # Fetch the API key from the .env file
20
  API_KEY = os.getenv("FIRST_API_KEY")
 
21
 
22
  # Ensure the API key is loaded correctly
23
  if not API_KEY:
24
  raise ValueError("API Key not found. Make sure it is set in the .env file.")
25
 
 
 
 
 
 
26
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
27
  GEMINI_API_KEY = API_KEY
28
 
@@ -77,26 +85,96 @@ async def process_audio():
77
  return jsonify({"error": str(e)}), 500
78
 
79
 
80
- async def transcribe_audio(audio_file):
81
- """
82
- Transcribe audio using Whisper AI (async function).
83
- """
84
- print("CAME IN THE transcribe audio function")
85
- try:
86
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
87
- audio_file.save(temp_audio_file.name)
88
- print(f"Temporary audio file saved: {temp_audio_file.name}")
89
 
90
- # Run Whisper transcription asynchronously
91
- loop = asyncio.get_event_loop()
92
- result = await loop.run_in_executor(None, whisper_model.transcribe, temp_audio_file.name)
93
- print("THE RESULTS ARE", result)
94
 
95
- return result.get("text", "").strip()
 
 
 
 
 
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  except Exception as e:
98
- print(f"Error in transcription: {e}")
99
- return None
 
 
 
 
100
 
101
 
102
  async def query_gemini_api(transcription):
 
5
  import aiohttp # For making async HTTP requests
6
  from quart import Quart, request, jsonify, render_template
7
  from dotenv import load_dotenv
8
+ from deepgram import DeepgramClient, PrerecordedOptions
9
+
10
  import warnings
11
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
12
 
 
20
 
21
  # Fetch the API key from the .env file
22
  API_KEY = os.getenv("FIRST_API_KEY")
23
+ DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
24
 
25
  # Ensure the API key is loaded correctly
26
  if not API_KEY:
27
  raise ValueError("API Key not found. Make sure it is set in the .env file.")
28
 
29
+ # Ensure the API key is loaded correctly
30
+ if not DEEPGRAM_API_KEY:
31
+ raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
32
+
33
+
34
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
35
  GEMINI_API_KEY = API_KEY
36
 
 
85
  return jsonify({"error": str(e)}), 500
86
 
87
 
88
+ import subprocess
89
+ import os
90
+ import json
91
+ from deepgram.clients import DeepgramClient
92
+ from deepgram.options import PrerecordedOptions
 
 
 
 
93
 
94
+ # Replace with your actual Deepgram API key
95
+ DEEPGRAM_API_KEY = "your_deepgram_api_key"
 
 
96
 
97
+ async def transcribe_audio(video_file_path, wav_file_path):
98
+ """
99
+ Transcribe audio from a video file using Whisper AI (async function).
100
+
101
+ Args:
102
+ video_file_path (str): Path to the input video file.
103
+ wav_file_path (str): Path to save the converted WAV file.
104
 
105
+ Returns:
106
+ dict: A dictionary containing status, transcript, or error message.
107
+ """
108
+ print("Entered the transcribe_audio function")
109
+ try:
110
+ # Initialize Deepgram client
111
+ deepgram = DeepgramClient(DEEPGRAM_API_KEY)
112
+
113
+ # Convert video to audio in WAV format using FFmpeg
114
+ print("Converting video to audio (WAV format)...")
115
+ ffmpeg_command = [
116
+ "ffmpeg", "-i", video_file_path, "-q:a", "0", "-map", "a", wav_file_path
117
+ ]
118
+ subprocess.run(ffmpeg_command, check=True)
119
+ print(f"Conversion successful! WAV file saved at: {wav_file_path}")
120
+
121
+ # Open the converted WAV file
122
+ with open(wav_file_path, 'rb') as buffer_data:
123
+ payload = {'buffer': buffer_data}
124
+
125
+ # Configure transcription options
126
+ options = PrerecordedOptions(
127
+ smart_format=True, model="nova-2", language="en-US"
128
+ )
129
+
130
+ # Transcribe the audio
131
+ response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
132
+
133
+ # Check if the response is valid
134
+ if response:
135
+ print("Request successful! Processing response.")
136
+
137
+ # Convert response to JSON string
138
+ try:
139
+ data_str = response.to_json(indent=4)
140
+ except AttributeError as e:
141
+ return {"status": "error", "message": f"Error converting response to JSON: {e}"}
142
+
143
+ # Parse the JSON string to a Python dictionary
144
+ try:
145
+ data = json.loads(data_str)
146
+ except json.JSONDecodeError as e:
147
+ return {"status": "error", "message": f"Error parsing JSON string: {e}"}
148
+
149
+ # Extract the transcript
150
+ try:
151
+ transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"]
152
+ except KeyError as e:
153
+ return {"status": "error", "message": f"Error extracting transcript: {e}"}
154
+
155
+ # Path to the text file
156
+ output_text_file = "deepGramNovaTranscript.txt"
157
+
158
+ # Write the transcript to the text file
159
+ with open(output_text_file, "w", encoding="utf-8") as file:
160
+ file.write(transcript)
161
+
162
+ print(f"Transcript saved to: {output_text_file}")
163
+ return {"status": "success", "transcript": transcript, "file_path": output_text_file}
164
+ else:
165
+ return {"status": "error", "message": "Invalid response from Deepgram."}
166
+
167
+ except FileNotFoundError:
168
+ return {"status": "error", "message": f"Video file not found: {video_file_path}"}
169
+ except subprocess.CalledProcessError as e:
170
+ return {"status": "error", "message": f"Error during audio conversion: {e}"}
171
  except Exception as e:
172
+ return {"status": "error", "message": f"Unexpected error: {e}"}
173
+ finally:
174
+ # Clean up the temporary WAV file
175
+ if os.path.exists(wav_file_path):
176
+ os.remove(wav_file_path)
177
+ print(f"Temporary WAV file deleted: {wav_file_path}")
178
 
179
 
180
  async def query_gemini_api(transcription):