GoodML commited on
Commit
e0b3b4f
·
verified ·
1 Parent(s): bfd0ee5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -49
app.py CHANGED
@@ -1,16 +1,14 @@
1
  import os
2
  import whisper
3
  import requests
4
- import asyncio
5
- import aiohttp # For making async HTTP requests
6
- from quart import Quart, request, jsonify, render_template
7
  from dotenv import load_dotenv
8
  from deepgram import DeepgramClient, PrerecordedOptions
9
 
10
  import warnings
11
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
12
 
13
- app = Quart(__name__)
14
  print("APP IS RUNNING, ANIKET")
15
 
16
  # Load the .env file
@@ -26,11 +24,9 @@ DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
26
  if not API_KEY:
27
  raise ValueError("API Key not found. Make sure it is set in the .env file.")
28
 
29
- # Ensure the API key is loaded correctly
30
  if not DEEPGRAM_API_KEY:
31
  raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
32
 
33
-
34
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
35
  GEMINI_API_KEY = API_KEY
36
 
@@ -41,17 +37,17 @@ GEMINI_API_KEY = API_KEY
41
 
42
 
43
  @app.route("/", methods=["GET"])
44
- async def health_check():
45
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
46
 
47
 
48
  @app.route("/mbsa")
49
- async def mbsa():
50
- return await render_template("mbsa.html")
51
 
52
 
53
  @app.route('/process-audio', methods=['POST'])
54
- async def process_audio():
55
  print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
56
 
57
  if 'audio' not in request.files:
@@ -63,8 +59,14 @@ async def process_audio():
63
  try:
64
  print("STARTING TRANSCRIPTION, ANIKET")
65
 
66
- # Step 1: Transcribe the uploaded audio file asynchronously
67
- transcription = await transcribe_audio(audio_file)
 
 
 
 
 
 
68
 
69
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
70
 
@@ -73,21 +75,21 @@ async def process_audio():
73
 
74
  print("GOT THE transcription")
75
 
 
76
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
77
- # Step 2: Generate structured recipe information using Gemini API asynchronously
78
- structured_data = await query_gemini_api(transcription)
79
 
80
  print("GOT THE STRUCTURED DATA", structured_data)
81
- # Step 3: Return the structured data
82
  return jsonify(structured_data)
83
 
84
  except Exception as e:
85
  return jsonify({"error": str(e)}), 500
86
 
87
 
88
- async def transcribe_audio(wav_file_path):
89
  """
90
- Transcribe audio from a video file using Whisper AI (async function).
91
 
92
  Args:
93
  wav_file_path (str): Path to save the converted WAV file.
@@ -100,14 +102,6 @@ async def transcribe_audio(wav_file_path):
100
  # Initialize Deepgram client
101
  deepgram = DeepgramClient(DEEPGRAM_API_KEY)
102
 
103
- # # Convert video to audio in WAV format using FFmpeg
104
- # print("Converting video to audio (WAV format)...")
105
- # ffmpeg_command = [
106
- # "ffmpeg", "-i", video_file_path, "-q:a", "0", "-map", "a", wav_file_path
107
- # ]
108
- # subprocess.run(ffmpeg_command, check=True)
109
- # print(f"Conversion successful! WAV file saved at: {wav_file_path}")
110
-
111
  # Open the converted WAV file
112
  with open(wav_file_path, 'rb') as buffer_data:
113
  payload = {'buffer': buffer_data}
@@ -142,22 +136,13 @@ async def transcribe_audio(wav_file_path):
142
  except KeyError as e:
143
  return {"status": "error", "message": f"Error extracting transcript: {e}"}
144
 
145
- # Path to the text file
146
- # output_text_file = "deepGramNovaTranscript.txt"
147
-
148
- # Write the transcript to the text file
149
- # with open(output_text_file, "w", encoding="utf-8") as file:
150
- # file.write(transcript)
151
-
152
- print(f"Transcript saved to: {output_text_file}")
153
  return transcript
154
  else:
155
  return {"status": "error", "message": "Invalid response from Deepgram."}
156
 
157
  except FileNotFoundError:
158
- return {"status": "error", "message": f"Video file not found: {video_file_path}"}
159
- except subprocess.CalledProcessError as e:
160
- return {"status": "error", "message": f"Error during audio conversion: {e}"}
161
  except Exception as e:
162
  return {"status": "error", "message": f"Unexpected error: {e}"}
163
  finally:
@@ -167,9 +152,9 @@ async def transcribe_audio(wav_file_path):
167
  print(f"Temporary WAV file deleted: {wav_file_path}")
168
 
169
 
170
- async def query_gemini_api(transcription):
171
  """
172
- Send transcription text to Gemini API and fetch structured recipe information (async function).
173
  """
174
  try:
175
  # Define the structured prompt
@@ -199,20 +184,22 @@ async def query_gemini_api(transcription):
199
  }
200
  headers = {"Content-Type": "application/json"}
201
 
202
- # Send request to Gemini API asynchronously
203
- async with aiohttp.ClientSession() as session:
204
- async with session.post(
205
- f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
206
- json=payload,
207
- headers=headers,
208
- timeout=60 # 60 seconds timeout for the request
209
- ) as response:
210
- response.raise_for_status() # Raise error if response code is not 200
211
- data = await response.json()
 
 
212
 
213
  return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
214
 
215
- except aiohttp.ClientError as e:
216
  print(f"Error querying Gemini API: {e}")
217
  return {"error": str(e)}
218
 
 
1
  import os
2
  import whisper
3
  import requests
4
+ from flask import Flask, request, jsonify, render_template
 
 
5
  from dotenv import load_dotenv
6
  from deepgram import DeepgramClient, PrerecordedOptions
7
 
8
  import warnings
9
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
10
 
11
+ app = Flask(__name__)
12
  print("APP IS RUNNING, ANIKET")
13
 
14
  # Load the .env file
 
24
  if not API_KEY:
25
  raise ValueError("API Key not found. Make sure it is set in the .env file.")
26
 
 
27
  if not DEEPGRAM_API_KEY:
28
  raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
29
 
 
30
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
31
  GEMINI_API_KEY = API_KEY
32
 
 
37
 
38
 
39
  @app.route("/", methods=["GET"])
40
+ def health_check():
41
  return jsonify({"status": "success", "message": "API is running successfully!"}), 200
42
 
43
 
44
  @app.route("/mbsa")
45
+ def mbsa():
46
+ return render_template("mbsa.html")
47
 
48
 
49
  @app.route('/process-audio', methods=['POST'])
50
+ def process_audio():
51
  print("GOT THE PROCESS AUDIO REQUEST, ANIKET")
52
 
53
  if 'audio' not in request.files:
 
59
  try:
60
  print("STARTING TRANSCRIPTION, ANIKET")
61
 
62
+ # Step 1: Save the audio file temporarily
63
+ # Save the audio file to a temporary location for processing
64
+ temp_audio_path = "/path/to/save/audio.wav" # Adjust this as needed
65
+ with open(temp_audio_path, 'wb') as f:
66
+ f.write(audio_file.read())
67
+
68
+ # Step 2: Transcribe the uploaded audio file synchronously
69
+ transcription = transcribe_audio(temp_audio_path)
70
 
71
  print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
72
 
 
75
 
76
  print("GOT THE transcription")
77
 
78
+ # Step 3: Generate structured recipe information using Gemini API synchronously
79
  print("Starting the GEMINI REQUEST TO STRUCTURE IT")
80
+ structured_data = query_gemini_api(transcription)
 
81
 
82
  print("GOT THE STRUCTURED DATA", structured_data)
83
+ # Step 4: Return the structured data
84
  return jsonify(structured_data)
85
 
86
  except Exception as e:
87
  return jsonify({"error": str(e)}), 500
88
 
89
 
90
+ def transcribe_audio(wav_file_path):
91
  """
92
+ Transcribe audio from a video file using Deepgram API synchronously.
93
 
94
  Args:
95
  wav_file_path (str): Path to save the converted WAV file.
 
102
  # Initialize Deepgram client
103
  deepgram = DeepgramClient(DEEPGRAM_API_KEY)
104
 
 
 
 
 
 
 
 
 
105
  # Open the converted WAV file
106
  with open(wav_file_path, 'rb') as buffer_data:
107
  payload = {'buffer': buffer_data}
 
136
  except KeyError as e:
137
  return {"status": "error", "message": f"Error extracting transcript: {e}"}
138
 
139
+ print(f"Transcript obtained: {transcript}")
 
 
 
 
 
 
 
140
  return transcript
141
  else:
142
  return {"status": "error", "message": "Invalid response from Deepgram."}
143
 
144
  except FileNotFoundError:
145
+ return {"status": "error", "message": f"Video file not found: {wav_file_path}"}
 
 
146
  except Exception as e:
147
  return {"status": "error", "message": f"Unexpected error: {e}"}
148
  finally:
 
152
  print(f"Temporary WAV file deleted: {wav_file_path}")
153
 
154
 
155
+ def query_gemini_api(transcription):
156
  """
157
+ Send transcription text to Gemini API and fetch structured recipe information synchronously.
158
  """
159
  try:
160
  # Define the structured prompt
 
184
  }
185
  headers = {"Content-Type": "application/json"}
186
 
187
+ # Send request to Gemini API synchronously
188
+ response = requests.post(
189
+ f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
190
+ json=payload,
191
+ headers=headers,
192
+ timeout=60 # 60 seconds timeout for the request
193
+ )
194
+
195
+ # Raise error if response code is not 200
196
+ response.raise_for_status()
197
+
198
+ data = response.json()
199
 
200
  return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
201
 
202
+ except requests.exceptions.RequestException as e:
203
  print(f"Error querying Gemini API: {e}")
204
  return {"error": str(e)}
205