GoodML commited on
Commit
a0fefdf
·
verified ·
1 Parent(s): 22596d6
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -160,12 +160,14 @@ from flask import Flask, request, jsonify, render_template
160
  import tempfile
161
 
162
  app = Flask(__name__)
163
-
164
  # Gemini API settings
165
  from dotenv import load_dotenv
166
  # Load the .env file
167
  load_dotenv()
168
 
 
 
169
  # Fetch the API key from the .env file
170
  API_KEY = os.getenv("FIRST_API_KEY")
171
 
@@ -176,10 +178,11 @@ if not API_KEY:
176
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
177
  GEMINI_API_KEY = API_KEY
178
 
 
179
  # Load Whisper AI model at startup
180
- print("Loading Whisper AI model...")
181
  whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
182
- print("Whisper AI model loaded successfully.")
183
 
184
 
185
  # Define the "/" endpoint for health check
@@ -193,38 +196,50 @@ def mbsa():
193
 
194
  @app.route('/process-video', methods=['POST'])
195
  def process_video():
 
196
  """
197
  Flask endpoint to process video:
198
  1. Extract audio and transcribe using Whisper AI.
199
  2. Send transcription to Gemini API for recipe information extraction.
200
  3. Return structured data in the response.
201
  """
 
202
  if 'video' not in request.files:
203
  return jsonify({"error": "No video file provided"}), 400
204
 
205
  video_file = request.files['video']
206
-
207
  try:
 
208
  # Step 1: Save video to a temporary file
209
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
210
  video_file.save(temp_video_file.name)
211
  print(f"Video file saved: {temp_video_file.name}")
212
 
 
213
  # Step 2: Extract audio from video using ffmpeg (waiting for completion)
214
  audio_path = extract_audio(temp_video_file.name)
215
-
 
216
  if not audio_path:
217
  return jsonify({"error": "Audio extraction failed"}), 500
218
 
 
219
  # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
220
  transcription = transcribe_audio(audio_path)
 
 
221
 
222
  if not transcription:
223
  return jsonify({"error": "Audio transcription failed"}), 500
 
 
224
 
 
225
  # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
226
  structured_data = query_gemini_api(transcription)
227
-
 
228
  # Step 5: Return the structured data
229
  return jsonify(structured_data)
230
 
@@ -266,10 +281,14 @@ def transcribe_audio(audio_path):
266
  """
267
  Transcribe audio using Whisper AI.
268
  """
 
269
  try:
 
270
  # Transcribe audio using Whisper AI
271
  print("Transcribing audio...")
272
  result = whisper_model.transcribe(audio_path)
 
 
273
  return result.get("text", "").strip()
274
 
275
  except Exception as e:
 
160
  import tempfile
161
 
162
  app = Flask(__name__)
163
+ print("APP IS RUNNING, ANIKET")
164
  # Gemini API settings
165
  from dotenv import load_dotenv
166
  # Load the .env file
167
  load_dotenv()
168
 
169
+ print("ENV LOADED, ANIKET")
170
+
171
  # Fetch the API key from the .env file
172
  API_KEY = os.getenv("FIRST_API_KEY")
173
 
 
178
  GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
179
  GEMINI_API_KEY = API_KEY
180
 
181
+
182
  # Load Whisper AI model at startup
183
+ print("Loading Whisper AI model..., ANIKET")
184
  whisper_model = whisper.load_model("base") # Choose model size: tiny, base, small, medium, large
185
+ print("Whisper AI model loaded successfully, ANIKET")
186
 
187
 
188
  # Define the "/" endpoint for health check
 
196
 
197
  @app.route('/process-video', methods=['POST'])
198
  def process_video():
199
+ print("GOT THE PROCESS VIDEO REQUEST, ANIKET")
200
  """
201
  Flask endpoint to process video:
202
  1. Extract audio and transcribe using Whisper AI.
203
  2. Send transcription to Gemini API for recipe information extraction.
204
  3. Return structured data in the response.
205
  """
206
+
207
  if 'video' not in request.files:
208
  return jsonify({"error": "No video file provided"}), 400
209
 
210
  video_file = request.files['video']
211
+ print("VIDEO FILE NAME: ", video_file)
212
  try:
213
+ print("SAVING THE FILE TEMPO, ANIKET")
214
  # Step 1: Save video to a temporary file
215
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
216
  video_file.save(temp_video_file.name)
217
  print(f"Video file saved: {temp_video_file.name}")
218
 
219
+
220
  # Step 2: Extract audio from video using ffmpeg (waiting for completion)
221
  audio_path = extract_audio(temp_video_file.name)
222
+ print("AUDIO PATH FROM LINE 221, ANIKET", audio_path)
223
+
224
  if not audio_path:
225
  return jsonify({"error": "Audio extraction failed"}), 500
226
 
227
+ print("STARTING TRANSCRIPTION, GOT THE .WAV AUDIO PATH THAT WAS STORED TEMPO, ANIKET")
228
  # Step 3: Transcribe the audio using Whisper AI (waiting for completion)
229
  transcription = transcribe_audio(audio_path)
230
+ )
231
+ print("BEFORE THE transcription FAILED ERROR, CHECKING IF I GOT THE TRANSCRIPTION", transcription)
232
 
233
  if not transcription:
234
  return jsonify({"error": "Audio transcription failed"}), 500
235
+
236
+ print("GOT THE transcription")
237
 
238
+ print("Starting the GEMINI REQUEST TO STRUCTURE IT")
239
  # Step 4: Generate structured recipe information using Gemini API (waiting for completion)
240
  structured_data = query_gemini_api(transcription)
241
+
242
+ print("GOT THE STRUCTURED DATA", structured_data)
243
  # Step 5: Return the structured data
244
  return jsonify(structured_data)
245
 
 
281
  """
282
  Transcribe audio using Whisper AI.
283
  """
284
+ print("CAME IN THE transcribe audio folder")
285
  try:
286
+
287
  # Transcribe audio using Whisper AI
288
  print("Transcribing audio...")
289
  result = whisper_model.transcribe(audio_path)
290
+ print("THE RESULTS ARE", result)
291
+
292
  return result.get("text", "").strip()
293
 
294
  except Exception as e: