GoodML commited on
Commit
43c5057
·
verified ·
1 Parent(s): 985ead6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -288
app.py CHANGED
@@ -1,286 +1,3 @@
1
- # import os
2
- # import requests
3
- # import cv2
4
- # import re
5
- # from flask import Flask, request, jsonify, render_template
6
- # from deepgram import DeepgramClient, PrerecordedOptions
7
- # from dotenv import load_dotenv
8
- # import tempfile
9
- # import json
10
- # import subprocess
11
-
12
-
13
- # import warnings
14
- # warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
15
-
16
- # app = Flask(__name__)
17
- # print("APP IS RUNNING, ANIKET")
18
-
19
- # # Load the .env file
20
- # load_dotenv()
21
-
22
- # print("ENV LOADED, ANIKET")
23
-
24
- # # Fetch the API key from the .env file
25
- # API_KEY = os.getenv("FIRST_API_KEY")
26
- # DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY")
27
-
28
- # # Ensure the API key is loaded correctly
29
- # if not API_KEY:
30
- # raise ValueError("API Key not found. Make sure it is set in the .env file.")
31
-
32
- # if not DEEPGRAM_API_KEY:
33
- # raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.")
34
-
35
- # GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
36
- # GEMINI_API_KEY = API_KEY
37
-
38
- # @app.route("/", methods=["GET"])
39
- # def health_check():
40
- # return jsonify({"status": "success", "message": "API is running successfully!"}), 200
41
-
42
-
43
- # def transcribe_audio(wav_file_path):
44
- # """
45
- # Transcribe audio from a video file using Deepgram API synchronously.
46
-
47
- # Args:
48
- # wav_file_path (str): Path to save the converted WAV file.
49
- # Returns:
50
- # dict: A dictionary containing status, transcript, or error message.
51
- # """
52
- # print("Entered the transcribe_audio function")
53
- # try:
54
- # # Initialize Deepgram client
55
- # deepgram = DeepgramClient(DEEPGRAM_API_KEY)
56
-
57
- # # Open the converted WAV file
58
- # with open(wav_file_path, 'rb') as buffer_data:
59
- # payload = {'buffer': buffer_data}
60
-
61
- # # Configure transcription options
62
- # options = PrerecordedOptions(
63
- # smart_format=True, model="nova-2", language="en-US"
64
- # )
65
-
66
- # # Transcribe the audio
67
- # response = deepgram.listen.prerecorded.v('1').transcribe_file(payload, options)
68
-
69
- # # Check if the response is valid
70
- # if response:
71
- # # print("Request successful! Processing response.")
72
-
73
- # # Convert response to JSON string
74
- # try:
75
- # data_str = response.to_json(indent=4)
76
- # except AttributeError as e:
77
- # return {"status": "error", "message": f"Error converting response to JSON: {e}"}
78
-
79
- # # Parse the JSON string to a Python dictionary
80
- # try:
81
- # data = json.loads(data_str)
82
- # except json.JSONDecodeError as e:
83
- # return {"status": "error", "message": f"Error parsing JSON string: {e}"}
84
-
85
- # # Extract the transcript
86
- # try:
87
- # transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"]
88
- # except KeyError as e:
89
- # return {"status": "error", "message": f"Error extracting transcript: {e}"}
90
-
91
- # print(f"Transcript obtained: {transcript}")
92
- # # Step: Save the transcript to a text file
93
- # transcript_file_path = "transcript_from_transcribe_audio.txt"
94
- # with open(transcript_file_path, "w", encoding="utf-8") as transcript_file:
95
- # transcript_file.write(transcript)
96
- # # print(f"Transcript saved to file: {transcript_file_path}")
97
-
98
- # return transcript
99
- # else:
100
- # return {"status": "error", "message": "Invalid response from Deepgram."}
101
-
102
- # except FileNotFoundError:
103
- # return {"status": "error", "message": f"Video file not found: {wav_file_path}"}
104
- # except Exception as e:
105
- # return {"status": "error", "message": f"Unexpected error: {e}"}
106
- # finally:
107
- # # Clean up the temporary WAV file
108
- # if os.path.exists(wav_file_path):
109
- # os.remove(wav_file_path)
110
- # print(f"Temporary WAV file deleted: {wav_file_path}")
111
-
112
-
113
-
114
- # def download_video(url, temp_video_path):
115
- # """Download video (MP4 format) from the given URL and save it to temp_video_path."""
116
- # response = requests.get(url, stream=True)
117
- # if response.status_code == 200:
118
- # with open(temp_video_path, 'wb') as f:
119
- # for chunk in response.iter_content(chunk_size=1024):
120
- # f.write(chunk)
121
- # print(f"Audio downloaded successfully to {temp_video_path}")
122
- # else:
123
- # raise Exception(f"Failed to download audio, status code: {response.status_code}")
124
-
125
-
126
- # def preprocess_frame(frame):
127
- # """Preprocess the frame for better OCR accuracy."""
128
- # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
129
- # denoised = cv2.medianBlur(gray, 3)
130
- # _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
131
- # return thresh
132
-
133
- # def clean_ocr_text(text):
134
- # """Clean the OCR output by removing noise and unwanted characters."""
135
- # cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
136
- # cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
137
- # return cleaned_text
138
-
139
- # def get_information_from_video_using_OCR(video_path, interval=1):
140
- # """Extract text from video frames using OCR and return the combined text content."""
141
- # cap = cv2.VideoCapture(video_path)
142
- # fps = int(cap.get(cv2.CAP_PROP_FPS))
143
- # frame_interval = interval * fps
144
- # frame_count = 0
145
- # extracted_text = ""
146
-
147
- # print("Starting text extraction from video...")
148
-
149
- # while cap.isOpened():
150
- # ret, frame = cap.read()
151
- # if not ret:
152
- # break
153
-
154
- # if frame_count % frame_interval == 0:
155
- # preprocessed_frame = preprocess_frame(frame)
156
- # text = pytesseract.image_to_string(preprocessed_frame, lang='eng', config='--psm 6 --oem 3')
157
- # cleaned_text = clean_ocr_text(text)
158
- # if cleaned_text:
159
- # extracted_text += cleaned_text + "\n\n"
160
- # print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
161
-
162
- # frame_count += 1
163
-
164
- # cap.release()
165
- # print("Text extraction completed.")
166
- # return extracted_text
167
-
168
-
169
-
170
-
171
- # @app.route('/process-video', methods=['POST'])
172
- # def process_video():
173
- # if 'videoUrl' not in request.json:
174
- # return jsonify({"error": "No video URL provided"}), 400
175
-
176
- # video_url = request.json['videoUrl']
177
- # temp_video_path = None
178
-
179
- # try:
180
- # # Step 1: Download the WAV file from the provided URL
181
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
182
- # temp_video_path = temp_video_file.name
183
- # download_video(video_url, temp_video_path)
184
- # interval = 1
185
- # # Step 2: get the information from the downloaded MP4 file synchronously
186
- # video_info = get_information_from_video_using_OCR(temp_video_path, interval)
187
-
188
- # if not video_info:
189
- # video_info = ""
190
-
191
-
192
-
193
- # # Step 2: Convert the MP4 to WAV
194
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
195
- # temp_wav_path = temp_wav_file.name
196
- # convert_mp4_to_wav(temp_video_path, temp_wav_path)
197
-
198
- # audio_info = transcribe_audio(temp_wav_path)
199
-
200
- # # If no transcription present, use an empty string
201
- # if not audio_info:
202
- # audio_info = ""
203
-
204
-
205
-
206
- # # Step 3: Generate structured recipe information using Gemini API synchronously
207
- # structured_data = query_gemini_api(video_info, audio_info)
208
-
209
- # return jsonify(structured_data)
210
-
211
- # except Exception as e:
212
- # return jsonify({"error": str(e)}), 500
213
-
214
- # finally:
215
- # # Clean up temporary audio file
216
- # if temp_video_path and os.path.exists(temp_video_path):
217
- # os.remove(temp_video_path)
218
- # print(f"Temporary audio file deleted: {temp_video_path}")
219
-
220
-
221
-
222
-
223
-
224
-
225
- # def query_gemini_api(video_transcription, audio_transcription):
226
- # """
227
- # Send transcription text to Gemini API and fetch structured recipe information synchronously.
228
- # """
229
- # try:
230
- # # Define the structured prompt
231
- # prompt = (
232
- # "Analyze the provided cooking video and audio transcription combined and based on the combined information extract the following structured information:\n"
233
- # "1. Recipe Name: Identify the name of the dish being prepared.\n"
234
- # "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n"
235
- # "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n"
236
- # "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n"
237
- # "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n"
238
- # "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n"
239
- # "7. Serving size: In count of people or portion size.\n"
240
- # "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n"
241
- # "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n"
242
- # "Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned."
243
- # f"Text: {audio_transcription}\n"
244
- # f"Text: {video_transcription}\n"
245
-
246
- # )
247
-
248
- # # Prepare the payload and headers
249
- # payload = {
250
- # "contents": [
251
- # {
252
- # "parts": [
253
- # {"text": prompt}
254
- # ]
255
- # }
256
- # ]
257
- # }
258
- # headers = {"Content-Type": "application/json"}
259
-
260
- # # Send request to Gemini API synchronously
261
- # response = requests.post(
262
- # f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}",
263
- # json=payload,
264
- # headers=headers,
265
- # )
266
-
267
- # # Raise error if response code is not 200
268
- # response.raise_for_status()
269
-
270
- # data = response.json()
271
-
272
- # return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found")
273
-
274
- # except requests.exceptions.RequestException as e:
275
- # print(f"Error querying Gemini API: {e}")
276
- # return {"error": str(e)}
277
-
278
-
279
- # if __name__ == '__main__':
280
- # app.run(debug=True)
281
-
282
-
283
-
284
  import os
285
  import requests
286
  import cv2
@@ -292,9 +9,8 @@ from dotenv import load_dotenv
292
  import tempfile
293
  import json
294
  import subprocess
295
-
296
-
297
  import warnings
 
298
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
299
 
300
  app = Flask(__name__)
@@ -388,7 +104,6 @@ def transcribe_audio(wav_file_path):
388
  if os.path.exists(wav_file_path):
389
  os.remove(wav_file_path)
390
  print(f"Temporary WAV file deleted: {wav_file_path}")
391
-
392
 
393
 
394
  def download_video(url, temp_video_path):
@@ -411,7 +126,7 @@ def preprocess_frame(frame):
411
  return thresh
412
 
413
  def clean_ocr_text(text):
414
- """Clean the OCR output by removing noise and unwanted characters."""
415
  cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
416
  cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
417
  return cleaned_text
@@ -494,11 +209,15 @@ def process_video():
494
  return jsonify({"error": str(e)}), 500
495
 
496
  finally:
497
- # Clean up temporary video file
498
  if temp_video_path and os.path.exists(temp_video_path):
499
  os.remove(temp_video_path)
500
  print(f"Temporary video file deleted: {temp_video_path}")
501
 
 
 
 
 
502
 
503
  def query_gemini_api(video_transcription, audio_transcription):
504
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import requests
3
  import cv2
 
9
  import tempfile
10
  import json
11
  import subprocess
 
 
12
  import warnings
13
+
14
  warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
15
 
16
  app = Flask(__name__)
 
104
  if os.path.exists(wav_file_path):
105
  os.remove(wav_file_path)
106
  print(f"Temporary WAV file deleted: {wav_file_path}")
 
107
 
108
 
109
  def download_video(url, temp_video_path):
 
126
  return thresh
127
 
128
  def clean_ocr_text(text):
129
+ """Clean the OCR output by removing noise and unwanted characters."""
130
  cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
131
  cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
132
  return cleaned_text
 
209
  return jsonify({"error": str(e)}), 500
210
 
211
  finally:
212
+ # Clean up temporary video file and WAV file
213
  if temp_video_path and os.path.exists(temp_video_path):
214
  os.remove(temp_video_path)
215
  print(f"Temporary video file deleted: {temp_video_path}")
216
 
217
+ if temp_wav_path and os.path.exists(temp_wav_path):
218
+ os.remove(temp_wav_path)
219
+ print(f"Temporary WAV file deleted: {temp_wav_path}")
220
+
221
 
222
  def query_gemini_api(video_transcription, audio_transcription):
223
  """