hprasath commited on
Commit
894070c
·
verified ·
1 Parent(s): 0633b03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -112
app.py CHANGED
@@ -20,7 +20,7 @@ from utils.objectDetection.index import detect_objects
20
 
21
 
22
 
23
- app = Flask(__name__)
24
  cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
25
  CORS(app)
26
  import moviepy.editor as mp
@@ -49,101 +49,106 @@ def get_face_locations(binary_data):
49
  print(3)
50
  return face_locations
51
 
52
- def separate_image_text_from_pdf(pdf_url):
53
  # List to store page information
54
- pages_info = []
55
-
56
- # Fetch the PDF from the URL
57
- response = requests.get(pdf_url)
58
-
59
- if response.status_code == 200:
60
- # Create a temporary directory to store the PDF data
61
- temp_dir = tempfile.mkdtemp()
62
-
63
- # Define the temporary file path for the PDF
64
- temp_pdf_path = os.path.join(temp_dir, "temp.pdf")
65
-
66
- # Write the PDF data to the temporary file
67
- with open(temp_pdf_path, "wb") as tmp_file:
68
- tmp_file.write(response.content)
69
-
70
- # Open the PDF
71
- pdf = fitz.open(temp_pdf_path)
72
-
73
- # Iterate through each page
74
- for page_num in range(len(pdf)):
75
- page = pdf.load_page(page_num)
76
-
77
- # Extract text
78
- text = page.get_text()
79
-
80
- # Count images
81
- image_list = page.get_images(full=True)
82
-
83
- # Convert images to BytesIO and store in a list
84
- images_bytes = []
85
- for img_index, img_info in enumerate(image_list):
86
- xref = img_info[0]
87
- base_image = pdf.extract_image(xref)
88
- image_bytes = base_image["image"]
89
- images_bytes.append(image_bytes)
90
-
91
- # Store page information in a dictionary
92
- page_info = {
93
- "pgno": page_num + 1,
94
- "images": images_bytes,
95
- "text": text
96
- }
97
-
98
- # Append page information to the list
99
- pages_info.append(page_info)
100
-
101
- # Close the PDF
102
- pdf.close()
103
-
104
- # Clean up the temporary files
105
- os.unlink(temp_pdf_path)
106
- os.rmdir(temp_dir)
107
- else:
108
- print("Failed to fetch the PDF from the URL.")
109
 
110
  return pages_info
111
 
112
  def pdf_image_text_embedding_and_text_embedding(pages_info):
 
113
  # List to store page embeddings
114
- page_embeddings = []
115
-
116
- # Iterate through each page
117
- for page in pages_info:
118
- # Extract text from the page
119
- text = page["text"]
120
 
121
- # Extract images from the page
122
- images = page["images"]
123
-
124
- # List to store image embeddings
125
- image_embeddings = []
126
-
127
- # Iterate through each image
128
- for image in images:
129
- # Get the image embedding
130
- image_embedding = get_image_embedding(image)
131
- extracted_text = extract_text(image)
132
- # Append the image embedding to the list
133
- image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
134
-
135
- # Get the text embedding
136
-
137
- # Store the page embeddings in a dictionary
138
- page_embedding = {
139
- "images": image_embeddings,
140
- "text": text,
141
- }
 
 
 
 
 
142
 
143
- # Append the page embedding to the list
144
- page_embeddings.append(page_embedding)
145
 
146
- return page_embeddings
 
 
 
 
147
 
148
  def separate_audio_from_video(video_url):
149
  try:
@@ -153,26 +158,25 @@ def separate_audio_from_video(video_url):
153
  # Extract audio
154
  audio = video.audio
155
 
156
- # Create a temporary directory to store temporary files
157
- temp_dir = tempfile.mkdtemp()
158
-
159
- # Define the temporary file path for the audio
160
- temp_audio_filename = os.path.join(temp_dir, "audio.wav")
161
 
162
- # Write the audio data to the temporary file
163
- audio.write_audiofile(temp_audio_filename)
164
 
165
- # Read the audio data from the temporary file as bytes
166
- with open(temp_audio_filename, "rb") as f:
167
- audio_bytes = f.read()
 
 
168
 
169
  return audio_bytes
170
 
171
  except Exception as e:
172
  print("An error occurred:", e)
173
-
174
-
175
-
176
 
177
  @cache.cached(timeout=300)
178
  @app.route('/get_text_embedding', methods=['POST'])
@@ -232,13 +236,25 @@ def get_image_embedding_route():
232
  def get_video_embedding_route():
233
  try:
234
  video_url = request.json.get("videoUrl")
235
- audio_data = separate_audio_from_video(video_url)
236
- audio_embedding = extract_audio_embeddings(audio_data)
 
 
 
 
 
 
237
  audio_embedding_list = audio_embedding
238
- audio_file = io.BytesIO(audio_data)
239
- r = sr.Recognizer()
240
- with sr.AudioFile(audio_file) as source:
241
- audio_data = r.record(source)
 
 
 
 
 
 
242
  extracted_text = ""
243
  try:
244
  text = r.recognize_google(audio_data)
@@ -258,12 +274,22 @@ def extract_pdf_text_and_embedding():
258
  try:
259
  pdf_url = request.json.get("pdfUrl")
260
  print(1)
261
- pages_info = seperate_image_text_from_pdf(pdf_url)
 
 
 
 
 
 
 
262
  content = pdf_image_text_embedding_and_text_embedding(pages_info)
 
 
263
  print(content)
264
  return jsonify({"content": content}), 200
265
 
266
  except Exception as e:
 
267
  return jsonify({"error": str(e)}), 500
268
 
269
  # Route to get text description embeddings
@@ -330,8 +356,3 @@ def get_similarity_score_route():
330
 
331
  except Exception as e:
332
  return jsonify({"error": str(e)}), 500
333
-
334
- @app.route('/')
335
- def hello():
336
- return 'Hello, World!'
337
-
 
20
 
21
 
22
 
23
+ app = Flask(_name_)
24
  cache = Cache(app, config={'CACHE_TYPE': 'simple'}) # You can choose a caching type based on your requirements
25
  CORS(app)
26
  import moviepy.editor as mp
 
49
  print(3)
50
  return face_locations
51
 
52
+ def seperate_image_text_from_pdf(pdf_url):
53
  # List to store page information
54
+ try:
55
+ pages_info = []
56
+
57
+ # Fetch the PDF from the URL
58
+ response = requests.get(pdf_url)
59
+
60
+ if response.status_code == 200:
61
+ # Create a temporary file to save the PDF data
62
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
63
+ tmp_file.write(response.content)
64
+ tmp_file_path = tmp_file.name
65
+
66
+ # Open the PDF
67
+ pdf = fitz.open(tmp_file_path)
68
+
69
+ # Iterate through each page
70
+ for page_num in range(len(pdf)):
71
+ page = pdf.load_page(page_num)
72
+
73
+ # Extract text
74
+ text = page.get_text()
75
+
76
+ # Count images
77
+ image_list = page.get_images(full=True)
78
+
79
+ # Convert images to BytesIO and store in a list
80
+ images_bytes = []
81
+ for img_index, img_info in enumerate(image_list):
82
+ xref = img_info[0]
83
+ base_image = pdf.extract_image(xref)
84
+ image_bytes = base_image["image"]
85
+ images_bytes.append(image_bytes)
86
+
87
+ # Store page information in a dictionary
88
+ page_info = {
89
+ "pgno": page_num + 1,
90
+ "images": images_bytes,
91
+ "text": text
92
+ }
93
+
94
+ # Append page information to the list
95
+ pages_info.append(page_info)
96
+
97
+ # Close the PDF
98
+ pdf.close()
99
+
100
+ # Clean up the temporary file
101
+ import os
102
+ os.unlink(tmp_file_path)
103
+ else:
104
+ print("Failed to fetch the PDF from the URL.")
105
+ except Exception as e:
106
+
107
+ print("An error occurred:", e)
108
+ return "Error"
109
 
110
  return pages_info
111
 
112
  def pdf_image_text_embedding_and_text_embedding(pages_info):
113
+ try:
114
  # List to store page embeddings
115
+ page_embeddings = []
 
 
 
 
 
116
 
117
+ # Iterate through each page
118
+ for page in pages_info:
119
+ # Extract text from the page
120
+ text = page["text"]
121
+
122
+ # Extract images from the page
123
+ images = page["images"]
124
+
125
+ # List to store image embeddings
126
+ image_embeddings = []
127
+
128
+ # Iterate through each image
129
+ for image in images:
130
+ # Get the image embedding
131
+ image_embedding = get_image_embedding(image)
132
+ extracted_text = extract_text(image)
133
+ # Append the image embedding to the list
134
+ image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
135
+
136
+ # Get the text embedding
137
+
138
+ # Store the page embeddings in a dictionary
139
+ page_embedding = {
140
+ "images": image_embeddings,
141
+ "text": text,
142
+ }
143
 
144
+ # Append the page embedding to the list
145
+ page_embeddings.append(page_embedding)
146
 
147
+ return page_embeddings
148
+ except Exception as e:
149
+ print("An error occurred:", e)
150
+ return "Error"
151
+
152
 
153
  def separate_audio_from_video(video_url):
154
  try:
 
158
  # Extract audio
159
  audio = video.audio
160
 
161
+ # Create a temporary file to write the audio data
162
+ try :
163
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
164
+ temp_audio_filename = temp_audio_file.name
 
165
 
166
+ # Write the audio data to the temporary file
167
+ audio.write_audiofile(temp_audio_filename)
168
 
169
+ # Read the audio data from the temporary file as bytes
170
+ with open(temp_audio_filename, "rb") as f:
171
+ audio_bytes = f.read()
172
+ except Exception as e:
173
+ return "Error"
174
 
175
  return audio_bytes
176
 
177
  except Exception as e:
178
  print("An error occurred:", e)
179
+ return "Error"
 
 
180
 
181
  @cache.cached(timeout=300)
182
  @app.route('/get_text_embedding', methods=['POST'])
 
236
  def get_video_embedding_route():
237
  try:
238
  video_url = request.json.get("videoUrl")
239
+ try:
240
+ audio_data = separate_audio_from_video(video_url)
241
+ except Exception as e:
242
+ return jsonify({"error": "Failed to extract audio from video 1"}), 500
243
+ try:
244
+ audio_embedding = extract_audio_embeddings(audio_data)
245
+ except Exception as e:
246
+ return jsonify({"error": "Failed to extract audio embeddings 2"}), 500
247
  audio_embedding_list = audio_embedding
248
+ try :
249
+ audio_file = io.BytesIO(audio_data)
250
+ except Exception as e:
251
+ return jsonify({"error": "Failed to extract audio embeddings 3"}), 500
252
+ try :
253
+ r = sr.Recognizer()
254
+ with sr.AudioFile(audio_file) as source:
255
+ audio_data = r.record(source)
256
+ except Exception as e:
257
+ return jsonify({"error": "Failed to extract audio embeddings 4"}), 500
258
  extracted_text = ""
259
  try:
260
  text = r.recognize_google(audio_data)
 
274
  try:
275
  pdf_url = request.json.get("pdfUrl")
276
  print(1)
277
+ pages_info
278
+ try :
279
+ pages_info = seperate_image_text_from_pdf(pdf_url)
280
+ except Exception as e:
281
+ print(e)
282
+ return jsonify({"error": "Failed to fetch the PDF from the URL"}), 500
283
+ if(pages_info == "Error"):
284
+ return jsonify({"error": "Failed to fetch the PDF from the URL seperate_image_text_from_pdf "}), 500
285
  content = pdf_image_text_embedding_and_text_embedding(pages_info)
286
+ if content == "Error":
287
+ return jsonify({"error": "An error occurred while processing the PDF"}), 500
288
  print(content)
289
  return jsonify({"content": content}), 200
290
 
291
  except Exception as e:
292
+ print(e)
293
  return jsonify({"error": str(e)}), 500
294
 
295
  # Route to get text description embeddings
 
356
 
357
  except Exception as e:
358
  return jsonify({"error": str(e)}), 500