Spaces:

hprasath
/

image-processing

Sleeping

App Files Files Community

hprasath commited on Apr 14, 2024

Commit

894070c

verified ·

1 Parent(s): 0633b03

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -112

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from utils.objectDetection.index import detect_objects
-app = Flask(__name__)
 cache = Cache(app, config={'CACHE_TYPE': 'simple'})  # You can choose a caching type based on your requirements
 CORS(app)
 import moviepy.editor as mp
@@ -49,101 +49,106 @@ def get_face_locations(binary_data):
     print(3)
     return face_locations
-def separate_image_text_from_pdf(pdf_url):
     # List to store page information
-    pages_info = []
-    # Fetch the PDF from the URL
-    response = requests.get(pdf_url)
-    if response.status_code == 200:
-        # Create a temporary directory to store the PDF data
-        temp_dir = tempfile.mkdtemp()
-        # Define the temporary file path for the PDF
-        temp_pdf_path = os.path.join(temp_dir, "temp.pdf")
-        # Write the PDF data to the temporary file
-        with open(temp_pdf_path, "wb") as tmp_file:
-            tmp_file.write(response.content)
-        # Open the PDF
-        pdf = fitz.open(temp_pdf_path)
-        # Iterate through each page
-        for page_num in range(len(pdf)):
-            page = pdf.load_page(page_num)
-            # Extract text
-            text = page.get_text()
-            # Count images
-            image_list = page.get_images(full=True)
-            # Convert images to BytesIO and store in a list
-            images_bytes = []
-            for img_index, img_info in enumerate(image_list):
-                xref = img_info[0]
-                base_image = pdf.extract_image(xref)
-                image_bytes = base_image["image"]
-                images_bytes.append(image_bytes)
-            # Store page information in a dictionary
-            page_info = {
-                "pgno": page_num + 1,
-                "images": images_bytes,
-                "text": text
-            }
-            # Append page information to the list
-            pages_info.append(page_info)
-        # Close the PDF
-        pdf.close()
-        # Clean up the temporary files
-        os.unlink(temp_pdf_path)
-        os.rmdir(temp_dir)
-    else:
-        print("Failed to fetch the PDF from the URL.")
     return pages_info
 def pdf_image_text_embedding_and_text_embedding(pages_info):
     # List to store page embeddings
-    page_embeddings = []
-    # Iterate through each page
-    for page in pages_info:
-        # Extract text from the page
-        text = page["text"]
-        # Extract images from the page
-        images = page["images"]
-        # List to store image embeddings
-        image_embeddings = []
-        # Iterate through each image
-        for image in images:
-            # Get the image embedding
-            image_embedding = get_image_embedding(image)
-            extracted_text = extract_text(image)
-            # Append the image embedding to the list
-            image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
-        # Get the text embedding
-        # Store the page embeddings in a dictionary
-        page_embedding = {
-            "images": image_embeddings,
-            "text": text,
-        }
-        # Append the page embedding to the list
-        page_embeddings.append(page_embedding)
-    return page_embeddings
 def separate_audio_from_video(video_url):
     try:
@@ -153,26 +158,25 @@ def separate_audio_from_video(video_url):
         # Extract audio
         audio = video.audio
-        # Create a temporary directory to store temporary files
-        temp_dir = tempfile.mkdtemp()
-        # Define the temporary file path for the audio
-        temp_audio_filename = os.path.join(temp_dir, "audio.wav")
-        # Write the audio data to the temporary file
-        audio.write_audiofile(temp_audio_filename)
-        # Read the audio data from the temporary file as bytes
-        with open(temp_audio_filename, "rb") as f:
-            audio_bytes = f.read()
         return audio_bytes
     except Exception as e:
         print("An error occurred:", e)
 @cache.cached(timeout=300)
 @app.route('/get_text_embedding', methods=['POST'])
@@ -232,13 +236,25 @@ def get_image_embedding_route():
 def get_video_embedding_route():
     try:
         video_url = request.json.get("videoUrl")
-        audio_data = separate_audio_from_video(video_url)
-        audio_embedding = extract_audio_embeddings(audio_data)
         audio_embedding_list = audio_embedding
-        audio_file = io.BytesIO(audio_data)
-        r = sr.Recognizer()
-        with sr.AudioFile(audio_file) as source:
-            audio_data = r.record(source)
         extracted_text = ""
         try:
             text = r.recognize_google(audio_data)
@@ -258,12 +274,22 @@ def extract_pdf_text_and_embedding():
     try:
         pdf_url = request.json.get("pdfUrl")
         print(1)
-        pages_info = seperate_image_text_from_pdf(pdf_url)
         content = pdf_image_text_embedding_and_text_embedding(pages_info)
         print(content)
         return jsonify({"content": content}), 200
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 # Route to get text description embeddings
@@ -330,8 +356,3 @@ def get_similarity_score_route():
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-@app.route('/')
-def hello():
-    return 'Hello, World!'

+app = Flask(_name_)
 cache = Cache(app, config={'CACHE_TYPE': 'simple'})  # You can choose a caching type based on your requirements
 CORS(app)
 import moviepy.editor as mp
     print(3)
     return face_locations
+def seperate_image_text_from_pdf(pdf_url):
     # List to store page information
+    try:
+        pages_info = []
+        # Fetch the PDF from the URL
+        response = requests.get(pdf_url)
+        if response.status_code == 200:
+            # Create a temporary file to save the PDF data
+            with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+                tmp_file.write(response.content)
+                tmp_file_path = tmp_file.name
+            # Open the PDF
+            pdf = fitz.open(tmp_file_path)
+            # Iterate through each page
+            for page_num in range(len(pdf)):
+                page = pdf.load_page(page_num)
+                # Extract text
+                text = page.get_text()
+                # Count images
+                image_list = page.get_images(full=True)
+                # Convert images to BytesIO and store in a list
+                images_bytes = []
+                for img_index, img_info in enumerate(image_list):
+                    xref = img_info[0]
+                    base_image = pdf.extract_image(xref)
+                    image_bytes = base_image["image"]
+                    images_bytes.append(image_bytes)
+                # Store page information in a dictionary
+                page_info = {
+                    "pgno": page_num + 1,
+                    "images": images_bytes,
+                    "text": text
+                }
+                # Append page information to the list
+                pages_info.append(page_info)
+            # Close the PDF
+            pdf.close()
+            # Clean up the temporary file
+            import os
+            os.unlink(tmp_file_path)
+        else:
+            print("Failed to fetch the PDF from the URL.")
+    except Exception as e:
+        print("An error occurred:", e)
+        return "Error"
     return pages_info
 def pdf_image_text_embedding_and_text_embedding(pages_info):
+    try:
     # List to store page embeddings
+        page_embeddings = []
+        # Iterate through each page
+        for page in pages_info:
+            # Extract text from the page
+            text = page["text"]
+            # Extract images from the page
+            images = page["images"]
+            # List to store image embeddings
+            image_embeddings = []
+            # Iterate through each image
+            for image in images:
+                # Get the image embedding
+                image_embedding = get_image_embedding(image)
+                extracted_text = extract_text(image)
+                # Append the image embedding to the list
+                image_embeddings.append({"image_embedding": image_embedding.tolist() ,"extracted_text":extracted_text})
+            # Get the text embedding
+            # Store the page embeddings in a dictionary
+            page_embedding = {
+                "images": image_embeddings,
+                "text": text,
+            }
+            # Append the page embedding to the list
+            page_embeddings.append(page_embedding)
+        return page_embeddings
+    except Exception as e:
+        print("An error occurred:", e)
+        return "Error"
 def separate_audio_from_video(video_url):
     try:
         # Extract audio
         audio = video.audio
+        # Create a temporary file to write the audio data
+        try :
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
+                temp_audio_filename = temp_audio_file.name
+                # Write the audio data to the temporary file
+                audio.write_audiofile(temp_audio_filename)
+                # Read the audio data from the temporary file as bytes
+                with open(temp_audio_filename, "rb") as f:
+                    audio_bytes = f.read()
+        except Exception as e:
+            return "Error"
         return audio_bytes
     except Exception as e:
         print("An error occurred:", e)
+        return "Error"
 @cache.cached(timeout=300)
 @app.route('/get_text_embedding', methods=['POST'])
 def get_video_embedding_route():
     try:
         video_url = request.json.get("videoUrl")
+        try:
+            audio_data = separate_audio_from_video(video_url)
+        except Exception as e:
+            return jsonify({"error": "Failed to extract audio from video 1"}), 500
+        try:
+            audio_embedding = extract_audio_embeddings(audio_data)
+        except Exception as e:
+            return jsonify({"error": "Failed to extract audio embeddings 2"}), 500
         audio_embedding_list = audio_embedding
+        try :
+            audio_file = io.BytesIO(audio_data)
+        except Exception as e:
+            return jsonify({"error": "Failed to extract audio embeddings 3"}), 500
+        try :
+            r = sr.Recognizer()
+            with sr.AudioFile(audio_file) as source:
+                audio_data = r.record(source)
+        except Exception as e:
+            return jsonify({"error": "Failed to extract audio embeddings 4"}), 500
         extracted_text = ""
         try:
             text = r.recognize_google(audio_data)
     try:
         pdf_url = request.json.get("pdfUrl")
         print(1)
+        pages_info
+        try :
+            pages_info = seperate_image_text_from_pdf(pdf_url)
+        except Exception as e:
+            print(e)
+            return jsonify({"error": "Failed to fetch the PDF from the URL"}), 500
+        if(pages_info == "Error"):
+            return jsonify({"error": "Failed to fetch the PDF from the URL seperate_image_text_from_pdf "}), 500
         content = pdf_image_text_embedding_and_text_embedding(pages_info)
+        if content == "Error":
+            return jsonify({"error": "An error occurred while processing the PDF"}), 500
         print(content)
         return jsonify({"content": content}), 200
     except Exception as e:
+        print(e)
         return jsonify({"error": str(e)}), 500
 # Route to get text description embeddings
     except Exception as e:
         return jsonify({"error": str(e)}), 500