Spaces:

NEXAS
/

Titan

Sleeping

App Files Files Community

NEXAS commited on Jan 7

Commit

4ed9090

verified ·

1 Parent(s): 2ab65e0

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -30

app.py CHANGED Viewed

@@ -183,49 +183,129 @@ def format_prompt_inputs(image_collection, text_collection, video_collection, us
     return inputs
-def page_1():
-    st.title("Page 1: Upload and Process PDF")
-    # File uploader for PDF
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
     # Button to trigger processing
-    if uploaded_file and st.button("Process PDF"):
-        pdf_path = f"/tmp/{uploaded_file.name}"
-        with open(pdf_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
         # Progress bar
         progress_bar = st.progress(0)
         status_text = st.empty()
         try:
-            progress_bar.progress(10)
-            status_text.text("Initializing processing...")
-            # Simulating progress during processing
-            for progress in range(10, 100, 30):
-                time.sleep(0.5)  # Simulate processing delay
-                progress_bar.progress(progress)
-                status_text.text(f"Processing... {progress}%")
-            # Process the PDF and save collections to session state
-            image_collection, text_collection, video_collection = process_pdf(pdf_path)
-            st.session_state.image_collection = image_collection
-            st.session_state.text_collection = text_collection
-            st.session_state.video_collection = video_collection
-            progress_bar.progress(100)
-            status_text.text("Processing completed successfully!")
-            st.success("PDF processed successfully! Collections saved to session state.")
         except Exception as e:
             progress_bar.progress(0)
             status_text.text("")
-            st.error(f"Error processing PDF: {e}")
 def page_2():
     st.title("Page 2: Query and Use Processed Collections")

     return inputs
+import streamlit as st
+import zipfile
+import os
+import time
+def unzip_file(zip_path, extract_to):
+    """
+    Unzips a zip file to the specified directory.
+    Args:
+        zip_path (str): Path to the zip file.
+        extract_to (str): Directory where the contents should be extracted.
+    """
+    try:
+        # Ensure the destination directory exists
+        os.makedirs(extract_to, exist_ok=True)
+        # Open the zip file
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            # Extract all the contents
+            zip_ref.extractall(extract_to)
+        return True
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return False
+import streamlit as st
+import zipfile
+import os
+import time
+def unzip_file(zip_path, extract_to):
+    """
+    Unzips a zip file to the specified directory.
+    Args:
+        zip_path (str): Path to the zip file.
+        extract_to (str): Directory where the contents should be extracted.
+    """
+    try:
+        # Ensure the destination directory exists
+        os.makedirs(extract_to, exist_ok=True)
+        # Open the zip file
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            # Extract all the contents
+            zip_ref.extractall(extract_to)
+        return True
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return False
+def process_pdf(pdf_path):
+    # Placeholder function to simulate PDF processing
+    # Replace this with actual PDF processing logic, such as extracting text, images, etc.
+    time.sleep(2)  # Simulating processing delay
+    return "image_collection", "text_collection", "video_collection"  # Replace with actual collections
+def page_1():
+    st.title("Page 1: Upload and Process Videos and PDFs")
+    # File uploader for multiple zip files containing videos
+    uploaded_video_zips = st.file_uploader("Upload ZIP files containing videos", type=["zip"], accept_multiple_files=True)
+    # File uploader for PDF files
+    uploaded_pdf_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
     # Button to trigger processing
+    if (uploaded_video_zips or uploaded_pdf_files) and st.button("Process Files"):
+        # Temporary folder to store extracted files
+        temp_folder = "/tmp/extracted_files"
+        os.makedirs(temp_folder, exist_ok=True)
         # Progress bar
         progress_bar = st.progress(0)
         status_text = st.empty()
         try:
+            total_files = len(uploaded_video_zips) + len(uploaded_pdf_files)
+            files_processed = 0
+            progress_step = 100 / total_files if total_files > 0 else 0
+            # Process video zip files
+            for uploaded_file in uploaded_video_zips:
+                zip_path = f"/tmp/{uploaded_file.name}"
+                with open(zip_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                # Extract the content from the zip file
+                folder_name = os.path.splitext(uploaded_file.name)[0]
+                extract_to = os.path.join(temp_folder, folder_name)
+                if unzip_file(zip_path, extract_to):
+                    files_processed += 1
+                    progress_bar.progress(files_processed * progress_step)
+                    status_text.text(f"Extracting: {uploaded_file.name} ({files_processed}/{total_files})")
+            # Process PDF files
+            for uploaded_pdf in uploaded_pdf_files:
+                pdf_path = f"/tmp/{uploaded_pdf.name}"
+                with open(pdf_path, "wb") as f:
+                    f.write(uploaded_pdf.getbuffer())
+                # Simulate PDF processing (replace with actual PDF processing logic)
+                files_processed += 1
+                progress_bar.progress(files_processed * progress_step)
+                status_text.text(f"Processing PDF: {uploaded_pdf.name} ({files_processed}/{total_files})")
+                # Call your actual PDF processing function here, e.g.
+                image_collection, text_collection, video_collection = process_pdf(pdf_path,temp_folder)
+                # Save collections to session state
+                st.session_state.image_collection = image_collection
+                st.session_state.text_collection = text_collection
+                st.session_state.video_collection = video_collection
+            # Update status after extraction and processing
+            status_text.text("Extraction and processing completed successfully!")
+            st.success("Videos and PDFs processed successfully! Collections saved to session state.")
         except Exception as e:
             progress_bar.progress(0)
             status_text.text("")
+            st.error(f"Error processing files: {e}")
 def page_2():
     st.title("Page 2: Query and Use Processed Collections")