radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 14, 2024

Commit

5e64098

verified ·

1 Parent(s): 92489c1

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -52

app.py CHANGED Viewed

@@ -32,15 +32,12 @@ vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
 # Create a global list to store uploaded document records
 uploaded_documents = []
-from datetime import datetime
-from langchain_core.documents import Document
 # Function to process PDF, extract text, split it into chunks, and upload to the vector DB
-def process_pdf(pdf_file,uploaded_documents):
     if pdf_file is None:
         return uploaded_documents, "No PDF file uploaded."
-    with pdfplumber.open(pdf_file) as pdf:
         all_text = ""
         for page in pdf.pages:
             all_text += page.extract_text()
@@ -65,63 +62,31 @@ def process_pdf(pdf_file,uploaded_documents):
     }
     # Add the record to the global list
-    uploaded_documents.append(document_record)
     # Convert the list of dictionaries into a list of lists for the dataframe
     table_data = [[doc["Document Name"], doc["Upload Time"], doc["Chunks"], doc["Pinecone Index"]] for doc in uploaded_documents]
     return table_data, f"Uploaded {len(chunks)} chunks to the vector database."
-# Gradio Blocks app with PDF uploader and table for logs
-def process_pdf(file):
-    # Extract text from PDF using pdfplumber
-    with pdfplumber.open(file.name) as pdf:
-        text = ""
-        for page in pdf.pages:
-            text += page.extract_text()
-    # Split text using RecursiveCharacterTextSplitter
-    documents = [Document(page_content=text)]
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
-    docs = text_splitter.split_documents(documents)
-    # Add documents to Pinecone Vector Store
-    vectorstore = PineconeVectorStore(index_name, embeddings)
-    vectorstore.add_documents(docs)
-    # Prepare log data
-    log_data = {
-        "File Name": [file.name],
-        "File Size (KB)": [os.path.getsize(file.name) / 1024],
-        "Number of Chunks": [len(docs)],
-        "Timestamp": [time.strftime("%Y-%m-%d %H:%M:%S")]
-    }
-    # Create a DataFrame for logs
-    df_logs = pd.DataFrame(log_data)
-    return "PDF processed successfully!", df_logs
 # Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("# PDF Uploader to Pinecone with Logs")
-# File upload component
     with gr.Column():
-                file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
-                # Button to trigger processing
-                process_button = gr.Button("Process PDF and Upload")
-                # Dataframe to display uploaded document records
-                document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
-                # Output textbox for results
-                output_textbox = gr.Textbox(label="Result")
-                # Define button click action
-                # process_button.click(fn=process_pdf, inputs=file_input, outputs=output_textbox)
-                process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
 demo.queue()
 demo.launch(show_error=True)

 # Create a global list to store uploaded document records
 uploaded_documents = []
 # Function to process PDF, extract text, split it into chunks, and upload to the vector DB
+def process_pdf(pdf_file, uploaded_documents):
     if pdf_file is None:
         return uploaded_documents, "No PDF file uploaded."
+    with pdfplumber.open(pdf_file.name) as pdf:
         all_text = ""
         for page in pdf.pages:
             all_text += page.extract_text()
     }
     # Add the record to the global list
+    uploaded_documents.append(document_record)
     # Convert the list of dictionaries into a list of lists for the dataframe
     table_data = [[doc["Document Name"], doc["Upload Time"], doc["Chunks"], doc["Pinecone Index"]] for doc in uploaded_documents]
     return table_data, f"Uploaded {len(chunks)} chunks to the vector database."
 # Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("# PDF Uploader to Pinecone with Logs")
+    # File upload component
     with gr.Column():
+        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+        # Button to trigger processing
+        process_button = gr.Button("Process PDF and Upload")
+        # Dataframe to display uploaded document records
+        document_table = gr.Dataframe(headers=["Document Name", "Upload Time", "Chunks", "Pinecone Index"], interactive=False)
+        # Output textbox for results
+        output_textbox = gr.Textbox(label="Result")
+        # Define button click action
+        process_button.click(fn=process_pdf, inputs=[file_input, gr.State([])], outputs=[document_table, output_textbox])
 demo.queue()
 demo.launch(show_error=True)