RAG-Vereine

Sleeping

App Files Files Community

mgokg commited on Nov 10, 2024

Commit

96ff633

verified ·

1 Parent(s): 65ab811

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -0

app.py CHANGED Viewed

@@ -6,6 +6,50 @@ import time
 client = chromadb.PersistentClient(path="./")
 collection = client.get_or_create_collection(name="code")
 # Function to extract text from PDF file
 def extract_text_from_pdf(file_path):
     try:

 client = chromadb.PersistentClient(path="./")
 collection = client.get_or_create_collection(name="code")
+# Funktion zum Extrahieren von Text aus einer PDF-Datei
+def extract_text_from_pdf(file_path):
+    try:
+        doc = fitz.open(file_path)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        return text
+    except Exception as e:
+        return f"Fehler beim Lesen der PDF-Datei: {e}"
+def process_pdf(uploaded_file, prompt):
+    if uploaded_file is not None:
+        # Text aus der hochgeladenen PDF-Datei extrahieren
+        pdf_text = extract_text_from_pdf(uploaded_file.name)
+        timestamp = time.time()
+        if pdf_text:
+            collection.add(
+                documents=[pdf_text],
+                ids=[timestamp]
+            )
+            print(pdf_text)
+            return pdf_text
+def main():
+    gr.Interface(
+        fn=process_pdf,
+        inputs=[gr.File(type="filepath", label="PDF-Datei hochladen"),
+                gr.Textbox(lines=2, placeholder="Stellen Sie eine Frage")],
+        outputs="text",
+        title="PDF-Chatbot",
+        description="Laden Sie eine PDF-Datei hoch und stellen Sie Fragen zu ihrem Inhalt."
+    ).launch()
+if __name__ == "__main__":
+    main()
+import gradio as gr
+import chromadb
+import fitz  # PyMuPDF
+import time
+client = chromadb.PersistentClient(path="./")
+collection = client.get_or_create_collection(name="code")
 # Function to extract text from PDF file
 def extract_text_from_pdf(file_path):
     try: