RAG-Vereine

Sleeping

File size: 1,390 Bytes

c29df11
3b67edb
c29df11
3b67edb
6bc3e62
43cd7fa
3b67edb
d189514
96ff633
 
 
 
 
 
 
 
 
 
 
 
 
0eeaa38
96ff633
 
0eeaa38
96ff633
 
 
 
 
 
6d1a446
96ff633
 
f0be5df
723e467
1245333
 
723e467
f0be5df
657d295
 
 
 
 
 
 
6d1a446
96ff633
657d295

import gradio as gr
import chromadb
import fitz  # PyMuPDF
import time

client = chromadb.PersistentClient(path="./")
collection = client.get_or_create_collection(name="code") 

# Funktion zum Extrahieren von Text aus einer PDF-Datei
def extract_text_from_pdf(file_path):
    try:
        doc = fitz.open(file_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Fehler beim Lesen der PDF-Datei: {e}"

def process_pdf(uploaded_file, prompt):
    if uploaded_file is not None:
        
        # Text aus der hochgeladenen PDF-Datei extrahieren
        pdf_text = extract_text_from_pdf(uploaded_file.name)
        return pdf_text
        timestamp = time.time()
        if pdf_text:
            collection.add(
                documents=[pdf_text],
                ids=[timestamp]
            )
            #print(pdf_text)
            return pdf_text


def suchen(inputs):
    test = "hallo"
    return test
  

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Run** to see the output.")
    with gr.Row():
        inp = gr.Textbox(placeholder="What is your name?")
        out = gr.Textbox()
        file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"),
    btn = gr.Button("Run")
    btn.click(fn=process_pdf, inputs=inp, outputs=out)

demo.launch()