RAG-Vereine / app.py
mgokg's picture
Update app.py
0eeaa38 verified
raw
history blame
1.39 kB
import gradio as gr
import chromadb
import fitz # PyMuPDF
import time
client = chromadb.PersistentClient(path="./")
collection = client.get_or_create_collection(name="code")
# Funktion zum Extrahieren von Text aus einer PDF-Datei
def extract_text_from_pdf(file_path):
try:
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
return text
except Exception as e:
return f"Fehler beim Lesen der PDF-Datei: {e}"
def process_pdf(uploaded_file, prompt):
if uploaded_file is not None:
# Text aus der hochgeladenen PDF-Datei extrahieren
pdf_text = extract_text_from_pdf(uploaded_file.name)
return pdf_text
timestamp = time.time()
if pdf_text:
collection.add(
documents=[pdf_text],
ids=[timestamp]
)
#print(pdf_text)
return pdf_text
def suchen(inputs):
test = "hallo"
return test
with gr.Blocks() as demo:
gr.Markdown("Start typing below and then click **Run** to see the output.")
with gr.Row():
inp = gr.Textbox(placeholder="What is your name?")
out = gr.Textbox()
file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"),
btn = gr.Button("Run")
btn.click(fn=process_pdf, inputs=inp, outputs=out)
demo.launch()