Spaces:
Sleeping
Sleeping
import gradio as gr | |
import chromadb | |
import fitz # PyMuPDF | |
import time | |
client = chromadb.PersistentClient(path="./") | |
collection = client.get_or_create_collection(name="code") | |
# Funktion zum Extrahieren von Text aus einer PDF-Datei | |
def extract_text_from_pdf(file_path): | |
try: | |
doc = fitz.open(file_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
except Exception as e: | |
return f"Fehler beim Lesen der PDF-Datei: {e}" | |
def process_pdf(uploaded_file, prompt): | |
if uploaded_file is not None: | |
# Text aus der hochgeladenen PDF-Datei extrahieren | |
pdf_text = extract_text_from_pdf(uploaded_file.name) | |
return pdf_text | |
timestamp = time.time() | |
if pdf_text: | |
collection.add( | |
documents=[pdf_text], | |
ids=[timestamp] | |
) | |
#print(pdf_text) | |
return pdf_text | |
def suchen(inputs): | |
test = "hallo" | |
return test | |
with gr.Blocks() as demo: | |
gr.Markdown("Start typing below and then click **Run** to see the output.") | |
with gr.Row(): | |
inp = gr.Textbox(placeholder="What is your name?") | |
out = gr.Textbox() | |
file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"), | |
btn = gr.Button("Run") | |
btn.click(fn=process_pdf, inputs=inp, outputs=out) | |
demo.launch() | |