Spaces:
Sleeping
Sleeping
File size: 1,390 Bytes
c29df11 3b67edb c29df11 3b67edb 6bc3e62 43cd7fa 3b67edb d189514 96ff633 0eeaa38 96ff633 0eeaa38 96ff633 6d1a446 96ff633 f0be5df 723e467 1245333 723e467 f0be5df 657d295 6d1a446 96ff633 657d295 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
import chromadb
import fitz # PyMuPDF
import time
client = chromadb.PersistentClient(path="./")
collection = client.get_or_create_collection(name="code")
# Funktion zum Extrahieren von Text aus einer PDF-Datei
def extract_text_from_pdf(file_path):
try:
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
return text
except Exception as e:
return f"Fehler beim Lesen der PDF-Datei: {e}"
def process_pdf(uploaded_file, prompt):
if uploaded_file is not None:
# Text aus der hochgeladenen PDF-Datei extrahieren
pdf_text = extract_text_from_pdf(uploaded_file.name)
return pdf_text
timestamp = time.time()
if pdf_text:
collection.add(
documents=[pdf_text],
ids=[timestamp]
)
#print(pdf_text)
return pdf_text
def suchen(inputs):
test = "hallo"
return test
with gr.Blocks() as demo:
gr.Markdown("Start typing below and then click **Run** to see the output.")
with gr.Row():
inp = gr.Textbox(placeholder="What is your name?")
out = gr.Textbox()
file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"),
btn = gr.Button("Run")
btn.click(fn=process_pdf, inputs=inp, outputs=out)
demo.launch()
|