RAG-Vereine

Sleeping

RAG-Vereine / app.py

Update app.py

0eeaa38 verified 8 months ago

1.39 kB

	import gradio as gr
	import chromadb
	import fitz # PyMuPDF
	import time

	client = chromadb.PersistentClient(path="./")
	collection = client.get_or_create_collection(name="code")

	# Funktion zum Extrahieren von Text aus einer PDF-Datei
	def extract_text_from_pdf(file_path):
	try:
	doc = fitz.open(file_path)
	text = ""
	for page in doc:
	text += page.get_text()
	return text
	except Exception as e:
	return f"Fehler beim Lesen der PDF-Datei: {e}"

	def process_pdf(uploaded_file, prompt):
	if uploaded_file is not None:

	# Text aus der hochgeladenen PDF-Datei extrahieren
	pdf_text = extract_text_from_pdf(uploaded_file.name)
	return pdf_text
	timestamp = time.time()
	if pdf_text:
	collection.add(
	documents=[pdf_text],
	ids=[timestamp]
	)
	#print(pdf_text)
	return pdf_text


	def suchen(inputs):
	test = "hallo"
	return test


	with gr.Blocks() as demo:
	gr.Markdown("Start typing below and then click Run to see the output.")
	with gr.Row():
	inp = gr.Textbox(placeholder="What is your name?")
	out = gr.Textbox()
	file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"),
	btn = gr.Button("Run")
	btn.click(fn=process_pdf, inputs=inp, outputs=out)

	demo.launch()