RAG-Vereine

Sleeping

App Files Files Community

RAG-Vereine / app.py

mgokg

Update app.py

bfed16d verified 8 months ago

raw

history blame

3.71 kB

	import gradio as gr
	import chromadb
	from chromadb.utils import embedding_functions
	from PyPDF2 import PdfReader
	from gradio_client import Client


	# Starte ChromaDB
	# Initialisiere ChromaDB
	#client_chroma = chromadb.Client()
	client_chroma = chromadb.PersistentClient(path = "./tmp", settings = None,)
	collection_name = "pdf_collection"
	collection = client_chroma.get_or_create_collection(name=collection_name)

	# Verwende die integrierten Embeddings von ChromaDB
	embedding_function = embedding_functions.DefaultEmbeddingFunction()

	client = Client("Qwen/Qwen2.5-72B-Instruct")
	def ask_llm(llm_prompt_input):
	# Erstelle Embedding für den Prompt
	query_embedding = embedding_function([llm_prompt_input])[0]

	# Führe die Ähnlichkeitssuche durch
	results = collection.query(
	query_embeddings=[query_embedding],
	n_results=3
	)

	# Formatiere die Ergebnisse
	formatted_results = []
	for i, doc in enumerate(results["documents"][0]):
	metadata = results["metadatas"][0][i]
	filename = metadata["filename"]
	formatted_results.append(f"{doc}\n")

	#queri = "\n".join(formatted_results)
	#return "\n".join(formatted_results)
	print(join(formatted_results))

	result = client.predict(
	query=llm_prompt_input,
	history=[],
	system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
	api_name="/model_chat"
	)

	return result

	def process_pdf(file):
	# Lese den PDF-Inhalt
	pdf_reader = PdfReader(file.name)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()

	# Erstelle Embedding
	embedding = embedding_function([text])[0]

	# Speichere das PDF in ChromaDB
	collection.add(
	documents=[text],
	metadatas=[{"filename": file.name}],
	ids=[file.name] # Verwende den Dateinamen als ID
	)

	return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."

	def search_similar_documents(prompt):
	# Erstelle Embedding für den Prompt
	query_embedding = embedding_function([prompt])[0]

	# Führe die Ähnlichkeitssuche durch
	results = collection.query(
	query_embeddings=[query_embedding],
	n_results=3
	)

	# Formatiere die Ergebnisse
	formatted_results = []
	for i, doc in enumerate(results["documents"][0]):
	metadata = results["metadatas"][0][i]
	filename = metadata["filename"]
	formatted_results.append(f"{doc}\n")

	return "\n".join(formatted_results)

	# Erstelle die Gradio-Schnittstelle
	with gr.Blocks() as demo:
	gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
	with gr.Row():
	file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
	upload_output = gr.Textbox(label="Upload Status")
	with gr.Row():
	submit_button = gr.Button("upload")
	with gr.Row():
	prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
	search_output = gr.Textbox(label="Ähnliche Dokumente")
	with gr.Row():
	search_button = gr.Button("Suchen")
	with gr.Row():
	llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
	llm_output = gr.Textbox(label="LLM Antwort")
	with gr.Row():
	llm_submit_button = gr.Button("send")

	submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
	search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
	llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)

	# Starte die Gradio-Anwendung
	demo.launch()