RAG-Vereine / app.py
mgokg's picture
Update app.py
bfed16d verified
raw
history blame
3.71 kB
import gradio as gr
import chromadb
from chromadb.utils import embedding_functions
from PyPDF2 import PdfReader
from gradio_client import Client
# Starte ChromaDB
# Initialisiere ChromaDB
#client_chroma = chromadb.Client()
client_chroma = chromadb.PersistentClient(path = "./tmp", settings = None,)
collection_name = "pdf_collection"
collection = client_chroma.get_or_create_collection(name=collection_name)
# Verwende die integrierten Embeddings von ChromaDB
embedding_function = embedding_functions.DefaultEmbeddingFunction()
client = Client("Qwen/Qwen2.5-72B-Instruct")
def ask_llm(llm_prompt_input):
# Erstelle Embedding für den Prompt
query_embedding = embedding_function([llm_prompt_input])[0]
# Führe die Ähnlichkeitssuche durch
results = collection.query(
query_embeddings=[query_embedding],
n_results=3
)
# Formatiere die Ergebnisse
formatted_results = []
for i, doc in enumerate(results["documents"][0]):
metadata = results["metadatas"][0][i]
filename = metadata["filename"]
formatted_results.append(f"{doc}\n")
#queri = "\n".join(formatted_results)
#return "\n".join(formatted_results)
print(join(formatted_results))
result = client.predict(
query=llm_prompt_input,
history=[],
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
api_name="/model_chat"
)
return result
def process_pdf(file):
# Lese den PDF-Inhalt
pdf_reader = PdfReader(file.name)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Erstelle Embedding
embedding = embedding_function([text])[0]
# Speichere das PDF in ChromaDB
collection.add(
documents=[text],
metadatas=[{"filename": file.name}],
ids=[file.name] # Verwende den Dateinamen als ID
)
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
def search_similar_documents(prompt):
# Erstelle Embedding für den Prompt
query_embedding = embedding_function([prompt])[0]
# Führe die Ähnlichkeitssuche durch
results = collection.query(
query_embeddings=[query_embedding],
n_results=3
)
# Formatiere die Ergebnisse
formatted_results = []
for i, doc in enumerate(results["documents"][0]):
metadata = results["metadatas"][0][i]
filename = metadata["filename"]
formatted_results.append(f"{doc}\n")
return "\n".join(formatted_results)
# Erstelle die Gradio-Schnittstelle
with gr.Blocks() as demo:
gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
with gr.Row():
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
upload_output = gr.Textbox(label="Upload Status")
with gr.Row():
submit_button = gr.Button("upload")
with gr.Row():
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
search_output = gr.Textbox(label="Ähnliche Dokumente")
with gr.Row():
search_button = gr.Button("Suchen")
with gr.Row():
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
llm_output = gr.Textbox(label="LLM Antwort")
with gr.Row():
llm_submit_button = gr.Button("send")
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
# Starte die Gradio-Anwendung
demo.launch()