Spaces:
Sleeping
Sleeping
import gradio as gr | |
import chromadb | |
from chromadb.utils import embedding_functions | |
from PyPDF2 import PdfReader | |
from gradio_client import Client | |
# Starte ChromaDB | |
# Initialisiere ChromaDB | |
#client_chroma = chromadb.Client() | |
client_chroma = chromadb.PersistentClient(path = "./tmp", settings = None,) | |
collection_name = "pdf_collection" | |
collection = client_chroma.get_or_create_collection(name=collection_name) | |
# Verwende die integrierten Embeddings von ChromaDB | |
embedding_function = embedding_functions.DefaultEmbeddingFunction() | |
client = Client("Qwen/Qwen2.5-72B-Instruct") | |
def ask_llm(llm_prompt_input): | |
# Erstelle Embedding für den Prompt | |
query_embedding = embedding_function([llm_prompt_input])[0] | |
# Führe die Ähnlichkeitssuche durch | |
results = collection.query( | |
query_embeddings=[query_embedding], | |
n_results=3 | |
) | |
# Formatiere die Ergebnisse | |
formatted_results = [] | |
for i, doc in enumerate(results["documents"][0]): | |
metadata = results["metadatas"][0][i] | |
filename = metadata["filename"] | |
formatted_results.append(f"{doc}\n") | |
#queri = "\n".join(formatted_results) | |
#return "\n".join(formatted_results) | |
print(join(formatted_results)) | |
result = client.predict( | |
query=llm_prompt_input, | |
history=[], | |
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.", | |
api_name="/model_chat" | |
) | |
return result | |
def process_pdf(file): | |
# Lese den PDF-Inhalt | |
pdf_reader = PdfReader(file.name) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
# Erstelle Embedding | |
embedding = embedding_function([text])[0] | |
# Speichere das PDF in ChromaDB | |
collection.add( | |
documents=[text], | |
metadatas=[{"filename": file.name}], | |
ids=[file.name] # Verwende den Dateinamen als ID | |
) | |
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert." | |
def search_similar_documents(prompt): | |
# Erstelle Embedding für den Prompt | |
query_embedding = embedding_function([prompt])[0] | |
# Führe die Ähnlichkeitssuche durch | |
results = collection.query( | |
query_embeddings=[query_embedding], | |
n_results=3 | |
) | |
# Formatiere die Ergebnisse | |
formatted_results = [] | |
for i, doc in enumerate(results["documents"][0]): | |
metadata = results["metadatas"][0][i] | |
filename = metadata["filename"] | |
formatted_results.append(f"{doc}\n") | |
return "\n".join(formatted_results) | |
# Erstelle die Gradio-Schnittstelle | |
with gr.Blocks() as demo: | |
gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM") | |
with gr.Row(): | |
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath") | |
upload_output = gr.Textbox(label="Upload Status") | |
with gr.Row(): | |
submit_button = gr.Button("upload") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein") | |
search_output = gr.Textbox(label="Ähnliche Dokumente") | |
with gr.Row(): | |
search_button = gr.Button("Suchen") | |
with gr.Row(): | |
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein") | |
llm_output = gr.Textbox(label="LLM Antwort") | |
with gr.Row(): | |
llm_submit_button = gr.Button("send") | |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output) | |
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output) | |
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output) | |
# Starte die Gradio-Anwendung | |
demo.launch() |