Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,19 @@ import gradio as gr
|
|
2 |
import chromadb
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
|
|
5 |
|
6 |
# Initialisiere ChromaDB
|
7 |
-
|
8 |
collection_name = "pdf_collection"
|
9 |
-
collection =
|
10 |
|
11 |
# Verwende die integrierten Embeddings von ChromaDB
|
12 |
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
13 |
|
|
|
|
|
|
|
14 |
def process_pdf(file):
|
15 |
# Lese den PDF-Inhalt
|
16 |
pdf_reader = PdfReader(file.name)
|
@@ -30,14 +34,54 @@ def process_pdf(file):
|
|
30 |
|
31 |
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Erstelle die Gradio-Schnittstelle
|
34 |
with gr.Blocks() as demo:
|
35 |
-
gr.Markdown("# PDF Upload
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# Starte die Gradio-Anwendung
|
43 |
demo.launch()
|
|
|
2 |
import chromadb
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
+
from gradio_client import Client
|
6 |
|
7 |
# Initialisiere ChromaDB
|
8 |
+
client_chroma = chromadb.Client()
|
9 |
collection_name = "pdf_collection"
|
10 |
+
collection = client_chroma.get_or_create_collection(name=collection_name)
|
11 |
|
12 |
# Verwende die integrierten Embeddings von ChromaDB
|
13 |
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
14 |
|
15 |
+
# Initialisiere das LLM
|
16 |
+
client_llm = Client("Mxytyu/meta-llama-Llama-3.2-3B")
|
17 |
+
|
18 |
def process_pdf(file):
|
19 |
# Lese den PDF-Inhalt
|
20 |
pdf_reader = PdfReader(file.name)
|
|
|
34 |
|
35 |
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
|
36 |
|
37 |
+
def search_similar_documents(prompt):
|
38 |
+
# Erstelle Embedding für den Prompt
|
39 |
+
query_embedding = embedding_function([prompt])[0]
|
40 |
+
|
41 |
+
# Führe die Ähnlichkeitssuche durch
|
42 |
+
results = collection.query(
|
43 |
+
query_embeddings=[query_embedding],
|
44 |
+
n_results=3
|
45 |
+
)
|
46 |
+
|
47 |
+
# Formatiere die Ergebnisse
|
48 |
+
formatted_results = []
|
49 |
+
for i, doc in enumerate(results["documents"][0]):
|
50 |
+
metadata = results["metadatas"][0][i]
|
51 |
+
filename = metadata["filename"]
|
52 |
+
formatted_results.append(f"Datei: {filename}\nText: {doc}\n")
|
53 |
+
|
54 |
+
return "\n".join(formatted_results)
|
55 |
+
|
56 |
+
def ask_llm(prompt):
|
57 |
+
result = client_llm.predict(
|
58 |
+
param_0=prompt,
|
59 |
+
api_name="/predict"
|
60 |
+
)
|
61 |
+
return result
|
62 |
+
|
63 |
# Erstelle die Gradio-Schnittstelle
|
64 |
with gr.Blocks() as demo:
|
65 |
+
gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
|
69 |
+
submit_button = gr.Button("Hochladen")
|
70 |
+
upload_output = gr.Textbox(label="Upload Status")
|
71 |
+
|
72 |
+
with gr.Row():
|
73 |
+
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
74 |
+
search_button = gr.Button("Suchen")
|
75 |
+
search_output = gr.Textbox(label="Ähnliche Dokumente")
|
76 |
+
|
77 |
+
with gr.Row():
|
78 |
+
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
79 |
+
llm_submit_button = gr.Button("Fragen")
|
80 |
+
llm_output = gr.Textbox(label="LLM Antwort")
|
81 |
+
|
82 |
+
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
83 |
+
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
84 |
+
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
85 |
|
86 |
# Starte die Gradio-Anwendung
|
87 |
demo.launch()
|