Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,92 @@ import chromadb
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Initialisiere ChromaDB
|
8 |
client_chroma = chromadb.Client()
|
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
6 |
+
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
|
7 |
+
|
8 |
+
# Initialisiere ChromaDB
|
9 |
+
#client_chroma = chromadb.Client()
|
10 |
+
client_croma = chromadb.PersistentClient(path="./chroma", settings=None,tenant=DEFAULT_TENANT,database=DEFAULT_DATABASE)
|
11 |
+
|
12 |
+
collection_name = "pdf_collection"
|
13 |
+
collection = client_chroma.get_or_create_collection(name=collection_name)
|
14 |
+
|
15 |
+
# Verwende die integrierten Embeddings von ChromaDB
|
16 |
+
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
17 |
+
|
18 |
+
client = Client("Qwen/Qwen2.5-72B-Instruct")
|
19 |
+
def ask_llm(llm_prompt_input):
|
20 |
+
result = client.predict(
|
21 |
+
query=f"{llm_prompt_input}",
|
22 |
+
history=[],
|
23 |
+
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
|
24 |
+
api_name="/model_chat"
|
25 |
+
)
|
26 |
+
print(result)
|
27 |
+
return result
|
28 |
+
|
29 |
+
def process_pdf(file):
|
30 |
+
# Lese den PDF-Inhalt
|
31 |
+
pdf_reader = PdfReader(file.name)
|
32 |
+
text = ""
|
33 |
+
for page in pdf_reader.pages:
|
34 |
+
text += page.extract_text()
|
35 |
+
|
36 |
+
# Erstelle Embedding
|
37 |
+
embedding = embedding_function([text])[0]
|
38 |
+
|
39 |
+
# Speichere das PDF in ChromaDB
|
40 |
+
collection.add(
|
41 |
+
documents=[text],
|
42 |
+
metadatas=[{"filename": file.name}],
|
43 |
+
ids=[file.name] # Verwende den Dateinamen als ID
|
44 |
+
)
|
45 |
+
|
46 |
+
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
|
47 |
+
|
48 |
+
def search_similar_documents(prompt):
|
49 |
+
# Erstelle Embedding für den Prompt
|
50 |
+
query_embedding = embedding_function([prompt])[0]
|
51 |
+
|
52 |
+
# Führe die Ähnlichkeitssuche durch
|
53 |
+
results = collection.query(
|
54 |
+
query_embeddings=[query_embedding],
|
55 |
+
n_results=3
|
56 |
+
)
|
57 |
+
|
58 |
+
# Formatiere die Ergebnisse
|
59 |
+
formatted_results = []
|
60 |
+
for i, doc in enumerate(results["documents"][0]):
|
61 |
+
metadata = results["metadatas"][0][i]
|
62 |
+
filename = metadata["filename"]
|
63 |
+
formatted_results.append(f"{doc}\n")
|
64 |
+
|
65 |
+
return "\n".join(formatted_results)
|
66 |
+
|
67 |
+
# Erstelle die Gradio-Schnittstelle
|
68 |
+
with gr.Blocks() as demo:
|
69 |
+
gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
|
70 |
+
with gr.Row():
|
71 |
+
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
|
72 |
+
upload_output = gr.Textbox(label="Upload Status")
|
73 |
+
with gr.Row():
|
74 |
+
submit_button = gr.Button("upload")
|
75 |
+
with gr.Row():
|
76 |
+
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
77 |
+
search_output = gr.Textbox(label="Ähnliche Dokumente")
|
78 |
+
with gr.Row():
|
79 |
+
search_button = gr.Button("Suchen")
|
80 |
+
with gr.Row():
|
81 |
+
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
82 |
+
llm_output = gr.Textbox(label="LLM Antwort")
|
83 |
+
with gr.Row():
|
84 |
+
llm_submit_button = gr.Button("send")
|
85 |
+
|
86 |
+
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
87 |
+
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
88 |
+
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
89 |
+
|
90 |
+
# Starte die Gradio-Anwendung
|
91 |
+
demo.launch()
|
92 |
|
93 |
# Initialisiere ChromaDB
|
94 |
client_chroma = chromadb.Client()
|