Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,91 +6,6 @@ from gradio_client import Client
|
|
6 |
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
|
7 |
|
8 |
|
9 |
-
# Initialisiere ChromaDB
|
10 |
-
#client_chroma = chromadb.Client()
|
11 |
-
client_chroma = chromadb.PersistentClient(path="./chroma", settings=None,tenant=DEFAULT_TENANT,database=DEFAULT_DATABASE)
|
12 |
-
|
13 |
-
collection_name = "pdf_collection"
|
14 |
-
collection = client_chroma.get_or_create_collection(name=collection_name)
|
15 |
-
|
16 |
-
# Verwende die integrierten Embeddings von ChromaDB
|
17 |
-
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
18 |
-
|
19 |
-
client = Client("Qwen/Qwen2.5-72B-Instruct")
|
20 |
-
def ask_llm(llm_prompt_input):
|
21 |
-
result = client.predict(
|
22 |
-
query=f"{llm_prompt_input}",
|
23 |
-
history=[],
|
24 |
-
system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
|
25 |
-
api_name="/model_chat"
|
26 |
-
)
|
27 |
-
print(result)
|
28 |
-
return result
|
29 |
-
|
30 |
-
def process_pdf(file):
|
31 |
-
# Lese den PDF-Inhalt
|
32 |
-
pdf_reader = PdfReader(file.name)
|
33 |
-
text = ""
|
34 |
-
for page in pdf_reader.pages:
|
35 |
-
text += page.extract_text()
|
36 |
-
|
37 |
-
# Erstelle Embedding
|
38 |
-
embedding = embedding_function([text])[0]
|
39 |
-
|
40 |
-
# Speichere das PDF in ChromaDB
|
41 |
-
collection.add(
|
42 |
-
documents=[text],
|
43 |
-
metadatas=[{"filename": file.name}],
|
44 |
-
ids=[file.name] # Verwende den Dateinamen als ID
|
45 |
-
)
|
46 |
-
|
47 |
-
return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
|
48 |
-
|
49 |
-
def search_similar_documents(prompt):
|
50 |
-
# Erstelle Embedding für den Prompt
|
51 |
-
query_embedding = embedding_function([prompt])[0]
|
52 |
-
|
53 |
-
# Führe die Ähnlichkeitssuche durch
|
54 |
-
results = collection.query(
|
55 |
-
query_embeddings=[query_embedding],
|
56 |
-
n_results=3
|
57 |
-
)
|
58 |
-
|
59 |
-
# Formatiere die Ergebnisse
|
60 |
-
formatted_results = []
|
61 |
-
for i, doc in enumerate(results["documents"][0]):
|
62 |
-
metadata = results["metadatas"][0][i]
|
63 |
-
filename = metadata["filename"]
|
64 |
-
formatted_results.append(f"{doc}\n")
|
65 |
-
|
66 |
-
return "\n".join(formatted_results)
|
67 |
-
|
68 |
-
# Erstelle die Gradio-Schnittstelle
|
69 |
-
with gr.Blocks() as demo:
|
70 |
-
gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
|
71 |
-
with gr.Row():
|
72 |
-
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
|
73 |
-
upload_output = gr.Textbox(label="Upload Status")
|
74 |
-
with gr.Row():
|
75 |
-
submit_button = gr.Button("upload")
|
76 |
-
with gr.Row():
|
77 |
-
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
78 |
-
search_output = gr.Textbox(label="Ähnliche Dokumente")
|
79 |
-
with gr.Row():
|
80 |
-
search_button = gr.Button("Suchen")
|
81 |
-
with gr.Row():
|
82 |
-
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
83 |
-
llm_output = gr.Textbox(label="LLM Antwort")
|
84 |
-
with gr.Row():
|
85 |
-
llm_submit_button = gr.Button("send")
|
86 |
-
|
87 |
-
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
88 |
-
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
89 |
-
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
90 |
-
|
91 |
-
# Starte die Gradio-Anwendung
|
92 |
-
demo.launch()
|
93 |
-
|
94 |
# Initialisiere ChromaDB
|
95 |
client_chroma = chromadb.Client()
|
96 |
#client_croma = chromadb.PersistentClient(path="/")
|
@@ -128,7 +43,7 @@ def process_pdf(file):
|
|
128 |
ids=[file.name] # Verwende den Dateinamen als ID
|
129 |
)
|
130 |
|
131 |
-
return f"PDF
|
132 |
|
133 |
def search_similar_documents(prompt):
|
134 |
# Erstelle Embedding für den Prompt
|
@@ -172,8 +87,6 @@ with gr.Blocks() as upload:
|
|
172 |
submit_button = gr.Button("upload")
|
173 |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
174 |
|
175 |
-
|
176 |
-
|
177 |
# Erstelle die Gradio-Schnittstelle
|
178 |
with gr.Blocks() as demo:
|
179 |
gr.TabbedInterface(
|
|
|
6 |
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Initialisiere ChromaDB
|
10 |
client_chroma = chromadb.Client()
|
11 |
#client_croma = chromadb.PersistentClient(path="/")
|
|
|
43 |
ids=[file.name] # Verwende den Dateinamen als ID
|
44 |
)
|
45 |
|
46 |
+
return f"PDF wurde erfolgreich in ChromaDB gespeichert."
|
47 |
|
48 |
def search_similar_documents(prompt):
|
49 |
# Erstelle Embedding für den Prompt
|
|
|
87 |
submit_button = gr.Button("upload")
|
88 |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
89 |
|
|
|
|
|
90 |
# Erstelle die Gradio-Schnittstelle
|
91 |
with gr.Blocks() as demo:
|
92 |
gr.TabbedInterface(
|