mgokg commited on
Commit
c0c400e
·
verified ·
1 Parent(s): de4f158

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py CHANGED
@@ -3,6 +3,92 @@ import chromadb
3
  from chromadb.utils import embedding_functions
4
  from PyPDF2 import PdfReader
5
  from gradio_client import Client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Initialisiere ChromaDB
8
  client_chroma = chromadb.Client()
 
3
  from chromadb.utils import embedding_functions
4
  from PyPDF2 import PdfReader
5
  from gradio_client import Client
6
+ from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
7
+
8
+ # Initialisiere ChromaDB
9
+ #client_chroma = chromadb.Client()
10
+ client_croma = chromadb.PersistentClient(path="./chroma", settings=None,tenant=DEFAULT_TENANT,database=DEFAULT_DATABASE)
11
+
12
+ collection_name = "pdf_collection"
13
+ collection = client_chroma.get_or_create_collection(name=collection_name)
14
+
15
+ # Verwende die integrierten Embeddings von ChromaDB
16
+ embedding_function = embedding_functions.DefaultEmbeddingFunction()
17
+
18
+ client = Client("Qwen/Qwen2.5-72B-Instruct")
19
+ def ask_llm(llm_prompt_input):
20
+ result = client.predict(
21
+ query=f"{llm_prompt_input}",
22
+ history=[],
23
+ system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
24
+ api_name="/model_chat"
25
+ )
26
+ print(result)
27
+ return result
28
+
29
+ def process_pdf(file):
30
+ # Lese den PDF-Inhalt
31
+ pdf_reader = PdfReader(file.name)
32
+ text = ""
33
+ for page in pdf_reader.pages:
34
+ text += page.extract_text()
35
+
36
+ # Erstelle Embedding
37
+ embedding = embedding_function([text])[0]
38
+
39
+ # Speichere das PDF in ChromaDB
40
+ collection.add(
41
+ documents=[text],
42
+ metadatas=[{"filename": file.name}],
43
+ ids=[file.name] # Verwende den Dateinamen als ID
44
+ )
45
+
46
+ return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
47
+
48
+ def search_similar_documents(prompt):
49
+ # Erstelle Embedding für den Prompt
50
+ query_embedding = embedding_function([prompt])[0]
51
+
52
+ # Führe die Ähnlichkeitssuche durch
53
+ results = collection.query(
54
+ query_embeddings=[query_embedding],
55
+ n_results=3
56
+ )
57
+
58
+ # Formatiere die Ergebnisse
59
+ formatted_results = []
60
+ for i, doc in enumerate(results["documents"][0]):
61
+ metadata = results["metadatas"][0][i]
62
+ filename = metadata["filename"]
63
+ formatted_results.append(f"{doc}\n")
64
+
65
+ return "\n".join(formatted_results)
66
+
67
+ # Erstelle die Gradio-Schnittstelle
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
70
+ with gr.Row():
71
+ file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
72
+ upload_output = gr.Textbox(label="Upload Status")
73
+ with gr.Row():
74
+ submit_button = gr.Button("upload")
75
+ with gr.Row():
76
+ prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
77
+ search_output = gr.Textbox(label="Ähnliche Dokumente")
78
+ with gr.Row():
79
+ search_button = gr.Button("Suchen")
80
+ with gr.Row():
81
+ llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
82
+ llm_output = gr.Textbox(label="LLM Antwort")
83
+ with gr.Row():
84
+ llm_submit_button = gr.Button("send")
85
+
86
+ submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
87
+ search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
88
+ llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
89
+
90
+ # Starte die Gradio-Anwendung
91
+ demo.launch()
92
 
93
  # Initialisiere ChromaDB
94
  client_chroma = chromadb.Client()