mgokg commited on
Commit
78ca2ea
·
verified ·
1 Parent(s): b144175

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -88
app.py CHANGED
@@ -6,91 +6,6 @@ from gradio_client import Client
6
  from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
7
 
8
 
9
- # Initialisiere ChromaDB
10
- #client_chroma = chromadb.Client()
11
- client_chroma = chromadb.PersistentClient(path="./chroma", settings=None,tenant=DEFAULT_TENANT,database=DEFAULT_DATABASE)
12
-
13
- collection_name = "pdf_collection"
14
- collection = client_chroma.get_or_create_collection(name=collection_name)
15
-
16
- # Verwende die integrierten Embeddings von ChromaDB
17
- embedding_function = embedding_functions.DefaultEmbeddingFunction()
18
-
19
- client = Client("Qwen/Qwen2.5-72B-Instruct")
20
- def ask_llm(llm_prompt_input):
21
- result = client.predict(
22
- query=f"{llm_prompt_input}",
23
- history=[],
24
- system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
25
- api_name="/model_chat"
26
- )
27
- print(result)
28
- return result
29
-
30
- def process_pdf(file):
31
- # Lese den PDF-Inhalt
32
- pdf_reader = PdfReader(file.name)
33
- text = ""
34
- for page in pdf_reader.pages:
35
- text += page.extract_text()
36
-
37
- # Erstelle Embedding
38
- embedding = embedding_function([text])[0]
39
-
40
- # Speichere das PDF in ChromaDB
41
- collection.add(
42
- documents=[text],
43
- metadatas=[{"filename": file.name}],
44
- ids=[file.name] # Verwende den Dateinamen als ID
45
- )
46
-
47
- return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
48
-
49
- def search_similar_documents(prompt):
50
- # Erstelle Embedding für den Prompt
51
- query_embedding = embedding_function([prompt])[0]
52
-
53
- # Führe die Ähnlichkeitssuche durch
54
- results = collection.query(
55
- query_embeddings=[query_embedding],
56
- n_results=3
57
- )
58
-
59
- # Formatiere die Ergebnisse
60
- formatted_results = []
61
- for i, doc in enumerate(results["documents"][0]):
62
- metadata = results["metadatas"][0][i]
63
- filename = metadata["filename"]
64
- formatted_results.append(f"{doc}\n")
65
-
66
- return "\n".join(formatted_results)
67
-
68
- # Erstelle die Gradio-Schnittstelle
69
- with gr.Blocks() as demo:
70
- gr.Markdown("# PDF Upload and Similarity Search with ChromaDB and LLM")
71
- with gr.Row():
72
- file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
73
- upload_output = gr.Textbox(label="Upload Status")
74
- with gr.Row():
75
- submit_button = gr.Button("upload")
76
- with gr.Row():
77
- prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
78
- search_output = gr.Textbox(label="Ähnliche Dokumente")
79
- with gr.Row():
80
- search_button = gr.Button("Suchen")
81
- with gr.Row():
82
- llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
83
- llm_output = gr.Textbox(label="LLM Antwort")
84
- with gr.Row():
85
- llm_submit_button = gr.Button("send")
86
-
87
- submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
88
- search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
89
- llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
90
-
91
- # Starte die Gradio-Anwendung
92
- demo.launch()
93
-
94
  # Initialisiere ChromaDB
95
  client_chroma = chromadb.Client()
96
  #client_croma = chromadb.PersistentClient(path="/")
@@ -128,7 +43,7 @@ def process_pdf(file):
128
  ids=[file.name] # Verwende den Dateinamen als ID
129
  )
130
 
131
- return f"PDF {file.name} wurde erfolgreich in ChromaDB gespeichert."
132
 
133
  def search_similar_documents(prompt):
134
  # Erstelle Embedding für den Prompt
@@ -172,8 +87,6 @@ with gr.Blocks() as upload:
172
  submit_button = gr.Button("upload")
173
  submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
174
 
175
-
176
-
177
  # Erstelle die Gradio-Schnittstelle
178
  with gr.Blocks() as demo:
179
  gr.TabbedInterface(
 
6
  from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
7
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Initialisiere ChromaDB
10
  client_chroma = chromadb.Client()
11
  #client_croma = chromadb.PersistentClient(path="/")
 
43
  ids=[file.name] # Verwende den Dateinamen als ID
44
  )
45
 
46
+ return f"PDF wurde erfolgreich in ChromaDB gespeichert."
47
 
48
  def search_similar_documents(prompt):
49
  # Erstelle Embedding für den Prompt
 
87
  submit_button = gr.Button("upload")
88
  submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
89
 
 
 
90
  # Erstelle die Gradio-Schnittstelle
91
  with gr.Blocks() as demo:
92
  gr.TabbedInterface(