Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,6 @@ import chromadb
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
6 |
-
#from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT #is needed for persistent client
|
7 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
import speech_recognition as sr
|
9 |
import groq
|
10 |
import os
|
@@ -80,23 +78,15 @@ def process_pdf(file):
|
|
80 |
for page in pdf_reader.pages:
|
81 |
text += page.extract_text()
|
82 |
|
83 |
-
#
|
84 |
-
|
85 |
-
chunk_size=300, # Adjust the chunk size as needed
|
86 |
-
chunk_overlap=10 # Adjust the overlap as needed
|
87 |
-
)
|
88 |
-
chunks = text_splitter.split_text(text)
|
89 |
|
90 |
-
#
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
documents=[chunk],
|
97 |
-
metadatas=[{"filename": file.name, "chunk_id": i}],
|
98 |
-
ids=[f"{file.name}_{i}"] # Use a unique ID for each chunk
|
99 |
-
)
|
100 |
return f"PDF wurde erfolgreich in ChromaDB gespeichert."
|
101 |
|
102 |
def search_similar_documents(prompt):
|
@@ -114,55 +104,6 @@ def search_similar_documents(prompt):
|
|
114 |
for i, doc in enumerate(results["documents"][0]):
|
115 |
metadata = results["metadatas"][0][i]
|
116 |
filename = metadata["filename"]
|
117 |
-
formatted_results.append(f"{doc}\n")
|
118 |
-
|
119 |
-
|
120 |
-
ergebnis = gr.Markdown(ergebnis)
|
121 |
-
return ergebnis
|
122 |
-
|
123 |
-
with gr.Blocks() as chat:
|
124 |
-
gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
|
125 |
-
with gr.Row():
|
126 |
-
llm_output = gr.Textbox(label="LLM Answer")
|
127 |
-
with gr.Row():
|
128 |
-
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
129 |
-
llm_submit_button = gr.Button("send")
|
130 |
-
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
131 |
-
|
132 |
-
with gr.Blocks() as upload:
|
133 |
-
gr.Markdown("### File upload", elem_classes="tab-header")
|
134 |
-
with gr.Row():
|
135 |
-
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
|
136 |
-
upload_output = gr.Textbox(label="Upload Status")
|
137 |
-
with gr.Row():
|
138 |
-
submit_button = gr.Button("upload")
|
139 |
-
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
140 |
-
|
141 |
-
with gr.Blocks() as suche:
|
142 |
-
gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
|
143 |
-
with gr.Row():
|
144 |
-
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
145 |
-
with gr.Row():
|
146 |
-
search_output = gr.Textbox(label="Ähnliche Dokumente")
|
147 |
-
with gr.Row():
|
148 |
-
search_button = gr.Button("Suchen")
|
149 |
-
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
150 |
-
|
151 |
-
#optional, Spracheingabe
|
152 |
-
with gr.Blocks() as speech:
|
153 |
-
gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
|
154 |
-
with gr.Row():
|
155 |
-
sr_outputs = gr.Textbox(label="Antwort")
|
156 |
-
with gr.Row():
|
157 |
-
sr_inputs = gr.Microphone(type="filepath")
|
158 |
-
sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
|
159 |
-
|
160 |
-
# Erstelle die Gradio-Schnittstelle
|
161 |
-
with gr.Blocks() as demo:
|
162 |
-
gr.TabbedInterface(
|
163 |
-
[chat, upload, suche],
|
164 |
-
["Chat", "Upload", "Suche"]
|
165 |
-
)
|
166 |
-
|
167 |
-
# Starte die Gradio-Anwendung
|
168 |
-
demo.launch()
|
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
|
|
|
|
6 |
import speech_recognition as sr
|
7 |
import groq
|
8 |
import os
|
|
|
78 |
for page in pdf_reader.pages:
|
79 |
text += page.extract_text()
|
80 |
|
81 |
+
# Create embedding for the entire text
|
82 |
+
embedding = embedding_function([text])[0]
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
# Store the entire text in ChromaDB
|
85 |
+
collection.add(
|
86 |
+
documents=[text],
|
87 |
+
metadatas=[{"filename": file.name}],
|
88 |
+
ids=[file.name] # Use the filename as the ID
|
89 |
+
)
|
|
|
|
|
|
|
|
|
90 |
return f"PDF wurde erfolgreich in ChromaDB gespeichert."
|
91 |
|
92 |
def search_similar_documents(prompt):
|
|
|
104 |
for i, doc in enumerate(results["documents"][0]):
|
105 |
metadata = results["metadatas"][0][i]
|
106 |
filename = metadata["filename"]
|
107 |
+
formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
|
108 |
+
|
109 |
+
return gr.Markdown(''.join(formatted_results))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|