Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,39 +3,32 @@ import chromadb
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
6 |
-
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
-
import os
|
9 |
import speech_recognition as sr
|
10 |
import groq
|
11 |
-
import
|
|
|
|
|
12 |
api_key = os.getenv('groq')
|
|
|
13 |
# Initialisiere ChromaDB
|
14 |
client_chroma = chromadb.Client()
|
15 |
collection_name = "pdf_collection"
|
16 |
collection = client_chroma.get_or_create_collection(name=collection_name)
|
17 |
-
|
18 |
-
custom_css = """
|
19 |
-
.gr-button {
|
20 |
-
width: 300px; /* Set the width of the button */
|
21 |
-
}
|
22 |
-
"""
|
23 |
-
|
24 |
# Verwende die integrierten Embeddings von ChromaDB
|
25 |
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
26 |
-
|
27 |
client = groq.Client(api_key=api_key)
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
try:
|
33 |
-
# Use Llama 3 70B powered by Groq for text generation
|
34 |
completion = client.chat.completions.create(
|
35 |
model="llama3-70b-8192",
|
36 |
messages=[
|
37 |
{"role": "system", "content": "You are a helpful assistant."},
|
38 |
-
{"role": "user", "content": f"{message} antworte immer auf deutsch"}
|
39 |
],
|
40 |
)
|
41 |
return completion.choices[0].message.content
|
@@ -50,7 +43,7 @@ def transcribe_audio(audio):
|
|
50 |
try:
|
51 |
text = recognizer.recognize_google(audio_data, language="de-DE")
|
52 |
result = update(text)
|
53 |
-
|
54 |
return result
|
55 |
|
56 |
except sr.UnknownValueError:
|
@@ -61,7 +54,6 @@ def transcribe_audio(audio):
|
|
61 |
def ask_llm(llm_prompt_input):
|
62 |
# Erstelle Embedding für den Prompt
|
63 |
query_embedding = embedding_function([llm_prompt_input])[0]
|
64 |
-
|
65 |
# Führe die Ähnlichkeitssuche durch
|
66 |
results = collection.query(
|
67 |
query_embeddings=[query_embedding],
|
@@ -107,8 +99,6 @@ def process_pdf(file):
|
|
107 |
)
|
108 |
return f"PDF wurde erfolgreich in ChromaDB gespeichert."
|
109 |
|
110 |
-
# Example usage
|
111 |
-
# process_pdf(your_file_object)
|
112 |
def search_similar_documents(prompt):
|
113 |
# Erstelle Embedding für den Prompt
|
114 |
query_embedding = embedding_function([prompt])[0]
|
@@ -129,7 +119,6 @@ def search_similar_documents(prompt):
|
|
129 |
ergebnis = f"{''.join(formatted_results)}"
|
130 |
ergebnis = gr.Markdown(ergebnis)
|
131 |
return ergebnis
|
132 |
-
#return "\n".join(formatted_results)
|
133 |
|
134 |
with gr.Blocks() as chat:
|
135 |
gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
|
@@ -137,9 +126,7 @@ with gr.Blocks() as chat:
|
|
137 |
llm_output = gr.Textbox(label="LLM Answer")
|
138 |
with gr.Row():
|
139 |
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
140 |
-
llm_submit_button = gr.Button("send")
|
141 |
-
|
142 |
-
#search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
143 |
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
144 |
|
145 |
with gr.Blocks() as upload:
|
@@ -152,7 +139,7 @@ with gr.Blocks() as upload:
|
|
152 |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
153 |
|
154 |
with gr.Blocks() as suche:
|
155 |
-
gr.Markdown("###
|
156 |
with gr.Row():
|
157 |
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
158 |
with gr.Row():
|
@@ -161,15 +148,13 @@ with gr.Blocks() as suche:
|
|
161 |
search_button = gr.Button("Suchen")
|
162 |
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
163 |
|
164 |
-
|
165 |
with gr.Blocks() as speech:
|
166 |
gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
|
167 |
-
|
168 |
with gr.Row():
|
169 |
sr_outputs = gr.Textbox(label="Antwort")
|
170 |
with gr.Row():
|
171 |
-
sr_inputs = gr.Microphone(type="filepath")
|
172 |
-
|
173 |
sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
|
174 |
|
175 |
# Erstelle die Gradio-Schnittstelle
|
|
|
3 |
from chromadb.utils import embedding_functions
|
4 |
from PyPDF2 import PdfReader
|
5 |
from gradio_client import Client
|
6 |
+
#from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT #is needed for persistent client
|
7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
8 |
import speech_recognition as sr
|
9 |
import groq
|
10 |
+
import os
|
11 |
+
|
12 |
+
#get your api-key @groq.com. its free!
|
13 |
api_key = os.getenv('groq')
|
14 |
+
|
15 |
# Initialisiere ChromaDB
|
16 |
client_chroma = chromadb.Client()
|
17 |
collection_name = "pdf_collection"
|
18 |
collection = client_chroma.get_or_create_collection(name=collection_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Verwende die integrierten Embeddings von ChromaDB
|
20 |
embedding_function = embedding_functions.DefaultEmbeddingFunction()
|
21 |
+
|
22 |
client = groq.Client(api_key=api_key)
|
23 |
|
24 |
+
# Use Llama 3 70B powered by Groq for answering
|
25 |
+
def update(message):
|
26 |
+
try:
|
|
|
|
|
27 |
completion = client.chat.completions.create(
|
28 |
model="llama3-70b-8192",
|
29 |
messages=[
|
30 |
{"role": "system", "content": "You are a helpful assistant."},
|
31 |
+
{"role": "user", "content": f"{message}. antworte immer auf deutsch"}
|
32 |
],
|
33 |
)
|
34 |
return completion.choices[0].message.content
|
|
|
43 |
try:
|
44 |
text = recognizer.recognize_google(audio_data, language="de-DE")
|
45 |
result = update(text)
|
46 |
+
result=gr.Markdown(result)
|
47 |
return result
|
48 |
|
49 |
except sr.UnknownValueError:
|
|
|
54 |
def ask_llm(llm_prompt_input):
|
55 |
# Erstelle Embedding für den Prompt
|
56 |
query_embedding = embedding_function([llm_prompt_input])[0]
|
|
|
57 |
# Führe die Ähnlichkeitssuche durch
|
58 |
results = collection.query(
|
59 |
query_embeddings=[query_embedding],
|
|
|
99 |
)
|
100 |
return f"PDF wurde erfolgreich in ChromaDB gespeichert."
|
101 |
|
|
|
|
|
102 |
def search_similar_documents(prompt):
|
103 |
# Erstelle Embedding für den Prompt
|
104 |
query_embedding = embedding_function([prompt])[0]
|
|
|
119 |
ergebnis = f"{''.join(formatted_results)}"
|
120 |
ergebnis = gr.Markdown(ergebnis)
|
121 |
return ergebnis
|
|
|
122 |
|
123 |
with gr.Blocks() as chat:
|
124 |
gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
|
|
|
126 |
llm_output = gr.Textbox(label="LLM Answer")
|
127 |
with gr.Row():
|
128 |
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
|
129 |
+
llm_submit_button = gr.Button("send")
|
|
|
|
|
130 |
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
|
131 |
|
132 |
with gr.Blocks() as upload:
|
|
|
139 |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
|
140 |
|
141 |
with gr.Blocks() as suche:
|
142 |
+
gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
|
143 |
with gr.Row():
|
144 |
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
|
145 |
with gr.Row():
|
|
|
148 |
search_button = gr.Button("Suchen")
|
149 |
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
|
150 |
|
151 |
+
#optional, Spracheingabe
|
152 |
with gr.Blocks() as speech:
|
153 |
gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
|
|
|
154 |
with gr.Row():
|
155 |
sr_outputs = gr.Textbox(label="Antwort")
|
156 |
with gr.Row():
|
157 |
+
sr_inputs = gr.Microphone(type="filepath")
|
|
|
158 |
sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
|
159 |
|
160 |
# Erstelle die Gradio-Schnittstelle
|