mgokg commited on
Commit
b0c20dc
·
verified ·
1 Parent(s): fdf8ecf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -30
app.py CHANGED
@@ -3,39 +3,32 @@ import chromadb
3
  from chromadb.utils import embedding_functions
4
  from PyPDF2 import PdfReader
5
  from gradio_client import Client
6
- from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- import os
9
  import speech_recognition as sr
10
  import groq
11
- import pyttsx3
 
 
12
  api_key = os.getenv('groq')
 
13
  # Initialisiere ChromaDB
14
  client_chroma = chromadb.Client()
15
  collection_name = "pdf_collection"
16
  collection = client_chroma.get_or_create_collection(name=collection_name)
17
-
18
- custom_css = """
19
- .gr-button {
20
- width: 300px; /* Set the width of the button */
21
- }
22
- """
23
-
24
  # Verwende die integrierten Embeddings von ChromaDB
25
  embedding_function = embedding_functions.DefaultEmbeddingFunction()
26
- #client = Client("Qwen/Qwen2.5-72B-Instruct")
27
  client = groq.Client(api_key=api_key)
28
 
29
- def update(message):
30
- #client = groq.Client(api_key=api_key)
31
-
32
- try:
33
- # Use Llama 3 70B powered by Groq for text generation
34
  completion = client.chat.completions.create(
35
  model="llama3-70b-8192",
36
  messages=[
37
  {"role": "system", "content": "You are a helpful assistant."},
38
- {"role": "user", "content": f"{message} antworte immer auf deutsch"}
39
  ],
40
  )
41
  return completion.choices[0].message.content
@@ -50,7 +43,7 @@ def transcribe_audio(audio):
50
  try:
51
  text = recognizer.recognize_google(audio_data, language="de-DE")
52
  result = update(text)
53
- #result=gr.Markdown(result)
54
  return result
55
 
56
  except sr.UnknownValueError:
@@ -61,7 +54,6 @@ def transcribe_audio(audio):
61
  def ask_llm(llm_prompt_input):
62
  # Erstelle Embedding für den Prompt
63
  query_embedding = embedding_function([llm_prompt_input])[0]
64
-
65
  # Führe die Ähnlichkeitssuche durch
66
  results = collection.query(
67
  query_embeddings=[query_embedding],
@@ -107,8 +99,6 @@ def process_pdf(file):
107
  )
108
  return f"PDF wurde erfolgreich in ChromaDB gespeichert."
109
 
110
- # Example usage
111
- # process_pdf(your_file_object)
112
  def search_similar_documents(prompt):
113
  # Erstelle Embedding für den Prompt
114
  query_embedding = embedding_function([prompt])[0]
@@ -129,7 +119,6 @@ def search_similar_documents(prompt):
129
  ergebnis = f"{''.join(formatted_results)}"
130
  ergebnis = gr.Markdown(ergebnis)
131
  return ergebnis
132
- #return "\n".join(formatted_results)
133
 
134
  with gr.Blocks() as chat:
135
  gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
@@ -137,9 +126,7 @@ with gr.Blocks() as chat:
137
  llm_output = gr.Textbox(label="LLM Answer")
138
  with gr.Row():
139
  llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
140
- llm_submit_button = gr.Button("send")
141
-
142
- #search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
143
  llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
144
 
145
  with gr.Blocks() as upload:
@@ -152,7 +139,7 @@ with gr.Blocks() as upload:
152
  submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
153
 
154
  with gr.Blocks() as suche:
155
- gr.Markdown("### suche", elem_classes="tab-header")
156
  with gr.Row():
157
  prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
158
  with gr.Row():
@@ -161,15 +148,13 @@ with gr.Blocks() as suche:
161
  search_button = gr.Button("Suchen")
162
  search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
163
 
164
-
165
  with gr.Blocks() as speech:
166
  gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
167
-
168
  with gr.Row():
169
  sr_outputs = gr.Textbox(label="Antwort")
170
  with gr.Row():
171
- sr_inputs = gr.Microphone(type="filepath")
172
-
173
  sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
174
 
175
  # Erstelle die Gradio-Schnittstelle
 
3
  from chromadb.utils import embedding_functions
4
  from PyPDF2 import PdfReader
5
  from gradio_client import Client
6
+ #from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT #is needed for persistent client
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
8
  import speech_recognition as sr
9
  import groq
10
+ import os
11
+
12
+ #get your api-key @groq.com. its free!
13
  api_key = os.getenv('groq')
14
+
15
  # Initialisiere ChromaDB
16
  client_chroma = chromadb.Client()
17
  collection_name = "pdf_collection"
18
  collection = client_chroma.get_or_create_collection(name=collection_name)
 
 
 
 
 
 
 
19
  # Verwende die integrierten Embeddings von ChromaDB
20
  embedding_function = embedding_functions.DefaultEmbeddingFunction()
21
+
22
  client = groq.Client(api_key=api_key)
23
 
24
+ # Use Llama 3 70B powered by Groq for answering
25
+ def update(message):
26
+ try:
 
 
27
  completion = client.chat.completions.create(
28
  model="llama3-70b-8192",
29
  messages=[
30
  {"role": "system", "content": "You are a helpful assistant."},
31
+ {"role": "user", "content": f"{message}. antworte immer auf deutsch"}
32
  ],
33
  )
34
  return completion.choices[0].message.content
 
43
  try:
44
  text = recognizer.recognize_google(audio_data, language="de-DE")
45
  result = update(text)
46
+ result=gr.Markdown(result)
47
  return result
48
 
49
  except sr.UnknownValueError:
 
54
  def ask_llm(llm_prompt_input):
55
  # Erstelle Embedding für den Prompt
56
  query_embedding = embedding_function([llm_prompt_input])[0]
 
57
  # Führe die Ähnlichkeitssuche durch
58
  results = collection.query(
59
  query_embeddings=[query_embedding],
 
99
  )
100
  return f"PDF wurde erfolgreich in ChromaDB gespeichert."
101
 
 
 
102
  def search_similar_documents(prompt):
103
  # Erstelle Embedding für den Prompt
104
  query_embedding = embedding_function([prompt])[0]
 
119
  ergebnis = f"{''.join(formatted_results)}"
120
  ergebnis = gr.Markdown(ergebnis)
121
  return ergebnis
 
122
 
123
  with gr.Blocks() as chat:
124
  gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
 
126
  llm_output = gr.Textbox(label="LLM Answer")
127
  with gr.Row():
128
  llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
129
+ llm_submit_button = gr.Button("send")
 
 
130
  llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
131
 
132
  with gr.Blocks() as upload:
 
139
  submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
140
 
141
  with gr.Blocks() as suche:
142
+ gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
143
  with gr.Row():
144
  prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
145
  with gr.Row():
 
148
  search_button = gr.Button("Suchen")
149
  search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
150
 
151
+ #optional, Spracheingabe
152
  with gr.Blocks() as speech:
153
  gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
 
154
  with gr.Row():
155
  sr_outputs = gr.Textbox(label="Antwort")
156
  with gr.Row():
157
+ sr_inputs = gr.Microphone(type="filepath")
 
158
  sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
159
 
160
  # Erstelle die Gradio-Schnittstelle