RAG-Vereine

Sleeping

App Files Files Community

mgokg commited on Nov 29, 2024

Commit

b0c20dc

verified ·

1 Parent(s): fdf8ecf

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -30

app.py CHANGED Viewed

@@ -3,39 +3,32 @@ import chromadb
 from chromadb.utils import embedding_functions
 from PyPDF2 import PdfReader
 from gradio_client import Client
-from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-import os
 import speech_recognition as sr
 import groq
-import pyttsx3
 api_key = os.getenv('groq')
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
-custom_css = """
-.gr-button {
-    width: 300px;  /* Set the width of the button */
-}
-"""
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
-#client = Client("Qwen/Qwen2.5-72B-Instruct")
 client = groq.Client(api_key=api_key)
-def update(message):
-    #client = groq.Client(api_key=api_key)
-    try:
-        # Use Llama 3 70B powered by Groq for text generation
         completion = client.chat.completions.create(
             model="llama3-70b-8192",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": f"{message} antworte immer auf deutsch"}
             ],
         )
         return completion.choices[0].message.content
@@ -50,7 +43,7 @@ def transcribe_audio(audio):
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
             result = update(text)
-            #result=gr.Markdown(result)
             return result
         except sr.UnknownValueError:
@@ -61,7 +54,6 @@ def transcribe_audio(audio):
 def ask_llm(llm_prompt_input):
     # Erstelle Embedding für den Prompt
     query_embedding = embedding_function([llm_prompt_input])[0]
     # Führe die Ähnlichkeitssuche durch
     results = collection.query(
         query_embeddings=[query_embedding],
@@ -107,8 +99,6 @@ def process_pdf(file):
         )
     return f"PDF wurde erfolgreich in ChromaDB gespeichert."
-# Example usage
-# process_pdf(your_file_object)
 def search_similar_documents(prompt):
     # Erstelle Embedding für den Prompt
     query_embedding = embedding_function([prompt])[0]
@@ -129,7 +119,6 @@ def search_similar_documents(prompt):
     ergebnis = f"{''.join(formatted_results)}"
     ergebnis = gr.Markdown(ergebnis)
     return ergebnis
-    #return "\n".join(formatted_results)
 with gr.Blocks() as chat:
     gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
@@ -137,9 +126,7 @@ with gr.Blocks() as chat:
         llm_output = gr.Textbox(label="LLM Answer")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
-        llm_submit_button = gr.Button("send")
-    #search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
@@ -152,7 +139,7 @@ with gr.Blocks() as upload:
     submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
 with gr.Blocks() as suche:
-    gr.Markdown("### suche", elem_classes="tab-header")
     with gr.Row():
         prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
@@ -161,15 +148,13 @@ with gr.Blocks() as suche:
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
 with gr.Blocks() as speech:
     gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
     with gr.Row():
         sr_outputs = gr.Textbox(label="Antwort")
     with gr.Row():
-        sr_inputs = gr.Microphone(type="filepath")
     sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
 # Erstelle die Gradio-Schnittstelle

 from chromadb.utils import embedding_functions
 from PyPDF2 import PdfReader
 from gradio_client import Client
+#from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT #is needed for persistent client
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import speech_recognition as sr
 import groq
+import os
+#get your api-key @groq.com. its free!
 api_key = os.getenv('groq')
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
 client = groq.Client(api_key=api_key)
+# Use Llama 3 70B powered by Groq for answering
+def update(message):
+    try:
         completion = client.chat.completions.create(
             model="llama3-70b-8192",
             messages=[
                 {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": f"{message}. antworte immer auf deutsch"}
             ],
         )
         return completion.choices[0].message.content
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
             result = update(text)
+            result=gr.Markdown(result)
             return result
         except sr.UnknownValueError:
 def ask_llm(llm_prompt_input):
     # Erstelle Embedding für den Prompt
     query_embedding = embedding_function([llm_prompt_input])[0]
     # Führe die Ähnlichkeitssuche durch
     results = collection.query(
         query_embeddings=[query_embedding],
         )
     return f"PDF wurde erfolgreich in ChromaDB gespeichert."
 def search_similar_documents(prompt):
     # Erstelle Embedding für den Prompt
     query_embedding = embedding_function([prompt])[0]
     ergebnis = f"{''.join(formatted_results)}"
     ergebnis = gr.Markdown(ergebnis)
     return ergebnis
 with gr.Blocks() as chat:
     gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
         llm_output = gr.Textbox(label="LLM Answer")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
+        llm_submit_button = gr.Button("send")
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
     submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)
 with gr.Blocks() as suche:
+    gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
     with gr.Row():
         prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
+#optional, Spracheingabe
 with gr.Blocks() as speech:
     gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
     with gr.Row():
         sr_outputs = gr.Textbox(label="Antwort")
     with gr.Row():
+        sr_inputs = gr.Microphone(type="filepath")
     sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
 # Erstelle die Gradio-Schnittstelle