RAG-Vereine

Sleeping

App Files Files Community

mgokg commited on Nov 23, 2024

Commit

3bff9be

verified ·

1 Parent(s): bdc45a6

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -10

app.py CHANGED Viewed

@@ -7,20 +7,42 @@ from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import speech_recognition as sr
-import requests
-import json
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
 def update(message):
-    # Your update function implementation
-    pass
 client = Client("Qwen/Qwen2.5-72B-Instruct")
@@ -31,22 +53,121 @@ def transcribe_audio(audio):
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
-            # Process the transcribed text as needed
-            return text
         except sr.UnknownValueError:
             return "Speech recognition could not understand the audio."
         except sr.RequestError as e:
             return f"Could not request results from Google Speech Recognition service; {e}"
-# Other functions (ask_llm, process_pdf, search_similar_documents) remain unchanged
 with gr.Blocks() as chat:
     gr.Markdown("### Chat", elem_classes="tab-header")
-    with gr.Row():
         llm_output = gr.Textbox(label="LLM Antwort")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
         llm_submit_button = gr.Button("send")
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
@@ -61,21 +182,34 @@ with gr.Blocks() as upload:
 with gr.Blocks() as suche:
     gr.Markdown("### suche", elem_classes="tab-header")
     with gr.Row():
-        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
         search_output = gr.Textbox(label="Ähnliche Dokumente")
     with gr.Row():
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
 with gr.Blocks() as speech:
     gr.Markdown("### audio", elem_classes="tab-header")
     with gr.Row():
         sr_inputs = gr.Microphone(type="filepath")
         sr_outputs = gr.Textbox(label="Transcribed Text")
     sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
 with gr.Blocks() as demo:
-    gr.TabbedInterface([chat, upload, suche, speech])
 demo.launch()

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import speech_recognition as sr
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
+#client_croma = chromadb.PersistentClient(path="/")
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
+custom_css = """
+.gr-button {
+    width: 300px;  /* Set the width of the button */
+}
+"""
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
 def update(message):
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    headers = {
+        "Authorization": groq,
+        "Content-Type": "application/json"
+    }
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": message
+            }
+        ],
+        "model": "mixtral-8x7b-32768",
+        "temperature": 0.2
+    }
+    response = requests.post(url, headers=headers, data=json.dumps(data))
+    return response.json()['choices'][0]['message']['content']
 client = Client("Qwen/Qwen2.5-72B-Instruct")
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
+            result = client.predict(
+                query=text,
+                history=[],
+                system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
+            api_name="/model_chat"
+    )
+            result = result[1]
+            result=gr.Markdown(result)
+            return result
+            #text = update(text)
+            #return text
         except sr.UnknownValueError:
             return "Speech recognition could not understand the audio."
         except sr.RequestError as e:
             return f"Could not request results from Google Speech Recognition service; {e}"
+def ask_llm(llm_prompt_input):
+    # Erstelle Embedding für den Prompt
+    query_embedding = embedding_function([llm_prompt_input])[0]
+    # Führe die Ähnlichkeitssuche durch
+    results = collection.query(
+        query_embeddings=[query_embedding],
+        n_results=3
+    )
+    # Formatiere die Ergebnisse
+    formatted_results = []
+    for i, doc in enumerate(results["documents"][0]):
+        metadata = results["metadatas"][0][i]
+        filename = metadata["filename"]
+        formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
+    # Füge die formatierten Ergebnisse zum Prompt hinzu
+    enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}"
+    #print(enriched_prompt)
+    # Führe die Abfrage des LLM durch
+    result = client.predict(
+        query=enriched_prompt,
+        history=[],
+        system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
+        api_name="/model_chat"
+    )
+    result = result[1]
+    result=gr.Markdown(result)
+    return result
+def process_pdf(file):
+    # Read the PDF content
+    pdf_reader = PdfReader(file.name)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    # Split the text into smaller chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,  # Adjust the chunk size as needed
+        chunk_overlap=100  # Adjust the overlap as needed
+    )
+    chunks = text_splitter.split_text(text)
+    # Create embeddings for each chunk
+    embeddings = embedding_function(chunks)
+    # Store each chunk in ChromaDB
+    for i, chunk in enumerate(chunks):
+        collection.add(
+            documents=[chunk],
+            metadatas=[{"filename": file.name, "chunk_id": i}],
+            ids=[f"{file.name}_{i}"]  # Use a unique ID for each chunk
+        )
+    return f"PDF wurde erfolgreich in ChromaDB gespeichert."
+# Example usage
+# process_pdf(your_file_object)
+def search_similar_documents(prompt):
+    # Erstelle Embedding für den Prompt
+    query_embedding = embedding_function([prompt])[0]
+    # Führe die Ähnlichkeitssuche durch
+    results = collection.query(
+        query_embeddings=[query_embedding],
+        n_results=3
+    )
+    # Formatiere die Ergebnisse
+    formatted_results = []
+    for i, doc in enumerate(results["documents"][0]):
+        metadata = results["metadatas"][0][i]
+        filename = metadata["filename"]
+        formatted_results.append(f"{doc}\n")
+    ergebnis = f"{''.join(formatted_results)}"
+    ergebnis = gr.Markdown(ergebnis)
+    return ergebnis
+    #return "\n".join(formatted_results)
 with gr.Blocks() as chat:
     gr.Markdown("### Chat", elem_classes="tab-header")
+    #with gr.Row():
+        #prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
+        #search_output = gr.Textbox(label="Ähnliche Dokumente")
+    #with gr.Row():
+        #search_button = gr.Button("Suchen")
+    with gr.Row():
         llm_output = gr.Textbox(label="LLM Antwort")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
         llm_submit_button = gr.Button("send")
+    #search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
 with gr.Blocks() as suche:
     gr.Markdown("### suche", elem_classes="tab-header")
     with gr.Row():
+        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
         search_output = gr.Textbox(label="Ähnliche Dokumente")
     with gr.Row():
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
 with gr.Blocks() as speech:
     gr.Markdown("### audio", elem_classes="tab-header")
     with gr.Row():
         sr_inputs = gr.Microphone(type="filepath")
         sr_outputs = gr.Textbox(label="Transcribed Text")
     sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
+    with gr.Row():
+        submit_button = gr.Button("rec")
+    submit_button.click(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
+# Erstelle die Gradio-Schnittstelle
 with gr.Blocks() as demo:
+    gr.TabbedInterface(
+        [chat, upload, suche, speech]
+    )
+# Starte die Gradio-Anwendung
 demo.launch()