RAG-Vereine

Sleeping

App Files Files Community

mgokg commited on Nov 23, 2024

Commit

bdc45a6

verified ·

1 Parent(s): 4a37332

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -144

app.py CHANGED Viewed

@@ -7,42 +7,20 @@ from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import speech_recognition as sr
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
-#client_croma = chromadb.PersistentClient(path="/")
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
-custom_css = """
-.gr-button {
-    width: 300px;  /* Set the width of the button */
-}
-"""
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
 def update(message):
-    url = "https://api.groq.com/openai/v1/chat/completions"
-    headers = {
-        "Authorization": groq,
-        "Content-Type": "application/json"
-    }
-    data = {
-        "messages": [
-            {
-                "role": "user",
-                "content": message
-            }
-        ],
-        "model": "mixtral-8x7b-32768",
-        "temperature": 0.2
-    }
-    response = requests.post(url, headers=headers, data=json.dumps(data))
-    return response.json()['choices'][0]['message']['content']
 client = Client("Qwen/Qwen2.5-72B-Instruct")
@@ -53,121 +31,22 @@ def transcribe_audio(audio):
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
-            result = client.predict(
-                query=text,
-                history=[],
-                system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-            api_name="/model_chat"
-    )
-            result = result[1]
-            result=gr.Markdown(result)
-            return result
-            #text = update(text)
-            #return text
         except sr.UnknownValueError:
             return "Speech recognition could not understand the audio."
         except sr.RequestError as e:
             return f"Could not request results from Google Speech Recognition service; {e}"
-def ask_llm(llm_prompt_input):
-    # Erstelle Embedding für den Prompt
-    query_embedding = embedding_function([llm_prompt_input])[0]
-    # Führe die Ähnlichkeitssuche durch
-    results = collection.query(
-        query_embeddings=[query_embedding],
-        n_results=3
-    )
-    # Formatiere die Ergebnisse
-    formatted_results = []
-    for i, doc in enumerate(results["documents"][0]):
-        metadata = results["metadatas"][0][i]
-        filename = metadata["filename"]
-        formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
-    # Füge die formatierten Ergebnisse zum Prompt hinzu
-    enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}"
-    #print(enriched_prompt)
-    # Führe die Abfrage des LLM durch
-    result = client.predict(
-        query=enriched_prompt,
-        history=[],
-        system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
-        api_name="/model_chat"
-    )
-    result = result[1]
-    result=gr.Markdown(result)
-    return result
-def process_pdf(file):
-    # Read the PDF content
-    pdf_reader = PdfReader(file.name)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    # Split the text into smaller chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,  # Adjust the chunk size as needed
-        chunk_overlap=100  # Adjust the overlap as needed
-    )
-    chunks = text_splitter.split_text(text)
-    # Create embeddings for each chunk
-    embeddings = embedding_function(chunks)
-    # Store each chunk in ChromaDB
-    for i, chunk in enumerate(chunks):
-        collection.add(
-            documents=[chunk],
-            metadatas=[{"filename": file.name, "chunk_id": i}],
-            ids=[f"{file.name}_{i}"]  # Use a unique ID for each chunk
-        )
-    return f"PDF wurde erfolgreich in ChromaDB gespeichert."
-# Example usage
-# process_pdf(your_file_object)
-def search_similar_documents(prompt):
-    # Erstelle Embedding für den Prompt
-    query_embedding = embedding_function([prompt])[0]
-    # Führe die Ähnlichkeitssuche durch
-    results = collection.query(
-        query_embeddings=[query_embedding],
-        n_results=3
-    )
-    # Formatiere die Ergebnisse
-    formatted_results = []
-    for i, doc in enumerate(results["documents"][0]):
-        metadata = results["metadatas"][0][i]
-        filename = metadata["filename"]
-        formatted_results.append(f"{doc}\n")
-    ergebnis = f"{''.join(formatted_results)}"
-    ergebnis = gr.Markdown(ergebnis)
-    return ergebnis
-    #return "\n".join(formatted_results)
 with gr.Blocks() as chat:
     gr.Markdown("### Chat", elem_classes="tab-header")
-    #with gr.Row():
-        #prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
-        #search_output = gr.Textbox(label="Ähnliche Dokumente")
-    #with gr.Row():
-        #search_button = gr.Button("Suchen")
-    with gr.Row():
         llm_output = gr.Textbox(label="LLM Antwort")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
         llm_submit_button = gr.Button("send")
-    #search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
@@ -182,33 +61,21 @@ with gr.Blocks() as upload:
 with gr.Blocks() as suche:
     gr.Markdown("### suche", elem_classes="tab-header")
     with gr.Row():
-        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
         search_output = gr.Textbox(label="Ähnliche Dokumente")
     with gr.Row():
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
 with gr.Blocks() as speech:
     gr.Markdown("### audio", elem_classes="tab-header")
     with gr.Row():
         sr_inputs = gr.Microphone(type="filepath")
         sr_outputs = gr.Textbox(label="Transcribed Text")
-    with gr.Row():
-        submit_button = gr.Button("rec")
-    submit_button.click(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
-# Erstelle die Gradio-Schnittstelle
 with gr.Blocks() as demo:
-    gr.TabbedInterface(
-        [chat, upload, suche, speech]
-    )
-# Starte die Gradio-Anwendung
 demo.launch()

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 import speech_recognition as sr
+import requests
+import json
 # Initialisiere ChromaDB
 client_chroma = chromadb.Client()
 collection_name = "pdf_collection"
 collection = client_chroma.get_or_create_collection(name=collection_name)
 # Verwende die integrierten Embeddings von ChromaDB
 embedding_function = embedding_functions.DefaultEmbeddingFunction()
 def update(message):
+    # Your update function implementation
+    pass
 client = Client("Qwen/Qwen2.5-72B-Instruct")
         audio_data = recognizer.record(source)
         try:
             text = recognizer.recognize_google(audio_data, language="de-DE")
+            # Process the transcribed text as needed
+            return text
         except sr.UnknownValueError:
             return "Speech recognition could not understand the audio."
         except sr.RequestError as e:
             return f"Could not request results from Google Speech Recognition service; {e}"
+# Other functions (ask_llm, process_pdf, search_similar_documents) remain unchanged
 with gr.Blocks() as chat:
     gr.Markdown("### Chat", elem_classes="tab-header")
+    with gr.Row():
         llm_output = gr.Textbox(label="LLM Antwort")
     with gr.Row():
         llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
         llm_submit_button = gr.Button("send")
     llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)
 with gr.Blocks() as upload:
 with gr.Blocks() as suche:
     gr.Markdown("### suche", elem_classes="tab-header")
     with gr.Row():
+        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")
     with gr.Row():
         search_output = gr.Textbox(label="Ähnliche Dokumente")
     with gr.Row():
         search_button = gr.Button("Suchen")
     search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)
 with gr.Blocks() as speech:
     gr.Markdown("### audio", elem_classes="tab-header")
     with gr.Row():
         sr_inputs = gr.Microphone(type="filepath")
         sr_outputs = gr.Textbox(label="Transcribed Text")
+    sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
 with gr.Blocks() as demo:
+    gr.TabbedInterface([chat, upload, suche, speech])
 demo.launch()