import gradio as gr import chromadb from chromadb.utils import embedding_functions from PyPDF2 import PdfReader from gradio_client import Client import speech_recognition as sr import groq import os #get your api-key @groq.com. its free! api_key = os.getenv('groq') # Initialisiere ChromaDB client_chroma = chromadb.Client() collection_name = "pdf_collection" collection = client_chroma.get_or_create_collection(name=collection_name) # Verwende die integrierten Embeddings von ChromaDB embedding_function = embedding_functions.DefaultEmbeddingFunction() client = groq.Client(api_key=api_key) # Use Llama 3 70B powered by Groq for answering def update(message): try: completion = client.chat.completions.create( model="Mixtral-8x7b-32768", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"{message}. antworte immer auf deutsch"} ], ) return completion.choices[0].message.content except Exception as e: return f"Error in response generation: {str(e)}" # Function to transcribe audio data to text def transcribe_audio(audio): recognizer = sr.Recognizer() with sr.AudioFile(audio) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data, language="de-DE") result = update(text) result=gr.Markdown(result) return result except sr.UnknownValueError: return "Speech recognition could not understand the audio." except sr.RequestError as e: return f"Could not request results from Google Speech Recognition service; {e}" def ask_llm(llm_prompt_input): # Erstelle Embedding für den Prompt query_embedding = embedding_function([llm_prompt_input])[0] # Führe die Ähnlichkeitssuche durch results = collection.query( query_embeddings=[query_embedding], n_results=3 ) # Formatiere die Ergebnisse formatted_results = [] for i, doc in enumerate(results["documents"][0]): metadata = results["metadatas"][0][i] filename = metadata["filename"] formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n") # Füge die formatierten Ergebnisse zum Prompt hinzu enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}" result = update(enriched_prompt) result=gr.Markdown(result) return result def process_pdf(file): # Read the PDF content pdf_reader = PdfReader(file.name) text = "" for page in pdf_reader.pages: text += page.extract_text() embeddings = embedding_function([text]) # Store the entire text in ChromaDB collection.add( documents=[text], metadatas=[{"filename": file.name}], ids=[file.name] # Use the filename as the unique ID ) return f"PDF wurde erfolgreich in ChromaDB gespeichert." def search_similar_documents(prompt): # Erstelle Embedding für den Prompt query_embedding = embedding_function([prompt])[0] # Führe die Ähnlichkeitssuche durch results = collection.query( query_embeddings=[query_embedding], n_results=1 ) # Formatiere die Ergebnisse formatted_results = [] for i, doc in enumerate(results["documents"][0]): metadata = results["metadatas"][0][i] filename = metadata["filename"] formatted_results.append(f"{doc}\n") ergebnis = f"{''.join(formatted_results)}" ergebnis = gr.Markdown(ergebnis) return ergebnis with gr.Blocks() as chat: gr.Markdown("### Ask the RKI Files", elem_classes="tab-header") with gr.Row(): llm_output = gr.Textbox(label="LLM Answer") with gr.Row(): llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein") llm_submit_button = gr.Button("send") llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output) with gr.Blocks() as upload: gr.Markdown("### File upload", elem_classes="tab-header") with gr.Row(): file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath") upload_output = gr.Textbox(label="Upload Status") with gr.Row(): submit_button = gr.Button("upload") submit_button.click(process_pdf, inputs=file_input, outputs=upload_output) with gr.Blocks() as suche: gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header") with gr.Row(): prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein") with gr.Row(): search_output = gr.Textbox(label="Ähnliche Dokumente") with gr.Row(): search_button = gr.Button("Suchen") search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output) #optional, Spracheingabe with gr.Blocks() as speech: gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header") with gr.Row(): sr_outputs = gr.Textbox(label="Antwort") with gr.Row(): sr_inputs = gr.Microphone(type="filepath") sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs) # Erstelle die Gradio-Schnittstelle with gr.Blocks() as demo: gr.TabbedInterface( [chat, upload, suche], ["Chat", "Upload", "Suche"] ) # Starte die Gradio-Anwendung demo.launch()