import gradio as gr
import chromadb
from chromadb.utils import embedding_functions
from PyPDF2 import PdfReader
from gradio_client import Client
import speech_recognition as sr
import groq
import os

#get your api-key @groq.com. its free!
api_key = os.getenv('groq')

# Initialisiere ChromaDB
client_chroma = chromadb.Client()
collection_name = "pdf_collection"
collection = client_chroma.get_or_create_collection(name=collection_name)
# Verwende die integrierten Embeddings von ChromaDB
embedding_function = embedding_functions.DefaultEmbeddingFunction()

client = groq.Client(api_key=api_key)

# Use Llama 3 70B powered by Groq for answering
def update(message):   
    try:        
        completion = client.chat.completions.create(
            model="Mixtral-8x7b-32768",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{message}. antworte immer auf deutsch"}
            ],
        )       
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

# Function to transcribe audio data to text
def transcribe_audio(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language="de-DE")
            result = update(text)
            result=gr.Markdown(result)
            return result

        except sr.UnknownValueError:
            return "Speech recognition could not understand the audio."
        except sr.RequestError as e:
            return f"Could not request results from Google Speech Recognition service; {e}"

def ask_llm(llm_prompt_input):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([llm_prompt_input])[0]
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=3
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
    
    # Füge die formatierten Ergebnisse zum Prompt hinzu
    enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}"
    result = update(enriched_prompt)
    result=gr.Markdown(result)
    return result
 
def process_pdf(file):
    # Read the PDF content
    pdf_reader = PdfReader(file.name)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    
    embeddings = embedding_function([text])
    
    # Store the entire text in ChromaDB
    collection.add(
        documents=[text],
        metadatas=[{"filename": file.name}],
        ids=[file.name]  # Use the filename as the unique ID
    )
    
    return f"PDF wurde erfolgreich in ChromaDB gespeichert."

def search_similar_documents(prompt):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([prompt])[0]
    
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=1
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"{doc}\n")

    ergebnis = f"{''.join(formatted_results)}"
    ergebnis = gr.Markdown(ergebnis)
    return ergebnis


with gr.Blocks() as chat:
    gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
    with gr.Row():       
        llm_output = gr.Textbox(label="LLM Answer")
    with gr.Row():
        llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
        llm_submit_button = gr.Button("send")  
    llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)

with gr.Blocks() as upload:
    gr.Markdown("### File upload", elem_classes="tab-header")
    with gr.Row():
        file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
        upload_output = gr.Textbox(label="Upload Status")
    with gr.Row():
        submit_button = gr.Button("upload")
    submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)

with gr.Blocks() as suche:
    gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
    with gr.Row():
        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")        
    with gr.Row():
        search_output = gr.Textbox(label="Ähnliche Dokumente")
    with gr.Row():
        search_button = gr.Button("Suchen")
    search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)

#optional, Spracheingabe
with gr.Blocks() as speech:
    gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
    with gr.Row():
        sr_outputs = gr.Textbox(label="Antwort")
    with gr.Row():
        sr_inputs = gr.Microphone(type="filepath") 
    sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)

# Erstelle die Gradio-Schnittstelle
with gr.Blocks() as demo:
    gr.TabbedInterface(
        [chat, upload, suche],
        ["Chat", "Upload", "Suche"]
    )

# Starte die Gradio-Anwendung
demo.launch()