import gradio as gr
import chromadb
from chromadb.utils import embedding_functions
from PyPDF2 import PdfReader
from gradio_client import Client, file
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
#from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT #is needed for persistent client
#import speech_recognition as sr

# Initialisiere ChromaDB
client_chroma = chromadb.Client()
collection_name = "pdf_collection"
collection = client_chroma.get_or_create_collection(name=collection_name)
# Verwende die integrierten Embeddings von ChromaDB
embedding_function = embedding_functions.DefaultEmbeddingFunction()

def groq(message):
    client = Client("mgokg/GroqApi")
    result = client.predict(
		model="llama3-8b-8192",
		input_text=f"{message}. antworte auf deutsch",
		api_name="/predict"
    )
    return result
    
def gemini(message):
    client = Client("mgokg/webapi")
    result = client.predict(
		input_text=f"{message}. antworte auf deutsch",
		api_name="/generate"
    )
    return result

"""
# Function to transcribe audio data to text
def transcribe_audio(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language="de-DE")
            result = update(text)
            result=gr.Markdown(result)
            return result

        except sr.UnknownValueError:
            return "Speech recognition could not understand the audio."
        except sr.RequestError as e:
            return f"Could not request results from Google Speech Recognition service; {e}"
"""

def ask_llm(llm_prompt_input):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([llm_prompt_input])[0]
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=5
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
    
    # Füge die formatierten Ergebnisse zum Prompt hinzu
    enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}"
    result = groq(enriched_prompt)
    #result = gemini(enriched_prompt)
    result=gr.Markdown(result)
    return result, " "
 
def process_pdf(file):
    # Read the PDF content
    pdf_reader = PdfReader(file.name)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    
    # Split the text into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,  # Adjust the chunk size as needed
        chunk_overlap=100  # Adjust the overlap as needed
    )
    chunks = text_splitter.split_text(text)
    
    # Create embeddings for each chunk
    embeddings = embedding_function(chunks)
    
    # Store each chunk in ChromaDB
    for i, chunk in enumerate(chunks):
        collection.add(
            documents=[chunk],
            metadatas=[{"filename": file.name, "chunk_id": i}],
            ids=[f"{file.name}_{i}"]  # Use a unique ID for each chunk
        )
    return f"PDF wurde erfolgreich in ChromaDB gespeichert."

def search_similar_documents(prompt):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([prompt])[0]
    
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=5
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"{doc}\n")

    ergebnis = f"{''.join(formatted_results)}"
    ergebnis = gr.Markdown(ergebnis)
    return ergebnis
   
with gr.Blocks() as chat:
    gr.Markdown("### Dokument Befragen", elem_classes="tab-header")
    with gr.Row():       
        llm_output = gr.Markdown(label="Antwort")
    with gr.Row():
        llm_prompt_input = gr.Textbox(label="ask anything...", placeholder="Gib eine Frage ein")
    with gr.Row():       
        llm_submit_button = gr.Button("send")  
    llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=[llm_output,llm_prompt_input])

with gr.Blocks() as upload:
    gr.Markdown("### File upload", elem_classes="tab-header")
    with gr.Row():        
        upload_output = gr.Textbox(label="Upload Status")
    with gr.Row():
        file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")        
    with gr.Row():
        submit_button = gr.Button("upload")
    submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)

with gr.Blocks() as suche:
    gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
    with gr.Row():
        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")        
    with gr.Row():
        search_output = gr.Textbox(label="Ähnliche Dokumente")
    with gr.Row():
        search_button = gr.Button("Suchen")
    search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)

#optional, Spracheingabe
"""
with gr.Blocks() as speech:
    gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
    with gr.Row():
        sr_outputs = gr.Textbox(label="Antwort")
    with gr.Row():
        sr_inputs = gr.Microphone(type="filepath")
    sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)
"""
# Erstelle die Gradio-Schnittstelle
with gr.Blocks() as demo:
    gr.TabbedInterface(
        [chat, upload, suche],
        ["Chat", "Upload", "Suche"]
    )

# Starte die Gradio-Anwendung
demo.launch()