import os
import gradio as gr
from groq import Groq
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import PyPDF2
import re
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity
import logging

# Setup logging
logging.basicConfig(filename='query_logs.log', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')

# Grog API key (Use environment variable or replace it with your actual API key)
grog_api_key = "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"

# Initialize groq API client
client = Groq(api_key=grog_api_key)

# Path to the already uploaded book
book_path = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'

# Cache system to store previous responses
cache = {}

# Check if the file exists
if os.path.exists(book_path):
    print(f"Book found at: {book_path}")
else:
    print("Book not found!")

# Function to read the PDF file
def read_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        number_of_pages = len(reader.pages)
        text = ""
        for page_num in range(number_of_pages):
            page = reader.pages[page_num]
            text += page.extract_text()
        return text

# Read the PDF content
book_text = read_pdf(book_path)
print(book_text[:1000])  # Print first 1000 characters of the book for verification

# Vectorization of the extracted PDF content
def vectorize_text(text):
    try:
        # Use Sentence Transformer to create embeddings
        model = SentenceTransformer('all-MiniLM-L6-v2')
        sentences = text.split('\n')  # Split text into sentences for vectorization
        embeddings = model.encode(sentences, show_progress_bar=True)

        # Create FAISS index for similarity search
        index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance index
        index.add(np.array(embeddings))  # Add embeddings to the index
        print(f"Added {len(sentences)} sentences to the vector store.")

        return index, sentences
    except Exception as e:
        print(f"Error during vectorization: {str(e)}")
        return None, None

# Vectorize the extracted PDF text
vector_index, sentences = vectorize_text(book_text)

# Check if the vectorization was successful
if vector_index:
    print("Vectorization complete.")
else:
    print("Vectorization failed.")

# Function to generate embeddings for the query using the SentenceTransformer
def generate_query_embedding(query, sentence_transformer_model):
    return sentence_transformer_model.encode([query])

# Function to check relevancy and handle out-of-bounds queries
def check_relevancy(D, threshold=0.4):
    if D[0][0] > threshold:
        return False
    return True

# Function to generate diverse responses from the LLM with varied parameters
def generate_diverse_responses(client, prompt, n=3):
    responses = []
    for i in range(n):
        temperature = 0.5 + (i * 0.2)  # Vary temperature from 0.5 to 0.9
        top_p = 0.9 - (i * 0.2)        # Vary top-p from 0.9 to 0.7
        try:
            chat_completion = client.chat.completions.create(
                messages=[{
                    "role": "user",
                    "content": prompt,
                }],
                model="llama3-8b-8192",
                temperature=temperature,
                top_p=top_p
            )
            responses.append(chat_completion.choices[0].message.content)
        except Exception as e:
            logging.error(f"Error generating response: {str(e)}")
            responses.append("Sorry, an error occurred while generating this response.")
    return responses

# Function to aggregate responses based on similarity and voting mechanism
def aggregate_responses(responses):
    # Use a simple voting mechanism to select the most common response
    response_counter = Counter(responses)
    most_common_response = response_counter.most_common(1)[0][0]
    
    # Rank responses by semantic similarity to the first response
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(responses)
    first_embedding = embeddings[0].reshape(1, -1)
    
    similarities = cosine_similarity(first_embedding, embeddings)[0]
    top_response_index = np.argmax(similarities)

    # Return the most similar response to the first response
    return responses[top_response_index]

# Function to generate answers using the groq API with Llama model
def generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model):
    # Check cache for previous queries
    if query in cache:
        logging.info(f"Cache hit for query: {query}")
        return cache[query]

    try:
        # Get the query embedding using the sentence transformer
        query_embedding = generate_query_embedding(query, sentence_transformer_model)

        # Perform similarity search on the vector store (vector index)
        D, I = vector_index.search(np.array(query_embedding), k=5)  # Find top 5 similar sentences

        # If no relevant content found, generate a fallback response
        if len(I[0]) == 0 or D[0][0] > 1.0:
            fallback_response = f"I couldn't find anything relevant in the document, but here's a general answer to your query: {query}"
            chat_completion = client.chat.completions.create(
                messages=[{
                    "role": "user",
                    "content": fallback_response,
                }],
                model="llama3-8b-8192",
            )
            cache[query] = chat_completion.choices[0].message.content
            return cache[query]

        # Retrieve the most relevant sentences
        relevant_sentences = [sentences[i] for i in I[0]]

        # Combine the relevant sentences for the final query
        combined_text = " ".join(relevant_sentences)

        # Create a prompt with the relevant content
        final_prompt = f"**Relevant Information:**\n\n '{combined_text}'\n\n **Answer:** {query}"

        # Generate diverse responses using the groq API
        responses = generate_diverse_responses(client, final_prompt)

        # Aggregate the responses to ensure stability and variety
        final_response = aggregate_responses(responses)

        # Cache the response for future queries
        cache[query] = final_response
        return final_response

    except Exception as e:
        logging.error(f"Error during answer generation with groq API: {str(e)}")
        return f"Error during answer generation: {str(e)}"

# Gradio app function
def gradio_interface(query):
    global vector_index, sentences

    # Initialize the sentence transformer model
    sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')

    if vector_index is None or sentences is None:
        return "Vector index or sentences not initialized properly."

    # Generate the answer using the groq API and Llama model with varied responses
    answer = generate_answer_with_grog(query, vector_index, sentences, sentence_transformer_model)
    
    # Log the query and answer for monitoring
    logging.info(f"Query: {query}, Answer: {answer}")
    
    return f"### Here's your response:\n\n{answer}"

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Enter your query"),
    outputs="markdown",  # Use markdown output for better formatting
    title="Generative_AI_Foundations_in_Python PDF-based Query Answering",
    description="Ask any question about the content in the uploaded PDF and receive diverse, reliable answers."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()