import os import gradio as gr import logging from groq import Groq from sentence_transformers import SentenceTransformer import faiss import numpy as np import PyPDF2 from sklearn.metrics.pairwise import cosine_similarity from collections import Counter # -------------------- Setup ------------------- logging.basicConfig( filename='query_logs.log', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s' ) GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) PDF_PATH = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf' sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2') cache = {} # --------------------- Vectorization Function --------------------- def vectorize_text(sentences_with_pages): """Vectorize sentences using SentenceTransformer and create a FAISS index.""" try: sentences = [item['sentence'] for item in sentences_with_pages] embeddings = sentence_transformer_model.encode(sentences, show_progress_bar=True) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) logging.info(f"Added {len(sentences)} sentences to the vector store.") return index, sentences_with_pages except Exception as e: logging.error(f"Error during vectorization: {str(e)}") return None, None # --------------------- PDF Processing --------------------- def read_pdf(file_path): if not os.path.exists(file_path): logging.error(f"PDF file not found at: {file_path}") return [] sentences_with_pages = [] with open(file_path, 'rb') as file: reader = PyPDF2.PdfReader(file) for page_num, page in enumerate(reader.pages): text = page.extract_text() if text: sentences = [sentence.strip() for sentence in text.split('\n') if sentence.strip()] for sentence in sentences: sentences_with_pages.append({'sentence': sentence, 'page_number': page_num + 1}) return sentences_with_pages # Read and Vectorize PDF Content sentences_with_pages = read_pdf(PDF_PATH) vector_index, sentences_with_pages = vectorize_text(sentences_with_pages) # --------------------- Query Handling --------------------- def generate_query_embedding(query): return sentence_transformer_model.encode([query]) def is_query_relevant(distances, threshold=1.0): return distances[0][0] <= threshold def generate_diverse_responses(prompt, n=3): responses = [] for i in range(n): temperature = 0.7 + (i * 0.1) top_p = 0.9 - (i * 0.1) try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama3-8b-8192", temperature=temperature, top_p=top_p ) responses.append(chat_completion.choices[0].message.content.strip()) except Exception as e: logging.error(f"Error generating response: {str(e)}") responses.append("Error generating this response.") return responses def aggregate_responses(responses): response_counter = Counter(responses) most_common_response, count = response_counter.most_common(1)[0] if count > 1: return most_common_response else: embeddings = sentence_transformer_model.encode(responses) avg_embedding = np.mean(embeddings, axis=0) similarities = cosine_similarity([avg_embedding], embeddings)[0] return responses[np.argmax(similarities)] def generate_answer(query): if query in cache: logging.info(f"Cache hit for query: {query}") return cache[query] try: query_embedding = generate_query_embedding(query) D, I = vector_index.search(np.array(query_embedding), k=5) if is_query_relevant(D): relevant_items = [sentences_with_pages[i] for i in I[0]] combined_text = " ".join([item['sentence'] for item in relevant_items]) page_numbers = sorted(set([item['page_number'] for item in relevant_items])) page_numbers_str = ', '.join(map(str, page_numbers)) # Construct primary prompt prompt = f""" Use the following context from "Generative AI Foundations" to answer the question. If additional explanation is needed, provide an example. **Context (Pages {page_numbers_str}):** {combined_text} **User's question:** {query} **Remember to indicate the specific page numbers.** """ primary_responses = generate_diverse_responses(prompt) primary_answer = aggregate_responses(primary_responses) # Construct additional prompt for explanations explanation_prompt = f""" The user has a question about a complex topic. Could you provide an explanation or example for better understanding? **User's question:** {query} **Primary answer:** {primary_answer} """ explanation_responses = generate_diverse_responses(explanation_prompt) explanation_answer = aggregate_responses(explanation_responses) # Combine primary answer and explanation full_response = f"{primary_answer}\n\n{explanation_answer}\n\n_From 'Generative AI Foundations,' pages {page_numbers_str}_" cache[query] = full_response logging.info(f"Generated response for query: {query}") return full_response else: # General knowledge fallback prompt = f""" The user asked a question that is not covered in "Generative AI Foundations." Please provide a helpful answer using general knowledge. **User's question:** {query} """ fallback_responses = generate_diverse_responses(prompt) fallback_answer = aggregate_responses(fallback_responses) cache[query] = fallback_answer return fallback_answer except Exception as e: logging.error(f"Error generating answer: {str(e)}") return "Sorry, an error occurred while generating the answer." # --------------------- Gradio Interface --------------------- def gradio_interface(user_query, history): response = generate_answer(user_query) history = history or [] history.append({"role": "user", "content": user_query}) history.append({"role": "assistant", "content": response}) return history, history # Create the Gradio interface with gr.Blocks(css=".gradio-container {background-color: #f0f0f0}") as iface: gr.Markdown(""" # **Generative AI Foundations Assistant** *Explore insights and get explanations with real-life examples from "Generative AI Foundations in Python".* """) chatbot = gr.Chatbot(height=500, type='messages') state = gr.State([]) with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Type your message here and press Enter", container=False ) submit_btn = gr.Button("Send") def submit_message(user_query, history): history = history or [] history.append({"role": "user", "content": user_query}) return "", history def bot_response(history): user_query = history[-1]['content'] response = generate_answer(user_query) history.append({"role": "assistant", "content": response}) return history txt.submit(submit_message, [txt, state], [txt, state], queue=False).then( bot_response, state, chatbot ) submit_btn.click(submit_message, [txt, state], [txt, state], queue=False).then( bot_response, state, chatbot ) reset_btn = gr.Button("Reset Chat") reset_btn.click(lambda: ([], []), outputs=[chatbot, state], queue=False) # Launch the Gradio app if __name__ == "__main__": iface.launch()