import faiss import numpy as np import os import openai openai.api_key = os.getenv("OPENAI_API_KEY") def search_with_metadata_and_reranking(query, index, chunked_documents, model, filters=None, top_k=5): query_embedding = model.encode([query]) distances, indices = index.search(query_embedding, top_k) results = [chunked_documents[i] for i in indices[0]] if filters: filtered_results = [] for result in results: match = True for key, value in filters.items(): if key == "categories": categories = eval(result.metadata.get("categories", "[]")) if value not in categories: match = False break else: if result.metadata.get(key) != value: match = False break if match: filtered_results.append(result) results = filtered_results results.sort(key=lambda x: x.metadata['publish_date'], reverse=True) return results def rag_based_generation(query, index, chunked_documents, model, filters=None, top_k=5): results = search_with_metadata_and_reranking(query, index, chunked_documents, model, filters, top_k) print("Results before filtering:", results) if not results: print("No relevant chunks found for the query.") return "No relevant information found." context = " ".join([result.page_content for result in results if result is not None]) prompt = f"Based on the following information:\n{context}\n\nAnswer the question: {query}" messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] response = openai.ChatCompletion.create( model="gpt-4o-mini", max_tokens=1500, n=1, stop=None, temperature=0.2, messages=messages ) generated_answer = response.choices[0].message['content'].strip() return generated_answer