|
import faiss |
|
import numpy as np |
|
import os |
|
import openai |
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
def search_with_metadata_and_reranking(query, index, chunked_documents, model, filters=None, top_k=5): |
|
query_embedding = model.encode([query]) |
|
distances, indices = index.search(query_embedding, top_k) |
|
results = [chunked_documents[i] for i in indices[0]] |
|
|
|
if filters: |
|
filtered_results = [] |
|
for result in results: |
|
match = True |
|
for key, value in filters.items(): |
|
if key == "categories": |
|
categories = eval(result.metadata.get("categories", "[]")) |
|
if value not in categories: |
|
match = False |
|
break |
|
else: |
|
if result.metadata.get(key) != value: |
|
match = False |
|
break |
|
if match: |
|
filtered_results.append(result) |
|
results = filtered_results |
|
|
|
results.sort(key=lambda x: x.metadata['publish_date'], reverse=True) |
|
return results |
|
|
|
def rag_based_generation(query, index, chunked_documents, model, filters=None, top_k=5): |
|
results = search_with_metadata_and_reranking(query, index, chunked_documents, model, filters, top_k) |
|
print("Results before filtering:", results) |
|
|
|
if not results: |
|
print("No relevant chunks found for the query.") |
|
return "No relevant information found." |
|
|
|
context = " ".join([result.page_content for result in results if result is not None]) |
|
|
|
prompt = f"Based on the following information:\n{context}\n\nAnswer the question: {query}" |
|
|
|
messages = [ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
max_tokens=1500, |
|
n=1, |
|
stop=None, |
|
temperature=0.2, |
|
messages=messages |
|
) |
|
|
|
generated_answer = response.choices[0].message['content'].strip() |
|
return generated_answer |
|
|
|
|