midrees2806 commited on
Commit
6dd717f
·
verified ·
1 Parent(s): ee7c176

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +12 -14
rag.py CHANGED
@@ -1,16 +1,9 @@
1
  import json
2
  from sentence_transformers import SentenceTransformer, util
3
  from groq import Groq
4
- import datetime
5
- import requests
6
- from io import BytesIO
7
- from PIL import Image, ImageDraw, ImageFont
8
- import numpy as np
9
- from dotenv import load_dotenv
10
  import os
11
- import pandas as pd
12
  import csv
13
- import os
14
 
15
  # Load environment variables
16
  load_dotenv()
@@ -18,11 +11,11 @@ load_dotenv()
18
  # Initialize Groq client
19
  groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
20
 
21
- # Load models and dataset
22
  similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
23
 
24
  # Load dataset
25
- with open('dataset.json', 'r') as f:
26
  dataset = json.load(f)
27
 
28
  # Precompute embeddings
@@ -30,6 +23,11 @@ dataset_questions = [item.get("input", "").lower().strip() for item in dataset]
30
  dataset_answers = [item.get("response", "") for item in dataset]
31
  dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
32
 
 
 
 
 
 
33
  def query_groq_llm(prompt, model_name="llama3-70b-8192"):
34
  try:
35
  chat_completion = groq_client.chat.completions.create(
@@ -63,12 +61,11 @@ def get_best_answer(user_input):
63
  best_match_idx = similarities.argmax().item()
64
  best_score = similarities[best_match_idx].item()
65
 
66
- # ✏️ If not matched well, log to CSV
67
  if best_score < 0.65:
68
- file_path = "unmatched_queries.csv"
69
  print(f"[DEBUG] Similarity score too low: {best_score}. Logging query to: {file_path}")
70
 
71
- # Check if file exists
72
  if not os.path.exists(file_path):
73
  print(f"[DEBUG] File {file_path} does not exist. Creating file with header.")
74
  try:
@@ -79,6 +76,7 @@ def get_best_answer(user_input):
79
  except Exception as e:
80
  print(f"[ERROR] Failed to create file: {e}")
81
 
 
82
  try:
83
  with open(file_path, mode="a", newline="", encoding="utf-8") as file:
84
  writer = csv.writer(file)
@@ -87,7 +85,7 @@ def get_best_answer(user_input):
87
  except Exception as e:
88
  print(f"[ERROR] Failed to write query to CSV: {e}")
89
 
90
- # 🧠 Prompt construction
91
  if best_score >= 0.65:
92
  original_answer = dataset_answers[best_match_idx]
93
  prompt = f"""As an official assistant for University of Education Lahore, provide a clear response:
 
1
  import json
2
  from sentence_transformers import SentenceTransformer, util
3
  from groq import Groq
 
 
 
 
 
 
4
  import os
 
5
  import csv
6
+ from dotenv import load_dotenv
7
 
8
  # Load environment variables
9
  load_dotenv()
 
11
  # Initialize Groq client
12
  groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
13
 
14
+ # Load similarity model
15
  similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
16
 
17
  # Load dataset
18
+ with open('dataset.json', 'r', encoding='utf-8') as f:
19
  dataset = json.load(f)
20
 
21
  # Precompute embeddings
 
23
  dataset_answers = [item.get("response", "") for item in dataset]
24
  dataset_embeddings = similarity_model.encode(dataset_questions, convert_to_tensor=True)
25
 
26
+ # Use absolute path for unmatched_queries.csv
27
+ base_dir = os.path.dirname(os.path.abspath(__file__))
28
+ file_path = os.path.join(base_dir, "unmatched_queries.csv")
29
+ print(f"[DEBUG] Writing to absolute path: {file_path}")
30
+
31
  def query_groq_llm(prompt, model_name="llama3-70b-8192"):
32
  try:
33
  chat_completion = groq_client.chat.completions.create(
 
61
  best_match_idx = similarities.argmax().item()
62
  best_score = similarities[best_match_idx].item()
63
 
64
+ # ✏️ Log to CSV if similarity is low
65
  if best_score < 0.65:
 
66
  print(f"[DEBUG] Similarity score too low: {best_score}. Logging query to: {file_path}")
67
 
68
+ # Create CSV with header if it doesn't exist
69
  if not os.path.exists(file_path):
70
  print(f"[DEBUG] File {file_path} does not exist. Creating file with header.")
71
  try:
 
76
  except Exception as e:
77
  print(f"[ERROR] Failed to create file: {e}")
78
 
79
+ # Append unmatched query
80
  try:
81
  with open(file_path, mode="a", newline="", encoding="utf-8") as file:
82
  writer = csv.writer(file)
 
85
  except Exception as e:
86
  print(f"[ERROR] Failed to write query to CSV: {e}")
87
 
88
+ # 🧠 Construct prompt
89
  if best_score >= 0.65:
90
  original_answer = dataset_answers[best_match_idx]
91
  prompt = f"""As an official assistant for University of Education Lahore, provide a clear response: