midrees2806 commited on
Commit
acefd67
·
verified ·
1 Parent(s): 575340c

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +10 -4
rag.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  from sentence_transformers import SentenceTransformer, util
3
  from groq import Groq
4
- import datetime
5
  import requests
6
  from io import BytesIO
7
  from PIL import Image, ImageDraw, ImageFont
@@ -23,9 +23,15 @@ similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
23
  HF_DATASET_REPO = "midrees2806/unmatched_queries" # Your dataset repo
24
  HF_TOKEN = os.getenv("HF_TOKEN") # From Space secrets
25
 
26
- # Load dataset (automatically using the path)
27
- with open('dataset.json', 'r') as f:
28
- dataset = json.load(f)
 
 
 
 
 
 
29
 
30
  # Precompute embeddings
31
  dataset_questions = [item.get("input", "").lower().strip() for item in dataset]
 
1
  import json
2
  from sentence_transformers import SentenceTransformer, util
3
  from groq import Groq
4
+ from datetime import datetime
5
  import requests
6
  from io import BytesIO
7
  from PIL import Image, ImageDraw, ImageFont
 
23
  HF_DATASET_REPO = "midrees2806/unmatched_queries" # Your dataset repo
24
  HF_TOKEN = os.getenv("HF_TOKEN") # From Space secrets
25
 
26
+ # --- Dataset Loading ---
27
+ try:
28
+ with open('dataset.json', 'r') as f:
29
+ dataset = json.load(f)
30
+ if not all(isinstance(item, dict) and 'input' in item and 'response' in item for item in dataset):
31
+ raise ValueError("Invalid dataset structure")
32
+ except Exception as e:
33
+ print(f"Error loading dataset: {e}")
34
+ dataset = []
35
 
36
  # Precompute embeddings
37
  dataset_questions = [item.get("input", "").lower().strip() for item in dataset]