Spaces:
Running
Running
Update rag.py
Browse files
rag.py
CHANGED
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7 |
from datasets import load_dataset, Dataset
|
8 |
from dotenv import load_dotenv
|
9 |
import random
|
|
|
10 |
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
@@ -36,15 +37,23 @@ UNMATCHED_RESPONSES = [
|
|
36 |
"We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ [email protected]\n🌐 https://ue.edu.pk"
|
37 |
]
|
38 |
|
39 |
-
# Load
|
|
|
40 |
try:
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
except Exception as e:
|
46 |
-
print(f"Error loading
|
47 |
-
dataset = []
|
48 |
|
49 |
# Precompute embeddings
|
50 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|
|
|
7 |
from datasets import load_dataset, Dataset
|
8 |
from dotenv import load_dotenv
|
9 |
import random
|
10 |
+
import glob
|
11 |
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
|
|
37 |
"We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ [email protected]\n🌐 https://ue.edu.pk"
|
38 |
]
|
39 |
|
40 |
+
# Load multiple JSON datasets
|
41 |
+
dataset = []
|
42 |
try:
|
43 |
+
json_files = glob.glob('datasets/*.json')
|
44 |
+
for file_path in json_files:
|
45 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
46 |
+
data = json.load(f)
|
47 |
+
if isinstance(data, list):
|
48 |
+
for item in data:
|
49 |
+
if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
|
50 |
+
dataset.append(item)
|
51 |
+
else:
|
52 |
+
print(f"Invalid entry in {file_path}: {item}")
|
53 |
+
else:
|
54 |
+
print(f"File {file_path} does not contain a list.")
|
55 |
except Exception as e:
|
56 |
+
print(f"Error loading datasets: {e}")
|
|
|
57 |
|
58 |
# Precompute embeddings
|
59 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|