Spaces:
Sleeping
Sleeping
Update data_service.py
Browse files- data_service.py +31 -4
data_service.py
CHANGED
@@ -5,10 +5,15 @@ from gpt_analyzer import GPTAnalyzer
|
|
5 |
|
6 |
class DataAssessmentService:
|
7 |
def __init__(self, api_key: str):
|
|
|
|
|
8 |
self.analyzer = GPTAnalyzer(api_key)
|
9 |
self.web_data = WEB_DATA_REPORTS
|
|
|
10 |
|
11 |
def check_data_availability(self, analysis_result: dict) -> dict:
|
|
|
|
|
12 |
availability = {
|
13 |
"available_in_webdata": [],
|
14 |
"needs_datalake": [],
|
@@ -16,16 +21,30 @@ class DataAssessmentService:
|
|
16 |
"access_instructions": []
|
17 |
}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
for required in analysis_result.get("required_reports", []):
|
20 |
-
|
21 |
-
|
|
|
22 |
|
|
|
23 |
if category in self.web_data:
|
24 |
matching_reports = []
|
25 |
for report_name, report_details in self.web_data[category].items():
|
|
|
26 |
required_fields = set(required.get("fields_needed", []))
|
27 |
available_fields = set(report_details["fields"].keys())
|
28 |
|
|
|
|
|
|
|
29 |
if required_fields.issubset(available_fields):
|
30 |
matching_reports.append({
|
31 |
"report_name": report_name,
|
@@ -53,23 +72,30 @@ class DataAssessmentService:
|
|
53 |
"reason": "Category not found in Web Data"
|
54 |
})
|
55 |
|
|
|
56 |
return availability
|
57 |
|
58 |
def assess_request(self, request_text: str) -> Dict[str, Any]:
|
|
|
|
|
59 |
# 1. Analyze the request
|
60 |
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
|
|
|
|
|
61 |
if "error" in analysis:
|
|
|
62 |
return {"status": "error", "message": analysis["error"]}
|
63 |
|
64 |
# 2. Check data availability
|
65 |
availability = self.check_data_availability(analysis)
|
|
|
66 |
|
67 |
# 3. Generate response
|
68 |
response = {
|
69 |
"status": "success",
|
70 |
"request_analysis": {
|
71 |
-
"interpretation": analysis
|
72 |
-
"confidence": analysis
|
73 |
},
|
74 |
"data_availability": {
|
75 |
"available_reports": availability["available_in_webdata"],
|
@@ -87,4 +113,5 @@ class DataAssessmentService:
|
|
87 |
if availability["not_available"]:
|
88 |
response["unavailable_data"] = availability["not_available"]
|
89 |
|
|
|
90 |
return response
|
|
|
5 |
|
6 |
class DataAssessmentService:
|
7 |
def __init__(self, api_key: str):
|
8 |
+
print("Initializing DataAssessmentService")
|
9 |
+
print(f"API Key available: {bool(api_key)}")
|
10 |
self.analyzer = GPTAnalyzer(api_key)
|
11 |
self.web_data = WEB_DATA_REPORTS
|
12 |
+
print(f"Available categories: {list(self.web_data.keys())}")
|
13 |
|
14 |
def check_data_availability(self, analysis_result: dict) -> dict:
|
15 |
+
print(f"Checking availability for analysis: {analysis_result}")
|
16 |
+
|
17 |
availability = {
|
18 |
"available_in_webdata": [],
|
19 |
"needs_datalake": [],
|
|
|
21 |
"access_instructions": []
|
22 |
}
|
23 |
|
24 |
+
if "error" in analysis_result:
|
25 |
+
print(f"Error in analysis result: {analysis_result['error']}")
|
26 |
+
return availability
|
27 |
+
|
28 |
+
if not analysis_result.get("required_reports"):
|
29 |
+
print("No required_reports in analysis result")
|
30 |
+
return availability
|
31 |
+
|
32 |
for required in analysis_result.get("required_reports", []):
|
33 |
+
print(f"Processing required report: {required}")
|
34 |
+
category = required.get("category")
|
35 |
+
report_type = required.get("report_type")
|
36 |
|
37 |
+
print(f"Checking category: {category}")
|
38 |
if category in self.web_data:
|
39 |
matching_reports = []
|
40 |
for report_name, report_details in self.web_data[category].items():
|
41 |
+
print(f"Checking report: {report_name}")
|
42 |
required_fields = set(required.get("fields_needed", []))
|
43 |
available_fields = set(report_details["fields"].keys())
|
44 |
|
45 |
+
print(f"Required fields: {required_fields}")
|
46 |
+
print(f"Available fields: {available_fields}")
|
47 |
+
|
48 |
if required_fields.issubset(available_fields):
|
49 |
matching_reports.append({
|
50 |
"report_name": report_name,
|
|
|
72 |
"reason": "Category not found in Web Data"
|
73 |
})
|
74 |
|
75 |
+
print(f"Final availability result: {availability}")
|
76 |
return availability
|
77 |
|
78 |
def assess_request(self, request_text: str) -> Dict[str, Any]:
|
79 |
+
print(f"Assessing request: {request_text}")
|
80 |
+
|
81 |
# 1. Analyze the request
|
82 |
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
|
83 |
+
print(f"Analysis result: {analysis}")
|
84 |
+
|
85 |
if "error" in analysis:
|
86 |
+
print(f"Error in analysis: {analysis['error']}")
|
87 |
return {"status": "error", "message": analysis["error"]}
|
88 |
|
89 |
# 2. Check data availability
|
90 |
availability = self.check_data_availability(analysis)
|
91 |
+
print(f"Availability check result: {availability}")
|
92 |
|
93 |
# 3. Generate response
|
94 |
response = {
|
95 |
"status": "success",
|
96 |
"request_analysis": {
|
97 |
+
"interpretation": analysis.get("interpretation", ""),
|
98 |
+
"confidence": analysis.get("confidence_score", "LOW")
|
99 |
},
|
100 |
"data_availability": {
|
101 |
"available_reports": availability["available_in_webdata"],
|
|
|
113 |
if availability["not_available"]:
|
114 |
response["unavailable_data"] = availability["not_available"]
|
115 |
|
116 |
+
print(f"Final response: {response}")
|
117 |
return response
|