Spaces:
Sleeping
Sleeping
Update data_service.py
Browse files- data_service.py +96 -82
data_service.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# data_service.py
|
|
|
2 |
from typing import Dict, List, Any
|
3 |
from data_structures import WEB_DATA_REPORTS
|
4 |
from gpt_analyzer import GPTAnalyzer
|
@@ -10,9 +11,11 @@ class DataAssessmentService:
|
|
10 |
self.analyzer = GPTAnalyzer(api_key)
|
11 |
self.web_data = WEB_DATA_REPORTS
|
12 |
print(f"Available categories: {list(self.web_data.keys())}")
|
|
|
13 |
|
14 |
def check_data_availability(self, analysis_result: dict) -> dict:
|
15 |
-
print(f"Checking availability for analysis
|
|
|
16 |
|
17 |
availability = {
|
18 |
"available_in_webdata": [],
|
@@ -21,97 +24,108 @@ class DataAssessmentService:
|
|
21 |
"access_instructions": []
|
22 |
}
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
"
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
})
|
55 |
-
|
56 |
-
if matching_reports:
|
57 |
-
availability["available_in_webdata"].extend(matching_reports)
|
58 |
-
availability["access_instructions"].append(
|
59 |
-
f"Access {category} data through Web Data portal using filters: "
|
60 |
-
f"{', '.join(matching_reports[0]['filters'])}"
|
61 |
-
)
|
62 |
else:
|
63 |
-
availability["
|
64 |
"category": category,
|
65 |
"report_type": report_type,
|
66 |
-
"reason": "
|
67 |
})
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
})
|
74 |
|
75 |
print(f"Final availability result: {availability}")
|
76 |
return availability
|
77 |
|
78 |
def assess_request(self, request_text: str) -> Dict[str, Any]:
|
79 |
-
print(f"
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
103 |
}
|
104 |
-
}
|
105 |
-
|
106 |
-
if availability["needs_datalake"]:
|
107 |
-
response["data_lake_requirements"] = {
|
108 |
-
"reports_needed": availability["needs_datalake"],
|
109 |
-
"estimated_processing_time": "2-3 business days",
|
110 |
-
"requires_it_support": True
|
111 |
-
}
|
112 |
-
|
113 |
-
if availability["not_available"]:
|
114 |
-
response["unavailable_data"] = availability["not_available"]
|
115 |
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# data_service.py
|
2 |
+
import traceback
|
3 |
from typing import Dict, List, Any
|
4 |
from data_structures import WEB_DATA_REPORTS
|
5 |
from gpt_analyzer import GPTAnalyzer
|
|
|
11 |
self.analyzer = GPTAnalyzer(api_key)
|
12 |
self.web_data = WEB_DATA_REPORTS
|
13 |
print(f"Available categories: {list(self.web_data.keys())}")
|
14 |
+
print(f"Web data structure loaded: {bool(self.web_data)}")
|
15 |
|
16 |
def check_data_availability(self, analysis_result: dict) -> dict:
|
17 |
+
print(f"Checking data availability for analysis result")
|
18 |
+
print(f"Analysis result: {analysis_result}")
|
19 |
|
20 |
availability = {
|
21 |
"available_in_webdata": [],
|
|
|
24 |
"access_instructions": []
|
25 |
}
|
26 |
|
27 |
+
try:
|
28 |
+
for required in analysis_result.get("required_reports", []):
|
29 |
+
print(f"Processing required report: {required}")
|
30 |
+
category = required.get("category")
|
31 |
+
report_type = required.get("report_type")
|
32 |
+
|
33 |
+
if not category or not report_type:
|
34 |
+
print(f"Missing category or report_type in required report")
|
35 |
+
continue
|
36 |
+
|
37 |
+
print(f"Checking category: {category} for report type: {report_type}")
|
38 |
+
if category in self.web_data:
|
39 |
+
matching_reports = []
|
40 |
+
for report_name, report_details in self.web_data[category].items():
|
41 |
+
required_fields = set(required.get("fields_needed", []))
|
42 |
+
available_fields = set(report_details["fields"].keys())
|
43 |
+
|
44 |
+
print(f"Checking report {report_name}")
|
45 |
+
print(f"Required fields: {required_fields}")
|
46 |
+
print(f"Available fields: {available_fields}")
|
47 |
+
|
48 |
+
if required_fields.issubset(available_fields):
|
49 |
+
matching_reports.append({
|
50 |
+
"name": report_details["name"],
|
51 |
+
"description": report_details["description"],
|
52 |
+
"access_path": f"/web-data/{category.lower()}/{report_name}",
|
53 |
+
"filters": report_details["filters"]
|
54 |
+
})
|
55 |
+
print(f"Found matching report: {report_name}")
|
56 |
+
|
57 |
+
if matching_reports:
|
58 |
+
availability["available_in_webdata"].extend(matching_reports)
|
59 |
+
filters_list = ', '.join(matching_reports[0]['filters'])
|
60 |
+
availability["access_instructions"].append(
|
61 |
+
f"Access {category} data through Web Data portal using filters: {filters_list}"
|
62 |
+
)
|
63 |
+
else:
|
64 |
+
availability["needs_datalake"].append({
|
65 |
+
"category": category,
|
66 |
+
"report_type": report_type,
|
67 |
+
"reason": "Required fields not available in Web Data"
|
68 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
else:
|
70 |
+
availability["not_available"].append({
|
71 |
"category": category,
|
72 |
"report_type": report_type,
|
73 |
+
"reason": "Category not found in Web Data"
|
74 |
})
|
75 |
+
|
76 |
+
except Exception as e:
|
77 |
+
print(f"Error in check_data_availability: {str(e)}")
|
78 |
+
print(traceback.format_exc())
|
79 |
+
availability["error"] = str(e)
|
|
|
80 |
|
81 |
print(f"Final availability result: {availability}")
|
82 |
return availability
|
83 |
|
84 |
def assess_request(self, request_text: str) -> Dict[str, Any]:
|
85 |
+
print(f"Starting assessment of request: {request_text}")
|
86 |
|
87 |
+
try:
|
88 |
+
# 1. Analyze the request
|
89 |
+
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
|
90 |
+
print(f"Analysis completed: {analysis}")
|
91 |
+
|
92 |
+
if "error" in analysis:
|
93 |
+
return {"status": "error", "message": analysis["error"]}
|
94 |
+
|
95 |
+
# 2. Check data availability
|
96 |
+
availability = self.check_data_availability(analysis)
|
97 |
+
print(f"Availability check completed: {availability}")
|
98 |
+
|
99 |
+
# 3. Generate response
|
100 |
+
response = {
|
101 |
+
"status": "success",
|
102 |
+
"request_analysis": {
|
103 |
+
"interpretation": analysis.get("interpretation", ""),
|
104 |
+
"confidence": analysis.get("confidence_score", "LOW")
|
105 |
+
},
|
106 |
+
"data_availability": {
|
107 |
+
"available_reports": availability["available_in_webdata"],
|
108 |
+
"access_instructions": availability["access_instructions"]
|
109 |
+
}
|
110 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
if availability["needs_datalake"]:
|
113 |
+
response["data_lake_requirements"] = {
|
114 |
+
"reports_needed": availability["needs_datalake"],
|
115 |
+
"estimated_processing_time": "2-3 business days",
|
116 |
+
"requires_it_support": True
|
117 |
+
}
|
118 |
+
|
119 |
+
if availability["not_available"]:
|
120 |
+
response["unavailable_data"] = availability["not_available"]
|
121 |
+
|
122 |
+
print(f"Final response prepared: {response}")
|
123 |
+
return response
|
124 |
+
|
125 |
+
except Exception as e:
|
126 |
+
print(f"Error in assess_request: {str(e)}")
|
127 |
+
print(traceback.format_exc())
|
128 |
+
return {
|
129 |
+
"status": "error",
|
130 |
+
"message": f"Error processing request: {str(e)}"
|
131 |
+
}
|