Spaces:
Sleeping
Sleeping
Create data_service.py
Browse files- data_service.py +90 -0
data_service.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# data_service.py
|
2 |
+
from typing import Dict, Any
|
3 |
+
from data_structures import WEB_DATA_REPORTS
|
4 |
+
from gpt_analyzer import GPTAnalyzer
|
5 |
+
|
6 |
+
class DataAssessmentService:
|
7 |
+
def __init__(self, api_key: str):
|
8 |
+
self.analyzer = GPTAnalyzer(api_key)
|
9 |
+
self.web_data = WEB_DATA_REPORTS
|
10 |
+
|
11 |
+
def check_data_availability(self, analysis_result: dict) -> dict:
|
12 |
+
availability = {
|
13 |
+
"available_in_webdata": [],
|
14 |
+
"needs_datalake": [],
|
15 |
+
"not_available": [],
|
16 |
+
"access_instructions": []
|
17 |
+
}
|
18 |
+
|
19 |
+
for required in analysis_result.get("required_reports", []):
|
20 |
+
category = required["category"]
|
21 |
+
report_type = required["report_type"]
|
22 |
+
|
23 |
+
if category in self.web_data:
|
24 |
+
matching_reports = []
|
25 |
+
for report_name, report_details in self.web_data[category].items():
|
26 |
+
required_fields = set(required.get("fields_needed", []))
|
27 |
+
available_fields = set(report_details["fields"].keys())
|
28 |
+
|
29 |
+
if required_fields.issubset(available_fields):
|
30 |
+
matching_reports.append({
|
31 |
+
"report_name": report_name,
|
32 |
+
"description": report_details["description"],
|
33 |
+
"access_path": f"/web-data/{category.lower()}/{report_name}",
|
34 |
+
"filters": report_details["filters"]
|
35 |
+
})
|
36 |
+
|
37 |
+
if matching_reports:
|
38 |
+
availability["available_in_webdata"].extend(matching_reports)
|
39 |
+
availability["access_instructions"].append(
|
40 |
+
f"Access {category} data through Web Data portal using filters: "
|
41 |
+
f"{', '.join(matching_reports[0]['filters'])}"
|
42 |
+
)
|
43 |
+
else:
|
44 |
+
availability["needs_datalake"].append({
|
45 |
+
"category": category,
|
46 |
+
"report_type": report_type,
|
47 |
+
"reason": "Required fields not available in Web Data"
|
48 |
+
})
|
49 |
+
else:
|
50 |
+
availability["not_available"].append({
|
51 |
+
"category": category,
|
52 |
+
"report_type": report_type,
|
53 |
+
"reason": "Category not found in Web Data"
|
54 |
+
})
|
55 |
+
|
56 |
+
return availability
|
57 |
+
|
58 |
+
def assess_request(self, request_text: str) -> Dict[str, Any]:
|
59 |
+
# 1. Analyze the request
|
60 |
+
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
|
61 |
+
if "error" in analysis:
|
62 |
+
return {"status": "error", "message": analysis["error"]}
|
63 |
+
|
64 |
+
# 2. Check data availability
|
65 |
+
availability = self.check_data_availability(analysis)
|
66 |
+
|
67 |
+
# 3. Generate response
|
68 |
+
response = {
|
69 |
+
"status": "success",
|
70 |
+
"request_analysis": {
|
71 |
+
"interpretation": analysis["interpretation"],
|
72 |
+
"confidence": analysis["confidence_score"]
|
73 |
+
},
|
74 |
+
"data_availability": {
|
75 |
+
"available_reports": availability["available_in_webdata"],
|
76 |
+
"access_instructions": availability["access_instructions"]
|
77 |
+
}
|
78 |
+
}
|
79 |
+
|
80 |
+
if availability["needs_datalake"]:
|
81 |
+
response["data_lake_requirements"] = {
|
82 |
+
"reports_needed": availability["needs_datalake"],
|
83 |
+
"estimated_processing_time": "2-3 business days",
|
84 |
+
"requires_it_support": True
|
85 |
+
}
|
86 |
+
|
87 |
+
if availability["not_available"]:
|
88 |
+
response["unavailable_data"] = availability["not_available"]
|
89 |
+
|
90 |
+
return response
|