Rathapoom commited on
Commit
df69a6f
·
verified ·
1 Parent(s): 824f395

Create data_service.py

Browse files
Files changed (1) hide show
  1. data_service.py +90 -0
data_service.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # data_service.py
2
+ from typing import Dict, Any
3
+ from data_structures import WEB_DATA_REPORTS
4
+ from gpt_analyzer import GPTAnalyzer
5
+
6
+ class DataAssessmentService:
7
+ def __init__(self, api_key: str):
8
+ self.analyzer = GPTAnalyzer(api_key)
9
+ self.web_data = WEB_DATA_REPORTS
10
+
11
+ def check_data_availability(self, analysis_result: dict) -> dict:
12
+ availability = {
13
+ "available_in_webdata": [],
14
+ "needs_datalake": [],
15
+ "not_available": [],
16
+ "access_instructions": []
17
+ }
18
+
19
+ for required in analysis_result.get("required_reports", []):
20
+ category = required["category"]
21
+ report_type = required["report_type"]
22
+
23
+ if category in self.web_data:
24
+ matching_reports = []
25
+ for report_name, report_details in self.web_data[category].items():
26
+ required_fields = set(required.get("fields_needed", []))
27
+ available_fields = set(report_details["fields"].keys())
28
+
29
+ if required_fields.issubset(available_fields):
30
+ matching_reports.append({
31
+ "report_name": report_name,
32
+ "description": report_details["description"],
33
+ "access_path": f"/web-data/{category.lower()}/{report_name}",
34
+ "filters": report_details["filters"]
35
+ })
36
+
37
+ if matching_reports:
38
+ availability["available_in_webdata"].extend(matching_reports)
39
+ availability["access_instructions"].append(
40
+ f"Access {category} data through Web Data portal using filters: "
41
+ f"{', '.join(matching_reports[0]['filters'])}"
42
+ )
43
+ else:
44
+ availability["needs_datalake"].append({
45
+ "category": category,
46
+ "report_type": report_type,
47
+ "reason": "Required fields not available in Web Data"
48
+ })
49
+ else:
50
+ availability["not_available"].append({
51
+ "category": category,
52
+ "report_type": report_type,
53
+ "reason": "Category not found in Web Data"
54
+ })
55
+
56
+ return availability
57
+
58
+ def assess_request(self, request_text: str) -> Dict[str, Any]:
59
+ # 1. Analyze the request
60
+ analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
61
+ if "error" in analysis:
62
+ return {"status": "error", "message": analysis["error"]}
63
+
64
+ # 2. Check data availability
65
+ availability = self.check_data_availability(analysis)
66
+
67
+ # 3. Generate response
68
+ response = {
69
+ "status": "success",
70
+ "request_analysis": {
71
+ "interpretation": analysis["interpretation"],
72
+ "confidence": analysis["confidence_score"]
73
+ },
74
+ "data_availability": {
75
+ "available_reports": availability["available_in_webdata"],
76
+ "access_instructions": availability["access_instructions"]
77
+ }
78
+ }
79
+
80
+ if availability["needs_datalake"]:
81
+ response["data_lake_requirements"] = {
82
+ "reports_needed": availability["needs_datalake"],
83
+ "estimated_processing_time": "2-3 business days",
84
+ "requires_it_support": True
85
+ }
86
+
87
+ if availability["not_available"]:
88
+ response["unavailable_data"] = availability["not_available"]
89
+
90
+ return response