Rathapoom commited on
Commit
996fcd5
·
verified ·
1 Parent(s): 95c7c2d

Update data_service.py

Browse files
Files changed (1) hide show
  1. data_service.py +96 -82
data_service.py CHANGED
@@ -1,4 +1,5 @@
1
  # data_service.py
 
2
  from typing import Dict, List, Any
3
  from data_structures import WEB_DATA_REPORTS
4
  from gpt_analyzer import GPTAnalyzer
@@ -10,9 +11,11 @@ class DataAssessmentService:
10
  self.analyzer = GPTAnalyzer(api_key)
11
  self.web_data = WEB_DATA_REPORTS
12
  print(f"Available categories: {list(self.web_data.keys())}")
 
13
 
14
  def check_data_availability(self, analysis_result: dict) -> dict:
15
- print(f"Checking availability for analysis: {analysis_result}")
 
16
 
17
  availability = {
18
  "available_in_webdata": [],
@@ -21,97 +24,108 @@ class DataAssessmentService:
21
  "access_instructions": []
22
  }
23
 
24
- if "error" in analysis_result:
25
- print(f"Error in analysis result: {analysis_result['error']}")
26
- return availability
27
-
28
- if not analysis_result.get("required_reports"):
29
- print("No required_reports in analysis result")
30
- return availability
31
-
32
- for required in analysis_result.get("required_reports", []):
33
- print(f"Processing required report: {required}")
34
- category = required.get("category")
35
- report_type = required.get("report_type")
36
-
37
- print(f"Checking category: {category}")
38
- if category in self.web_data:
39
- matching_reports = []
40
- for report_name, report_details in self.web_data[category].items():
41
- print(f"Checking report: {report_name}")
42
- required_fields = set(required.get("fields_needed", []))
43
- available_fields = set(report_details["fields"].keys())
44
-
45
- print(f"Required fields: {required_fields}")
46
- print(f"Available fields: {available_fields}")
47
-
48
- if required_fields.issubset(available_fields):
49
- matching_reports.append({
50
- "report_name": report_name,
51
- "description": report_details["description"],
52
- "access_path": f"/web-data/{category.lower()}/{report_name}",
53
- "filters": report_details["filters"]
 
 
 
 
 
 
 
 
 
 
 
54
  })
55
-
56
- if matching_reports:
57
- availability["available_in_webdata"].extend(matching_reports)
58
- availability["access_instructions"].append(
59
- f"Access {category} data through Web Data portal using filters: "
60
- f"{', '.join(matching_reports[0]['filters'])}"
61
- )
62
  else:
63
- availability["needs_datalake"].append({
64
  "category": category,
65
  "report_type": report_type,
66
- "reason": "Required fields not available in Web Data"
67
  })
68
- else:
69
- availability["not_available"].append({
70
- "category": category,
71
- "report_type": report_type,
72
- "reason": "Category not found in Web Data"
73
- })
74
 
75
  print(f"Final availability result: {availability}")
76
  return availability
77
 
78
  def assess_request(self, request_text: str) -> Dict[str, Any]:
79
- print(f"Assessing request: {request_text}")
80
 
81
- # 1. Analyze the request
82
- analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
83
- print(f"Analysis result: {analysis}")
84
-
85
- if "error" in analysis:
86
- print(f"Error in analysis: {analysis['error']}")
87
- return {"status": "error", "message": analysis["error"]}
88
-
89
- # 2. Check data availability
90
- availability = self.check_data_availability(analysis)
91
- print(f"Availability check result: {availability}")
92
-
93
- # 3. Generate response
94
- response = {
95
- "status": "success",
96
- "request_analysis": {
97
- "interpretation": analysis.get("interpretation", ""),
98
- "confidence": analysis.get("confidence_score", "LOW")
99
- },
100
- "data_availability": {
101
- "available_reports": availability["available_in_webdata"],
102
- "access_instructions": availability["access_instructions"]
 
103
  }
104
- }
105
-
106
- if availability["needs_datalake"]:
107
- response["data_lake_requirements"] = {
108
- "reports_needed": availability["needs_datalake"],
109
- "estimated_processing_time": "2-3 business days",
110
- "requires_it_support": True
111
- }
112
-
113
- if availability["not_available"]:
114
- response["unavailable_data"] = availability["not_available"]
115
 
116
- print(f"Final response: {response}")
117
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # data_service.py
2
+ import traceback
3
  from typing import Dict, List, Any
4
  from data_structures import WEB_DATA_REPORTS
5
  from gpt_analyzer import GPTAnalyzer
 
11
  self.analyzer = GPTAnalyzer(api_key)
12
  self.web_data = WEB_DATA_REPORTS
13
  print(f"Available categories: {list(self.web_data.keys())}")
14
+ print(f"Web data structure loaded: {bool(self.web_data)}")
15
 
16
  def check_data_availability(self, analysis_result: dict) -> dict:
17
+ print(f"Checking data availability for analysis result")
18
+ print(f"Analysis result: {analysis_result}")
19
 
20
  availability = {
21
  "available_in_webdata": [],
 
24
  "access_instructions": []
25
  }
26
 
27
+ try:
28
+ for required in analysis_result.get("required_reports", []):
29
+ print(f"Processing required report: {required}")
30
+ category = required.get("category")
31
+ report_type = required.get("report_type")
32
+
33
+ if not category or not report_type:
34
+ print(f"Missing category or report_type in required report")
35
+ continue
36
+
37
+ print(f"Checking category: {category} for report type: {report_type}")
38
+ if category in self.web_data:
39
+ matching_reports = []
40
+ for report_name, report_details in self.web_data[category].items():
41
+ required_fields = set(required.get("fields_needed", []))
42
+ available_fields = set(report_details["fields"].keys())
43
+
44
+ print(f"Checking report {report_name}")
45
+ print(f"Required fields: {required_fields}")
46
+ print(f"Available fields: {available_fields}")
47
+
48
+ if required_fields.issubset(available_fields):
49
+ matching_reports.append({
50
+ "name": report_details["name"],
51
+ "description": report_details["description"],
52
+ "access_path": f"/web-data/{category.lower()}/{report_name}",
53
+ "filters": report_details["filters"]
54
+ })
55
+ print(f"Found matching report: {report_name}")
56
+
57
+ if matching_reports:
58
+ availability["available_in_webdata"].extend(matching_reports)
59
+ filters_list = ', '.join(matching_reports[0]['filters'])
60
+ availability["access_instructions"].append(
61
+ f"Access {category} data through Web Data portal using filters: {filters_list}"
62
+ )
63
+ else:
64
+ availability["needs_datalake"].append({
65
+ "category": category,
66
+ "report_type": report_type,
67
+ "reason": "Required fields not available in Web Data"
68
  })
 
 
 
 
 
 
 
69
  else:
70
+ availability["not_available"].append({
71
  "category": category,
72
  "report_type": report_type,
73
+ "reason": "Category not found in Web Data"
74
  })
75
+
76
+ except Exception as e:
77
+ print(f"Error in check_data_availability: {str(e)}")
78
+ print(traceback.format_exc())
79
+ availability["error"] = str(e)
 
80
 
81
  print(f"Final availability result: {availability}")
82
  return availability
83
 
84
  def assess_request(self, request_text: str) -> Dict[str, Any]:
85
+ print(f"Starting assessment of request: {request_text}")
86
 
87
+ try:
88
+ # 1. Analyze the request
89
+ analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
90
+ print(f"Analysis completed: {analysis}")
91
+
92
+ if "error" in analysis:
93
+ return {"status": "error", "message": analysis["error"]}
94
+
95
+ # 2. Check data availability
96
+ availability = self.check_data_availability(analysis)
97
+ print(f"Availability check completed: {availability}")
98
+
99
+ # 3. Generate response
100
+ response = {
101
+ "status": "success",
102
+ "request_analysis": {
103
+ "interpretation": analysis.get("interpretation", ""),
104
+ "confidence": analysis.get("confidence_score", "LOW")
105
+ },
106
+ "data_availability": {
107
+ "available_reports": availability["available_in_webdata"],
108
+ "access_instructions": availability["access_instructions"]
109
+ }
110
  }
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ if availability["needs_datalake"]:
113
+ response["data_lake_requirements"] = {
114
+ "reports_needed": availability["needs_datalake"],
115
+ "estimated_processing_time": "2-3 business days",
116
+ "requires_it_support": True
117
+ }
118
+
119
+ if availability["not_available"]:
120
+ response["unavailable_data"] = availability["not_available"]
121
+
122
+ print(f"Final response prepared: {response}")
123
+ return response
124
+
125
+ except Exception as e:
126
+ print(f"Error in assess_request: {str(e)}")
127
+ print(traceback.format_exc())
128
+ return {
129
+ "status": "error",
130
+ "message": f"Error processing request: {str(e)}"
131
+ }