File size: 5,748 Bytes
df69a6f
996fcd5
e4184fb
df69a6f
 
 
 
 
3297640
 
df69a6f
 
3297640
996fcd5
df69a6f
 
996fcd5
 
3297640
df69a6f
 
 
 
 
 
 
996fcd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df69a6f
 
996fcd5
df69a6f
 
996fcd5
df69a6f
996fcd5
 
 
 
 
df69a6f
3297640
df69a6f
 
1892580
 
 
996fcd5
 
 
1892580
 
 
 
 
996fcd5
1892580
996fcd5
 
 
 
 
 
 
 
 
 
 
 
 
 
df69a6f
 
996fcd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# data_service.py
import traceback
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer

class DataAssessmentService:
    def __init__(self, api_key: str):
        print("Initializing DataAssessmentService")
        print(f"API Key available: {bool(api_key)}")
        self.analyzer = GPTAnalyzer(api_key)
        self.web_data = WEB_DATA_REPORTS
        print(f"Available categories: {list(self.web_data.keys())}")
        print(f"Web data structure loaded: {bool(self.web_data)}")

    def check_data_availability(self, analysis_result: dict) -> dict:
        print(f"Checking data availability for analysis result")
        print(f"Analysis result: {analysis_result}")
        
        availability = {
            "available_in_webdata": [],
            "needs_datalake": [],
            "not_available": [],
            "access_instructions": []
        }

        try:
            for required in analysis_result.get("required_reports", []):
                print(f"Processing required report: {required}")
                category = required.get("category")
                report_type = required.get("report_type")

                if not category or not report_type:
                    print(f"Missing category or report_type in required report")
                    continue

                print(f"Checking category: {category} for report type: {report_type}")
                if category in self.web_data:
                    matching_reports = []
                    for report_name, report_details in self.web_data[category].items():
                        required_fields = set(required.get("fields_needed", []))
                        available_fields = set(report_details["fields"].keys())

                        print(f"Checking report {report_name}")
                        print(f"Required fields: {required_fields}")
                        print(f"Available fields: {available_fields}")

                        if required_fields.issubset(available_fields):
                            matching_reports.append({
                                "name": report_details["name"],
                                "description": report_details["description"],
                                "access_path": f"/web-data/{category.lower()}/{report_name}",
                                "filters": report_details["filters"]
                            })
                            print(f"Found matching report: {report_name}")

                    if matching_reports:
                        availability["available_in_webdata"].extend(matching_reports)
                        filters_list = ', '.join(matching_reports[0]['filters'])
                        availability["access_instructions"].append(
                            f"Access {category} data through Web Data portal using filters: {filters_list}"
                        )
                    else:
                        availability["needs_datalake"].append({
                            "category": category,
                            "report_type": report_type,
                            "reason": "Required fields not available in Web Data"
                        })
                else:
                    availability["not_available"].append({
                        "category": category,
                        "report_type": report_type,
                        "reason": "Category not found in Web Data"
                    })

        except Exception as e:
            print(f"Error in check_data_availability: {str(e)}")
            print(traceback.format_exc())
            availability["error"] = str(e)

        print(f"Final availability result: {availability}")
        return availability

    def assess_request(self, request_text: str):
        print("=== Debug: Calling GPTAnalyzer ===")
        print(f"Request text: {request_text}")
        try:
            # 1. Analyze the request
            analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
            print("GPT Response:", analysis)
            return analysis
        except Exception as e:
            print(f"Error during GPT analysis: {str(e)}")
            raise

          # 2. Check data availability
            availability = self.check_data_availability(analysis)
            print(f"Availability check completed: {availability}")

            # 3. Generate response
            response = {
                "status": "success",
                "request_analysis": {
                    "interpretation": analysis.get("interpretation", ""),
                    "confidence": analysis.get("confidence_score", "LOW")
                },
                "data_availability": {
                    "available_reports": availability["available_in_webdata"],
                    "access_instructions": availability["access_instructions"]
                }
            }

            if availability["needs_datalake"]:
                response["data_lake_requirements"] = {
                    "reports_needed": availability["needs_datalake"],
                    "estimated_processing_time": "2-3 business days",
                    "requires_it_support": True
                }

            if availability["not_available"]:
                response["unavailable_data"] = availability["not_available"]

            print(f"Final response prepared: {response}")
            return response
            
        except Exception as e:
            print(f"Error in assess_request: {str(e)}")
            print(traceback.format_exc())
            return {
                "status": "error",
                "message": f"Error processing request: {str(e)}"
            }