File size: 4,884 Bytes
df69a6f
e4184fb
df69a6f
 
 
 
 
3297640
 
df69a6f
 
3297640
df69a6f
 
3297640
 
df69a6f
 
 
 
 
 
 
3297640
 
 
 
 
 
 
 
df69a6f
3297640
 
 
df69a6f
3297640
df69a6f
 
 
3297640
df69a6f
 
 
3297640
 
 
df69a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3297640
df69a6f
 
 
3297640
 
df69a6f
 
3297640
 
df69a6f
3297640
df69a6f
 
 
 
3297640
df69a6f
 
 
 
 
3297640
 
df69a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3297640
df69a6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# data_service.py
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer

class DataAssessmentService:
    def __init__(self, api_key: str):
        print("Initializing DataAssessmentService")
        print(f"API Key available: {bool(api_key)}")
        self.analyzer = GPTAnalyzer(api_key)
        self.web_data = WEB_DATA_REPORTS
        print(f"Available categories: {list(self.web_data.keys())}")

    def check_data_availability(self, analysis_result: dict) -> dict:
        print(f"Checking availability for analysis: {analysis_result}")
        
        availability = {
            "available_in_webdata": [],
            "needs_datalake": [],
            "not_available": [],
            "access_instructions": []
        }

        if "error" in analysis_result:
            print(f"Error in analysis result: {analysis_result['error']}")
            return availability

        if not analysis_result.get("required_reports"):
            print("No required_reports in analysis result")
            return availability

        for required in analysis_result.get("required_reports", []):
            print(f"Processing required report: {required}")
            category = required.get("category")
            report_type = required.get("report_type")

            print(f"Checking category: {category}")
            if category in self.web_data:
                matching_reports = []
                for report_name, report_details in self.web_data[category].items():
                    print(f"Checking report: {report_name}")
                    required_fields = set(required.get("fields_needed", []))
                    available_fields = set(report_details["fields"].keys())

                    print(f"Required fields: {required_fields}")
                    print(f"Available fields: {available_fields}")

                    if required_fields.issubset(available_fields):
                        matching_reports.append({
                            "report_name": report_name,
                            "description": report_details["description"],
                            "access_path": f"/web-data/{category.lower()}/{report_name}",
                            "filters": report_details["filters"]
                        })

                if matching_reports:
                    availability["available_in_webdata"].extend(matching_reports)
                    availability["access_instructions"].append(
                        f"Access {category} data through Web Data portal using filters: "
                        f"{', '.join(matching_reports[0]['filters'])}"
                    )
                else:
                    availability["needs_datalake"].append({
                        "category": category,
                        "report_type": report_type,
                        "reason": "Required fields not available in Web Data"
                    })
            else:
                availability["not_available"].append({
                    "category": category,
                    "report_type": report_type,
                    "reason": "Category not found in Web Data"
                })

        print(f"Final availability result: {availability}")
        return availability

    def assess_request(self, request_text: str) -> Dict[str, Any]:
        print(f"Assessing request: {request_text}")
        
        # 1. Analyze the request
        analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
        print(f"Analysis result: {analysis}")
        
        if "error" in analysis:
            print(f"Error in analysis: {analysis['error']}")
            return {"status": "error", "message": analysis["error"]}

        # 2. Check data availability
        availability = self.check_data_availability(analysis)
        print(f"Availability check result: {availability}")

        # 3. Generate response
        response = {
            "status": "success",
            "request_analysis": {
                "interpretation": analysis.get("interpretation", ""),
                "confidence": analysis.get("confidence_score", "LOW")
            },
            "data_availability": {
                "available_reports": availability["available_in_webdata"],
                "access_instructions": availability["access_instructions"]
            }
        }

        if availability["needs_datalake"]:
            response["data_lake_requirements"] = {
                "reports_needed": availability["needs_datalake"],
                "estimated_processing_time": "2-3 business days",
                "requires_it_support": True
            }

        if availability["not_available"]:
            response["unavailable_data"] = availability["not_available"]

        print(f"Final response: {response}")
        return response