File size: 3,657 Bytes
df69a6f
e4184fb
df69a6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# data_service.py
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer

class DataAssessmentService:
    def __init__(self, api_key: str):
        self.analyzer = GPTAnalyzer(api_key)
        self.web_data = WEB_DATA_REPORTS

    def check_data_availability(self, analysis_result: dict) -> dict:
        availability = {
            "available_in_webdata": [],
            "needs_datalake": [],
            "not_available": [],
            "access_instructions": []
        }

        for required in analysis_result.get("required_reports", []):
            category = required["category"]
            report_type = required["report_type"]

            if category in self.web_data:
                matching_reports = []
                for report_name, report_details in self.web_data[category].items():
                    required_fields = set(required.get("fields_needed", []))
                    available_fields = set(report_details["fields"].keys())

                    if required_fields.issubset(available_fields):
                        matching_reports.append({
                            "report_name": report_name,
                            "description": report_details["description"],
                            "access_path": f"/web-data/{category.lower()}/{report_name}",
                            "filters": report_details["filters"]
                        })

                if matching_reports:
                    availability["available_in_webdata"].extend(matching_reports)
                    availability["access_instructions"].append(
                        f"Access {category} data through Web Data portal using filters: "
                        f"{', '.join(matching_reports[0]['filters'])}"
                    )
                else:
                    availability["needs_datalake"].append({
                        "category": category,
                        "report_type": report_type,
                        "reason": "Required fields not available in Web Data"
                    })
            else:
                availability["not_available"].append({
                    "category": category,
                    "report_type": report_type,
                    "reason": "Category not found in Web Data"
                })

        return availability

    def assess_request(self, request_text: str) -> Dict[str, Any]:
        # 1. Analyze the request
        analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
        if "error" in analysis:
            return {"status": "error", "message": analysis["error"]}

        # 2. Check data availability
        availability = self.check_data_availability(analysis)

        # 3. Generate response
        response = {
            "status": "success",
            "request_analysis": {
                "interpretation": analysis["interpretation"],
                "confidence": analysis["confidence_score"]
            },
            "data_availability": {
                "available_reports": availability["available_in_webdata"],
                "access_instructions": availability["access_instructions"]
            }
        }

        if availability["needs_datalake"]:
            response["data_lake_requirements"] = {
                "reports_needed": availability["needs_datalake"],
                "estimated_processing_time": "2-3 business days",
                "requires_it_support": True
            }

        if availability["not_available"]:
            response["unavailable_data"] = availability["not_available"]

        return response