# data_service.py from typing import Dict, List, Any from data_structures import WEB_DATA_REPORTS from gpt_analyzer import GPTAnalyzer class DataAssessmentService: def __init__(self, api_key: str): self.analyzer = GPTAnalyzer(api_key) self.web_data = WEB_DATA_REPORTS def check_data_availability(self, analysis_result: dict) -> dict: availability = { "available_in_webdata": [], "needs_datalake": [], "not_available": [], "access_instructions": [] } for required in analysis_result.get("required_reports", []): category = required["category"] report_type = required["report_type"] if category in self.web_data: matching_reports = [] for report_name, report_details in self.web_data[category].items(): required_fields = set(required.get("fields_needed", [])) available_fields = set(report_details["fields"].keys()) if required_fields.issubset(available_fields): matching_reports.append({ "report_name": report_name, "description": report_details["description"], "access_path": f"/web-data/{category.lower()}/{report_name}", "filters": report_details["filters"] }) if matching_reports: availability["available_in_webdata"].extend(matching_reports) availability["access_instructions"].append( f"Access {category} data through Web Data portal using filters: " f"{', '.join(matching_reports[0]['filters'])}" ) else: availability["needs_datalake"].append({ "category": category, "report_type": report_type, "reason": "Required fields not available in Web Data" }) else: availability["not_available"].append({ "category": category, "report_type": report_type, "reason": "Category not found in Web Data" }) return availability def assess_request(self, request_text: str) -> Dict[str, Any]: # 1. Analyze the request analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys())) if "error" in analysis: return {"status": "error", "message": analysis["error"]} # 2. Check data availability availability = self.check_data_availability(analysis) # 3. Generate response response = { "status": "success", "request_analysis": { "interpretation": analysis["interpretation"], "confidence": analysis["confidence_score"] }, "data_availability": { "available_reports": availability["available_in_webdata"], "access_instructions": availability["access_instructions"] } } if availability["needs_datalake"]: response["data_lake_requirements"] = { "reports_needed": availability["needs_datalake"], "estimated_processing_time": "2-3 business days", "requires_it_support": True } if availability["not_available"]: response["unavailable_data"] = availability["not_available"] return response