Spaces:
Sleeping
Sleeping
# data_service.py | |
from typing import Dict, List, Any | |
from data_structures import WEB_DATA_REPORTS | |
from gpt_analyzer import GPTAnalyzer | |
class DataAssessmentService: | |
def __init__(self, api_key: str): | |
self.analyzer = GPTAnalyzer(api_key) | |
self.web_data = WEB_DATA_REPORTS | |
def check_data_availability(self, analysis_result: dict) -> dict: | |
availability = { | |
"available_in_webdata": [], | |
"needs_datalake": [], | |
"not_available": [], | |
"access_instructions": [] | |
} | |
for required in analysis_result.get("required_reports", []): | |
category = required["category"] | |
report_type = required["report_type"] | |
if category in self.web_data: | |
matching_reports = [] | |
for report_name, report_details in self.web_data[category].items(): | |
required_fields = set(required.get("fields_needed", [])) | |
available_fields = set(report_details["fields"].keys()) | |
if required_fields.issubset(available_fields): | |
matching_reports.append({ | |
"report_name": report_name, | |
"description": report_details["description"], | |
"access_path": f"/web-data/{category.lower()}/{report_name}", | |
"filters": report_details["filters"] | |
}) | |
if matching_reports: | |
availability["available_in_webdata"].extend(matching_reports) | |
availability["access_instructions"].append( | |
f"Access {category} data through Web Data portal using filters: " | |
f"{', '.join(matching_reports[0]['filters'])}" | |
) | |
else: | |
availability["needs_datalake"].append({ | |
"category": category, | |
"report_type": report_type, | |
"reason": "Required fields not available in Web Data" | |
}) | |
else: | |
availability["not_available"].append({ | |
"category": category, | |
"report_type": report_type, | |
"reason": "Category not found in Web Data" | |
}) | |
return availability | |
def assess_request(self, request_text: str) -> Dict[str, Any]: | |
# 1. Analyze the request | |
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys())) | |
if "error" in analysis: | |
return {"status": "error", "message": analysis["error"]} | |
# 2. Check data availability | |
availability = self.check_data_availability(analysis) | |
# 3. Generate response | |
response = { | |
"status": "success", | |
"request_analysis": { | |
"interpretation": analysis["interpretation"], | |
"confidence": analysis["confidence_score"] | |
}, | |
"data_availability": { | |
"available_reports": availability["available_in_webdata"], | |
"access_instructions": availability["access_instructions"] | |
} | |
} | |
if availability["needs_datalake"]: | |
response["data_lake_requirements"] = { | |
"reports_needed": availability["needs_datalake"], | |
"estimated_processing_time": "2-3 business days", | |
"requires_it_support": True | |
} | |
if availability["not_available"]: | |
response["unavailable_data"] = availability["not_available"] | |
return response |