Data_request / data_service.py
Rathapoom's picture
Update data_service.py
3297640 verified
raw
history blame
4.88 kB
# data_service.py
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer
class DataAssessmentService:
def __init__(self, api_key: str):
print("Initializing DataAssessmentService")
print(f"API Key available: {bool(api_key)}")
self.analyzer = GPTAnalyzer(api_key)
self.web_data = WEB_DATA_REPORTS
print(f"Available categories: {list(self.web_data.keys())}")
def check_data_availability(self, analysis_result: dict) -> dict:
print(f"Checking availability for analysis: {analysis_result}")
availability = {
"available_in_webdata": [],
"needs_datalake": [],
"not_available": [],
"access_instructions": []
}
if "error" in analysis_result:
print(f"Error in analysis result: {analysis_result['error']}")
return availability
if not analysis_result.get("required_reports"):
print("No required_reports in analysis result")
return availability
for required in analysis_result.get("required_reports", []):
print(f"Processing required report: {required}")
category = required.get("category")
report_type = required.get("report_type")
print(f"Checking category: {category}")
if category in self.web_data:
matching_reports = []
for report_name, report_details in self.web_data[category].items():
print(f"Checking report: {report_name}")
required_fields = set(required.get("fields_needed", []))
available_fields = set(report_details["fields"].keys())
print(f"Required fields: {required_fields}")
print(f"Available fields: {available_fields}")
if required_fields.issubset(available_fields):
matching_reports.append({
"report_name": report_name,
"description": report_details["description"],
"access_path": f"/web-data/{category.lower()}/{report_name}",
"filters": report_details["filters"]
})
if matching_reports:
availability["available_in_webdata"].extend(matching_reports)
availability["access_instructions"].append(
f"Access {category} data through Web Data portal using filters: "
f"{', '.join(matching_reports[0]['filters'])}"
)
else:
availability["needs_datalake"].append({
"category": category,
"report_type": report_type,
"reason": "Required fields not available in Web Data"
})
else:
availability["not_available"].append({
"category": category,
"report_type": report_type,
"reason": "Category not found in Web Data"
})
print(f"Final availability result: {availability}")
return availability
def assess_request(self, request_text: str) -> Dict[str, Any]:
print(f"Assessing request: {request_text}")
# 1. Analyze the request
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
print(f"Analysis result: {analysis}")
if "error" in analysis:
print(f"Error in analysis: {analysis['error']}")
return {"status": "error", "message": analysis["error"]}
# 2. Check data availability
availability = self.check_data_availability(analysis)
print(f"Availability check result: {availability}")
# 3. Generate response
response = {
"status": "success",
"request_analysis": {
"interpretation": analysis.get("interpretation", ""),
"confidence": analysis.get("confidence_score", "LOW")
},
"data_availability": {
"available_reports": availability["available_in_webdata"],
"access_instructions": availability["access_instructions"]
}
}
if availability["needs_datalake"]:
response["data_lake_requirements"] = {
"reports_needed": availability["needs_datalake"],
"estimated_processing_time": "2-3 business days",
"requires_it_support": True
}
if availability["not_available"]:
response["unavailable_data"] = availability["not_available"]
print(f"Final response: {response}")
return response