Spaces:
Sleeping
Sleeping
File size: 4,884 Bytes
df69a6f e4184fb df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f 3297640 df69a6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# data_service.py
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer
class DataAssessmentService:
def __init__(self, api_key: str):
print("Initializing DataAssessmentService")
print(f"API Key available: {bool(api_key)}")
self.analyzer = GPTAnalyzer(api_key)
self.web_data = WEB_DATA_REPORTS
print(f"Available categories: {list(self.web_data.keys())}")
def check_data_availability(self, analysis_result: dict) -> dict:
print(f"Checking availability for analysis: {analysis_result}")
availability = {
"available_in_webdata": [],
"needs_datalake": [],
"not_available": [],
"access_instructions": []
}
if "error" in analysis_result:
print(f"Error in analysis result: {analysis_result['error']}")
return availability
if not analysis_result.get("required_reports"):
print("No required_reports in analysis result")
return availability
for required in analysis_result.get("required_reports", []):
print(f"Processing required report: {required}")
category = required.get("category")
report_type = required.get("report_type")
print(f"Checking category: {category}")
if category in self.web_data:
matching_reports = []
for report_name, report_details in self.web_data[category].items():
print(f"Checking report: {report_name}")
required_fields = set(required.get("fields_needed", []))
available_fields = set(report_details["fields"].keys())
print(f"Required fields: {required_fields}")
print(f"Available fields: {available_fields}")
if required_fields.issubset(available_fields):
matching_reports.append({
"report_name": report_name,
"description": report_details["description"],
"access_path": f"/web-data/{category.lower()}/{report_name}",
"filters": report_details["filters"]
})
if matching_reports:
availability["available_in_webdata"].extend(matching_reports)
availability["access_instructions"].append(
f"Access {category} data through Web Data portal using filters: "
f"{', '.join(matching_reports[0]['filters'])}"
)
else:
availability["needs_datalake"].append({
"category": category,
"report_type": report_type,
"reason": "Required fields not available in Web Data"
})
else:
availability["not_available"].append({
"category": category,
"report_type": report_type,
"reason": "Category not found in Web Data"
})
print(f"Final availability result: {availability}")
return availability
def assess_request(self, request_text: str) -> Dict[str, Any]:
print(f"Assessing request: {request_text}")
# 1. Analyze the request
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
print(f"Analysis result: {analysis}")
if "error" in analysis:
print(f"Error in analysis: {analysis['error']}")
return {"status": "error", "message": analysis["error"]}
# 2. Check data availability
availability = self.check_data_availability(analysis)
print(f"Availability check result: {availability}")
# 3. Generate response
response = {
"status": "success",
"request_analysis": {
"interpretation": analysis.get("interpretation", ""),
"confidence": analysis.get("confidence_score", "LOW")
},
"data_availability": {
"available_reports": availability["available_in_webdata"],
"access_instructions": availability["access_instructions"]
}
}
if availability["needs_datalake"]:
response["data_lake_requirements"] = {
"reports_needed": availability["needs_datalake"],
"estimated_processing_time": "2-3 business days",
"requires_it_support": True
}
if availability["not_available"]:
response["unavailable_data"] = availability["not_available"]
print(f"Final response: {response}")
return response |