Spaces:
Sleeping
Sleeping
# data_service.py | |
import traceback | |
from typing import Dict, List, Any | |
from data_structures import WEB_DATA_REPORTS | |
from gpt_analyzer import GPTAnalyzer | |
class DataAssessmentService: | |
def __init__(self, api_key: str): | |
print("Initializing DataAssessmentService") | |
print(f"API Key available: {bool(api_key)}") | |
self.analyzer = GPTAnalyzer(api_key) | |
self.web_data = WEB_DATA_REPORTS | |
print(f"Available categories: {list(self.web_data.keys())}") | |
print(f"Web data structure loaded: {bool(self.web_data)}") | |
def check_data_availability(self, analysis_result: dict) -> dict: | |
print(f"Checking data availability for analysis result") | |
print(f"Analysis result: {analysis_result}") | |
availability = { | |
"available_in_webdata": [], | |
"needs_datalake": [], | |
"not_available": [], | |
"access_instructions": [] | |
} | |
try: | |
for required in analysis_result.get("required_reports", []): | |
print(f"Processing required report: {required}") | |
category = required.get("category") | |
report_type = required.get("report_type") | |
if not category or not report_type: | |
print(f"Missing category or report_type in required report") | |
continue | |
print(f"Checking category: {category} for report type: {report_type}") | |
if category in self.web_data: | |
matching_reports = [] | |
for report_name, report_details in self.web_data[category].items(): | |
required_fields = set(required.get("fields_needed", [])) | |
available_fields = set(report_details["fields"].keys()) | |
print(f"Checking report {report_name}") | |
print(f"Required fields: {required_fields}") | |
print(f"Available fields: {available_fields}") | |
if required_fields.issubset(available_fields): | |
matching_reports.append({ | |
"name": report_details["name"], | |
"description": report_details["description"], | |
"access_path": f"/web-data/{category.lower()}/{report_name}", | |
"filters": report_details["filters"] | |
}) | |
print(f"Found matching report: {report_name}") | |
if matching_reports: | |
availability["available_in_webdata"].extend(matching_reports) | |
filters_list = ', '.join(matching_reports[0]['filters']) | |
availability["access_instructions"].append( | |
f"Access {category} data through Web Data portal using filters: {filters_list}" | |
) | |
else: | |
availability["needs_datalake"].append({ | |
"category": category, | |
"report_type": report_type, | |
"reason": "Required fields not available in Web Data" | |
}) | |
else: | |
availability["not_available"].append({ | |
"category": category, | |
"report_type": report_type, | |
"reason": "Category not found in Web Data" | |
}) | |
except Exception as e: | |
print(f"Error in check_data_availability: {str(e)}") | |
print(traceback.format_exc()) | |
availability["error"] = str(e) | |
print(f"Final availability result: {availability}") | |
return availability | |
def assess_request(self, request_text: str): | |
print("=== Debug: Calling GPTAnalyzer ===") | |
print(f"Request text: {request_text}") | |
try: | |
# 1. Analyze the request | |
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys())) | |
print("GPT Response:", analysis) | |
return analysis | |
except Exception as e: | |
print(f"Error during GPT analysis: {str(e)}") | |
raise | |
# 2. Check data availability | |
availability = self.check_data_availability(analysis) | |
print(f"Availability check completed: {availability}") | |
# 3. Generate response | |
response = { | |
"status": "success", | |
"request_analysis": { | |
"interpretation": analysis.get("interpretation", ""), | |
"confidence": analysis.get("confidence_score", "LOW") | |
}, | |
"data_availability": { | |
"available_reports": availability["available_in_webdata"], | |
"access_instructions": availability["access_instructions"] | |
} | |
} | |
if availability["needs_datalake"]: | |
response["data_lake_requirements"] = { | |
"reports_needed": availability["needs_datalake"], | |
"estimated_processing_time": "2-3 business days", | |
"requires_it_support": True | |
} | |
if availability["not_available"]: | |
response["unavailable_data"] = availability["not_available"] | |
print(f"Final response prepared: {response}") | |
return response | |
except Exception as e: | |
print(f"Error in assess_request: {str(e)}") | |
print(traceback.format_exc()) | |
return { | |
"status": "error", | |
"message": f"Error processing request: {str(e)}" | |
} |