# data_service.py import traceback from typing import Dict, List, Any from data_structures import WEB_DATA_REPORTS from gpt_analyzer import GPTAnalyzer class DataAssessmentService: def __init__(self, api_key: str): print("Initializing DataAssessmentService") print(f"API Key available: {bool(api_key)}") self.analyzer = GPTAnalyzer(api_key) self.web_data = WEB_DATA_REPORTS print(f"Available categories: {list(self.web_data.keys())}") print(f"Web data structure loaded: {bool(self.web_data)}") def check_data_availability(self, analysis_result: dict) -> dict: print(f"Checking data availability for analysis result") print(f"Analysis result: {analysis_result}") availability = { "available_in_webdata": [], "needs_datalake": [], "not_available": [], "access_instructions": [] } try: for required in analysis_result.get("required_reports", []): print(f"Processing required report: {required}") category = required.get("category") report_type = required.get("report_type") if not category or not report_type: print(f"Missing category or report_type in required report") continue print(f"Checking category: {category} for report type: {report_type}") if category in self.web_data: matching_reports = [] for report_name, report_details in self.web_data[category].items(): required_fields = set(required.get("fields_needed", [])) available_fields = set(report_details["fields"].keys()) print(f"Checking report {report_name}") print(f"Required fields: {required_fields}") print(f"Available fields: {available_fields}") if required_fields.issubset(available_fields): matching_reports.append({ "name": report_details["name"], "description": report_details["description"], "access_path": f"/web-data/{category.lower()}/{report_name}", "filters": report_details["filters"] }) print(f"Found matching report: {report_name}") if matching_reports: availability["available_in_webdata"].extend(matching_reports) filters_list = ', '.join(matching_reports[0]['filters']) availability["access_instructions"].append( f"Access {category} data through Web Data portal using filters: {filters_list}" ) else: availability["needs_datalake"].append({ "category": category, "report_type": report_type, "reason": "Required fields not available in Web Data" }) else: availability["not_available"].append({ "category": category, "report_type": report_type, "reason": "Category not found in Web Data" }) except Exception as e: print(f"Error in check_data_availability: {str(e)}") print(traceback.format_exc()) availability["error"] = str(e) print(f"Final availability result: {availability}") return availability def assess_request(self, request_text: str): print("=== Debug: Calling GPTAnalyzer ===") print(f"Request text: {request_text}") try: # 1. Analyze the request analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys())) print("GPT Response:", analysis) return analysis except Exception as e: print(f"Error during GPT analysis: {str(e)}") raise # 2. Check data availability availability = self.check_data_availability(analysis) print(f"Availability check completed: {availability}") # 3. Generate response response = { "status": "success", "request_analysis": { "interpretation": analysis.get("interpretation", ""), "confidence": analysis.get("confidence_score", "LOW") }, "data_availability": { "available_reports": availability["available_in_webdata"], "access_instructions": availability["access_instructions"] } } if availability["needs_datalake"]: response["data_lake_requirements"] = { "reports_needed": availability["needs_datalake"], "estimated_processing_time": "2-3 business days", "requires_it_support": True } if availability["not_available"]: response["unavailable_data"] = availability["not_available"] print(f"Final response prepared: {response}") return response except Exception as e: print(f"Error in assess_request: {str(e)}") print(traceback.format_exc()) return { "status": "error", "message": f"Error processing request: {str(e)}" }