Spaces:
Sleeping
Sleeping
File size: 5,748 Bytes
df69a6f 996fcd5 e4184fb df69a6f 3297640 df69a6f 3297640 996fcd5 df69a6f 996fcd5 3297640 df69a6f 996fcd5 df69a6f 996fcd5 df69a6f 996fcd5 df69a6f 996fcd5 df69a6f 3297640 df69a6f 1892580 996fcd5 1892580 996fcd5 1892580 996fcd5 df69a6f 996fcd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# data_service.py
import traceback
from typing import Dict, List, Any
from data_structures import WEB_DATA_REPORTS
from gpt_analyzer import GPTAnalyzer
class DataAssessmentService:
def __init__(self, api_key: str):
print("Initializing DataAssessmentService")
print(f"API Key available: {bool(api_key)}")
self.analyzer = GPTAnalyzer(api_key)
self.web_data = WEB_DATA_REPORTS
print(f"Available categories: {list(self.web_data.keys())}")
print(f"Web data structure loaded: {bool(self.web_data)}")
def check_data_availability(self, analysis_result: dict) -> dict:
print(f"Checking data availability for analysis result")
print(f"Analysis result: {analysis_result}")
availability = {
"available_in_webdata": [],
"needs_datalake": [],
"not_available": [],
"access_instructions": []
}
try:
for required in analysis_result.get("required_reports", []):
print(f"Processing required report: {required}")
category = required.get("category")
report_type = required.get("report_type")
if not category or not report_type:
print(f"Missing category or report_type in required report")
continue
print(f"Checking category: {category} for report type: {report_type}")
if category in self.web_data:
matching_reports = []
for report_name, report_details in self.web_data[category].items():
required_fields = set(required.get("fields_needed", []))
available_fields = set(report_details["fields"].keys())
print(f"Checking report {report_name}")
print(f"Required fields: {required_fields}")
print(f"Available fields: {available_fields}")
if required_fields.issubset(available_fields):
matching_reports.append({
"name": report_details["name"],
"description": report_details["description"],
"access_path": f"/web-data/{category.lower()}/{report_name}",
"filters": report_details["filters"]
})
print(f"Found matching report: {report_name}")
if matching_reports:
availability["available_in_webdata"].extend(matching_reports)
filters_list = ', '.join(matching_reports[0]['filters'])
availability["access_instructions"].append(
f"Access {category} data through Web Data portal using filters: {filters_list}"
)
else:
availability["needs_datalake"].append({
"category": category,
"report_type": report_type,
"reason": "Required fields not available in Web Data"
})
else:
availability["not_available"].append({
"category": category,
"report_type": report_type,
"reason": "Category not found in Web Data"
})
except Exception as e:
print(f"Error in check_data_availability: {str(e)}")
print(traceback.format_exc())
availability["error"] = str(e)
print(f"Final availability result: {availability}")
return availability
def assess_request(self, request_text: str):
print("=== Debug: Calling GPTAnalyzer ===")
print(f"Request text: {request_text}")
try:
# 1. Analyze the request
analysis = self.analyzer.analyze_request(request_text, list(self.web_data.keys()))
print("GPT Response:", analysis)
return analysis
except Exception as e:
print(f"Error during GPT analysis: {str(e)}")
raise
# 2. Check data availability
availability = self.check_data_availability(analysis)
print(f"Availability check completed: {availability}")
# 3. Generate response
response = {
"status": "success",
"request_analysis": {
"interpretation": analysis.get("interpretation", ""),
"confidence": analysis.get("confidence_score", "LOW")
},
"data_availability": {
"available_reports": availability["available_in_webdata"],
"access_instructions": availability["access_instructions"]
}
}
if availability["needs_datalake"]:
response["data_lake_requirements"] = {
"reports_needed": availability["needs_datalake"],
"estimated_processing_time": "2-3 business days",
"requires_it_support": True
}
if availability["not_available"]:
response["unavailable_data"] = availability["not_available"]
print(f"Final response prepared: {response}")
return response
except Exception as e:
print(f"Error in assess_request: {str(e)}")
print(traceback.format_exc())
return {
"status": "error",
"message": f"Error processing request: {str(e)}"
} |