import json import os import pandas as pd from fastapi import UploadFile import tasks.data.dataAugmentation as da import tasks.data.dataEngineering as de import tasks.data.utility as util import tasks.training.handle_train as trainingPipeline def augmentDataUsingVectorSpaceAlgorithm(file: UploadFile, savePath: str): try: os.makedirs(os.path.dirname(savePath), exist_ok=True) data = pd.read_csv(file.file) df = da.augmentDataWithVectorSpaceAlgorithm(data) df.to_csv(savePath, index=False, encoding='utf-8') return { "success": True, "message": "Training data augmented successfully", "data": df.head(5).to_dict() } except Exception as error: return { "success": False, "message": f"Training data augmentation failed. {error}", "data": None } def getSymptomsCausesAndDiseaseNameFromJSON(file: UploadFile, savePath: str): try: os.makedirs(os.path.dirname(savePath), exist_ok=True) data = pd.read_csv(file.file) diseaseDict = util.getSymptomsCausesAndDiseaseNameFromJSON(data) json.dump(diseaseDict, open(savePath, 'w', encoding='utf-8'), ensure_ascii=False) return { "success": True, "message": "Symptoms, causes and disease name extracted successfully", "data": None } except Exception as error: return { "success": False, "message": f"Symptoms, causes and disease name extraction failed. {error}", "data": None } def trainingDataFromUTagsJSON(file: UploadFile, savePath: str): try: os.makedirs(os.path.dirname(savePath), exist_ok=True) data = json.loads(file.file.read()) df = de.trainingDataFromUTagsJSON(data) df.to_csv(savePath, index=False, encoding='utf-8') return { "success": True, "message": "Training data generated successfully", "data": df.head(5).to_dict() } except Exception as error: return { "success": False, "message": f"Training data generation failed. {error}", "data": None } def trainingDataFromPromptsForBERT(file: UploadFile, savePath: str): try: os.makedirs(os.path.dirname(savePath), exist_ok=True) data = json.loads(file.file.read()) df = de.trainingDataFromPromptsForBERT(data) df.to_csv(savePath, index=False, encoding='utf-8') return { "success": True, "message": "Training data generated successfully", "data": df.head(5).to_dict() } except Exception as error: return { "success": False, "message": f"Training data generation failed. {error}", "data": None } def trainModelOnSageMaker(trainDataPath: str, testDataPath: str, file: UploadFile | None = None): try: hyperparameters = None if file is not None: hyperparameters = json.loads(file.file.read()) trainingPipeline.train(trainDataPath, testDataPath, hyperparameters) return { "success": True, "message": "Model trained successfully", "data": None } except Exception as error: return { "success": False, "message": f"Model training failed. {error}", "data": None }