Spaces:
Runtime error
Runtime error
import pandas as pd | |
from sklearn.preprocessing import MultiLabelBinarizer | |
def trainingDataFromUTagsJSON(data: dict) -> pd.DataFrame: | |
""" | |
Get the training data from the UTags JSON file | |
============================================== | |
Parameters: | |
----------- | |
data: | |
description: UTags JSON file | |
type: dict | |
----------- | |
Returns: | |
-------- | |
data: | |
description: Training data | |
type: pd.DataFrame | |
""" | |
df = pd.DataFrame() | |
df['disease'] = [disease.disease_persian[0] for disease in data['diseases']] # disease[UTag] | |
df['symptoms'] = [disease.symptom_eng for disease in data['diseases']] | |
df['causes'] = [disease.cause_eng for disease in data['diseases']] | |
# df['cause_persian'] = [disease.cause_persian for disease in data['diseases']] | |
mlb = MultiLabelBinarizer(sparse_output=True) | |
for col in df.columns: | |
if col == 'disease': | |
continue | |
try: | |
df = df.join( | |
pd.DataFrame.sparse.from_spmatrix( | |
mlb.fit_transform(df.pop(col)), # type: ignore | |
index=df.index, | |
columns=[f'{col}_'] + mlb.classes_ | |
), | |
) | |
except Exception as error: | |
print(f'Error: {error} at column: {col}, skipping...') | |
return df | |
def trainingDataFromPromptsForBERT(data: dict) -> pd.DataFrame: | |
""" | |
Get the training data from the prompts JSON file | |
================================================ | |
Parameters: | |
----------- | |
data: | |
description: Prompts JSON file | |
type: dict | |
----------- | |
Returns: | |
-------- | |
data: | |
description: Training data | |
type: pd.DataFrame | |
""" | |
sentences = [] | |
for prompt in data['diseasesPrompts']: | |
for sentence in prompt['sentences']: | |
sentences.append((sentence, prompt['disease'])) | |
df = pd.DataFrame(sentences, columns=['sentence', 'disease']) | |
return df |