Spaces:
Runtime error
Runtime error
import pandas as pd | |
def augmentDataWithVectorSpaceAlgorithm(data: pd.DataFrame) -> pd.DataFrame: | |
""" | |
Augment the Data | |
================= | |
Parameters: | |
----------- | |
data: | |
description: Data to augment | |
type: pd.DataFrame | |
----------- | |
Returns: | |
-------- | |
data: | |
description: Augmented data | |
type: pd.DataFrame | |
-------------------------------------------------------------------------------------------- | |
Working: | |
-------- | |
- Create a DataFrame from data | |
- Remove a symptom or cause from the new DataFrame | |
- Check if the resulting row is present in the original data | |
- If not present, add the resulting row to the new DataFrame | |
- Repeat steps 1-3 for all symptoms and causes | |
- Remove the rows with sum = 0 | |
- Remove the same rows from the new DataFrame | |
- Add the new DataFrame to the original data | |
- Return the Resulting DataFrame | |
-------------------------------------------------------------------------------------------- | |
""" | |
# Get the number of columns with symptoms_ prefix | |
numberOfSymptoms = len([col for col in data.columns if col.startswith('symptoms_')]) | |
symptoms = data.columns[1:numberOfSymptoms] | |
causes = data.columns[numberOfSymptoms:] | |
df = data | |
for index, row in data.iterrows(): | |
for symptom in symptoms: | |
if row[symptom] == 1: # type: ignore | |
row[symptom] = 0 | |
df = df.append(row, ignore_index=True) # type: ignore | |
row[symptom] = 1 | |
df.append(row, ignore_index=True) | |
for cause in causes: | |
if row[cause] == 1: # type: ignore | |
row[cause] = 0 | |
df = df.append(row, ignore_index=True) # type: ignore | |
row[cause] = 1 | |
df.append(row, ignore_index=True) | |
print(f"data before drop_duplicates: {df}") | |
df = df[(df.sum(axis=1, numeric_only=True) != 0)] | |
data = data.append(df, ignore_index=True) # type: ignore | |
data = data.drop_duplicates(subset=df.columns.difference(['disease']), keep=False) | |
data.reset_index(drop=True, inplace=True) | |
print(f"final data: {data}") | |
return data |