Spaces:

Add1E
/

SpotifyHitPrediction

Sleeping

App Files Files Community

Add1E commited on Nov 16, 2023

Commit

36ab544

1 Parent(s): 7375b63

Update utils.py

Browse files

Files changed (1) hide show

utils.py +18 -6

utils.py CHANGED Viewed

@@ -17,22 +17,34 @@ localdir = "SpotifyHitPrediction"
 repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
 def predict_popularity(features,trainset):
     predictions = [None] * 2
     predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
-    addToCsvAndTrain(trainset)
     return predictions
-def addToCsvAndTrain(trainset):
     trainset = [
-        [trainset[0], trainset[1], trainset[2], trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
          trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
          ]
     ]
     neues_df = pd.DataFrame(trainset, columns= data.columns)
-    df = pd.concat([data, neues_df], ignore_index=True)
-    df.to_csv(f'{localdir}/top50.csv', index=False)
     repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
     repo.git_commit("Add top50.csv")
     repo.git_push()
@@ -40,7 +52,7 @@ def addToCsvAndTrain(trainset):
-data = pd.read_csv('top50.csv', encoding='ISO-8859-1')
 print(data.head())
 # Let's also describe the data to get a sense of the distributions

 repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
+def remove_non_utf8_characters(text):
+    # Encode using UTF-8, ignore errors
+    encoded_string = text.encode('utf-8', 'remove')
+    # Decode back to string
+    return encoded_string.decode('utf-8')
 def predict_popularity(features,trainset):
     predictions = [None] * 2
     predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
+    old_df = pd.read_csv('top50.csv')
+    addToCsvAndTrain(trainset, old_df)
+    st.write("Regression :")
+    st.code(f"MeanSquaredError: {mse}, rSqared: {r2}")
+    st.write("Random Forest :")
+    st.code(f"MeanSquaredError: {rf_mse}, rSqared: {rf_r2}")
     return predictions
+def addToCsvAndTrain(trainset, old_df):
     trainset = [
+        [trainset[0], remove_non_utf8_characters(trainset[1]), remove_non_utf8_characters(trainset[2]), trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
          trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
          ]
     ]
     neues_df = pd.DataFrame(trainset, columns= data.columns)
+    df = pd.concat([old_df, neues_df], ignore_index=True)
+    df.to_csv(f'{localdir}/top50.csv', index=False, encoding='utf-8')
     repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
     repo.git_commit("Add top50.csv")
     repo.git_push()
+data = pd.read_csv('top50.csv', encoding='utf-8')
 print(data.head())
 # Let's also describe the data to get a sense of the distributions