Add1E commited on
Commit
36ab544
·
1 Parent(s): 7375b63

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +18 -6
utils.py CHANGED
@@ -17,22 +17,34 @@ localdir = "SpotifyHitPrediction"
17
  repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
18
 
19
 
 
 
 
 
 
 
 
20
  def predict_popularity(features,trainset):
21
  predictions = [None] * 2
22
  predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
23
- addToCsvAndTrain(trainset)
 
 
 
 
 
24
  return predictions
25
 
26
 
27
- def addToCsvAndTrain(trainset):
28
  trainset = [
29
- [trainset[0], trainset[1], trainset[2], trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
30
  trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
31
  ]
32
  ]
33
  neues_df = pd.DataFrame(trainset, columns= data.columns)
34
- df = pd.concat([data, neues_df], ignore_index=True)
35
- df.to_csv(f'{localdir}/top50.csv', index=False)
36
  repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
37
  repo.git_commit("Add top50.csv")
38
  repo.git_push()
@@ -40,7 +52,7 @@ def addToCsvAndTrain(trainset):
40
 
41
 
42
 
43
- data = pd.read_csv('top50.csv', encoding='ISO-8859-1')
44
  print(data.head())
45
 
46
  # Let's also describe the data to get a sense of the distributions
 
17
  repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
18
 
19
 
20
+ def remove_non_utf8_characters(text):
21
+ # Encode using UTF-8, ignore errors
22
+ encoded_string = text.encode('utf-8', 'remove')
23
+ # Decode back to string
24
+ return encoded_string.decode('utf-8')
25
+
26
+
27
  def predict_popularity(features,trainset):
28
  predictions = [None] * 2
29
  predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
30
+ old_df = pd.read_csv('top50.csv')
31
+ addToCsvAndTrain(trainset, old_df)
32
+ st.write("Regression :")
33
+ st.code(f"MeanSquaredError: {mse}, rSqared: {r2}")
34
+ st.write("Random Forest :")
35
+ st.code(f"MeanSquaredError: {rf_mse}, rSqared: {rf_r2}")
36
  return predictions
37
 
38
 
39
+ def addToCsvAndTrain(trainset, old_df):
40
  trainset = [
41
+ [trainset[0], remove_non_utf8_characters(trainset[1]), remove_non_utf8_characters(trainset[2]), trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
42
  trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
43
  ]
44
  ]
45
  neues_df = pd.DataFrame(trainset, columns= data.columns)
46
+ df = pd.concat([old_df, neues_df], ignore_index=True)
47
+ df.to_csv(f'{localdir}/top50.csv', index=False, encoding='utf-8')
48
  repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
49
  repo.git_commit("Add top50.csv")
50
  repo.git_push()
 
52
 
53
 
54
 
55
+ data = pd.read_csv('top50.csv', encoding='utf-8')
56
  print(data.head())
57
 
58
  # Let's also describe the data to get a sense of the distributions