Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -17,22 +17,34 @@ localdir = "SpotifyHitPrediction"
|
|
17 |
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def predict_popularity(features,trainset):
|
21 |
predictions = [None] * 2
|
22 |
predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
24 |
return predictions
|
25 |
|
26 |
|
27 |
-
def addToCsvAndTrain(trainset):
|
28 |
trainset = [
|
29 |
-
[trainset[0], trainset[1], trainset[2], trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
|
30 |
trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
|
31 |
]
|
32 |
]
|
33 |
neues_df = pd.DataFrame(trainset, columns= data.columns)
|
34 |
-
df = pd.concat([
|
35 |
-
df.to_csv(f'{localdir}/top50.csv', index=False)
|
36 |
repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
|
37 |
repo.git_commit("Add top50.csv")
|
38 |
repo.git_push()
|
@@ -40,7 +52,7 @@ def addToCsvAndTrain(trainset):
|
|
40 |
|
41 |
|
42 |
|
43 |
-
data = pd.read_csv('top50.csv', encoding='
|
44 |
print(data.head())
|
45 |
|
46 |
# Let's also describe the data to get a sense of the distributions
|
|
|
17 |
repo = Repository(local_dir=localdir, clone_from="https://huggingface.co/spaces/Add1E/SpotifyHitPrediction", token=token)
|
18 |
|
19 |
|
20 |
+
def remove_non_utf8_characters(text):
|
21 |
+
# Encode using UTF-8, ignore errors
|
22 |
+
encoded_string = text.encode('utf-8', 'remove')
|
23 |
+
# Decode back to string
|
24 |
+
return encoded_string.decode('utf-8')
|
25 |
+
|
26 |
+
|
27 |
def predict_popularity(features,trainset):
|
28 |
predictions = [None] * 2
|
29 |
predictions[0], predictions[1] = rf_model.predict([features]), model.predict([features])
|
30 |
+
old_df = pd.read_csv('top50.csv')
|
31 |
+
addToCsvAndTrain(trainset, old_df)
|
32 |
+
st.write("Regression :")
|
33 |
+
st.code(f"MeanSquaredError: {mse}, rSqared: {r2}")
|
34 |
+
st.write("Random Forest :")
|
35 |
+
st.code(f"MeanSquaredError: {rf_mse}, rSqared: {rf_r2}")
|
36 |
return predictions
|
37 |
|
38 |
|
39 |
+
def addToCsvAndTrain(trainset, old_df):
|
40 |
trainset = [
|
41 |
+
[trainset[0], remove_non_utf8_characters(trainset[1]), remove_non_utf8_characters(trainset[2]), trainset[3], trainset[4], trainset[5], trainset[6], trainset[7],
|
42 |
trainset[8], trainset[9], trainset[10], trainset[11], trainset[12], trainset[13]
|
43 |
]
|
44 |
]
|
45 |
neues_df = pd.DataFrame(trainset, columns= data.columns)
|
46 |
+
df = pd.concat([old_df, neues_df], ignore_index=True)
|
47 |
+
df.to_csv(f'{localdir}/top50.csv', index=False, encoding='utf-8')
|
48 |
repo.git_add(os.path.abspath(f'{localdir}/top50.csv'))
|
49 |
repo.git_commit("Add top50.csv")
|
50 |
repo.git_push()
|
|
|
52 |
|
53 |
|
54 |
|
55 |
+
data = pd.read_csv('top50.csv', encoding='utf-8')
|
56 |
print(data.head())
|
57 |
|
58 |
# Let's also describe the data to get a sense of the distributions
|