Spaces:
Runtime error
Runtime error
nandovallec
commited on
Commit
•
1ab13ba
1
Parent(s):
4da8e4d
Add dataset
Browse files- app.py +18 -4
- recommender.py +19 -4
- requirements.txt +1 -0
app.py
CHANGED
@@ -21,14 +21,25 @@ import pandas as pd
|
|
21 |
import os
|
22 |
from scipy.sparse import vstack
|
23 |
from recommender import *
|
|
|
|
|
24 |
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
# mode = "walking"
|
30 |
|
|
|
|
|
|
|
31 |
|
|
|
|
|
|
|
32 |
|
33 |
def test(playlist_url, n_rec):
|
34 |
n_rec = int(n_rec)
|
@@ -46,6 +57,9 @@ def test(playlist_url, n_rec):
|
|
46 |
# if i % 5 == 0:
|
47 |
# time.sleep(1)
|
48 |
uri_links = inference_from_uri(list_uri, MAX_tid=n_rec)
|
|
|
|
|
|
|
49 |
# uri_links = []
|
50 |
frames = ""
|
51 |
for uri_link in uri_links:
|
|
|
21 |
import os
|
22 |
from scipy.sparse import vstack
|
23 |
from recommender import *
|
24 |
+
import huggingface_hub
|
25 |
+
from huggingface_hub import Repository
|
26 |
|
27 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
28 |
+
DATASET_REPO_URL_TRAIN = "https://huggingface.co/datasets/nandovallec/df_ps_train_extra"
|
29 |
+
DATA_FILENAME_TRAIN = "df_ps_train_extra.hdf"
|
30 |
+
DATA_FILE_TRAIN = os.path.join("data_train", DATA_FILENAME_TRAIN)
|
31 |
|
32 |
+
DATASET_REPO_URL_MAT = "https://huggingface.co/datasets/nandovallec/giantMatrix_extra"
|
33 |
+
DATA_FILENAME_MAT = "giantMatrix_extra.pickle"
|
34 |
+
DATA_FILE_MAT = os.path.join("data_mat", DATA_FILENAME_MAT)
|
|
|
35 |
|
36 |
+
repo_train = Repository(
|
37 |
+
local_dir="data_train", clone_from=DATASET_REPO_URL_TRAIN, use_auth_token=HF_TOKEN
|
38 |
+
)
|
39 |
|
40 |
+
repo_mat = Repository(
|
41 |
+
local_dir="data_mat", clone_from=DATASET_REPO_URL_MAT, use_auth_token=HF_TOKEN
|
42 |
+
)
|
43 |
|
44 |
def test(playlist_url, n_rec):
|
45 |
n_rec = int(n_rec)
|
|
|
57 |
# if i % 5 == 0:
|
58 |
# time.sleep(1)
|
59 |
uri_links = inference_from_uri(list_uri, MAX_tid=n_rec)
|
60 |
+
commit_url = repo_train.push_to_hub()
|
61 |
+
commit_url = repo_mat.push_to_hub()
|
62 |
+
|
63 |
# uri_links = []
|
64 |
frames = ""
|
65 |
for uri_link in uri_links:
|
recommender.py
CHANGED
@@ -4,9 +4,21 @@ from scipy.sparse import csr_matrix
|
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
from scipy.sparse import vstack
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def add_row_train(df, list_tid):
|
11 |
new_pid_add = df.iloc[-1].name +1
|
12 |
list_tid_add = list_tid
|
@@ -28,7 +40,7 @@ def inference_row(list_tid, ps_matrix):
|
|
28 |
|
29 |
def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
|
30 |
df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
|
31 |
-
df_ps_train_extra = pd.read_hdf('
|
32 |
df_ps_train = pd.concat([df_ps_train,df_ps_train_extra])
|
33 |
|
34 |
sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
|
@@ -70,7 +82,10 @@ def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
|
|
70 |
break
|
71 |
|
72 |
df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
|
73 |
-
|
|
|
|
|
|
|
74 |
return new_list, sparse_row
|
75 |
|
76 |
|
@@ -81,7 +96,7 @@ def inference_from_tid(list_tid, K=50, MAX_tid=10):
|
|
81 |
with open(pickle_path, 'rb') as f:
|
82 |
ps_matrix = pickle.load(f)
|
83 |
|
84 |
-
with open("
|
85 |
ps_matrix_extra = pickle.load(f)
|
86 |
|
87 |
ps_matrix = vstack((ps_matrix,ps_matrix_extra))
|
@@ -89,7 +104,7 @@ def inference_from_tid(list_tid, K=50, MAX_tid=10):
|
|
89 |
result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
|
90 |
ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
|
91 |
|
92 |
-
with open("
|
93 |
pickle.dump(ps_matrix_extra, f)
|
94 |
|
95 |
return result
|
|
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
from scipy.sparse import vstack
|
7 |
+
import dataset_url
|
8 |
+
import huggingface_hub
|
9 |
+
from huggingface_hub import Repository
|
10 |
|
11 |
|
12 |
|
13 |
+
def save_train_repo(df):
|
14 |
+
repo = Repository(
|
15 |
+
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
16 |
+
)
|
17 |
+
df.to_hdf(DATA_FILE, key='abc')
|
18 |
+
commit_url = repo.push_to_hub()
|
19 |
+
return
|
20 |
+
|
21 |
+
|
22 |
def add_row_train(df, list_tid):
|
23 |
new_pid_add = df.iloc[-1].name +1
|
24 |
list_tid_add = list_tid
|
|
|
40 |
|
41 |
def get_best_tid(current_list, ps_matrix_row, K=50, MAX_tid=10):
|
42 |
df_ps_train = pd.read_hdf('model/df_ps_train_new.hdf')
|
43 |
+
df_ps_train_extra = pd.read_hdf('data_train/df_ps_train_extra.hdf')
|
44 |
df_ps_train = pd.concat([df_ps_train,df_ps_train_extra])
|
45 |
|
46 |
sim_vector, sparse_row = inference_row(current_list, ps_matrix_row)
|
|
|
82 |
break
|
83 |
|
84 |
df_ps_train_extra = add_row_train(df_ps_train_extra, current_list)
|
85 |
+
|
86 |
+
|
87 |
+
df_ps_train_extra.to_hdf('data_train/df_ps_train_extra.hdf', key='abc')
|
88 |
+
|
89 |
return new_list, sparse_row
|
90 |
|
91 |
|
|
|
96 |
with open(pickle_path, 'rb') as f:
|
97 |
ps_matrix = pickle.load(f)
|
98 |
|
99 |
+
with open("data_mat/giantMatrix_extra.pickle",'rb') as f:
|
100 |
ps_matrix_extra = pickle.load(f)
|
101 |
|
102 |
ps_matrix = vstack((ps_matrix,ps_matrix_extra))
|
|
|
104 |
result, sparse_row = get_best_tid(list_tid, ps_matrix.tocsr(), K, MAX_tid)
|
105 |
ps_matrix_extra = vstack((ps_matrix_extra,sparse_row.todok()))
|
106 |
|
107 |
+
with open("data_mat/giantMatrix_extra.pickle", 'wb') as f:
|
108 |
pickle.dump(ps_matrix_extra, f)
|
109 |
|
110 |
return result
|
requirements.txt
CHANGED
@@ -3,3 +3,4 @@ scikit-learn
|
|
3 |
tables
|
4 |
pandas
|
5 |
numpy
|
|
|
|
3 |
tables
|
4 |
pandas
|
5 |
numpy
|
6 |
+
huggingface_hub
|