Spaces:
Runtime error
Runtime error
import gzip | |
import hickle | |
import _pickle as cPickle | |
import itertools | |
import time | |
def get_num_neighbor(G,etype): | |
print(G.edges(etype=etype)) | |
for i in G.edges(etype=etype): | |
print(i) | |
# exit() | |
def neighbormap(df,dic,user_dic,new_item_dic,col_user='user_id',col_item='item_id'): | |
t=time.time() | |
print('Start time') | |
for i in range(len(df)): | |
user=df.at[i,col_user] | |
item=df.at[i,col_item] | |
if item in new_item_dic: | |
dic[user_dic[user]].append(new_item_dic[item]) | |
print('End time',time.time()-t) | |
return dic | |
def split_char(str): | |
english = 'abcdefghijklmnopqrstuvwxyz0123456789' | |
output = [] | |
buffer = '' | |
try: | |
for s in str: | |
if s in english or s in english.upper(): # English or numeric | |
buffer += s | |
elif s in ' ()*()【】/-.': # If it is a special symbol such as a space, skip it | |
continue | |
else: # Chinese | |
if buffer: | |
output.append(buffer) | |
buffer = '' | |
output.append(s) | |
if buffer: | |
output.append(buffer) | |
except: | |
print(str) | |
return output | |
def filter_sample(threshold,dic): | |
del_index = [] | |
out = [] | |
for key,value in dic.items(): | |
if len(set(value)) < threshold: | |
del_index.append(key) | |
else: | |
neirghbor = value | |
out.append(neirghbor[:threshold]) | |
return out,del_index | |
def combination(df,users,col_user='user_id',col_item='item_id'): | |
df = df[df[col_user].isin(users)] # Filtering, the user must be a user who meets the conditions | |
df.reset_index(drop=True, inplace=True) | |
df_item=df[col_item].value_counts() | |
items = df_item[df_item >= 10].to_dict().keys() # Filtered, the number of users clicked on the item should be greater than a certain value | |
df = df[df[col_item].isin(items)] | |
df.reset_index(drop=True, inplace=True) | |
print(df.shape,len(list(df.groupby([col_item])))) | |
out = [] | |
for iter in df.groupby([col_item]): | |
l = iter[1][col_user].tolist() | |
l = [x for x in l if x in set(users)] | |
pairs = list(itertools.combinations(l, 2))[:10 if 10>len(l) else len(l)] | |
out.extend(pairs) | |
out = list(zip(*set(out))) | |
print('Number of sides after de-duplication:', len(out[0])) | |
return out | |