File size: 8,123 Bytes
9f13819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                      seq  next  len_seq  \\\n",
      "0       [3442, 3448, 3451, 97, 180, 78, 3300, 152, 340...  3462       10   \n",
      "1       [3448, 3451, 97, 180, 78, 3300, 152, 3401, 185...  3354       10   \n",
      "2       [3451, 97, 180, 78, 3300, 152, 3401, 185, 3462...   274       10   \n",
      "3       [97, 180, 78, 3300, 152, 3401, 185, 3462, 3354...  3276       10   \n",
      "4       [180, 78, 3300, 152, 3401, 185, 3462, 3354, 27...  3225       10   \n",
      "...                                                   ...   ...      ...   \n",
      "151051  [129, 2411, 1033, 204, 498, 3242, 3505, 3224, ...  1734       10   \n",
      "151052  [2411, 1033, 204, 498, 3242, 3505, 3224, 2524,...  1945       10   \n",
      "151053  [1033, 204, 498, 3242, 3505, 3224, 2524, 1099,...  1134       10   \n",
      "151054  [3431, 2808, 3214, 3284, 3001, 800, 2636, 3581...  2636        7   \n",
      "151055  [14, 58, 3510, 15, 68, 86, 3238, 2411, 1134, 2...  1073       10   \n",
      "\n",
      "                                                   review  \\\n",
      "0       [Maybe a niche thing, but it has solid gamepla...   \n",
      "1       [Borderlands at its core is spectacular: the g...   \n",
      "2       [You get to be the hero of the people WHILE bl...   \n",
      "3       [The game is far from perfect, I could point o...   \n",
      "4       [Very instructive', \"There's a ton of stuff wr...   \n",
      "...                                                   ...   \n",
      "151051  [This game is the best when I was bored playin...   \n",
      "151052  [I don't even know where to begin on how good ...   \n",
      "151053  [BrainBread 2 has really fun single player bec...   \n",
      "151054  [Starting the game, you will go into an area w...   \n",
      "151055  [Think the original Doom but with more awesome...   \n",
      "\n",
      "                                              next_review  \n",
      "0       I played to the end, I feel obligated to recom...  \n",
      "1       I was pretty shocked by how much I liked this ...  \n",
      "2       words cant explain how good this game is GET I...  \n",
      "3       best game i ever bought a rts with actual play...  \n",
      "4       Extremely punishing game, but every death is p...  \n",
      "...                                                   ...  \n",
      "151051            I had this game since it first came out  \n",
      "151052    give me a reason to actually hate white people.  \n",
      "151053  Heroes and Generals is a really fun game but r...  \n",
      "151054  The game is fun it's a cool concept, and I lik...  \n",
      "151055  It's Wasted Potential but still a pretty good ...  \n",
      "\n",
      "[151056 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# 读取pickle文件\n",
    "df = pd.read_pickle(\"./train_data.df\")\n",
    "\n",
    "# 打印数据框\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                    seq  next  len_seq  \\\n",
      "0     [1221, 899, 490, 1985, 255, 3058, 3119, 2930, ...  1417       10   \n",
      "1     [3448, 855, 2710, 6, 2694, 3581, 3581, 3581, 3...  2694        5   \n",
      "2     [3255, 3399, 3330, 3500, 2729, 1335, 68, 1866,...   642       10   \n",
      "3     [273, 3462, 2656, 2033, 3333, 3284, 2110, 470,...   470        8   \n",
      "4     [3284, 1134, 2992, 727, 3442, 831, 6, 76, 2899...  2899        9   \n",
      "...                                                 ...   ...      ...   \n",
      "1189  [6, 2986, 2683, 495, 3580, 3171, 234, 164, 2, ...  2853       10   \n",
      "1190  [2262, 2694, 1358, 883, 2174, 6, 2683, 1604, 4...  1478       10   \n",
      "1191  [150, 1099, 3488, 3580, 2725, 3171, 535, 1622,...   206       10   \n",
      "1192  [206, 6, 52, 3272, 204, 926, 498, 2712, 2496, ...  3202       10   \n",
      "1193  [3251, 3300, 1822, 150, 2160, 2046, 6, 1038, 2...  1602       10   \n",
      "\n",
      "                                                 review  \\\n",
      "0     [Really nice game, Would recommend!, meh, This...   \n",
      "1     [finished it, its good but after i finished it...   \n",
      "2     [Paradox Interactive\\tdouble the price for alm...   \n",
      "3     [Nice, Combat system can be better, All the fu...   \n",
      "4     [9/10, 10/10, 9/10, 10/10, -99999/10, 9/10, id...   \n",
      "...                                                 ...   \n",
      "1189  [RDM simulator, 192 cubic tonnes of mud simula...   \n",
      "1190  [do I have a brain?, 18+, relationship of Park...   \n",
      "1191  [For DC, Любителям Аниме., 123, Ну такое., Hor...   \n",
      "1192  [Peed all over my dog then I started the game....   \n",
      "1193  [Great for a few hours until you beat it., Fun...   \n",
      "\n",
      "                                            next_review  \n",
      "0     HUGE improvements on graphics and gameplay and...  \n",
      "1                                                   ok.  \n",
      "2                                              Unstable  \n",
      "3     You've probably already heard that this game i...  \n",
      "4                                                 10/10  \n",
      "...                                                 ...  \n",
      "1189                      best game ever made simulator  \n",
      "1190                                new vision, new way  \n",
      "1191    Кратко: мужик живёт с мамой в маленьком фургоне  \n",
      "1192  Story mode kinda sucks but the rest of the gam...  \n",
      "1193  While being free-to-play, Robocraft seems very...  \n",
      "\n",
      "[1194 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "# 读取pickle文件\n",
    "df = pd.read_pickle(\"./Test_data.df\")\n",
    "\n",
    "# 打印数据框\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "interaction_pairs = []\n",
    "\n",
    "for idx, row in df.iterrows():\n",
    "    user_id = idx  # 如果你有显式的用户ID,这里应该使用那个用户ID\n",
    "    items = row['seq'] + [row['next']]\n",
    "    for item in items:\n",
    "        interaction_pairs.append((user_id, item))\n",
    "\n",
    "# 转换成DataFrame\n",
    "interaction_df = pd.DataFrame(interaction_pairs, columns=['user_id', 'item_id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Interaction Density: 0.0031\n"
     ]
    }
   ],
   "source": [
    "# 创建用户-项矩阵\n",
    "user_item_matrix = pd.crosstab(interaction_df['user_id'], interaction_df['item_id'])\n",
    "\n",
    "# 计算非零元素的比例来确定稠密度\n",
    "non_zero_count = user_item_matrix.astype(bool).sum().sum()  # 计算非零元素的总数\n",
    "total_elements = user_item_matrix.size  # 矩阵中元素的总数\n",
    "\n",
    "interaction_density = non_zero_count / total_elements\n",
    "print(f\"Interaction Density: {interaction_density:.4f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "MOE4REC",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}