lcjln commited on
Commit
5b59009
Β·
verified Β·
1 Parent(s): ff7cf0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -36
app.py CHANGED
@@ -16,7 +16,7 @@ st.set_page_config(layout="wide")
16
  st.title("VOD μ±„νŒ… 크둀러")
17
  st.write("VOD URL을 μž…λ ₯ν•˜κ³  μ±„νŒ… 데이터λ₯Ό ν¬λ‘€λ§ν•©λ‹ˆλ‹€.")
18
 
19
- # URL μž…λ ₯ λ°›κΈ° (κΈ°λ³Έκ°’ 없이)
20
  vod_url = st.text_input("VOD URL μž…λ ₯")
21
 
22
  # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό μ €μž₯ν•  곡간
@@ -25,7 +25,7 @@ if 'selected_times' not in st.session_state:
25
 
26
  # μ±„νŒ… 크둀링 ν•¨μˆ˜
27
  def crawl_chats(vod_url):
28
- # URL μ„€μ • (μ—¬κΈ°μ„œ /chatsλ₯Ό λΆ™μž„)
29
  url = vod_url + "/chats"
30
 
31
  # μš”μ²­ 헀더 (ν•„μš”μ‹œ μΏ ν‚€λ‚˜ 기타 헀더 정보 μΆ”κ°€)
@@ -47,11 +47,10 @@ def crawl_chats(vod_url):
47
  chat_counts = defaultdict(int)
48
  laugh_counts = defaultdict(int)
49
  total_chats_collected = 0 # 총 μˆ˜μ§‘λœ μ±„νŒ… 개수
 
50
 
51
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
52
  status_text = st.empty() # μƒνƒœ λ©”μ‹œμ§€ 좜λ ₯용
53
- start_time = time.time() # 크둀링 μ‹œμž‘ μ‹œκ°„
54
-
55
  while True:
56
  # API μš”μ²­ 보내기
57
  response = requests.get(url, params=params, headers=headers)
@@ -105,9 +104,7 @@ def crawl_chats(vod_url):
105
  laugh_counts[minute_key] += 1
106
 
107
  total_chats_collected += len(chats)
108
-
109
- # ν˜„μž¬ μ‹œκ°„κ³Ό κ²½κ³Ό μ‹œκ°„ ν‘œμ‹œ
110
- elapsed_time = time.time() - start_time
111
  status_text.text(f"ν˜„μž¬κΉŒμ§€ μˆ˜μ§‘λœ μ±„νŒ… λ©”μ‹œμ§€ 개수: {total_chats_collected} | κ²½κ³Ό μ‹œκ°„: {int(elapsed_time // 60)}λΆ„ {int(elapsed_time % 60)}초")
112
 
113
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
@@ -118,6 +115,39 @@ def crawl_chats(vod_url):
118
 
119
  return "\n".join(chat_logs), chat_counts, laugh_counts
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
122
  if 'chat_logs' not in st.session_state:
123
  st.session_state['chat_logs'] = None
@@ -186,6 +216,7 @@ if st.session_state['chat_logs']:
186
  title="λΆ„λ‹Ή μ±„νŒ… 및 γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수",
187
  xaxis_title="μ‹œκ°„",
188
  yaxis_title="μ±„νŒ… 개수",
 
189
  xaxis=dict(showticklabels=False), # xμΆ• μ‹œκ°„ λ ˆμ΄λΈ” μˆ¨κΉ€
190
  hovermode="x unified", # 마우슀λ₯Ό μ˜¬λ Έμ„ λ•Œ ν•΄λ‹Ή xμΆ•μ—μ„œ 툴팁 ν‘œμ‹œ
191
  showlegend=True,
@@ -213,35 +244,7 @@ if st.session_state['chat_logs']:
213
  minute = 0
214
 
215
  # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό ν‘œμ‹œ 및 μ‚­μ œ κΈ°λŠ₯
216
- if st.session_state['selected_times']:
217
- st.write("### μ„ νƒλœ μ‹œκ°„λŒ€")
218
- for time in st.session_state['selected_times']:
219
- col1, col2 = st.columns([9, 1])
220
- col1.write(f"{time}")
221
- if col2.button("X", key=f"remove_{time}"):
222
- st.session_state['selected_times'].remove(time)
223
-
224
- # yt-dlpλ₯Ό μ‚¬μš©ν•˜μ—¬ μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒμ„ λ‹€μš΄λ‘œλ“œ
225
- def download_clips():
226
- if st.session_state['selected_times']:
227
- for idx, start_time in enumerate(st.session_state['selected_times']):
228
- start_time_obj = datetime.datetime.strptime(start_time, '%H:%M:%S')
229
- end_time_obj = start_time_obj + datetime.timedelta(minutes=1)
230
-
231
- # μ‹œμž‘ μ‹œκ°„κ³Ό 끝 μ‹œκ°„μ„ HH:MM:SS ν˜•μ‹μœΌλ‘œ λ³€ν™˜
232
- start_time_str = start_time_obj.strftime('%H:%M:%S')
233
- end_time_str = end_time_obj.strftime('%H:%M:%S')
234
-
235
- # yt-dlp λ‹€μš΄λ‘œλ“œ λͺ…λ Ήμ–΄ μ‹€ν–‰
236
- output_filename = f"clip_{idx + 1}.mp4"
237
- ydl_opts = {
238
- 'outtmpl': os.path.join(os.path.expanduser("~/Downloads"), output_filename),
239
- 'download_sections': [f"*{start_time_str}-{end_time_str}"]
240
- }
241
-
242
- st.write(f"λ‹€μš΄λ‘œλ“œ 쀑: {start_time_str} ~ {end_time_str} | 파일λͺ…: {output_filename}")
243
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
244
- ydl.download([vod_url]) # μ›λž˜ URL μ‚¬μš©
245
 
246
  # λ‹€μš΄λ‘œλ“œ λ²„νŠΌ ν‘œμ‹œ
247
  if st.button("μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒ λ‹€μš΄λ‘œλ“œ"):
 
16
  st.title("VOD μ±„νŒ… 크둀러")
17
  st.write("VOD URL을 μž…λ ₯ν•˜κ³  μ±„νŒ… 데이터λ₯Ό ν¬λ‘€λ§ν•©λ‹ˆλ‹€.")
18
 
19
+ # URL μž…λ ₯ λ°›κΈ°
20
  vod_url = st.text_input("VOD URL μž…λ ₯")
21
 
22
  # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό μ €μž₯ν•  곡간
 
25
 
26
  # μ±„νŒ… 크둀링 ν•¨μˆ˜
27
  def crawl_chats(vod_url):
28
+ # URL μ„€μ •
29
  url = vod_url + "/chats"
30
 
31
  # μš”μ²­ 헀더 (ν•„μš”μ‹œ μΏ ν‚€λ‚˜ 기타 헀더 정보 μΆ”κ°€)
 
47
  chat_counts = defaultdict(int)
48
  laugh_counts = defaultdict(int)
49
  total_chats_collected = 0 # 총 μˆ˜μ§‘λœ μ±„νŒ… 개수
50
+ start_time = time.time() # 크둀링 μ‹œμž‘ μ‹œκ°„
51
 
52
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
53
  status_text = st.empty() # μƒνƒœ λ©”μ‹œμ§€ 좜λ ₯용
 
 
54
  while True:
55
  # API μš”μ²­ 보내기
56
  response = requests.get(url, params=params, headers=headers)
 
104
  laugh_counts[minute_key] += 1
105
 
106
  total_chats_collected += len(chats)
107
+ elapsed_time = time.time() - start_time # κ²½κ³Ό μ‹œκ°„ 계산
 
 
108
  status_text.text(f"ν˜„μž¬κΉŒμ§€ μˆ˜μ§‘λœ μ±„νŒ… λ©”μ‹œμ§€ 개수: {total_chats_collected} | κ²½κ³Ό μ‹œκ°„: {int(elapsed_time // 60)}λΆ„ {int(elapsed_time % 60)}초")
109
 
110
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
 
115
 
116
  return "\n".join(chat_logs), chat_counts, laugh_counts
117
 
118
+ # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό ν‘œμ‹œ 및 μ‚­μ œ κΈ°λŠ₯
119
+ def display_selected_times():
120
+ if st.session_state['selected_times']:
121
+ st.write("### μ„ νƒλœ μ‹œκ°„λŒ€")
122
+ for time in st.session_state['selected_times']:
123
+ col1, col2 = st.columns([9, 1])
124
+ col1.write(f"{time}")
125
+ if col2.button("X", key=f"remove_{time}"):
126
+ st.session_state['selected_times'].remove(time)
127
+
128
+ # yt-dlpλ₯Ό μ‚¬μš©ν•˜μ—¬ μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒμ„ λ‹€μš΄λ‘œλ“œ
129
+ def download_clips():
130
+ if st.session_state['selected_times']:
131
+ st.write("### μ˜μƒ λ‹€μš΄λ‘œλ“œ")
132
+ for idx, start_time in enumerate(st.session_state['selected_times']):
133
+ start_time_obj = datetime.datetime.strptime(start_time, '%H:%M:%S')
134
+ end_time_obj = start_time_obj + datetime.timedelta(minutes=1)
135
+
136
+ # μ‹œμž‘ μ‹œκ°„κ³Ό 끝 μ‹œκ°„μ„ HH:MM:SS ν˜•μ‹μœΌλ‘œ λ³€ν™˜
137
+ start_time_str = start_time_obj.strftime('%H:%M:%S')
138
+ end_time_str = end_time_obj.strftime('%H:%M:%S')
139
+
140
+ # yt-dlp λ‹€μš΄λ‘œλ“œ λͺ…λ Ήμ–΄ μ‹€ν–‰
141
+ output_filename = f"clip_{idx + 1}.mp4"
142
+ ydl_opts = {
143
+ 'outtmpl': os.path.join(os.path.expanduser("~/Downloads"), output_filename), # Downloads 폴더에 μ €μž₯
144
+ 'download_sections': [f"*{start_time_str}-{end_time_str}"]
145
+ }
146
+
147
+ st.write(f"λ‹€μš΄λ‘œλ“œ 쀑: {start_time_str} ~ {end_time_str} | 파일λͺ…: {output_filename}")
148
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
149
+ ydl.download([vod_url]) # μ›λž˜ URL μ‚¬μš©
150
+
151
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
152
  if 'chat_logs' not in st.session_state:
153
  st.session_state['chat_logs'] = None
 
216
  title="λΆ„λ‹Ή μ±„νŒ… 및 γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수",
217
  xaxis_title="μ‹œκ°„",
218
  yaxis_title="μ±„νŒ… 개수",
219
+ height=600, # κ·Έλž˜ν”„ 높이 μ„€μ •
220
  xaxis=dict(showticklabels=False), # xμΆ• μ‹œκ°„ λ ˆμ΄λΈ” μˆ¨κΉ€
221
  hovermode="x unified", # 마우슀λ₯Ό μ˜¬λ Έμ„ λ•Œ ν•΄λ‹Ή xμΆ•μ—μ„œ 툴팁 ν‘œμ‹œ
222
  showlegend=True,
 
244
  minute = 0
245
 
246
  # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό ν‘œμ‹œ 및 μ‚­μ œ κΈ°λŠ₯
247
+ display_selected_times()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
  # λ‹€μš΄λ‘œλ“œ λ²„νŠΌ ν‘œμ‹œ
250
  if st.button("μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒ λ‹€μš΄λ‘œλ“œ"):