lcjln commited on
Commit
996dd27
Β·
verified Β·
1 Parent(s): 406ed1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -23
app.py CHANGED
@@ -5,10 +5,8 @@ from collections import defaultdict
5
  import datetime
6
  import re
7
  import requests
8
- import time
9
-
10
- # νŽ˜μ΄μ§€ ꡬ성을 'wide'둜 μ„€μ •ν•˜μ—¬ μ—¬λ°± μ΅œμ†Œν™”
11
- st.set_page_config(layout="wide")
12
 
13
  # Streamlit 제λͺ© 및 μ„€λͺ…
14
  st.title("VOD μ±„νŒ… 크둀러")
@@ -17,6 +15,10 @@ st.write("VOD URL을 μž…λ ₯ν•˜κ³  μ±„νŒ… 데이터λ₯Ό ν¬λ‘€λ§ν•©λ‹ˆλ‹€.")
17
  # URL μž…λ ₯ λ°›κΈ°
18
  vod_url = st.text_input("VOD URL μž…λ ₯")
19
 
 
 
 
 
20
  # μ±„νŒ… 크둀링 ν•¨μˆ˜
21
  def crawl_chats(vod_url):
22
  # URL μ„€μ •
@@ -41,7 +43,6 @@ def crawl_chats(vod_url):
41
  chat_counts = defaultdict(int)
42
  laugh_counts = defaultdict(int)
43
  total_chats_collected = 0 # 총 μˆ˜μ§‘λœ μ±„νŒ… 개수
44
- start_time = time.time() # 크둀링 μ‹œμž‘ μ‹œκ°„ 기둝
45
 
46
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
47
  status_text = st.empty() # μƒνƒœ λ©”μ‹œμ§€ 좜λ ₯용
@@ -98,14 +99,7 @@ def crawl_chats(vod_url):
98
  laugh_counts[minute_key] += 1
99
 
100
  total_chats_collected += len(chats)
101
-
102
- # 크둀링 κ²½κ³Ό μ‹œκ°„ 계산
103
- elapsed_time = time.time() - start_time
104
- elapsed_minutes = int(elapsed_time // 60)
105
- elapsed_seconds = int(elapsed_time % 60)
106
-
107
- # μƒνƒœ λ©”μ‹œμ§€ μ—…λ°μ΄νŠΈ (총 μ±„νŒ… κ°œμˆ˜μ™€ κ²½κ³Ό μ‹œκ°„ ν‘œμ‹œ)
108
- status_text.text(f"ν˜„μž¬κΉŒμ§€ μˆ˜μ§‘λœ μ±„νŒ… λ©”μ‹œμ§€ 개수: {total_chats_collected} | κ²½κ³Ό μ‹œκ°„: {elapsed_minutes}λΆ„ {elapsed_seconds}초")
109
 
110
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
111
  next_time = data["content"].get("nextPlayerMessageTime")
@@ -115,6 +109,46 @@ def crawl_chats(vod_url):
115
 
116
  return "\n".join(chat_logs), chat_counts, laugh_counts
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
119
  if 'chat_logs' not in st.session_state:
120
  st.session_state['chat_logs'] = None
@@ -162,7 +196,7 @@ if st.session_state['chat_logs']:
162
  fig.add_trace(go.Scatter(
163
  x=df['μ‹œκ°„'],
164
  y=df['전체 μ±„νŒ… 개수'],
165
- mode='lines', # 마컀 없이 μ„ λ§Œ ν‘œμ‹œ
166
  name='전체 μ±„νŒ… 개수',
167
  line=dict(color='blue'),
168
  hovertemplate='%{x} - 전체 μ±„νŒ… 개수: %{y}<extra></extra>'
@@ -172,7 +206,7 @@ if st.session_state['chat_logs']:
172
  fig.add_trace(go.Scatter(
173
  x=df['μ‹œκ°„'],
174
  y=df['γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수'],
175
- mode='lines', # 마컀 없이 μ„ λ§Œ ν‘œμ‹œ
176
  name='γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수',
177
  line=dict(color='red'),
178
  hovertemplate='%{x} - γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수: %{y}<extra></extra>'
@@ -181,16 +215,22 @@ if st.session_state['chat_logs']:
181
  # κ·Έλž˜ν”„ λ ˆμ΄μ•„μ›ƒ μ„€μ •
182
  fig.update_layout(
183
  title="λΆ„λ‹Ή μ±„νŒ… 및 γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수",
184
- height=600, # κ·Έλž˜ν”„μ˜ 높이λ₯Ό 킀움
185
  xaxis_title="μ‹œκ°„",
186
  yaxis_title="μ±„νŒ… 개수",
187
- xaxis=dict(
188
- showticklabels=False # xμΆ• μ‹œκ°„ λ ˆμ΄λΈ” μˆ¨κΉ€
189
- ),
190
  hovermode="x unified", # 마우슀λ₯Ό μ˜¬λ Έμ„ λ•Œ ν•΄λ‹Ή xμΆ•μ—μ„œ 툴팁 ν‘œμ‹œ
191
- showlegend=True, # λ²”λ‘€ ν‘œμ‹œ
192
- margin=dict(l=50, r=50, t=100, b=100) # κ·Έλž˜ν”„ μ•„λž˜μͺ½ 여백을 μ‘°μ •
193
  )
194
 
195
- # κ·Έλž˜ν”„ 좜λ ₯
196
- st.plotly_chart(fig, use_container_width=True) # ν™”λ©΄
 
 
 
 
 
 
 
 
 
 
5
  import datetime
6
  import re
7
  import requests
8
+ import yt_dlp
9
+ import os
 
 
10
 
11
  # Streamlit 제λͺ© 및 μ„€λͺ…
12
  st.title("VOD μ±„νŒ… 크둀러")
 
15
  # URL μž…λ ₯ λ°›κΈ°
16
  vod_url = st.text_input("VOD URL μž…λ ₯")
17
 
18
+ # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό μ €μž₯ν•  곡간
19
+ if 'selected_times' not in st.session_state:
20
+ st.session_state['selected_times'] = []
21
+
22
  # μ±„νŒ… 크둀링 ν•¨μˆ˜
23
  def crawl_chats(vod_url):
24
  # URL μ„€μ •
 
43
  chat_counts = defaultdict(int)
44
  laugh_counts = defaultdict(int)
45
  total_chats_collected = 0 # 총 μˆ˜μ§‘λœ μ±„νŒ… 개수
 
46
 
47
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
48
  status_text = st.empty() # μƒνƒœ λ©”μ‹œμ§€ 좜λ ₯용
 
99
  laugh_counts[minute_key] += 1
100
 
101
  total_chats_collected += len(chats)
102
+ status_text.text(f"ν˜„μž¬κΉŒμ§€ μˆ˜μ§‘λœ μ±„νŒ… λ©”μ‹œμ§€ 개수: {total_chats_collected}")
 
 
 
 
 
 
 
103
 
104
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
105
  next_time = data["content"].get("nextPlayerMessageTime")
 
109
 
110
  return "\n".join(chat_logs), chat_counts, laugh_counts
111
 
112
+ # ν΄λ¦­ν•œ μ‹œκ°„λŒ€λ₯Ό 선택 및 좜λ ₯
113
+ def handle_time_selection(click_data):
114
+ if click_data:
115
+ selected_time = click_data['points'][0]['x']
116
+ if selected_time not in st.session_state['selected_times']:
117
+ st.session_state['selected_times'].append(selected_time)
118
+
119
+ # μ„ νƒλœ μ‹œκ°„λŒ€λ₯Ό ν‘œμ‹œ 및 μ‚­μ œ κΈ°λŠ₯
120
+ def display_selected_times():
121
+ if st.session_state['selected_times']:
122
+ st.write("### μ„ νƒλœ μ‹œκ°„λŒ€")
123
+ for time in st.session_state['selected_times']:
124
+ col1, col2 = st.columns([9, 1])
125
+ col1.write(f"{time}")
126
+ if col2.button("X", key=f"remove_{time}"):
127
+ st.session_state['selected_times'].remove(time)
128
+
129
+ # yt-dlpλ₯Ό μ‚¬μš©ν•˜μ—¬ μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒμ„ λ‹€μš΄λ‘œλ“œ
130
+ def download_clips():
131
+ if st.session_state['selected_times']:
132
+ st.write("### μ˜μƒ λ‹€μš΄λ‘œλ“œ")
133
+ for idx, start_time in enumerate(st.session_state['selected_times']):
134
+ start_time_obj = datetime.datetime.strptime(start_time, '%H:%M:%S')
135
+ end_time_obj = start_time_obj + datetime.timedelta(minutes=1)
136
+
137
+ # μ‹œμž‘ μ‹œκ°„κ³Ό 끝 μ‹œκ°„μ„ HH:MM:SS ν˜•μ‹μœΌλ‘œ λ³€ν™˜
138
+ start_time_str = start_time_obj.strftime('%H:%M:%S')
139
+ end_time_str = end_time_obj.strftime('%H:%M:%S')
140
+
141
+ # yt-dlp λ‹€μš΄λ‘œλ“œ λͺ…λ Ήμ–΄ μ‹€ν–‰
142
+ output_filename = f"clip_{idx + 1}.mp4"
143
+ ydl_opts = {
144
+ 'outtmpl': output_filename,
145
+ 'download_sections': [f"*{start_time_str}-{end_time_str}"]
146
+ }
147
+
148
+ st.write(f"λ‹€μš΄λ‘œλ“œ 쀑: {start_time_str} ~ {end_time_str} | 파일λͺ…: {output_filename}")
149
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
150
+ ydl.download([vod_url])
151
+
152
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
153
  if 'chat_logs' not in st.session_state:
154
  st.session_state['chat_logs'] = None
 
196
  fig.add_trace(go.Scatter(
197
  x=df['μ‹œκ°„'],
198
  y=df['전체 μ±„νŒ… 개수'],
199
+ mode='lines',
200
  name='전체 μ±„νŒ… 개수',
201
  line=dict(color='blue'),
202
  hovertemplate='%{x} - 전체 μ±„νŒ… 개수: %{y}<extra></extra>'
 
206
  fig.add_trace(go.Scatter(
207
  x=df['μ‹œκ°„'],
208
  y=df['γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수'],
209
+ mode='lines',
210
  name='γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수',
211
  line=dict(color='red'),
212
  hovertemplate='%{x} - γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수: %{y}<extra></extra>'
 
215
  # κ·Έλž˜ν”„ λ ˆμ΄μ•„μ›ƒ μ„€μ •
216
  fig.update_layout(
217
  title="λΆ„λ‹Ή μ±„νŒ… 및 γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수",
 
218
  xaxis_title="μ‹œκ°„",
219
  yaxis_title="μ±„νŒ… 개수",
220
+ xaxis=dict(showticklabels=False), # xμΆ• μ‹œκ°„ λ ˆμ΄λΈ” μˆ¨κΉ€
 
 
221
  hovermode="x unified", # 마우슀λ₯Ό μ˜¬λ Έμ„ λ•Œ ν•΄λ‹Ή xμΆ•μ—μ„œ 툴팁 ν‘œμ‹œ
222
+ showlegend=True,
223
+ margin=dict(l=50, r=50, t=100, b=100)
224
  )
225
 
226
+ # κ·Έλž˜ν”„ 좜λ ₯ 및 클릭 이벀트 처리
227
+ st.plotly_chart(fig, use_container_width=True)
228
+ click_data = st.plotly_chart(fig).get_current_click()
229
+ handle_time_selection(click_data)
230
+
231
+ # μ„ νƒλœ μ‹œκ°„λŒ€ ν‘œμ‹œ
232
+ display_selected_times()
233
+
234
+ # λ‹€μš΄λ‘œλ“œ λ²„νŠΌ ν‘œμ‹œ
235
+ if st.button("μ„ νƒλœ μ‹œκ°„λŒ€μ˜ μ˜μƒ λ‹€μš΄λ‘œλ“œ"):
236
+ download_clips()