|
import streamlit as st |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
from collections import defaultdict |
|
import datetime |
|
import re |
|
import requests |
|
import yt_dlp |
|
import os |
|
import time |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
st.title("VOD μ±ν
ν¬λ‘€λ¬") |
|
st.write("VOD URLμ μ
λ ₯νκ³ μ±ν
λ°μ΄ν°λ₯Ό ν¬λ‘€λ§ν©λλ€.") |
|
|
|
|
|
vod_url = st.text_input("VOD URL μ
λ ₯") |
|
|
|
|
|
if 'selected_times' not in st.session_state: |
|
st.session_state['selected_times'] = [] |
|
|
|
|
|
def crawl_chats(vod_url): |
|
|
|
url = vod_url + "/chats" |
|
|
|
|
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36", |
|
"Accept": "application/json, text/plain, */*", |
|
"Referer": "https://chzzk.naver.com/video/3646597", |
|
"Origin": "https://chzzk.naver.com", |
|
"Cookie": "your-cookie-string-here" |
|
} |
|
|
|
|
|
params = { |
|
"playerMessageTime": 0, |
|
"previousVideoChatSize": 50 |
|
} |
|
|
|
chat_logs = [] |
|
chat_counts = defaultdict(int) |
|
laugh_counts = defaultdict(int) |
|
total_chats_collected = 0 |
|
start_time = time.time() |
|
|
|
|
|
status_text = st.empty() |
|
while True: |
|
|
|
response = requests.get(url, params=params, headers=headers) |
|
|
|
|
|
if response.status_code != 200: |
|
return f"API μμ² μ€ν¨: {response.status_code}", None, None |
|
|
|
|
|
data = response.json() |
|
|
|
|
|
chats = data.get("content", {}).get("videoChats", []) |
|
|
|
|
|
if not chats: |
|
break |
|
|
|
|
|
for chat in chats: |
|
chat_content = chat.get("content") |
|
message_time = chat.get("playerMessageTime") |
|
|
|
|
|
if not re.search(r'{:[^}]*:}', chat_content): |
|
|
|
hours, remainder = divmod(message_time // 1000, 3600) |
|
minutes, seconds = divmod(remainder, 60) |
|
|
|
if hours > 0: |
|
formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}" |
|
else: |
|
formatted_time = f"{minutes:02}:{seconds:02}" |
|
|
|
|
|
chat_logs.append(f"{formatted_time} - {chat_content}") |
|
|
|
|
|
try: |
|
|
|
time_obj = datetime.datetime.strptime(formatted_time, '%H:%M:%S') |
|
except ValueError: |
|
|
|
time_obj = datetime.datetime.strptime(formatted_time, '%M:%S') |
|
|
|
minute_key = time_obj.replace(second=0) |
|
chat_counts[minute_key] += 1 |
|
|
|
|
|
if len(re.findall(r'γ
', chat_content)) >= 4: |
|
laugh_counts[minute_key] += 1 |
|
|
|
total_chats_collected += len(chats) |
|
elapsed_time = time.time() - start_time |
|
status_text.text(f"νμ¬κΉμ§ μμ§λ μ±ν
λ©μμ§ κ°μ: {total_chats_collected} | κ²½κ³Ό μκ°: {int(elapsed_time // 60)}λΆ {int(elapsed_time % 60)}μ΄") |
|
|
|
|
|
next_time = data["content"].get("nextPlayerMessageTime") |
|
if next_time is None: |
|
break |
|
params["playerMessageTime"] = next_time |
|
|
|
return "\n".join(chat_logs), chat_counts, laugh_counts |
|
|
|
|
|
def add_selected_time(time): |
|
if time not in st.session_state['selected_times']: |
|
st.session_state['selected_times'].append(time) |
|
|
|
|
|
def display_selected_times(): |
|
if st.session_state['selected_times']: |
|
st.write("### μ νλ μκ°λ") |
|
for time in st.session_state['selected_times']: |
|
col1, col2 = st.columns([9, 1]) |
|
col1.write(f"{time}") |
|
if col2.button("X", key=f"remove_{time}"): |
|
st.session_state['selected_times'].remove(time) |
|
|
|
|
|
def download_clips(): |
|
if st.session_state['selected_times']: |
|
st.write("### μμ λ€μ΄λ‘λ") |
|
for idx, start_time in enumerate(st.session_state['selected_times']): |
|
start_time_obj = datetime.datetime.strptime(start_time, '%H:%M:%S') |
|
end_time_obj = start_time_obj + datetime.timedelta(minutes=1) |
|
|
|
|
|
start_time_str = start_time_obj.strftime('%H:%M:%S') |
|
end_time_str = end_time_obj.strftime('%H:%M:%S') |
|
|
|
|
|
output_filename = f"clip_{idx + 1}.mp4" |
|
ydl_opts = { |
|
'outtmpl': output_filename, |
|
'download_sections': [f"*{start_time_str}-{end_time_str}"] |
|
} |
|
|
|
st.write(f"λ€μ΄λ‘λ μ€: {start_time_str} ~ {end_time_str} | νμΌλͺ
: {output_filename}") |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([vod_url]) |
|
|
|
|
|
if 'chat_logs' not in st.session_state: |
|
st.session_state['chat_logs'] = None |
|
if 'chat_counts' not in st.session_state: |
|
st.session_state['chat_counts'] = None |
|
if 'laugh_counts' not in st.session_state: |
|
st.session_state['laugh_counts'] = None |
|
|
|
if st.button("ν¬λ‘€λ§ μμ"): |
|
if vod_url: |
|
chat_logs, chat_counts, laugh_counts = crawl_chats(vod_url) |
|
|
|
st.session_state['chat_logs'] = chat_logs |
|
st.session_state['chat_counts'] = chat_counts |
|
st.session_state['laugh_counts'] = laugh_counts |
|
|
|
|
|
file_name = "chat_logs.txt" |
|
with open(file_name, "w") as file: |
|
file.write(chat_logs) |
|
|
|
|
|
with open(file_name, "rb") as file: |
|
st.download_button( |
|
label="μ±ν
λ‘κ·Έ λ€μ΄λ‘λ", |
|
data=file, |
|
file_name=file_name, |
|
mime="text/plain" |
|
) |
|
else: |
|
st.warning("URLμ μ
λ ₯νμΈμ.") |
|
|
|
|
|
if st.session_state['chat_logs']: |
|
|
|
times = [time.strftime('%H:%M:%S') for time in st.session_state['chat_counts'].keys()] |
|
chat_numbers = list(st.session_state['chat_counts'].values()) |
|
laugh_numbers = [st.session_state['laugh_counts'].get(time, 0) for time in st.session_state['chat_counts'].keys()] |
|
df = pd.DataFrame({'μκ°': times, 'μ 체 μ±ν
κ°μ': chat_numbers, 'γ
γ
γ
γ
μ±ν
κ°μ': laugh_numbers}) |
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
x=df['μκ°'], |
|
y=df['μ 체 μ±ν
κ°μ'], |
|
mode='lines', |
|
name='μ 체 μ±ν
κ°μ', |
|
line=dict(color='blue'), |
|
hovertemplate='%{x} - μ 체 μ±ν
κ°μ: %{y}<extra></extra>' |
|
)) |
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
x=df['μκ°'], |
|
y=df['γ
γ
γ
γ
μ±ν
κ°μ'], |
|
mode='lines', |
|
name='γ
γ
γ
γ
μ±ν
κ°μ', |
|
line=dict(color='red'), |
|
hovertemplate='%{x} - γ
γ
γ
γ
μ±ν
κ°μ: %{y}<extra></extra>' |
|
)) |
|
|
|
|
|
fig.update_layout( |
|
title="λΆλΉ μ±ν
λ° γ
γ
γ
γ
μ±ν
κ°μ", |
|
xaxis_title="μκ°", |
|
yaxis_title="μ±ν
κ°μ", |
|
height=600, |
|
xaxis=dict(showticklabels=False), |
|
hovermode="x unified", |
|
showlegend=True, |
|
margin=dict(l=50, r=50, t=100, b=100) |
|
) |
|
|
|
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
click_data = st.session_state.get('click_data') |
|
if click_data: |
|
time_selected = click_data['points'][0]['x'] |
|
add_selected_time(time_selected) |
|
|
|
|
|
display_selected_times() |
|
|
|
|
|
if st.button("μ νλ μκ°λμ μμ λ€μ΄λ‘λ"): |
|
download_clips() |