File size: 4,201 Bytes
fe42b4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import streamlit as st
import requests
import re
# Streamlit μ λͺ© λ° μ€λͺ
st.title("VOD μ±ν
ν¬λ‘€λ¬")
st.write("VOD URLμ μ
λ ₯νκ³ μ±ν
λ°μ΄ν°λ₯Ό ν¬λ‘€λ§ν©λλ€.")
# URL μ
λ ₯ λ°κΈ°
vod_url = st.text_input("VOD URL μ
λ ₯")
# μ±ν
ν¬λ‘€λ§ ν¨μ
def crawl_chats(vod_url):
# URL μ€μ
url = vod_url + "/chats"
# μμ² ν€λ (νμμ μΏ ν€λ κΈ°ν ν€λ μ 보 μΆκ°)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"Referer": "https://chzzk.naver.com/video/3646597",
"Origin": "https://chzzk.naver.com",
"Cookie": "your-cookie-string-here" # λ‘κ·ΈμΈ ν κ°λ°μ λꡬμ Request Headers νμμ μΏ ν€ κ° λ³΅μ¬
}
# μ΄κΈ° μμ² νλΌλ―Έν° μ€μ
params = {
"playerMessageTime": 0, # μμ μμ (0 = 0μ΄)
"previousVideoChatSize": 50 # κ°μ Έμ¬ μ±ν
λ©μμ§ μ
}
# λ§μ§λ§ μμ§ν μ±ν
λ©μμ§ μ μ₯ λ³μ
last_collected_chats = None
total_collected_chats = 0
chat_logs = []
# μ±ν
λ°μ΄ν°λ₯Ό μμ°¨μ μΌλ‘ μμ²νμ¬ κ°μ Έμ€κΈ°
while True:
# API μμ² λ³΄λ΄κΈ°
response = requests.get(url, params=params, headers=headers)
# μμ² κ²°κ³Όκ° μ±κ³΅μ μ΄μ§ μμ κ²½μ° μ’
λ£
if response.status_code != 200:
return f"API μμ² μ€ν¨: {response.status_code}"
# JSON λ°μ΄ν°λ‘ λ³ν
data = response.json()
# μ±ν
λ©μμ§κ° ν¬ν¨λ 'videoChats' λ°°μ΄ κ°μ Έμ€κΈ°
chats = data.get("content", {}).get("videoChats", [])
# μ±ν
λ©μμ§κ° μλ€λ©΄ μ’
λ£
if not chats:
break
# νμ¬ μμ§λ μ±ν
μ΄ μ΄μ μ μμ§λ μ±ν
κ³Ό λμΌνμ§ νμΈ
if last_collected_chats == chats:
break
# μ±ν
λ©μμ§(content)μ playerMessageTime(μ±ν
μκ°)λ§ μμ§
for chat in chats:
chat_content = chat.get("content") # μ±ν
λ©μμ§ λ΄μ©
message_time = chat.get("playerMessageTime") # μ±ν
λ©μμ§ μ
λ ₯ μκ°
# μμ ν
μ€νΈλ§ ν¬ν¨λ μ±ν
λ©μμ§ νν°λ§
if not re.search(r'{:[^}]*:}', chat_content): # μ΄λͺ¨ν°μ½ νμμ λ©μμ§κ° μμ λλ§ μμ§
# λ°λ¦¬μ΄ μκ°μ "μκ°:λΆ:μ΄" νμμΌλ‘ λ³ν
hours, remainder = divmod(message_time // 1000, 3600)
minutes, seconds = divmod(remainder, 60)
if hours > 0:
formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}" # "HH:MM:SS" νμ
else:
formatted_time = f"{minutes:02}:{seconds:02}" # "MM:SS" νμ
# μ±ν
λ‘κ·Έμ μΆκ°
chat_logs.append(f"{formatted_time} - {chat_content}")
# λ§μ§λ§μΌλ‘ μμ§ν μ±ν
λ©μμ§ μ
λ°μ΄νΈ
last_collected_chats = chats
# μ±ν
λ°μ΄ν° κ°μ μ
λ°μ΄νΈ λ° μΆλ ₯
total_collected_chats += len(chats)
# λ€μ μμ²μ μν΄ playerMessageTime νλΌλ―Έν° μ
λ°μ΄νΈ
next_time = data["content"].get("nextPlayerMessageTime")
if next_time is None:
break
params["playerMessageTime"] = next_time
# κ²°κ³Ό λ°ν
return "\n".join(chat_logs)
# λ²νΌμ λλ μ λ μ±ν
ν¬λ‘€λ§ μμ
if st.button("ν¬λ‘€λ§ μμ"):
if vod_url:
chat_logs = crawl_chats(vod_url)
st.text_area("μ±ν
λ‘κ·Έ", value=chat_logs, height=400)
# νμΌλ‘ μ μ₯
file_name = "chat_logs.txt"
with open(file_name, "w") as file:
file.write(chat_logs)
# λ€μ΄λ‘λ λ²νΌ νμ
with open(file_name, "rb") as file:
btn = st.download_button(
label="μ±ν
λ‘κ·Έ λ€μ΄λ‘λ",
data=file,
file_name=file_name,
mime="text/plain"
)
else:
st.warning("URLμ μ
λ ₯νμΈμ.") |