lcjln commited on
Commit
fe42b4a
Β·
verified Β·
1 Parent(s): ad696a9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +116 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import re
4
+
5
+ # Streamlit 제λͺ© 및 μ„€λͺ…
6
+ st.title("VOD μ±„νŒ… 크둀러")
7
+ st.write("VOD URL을 μž…λ ₯ν•˜κ³  μ±„νŒ… 데이터λ₯Ό ν¬λ‘€λ§ν•©λ‹ˆλ‹€.")
8
+
9
+ # URL μž…λ ₯ λ°›κΈ°
10
+ vod_url = st.text_input("VOD URL μž…λ ₯")
11
+
12
+
13
+ # μ±„νŒ… 크둀링 ν•¨μˆ˜
14
+ def crawl_chats(vod_url):
15
+ # URL μ„€μ •
16
+ url = vod_url + "/chats"
17
+
18
+ # μš”μ²­ 헀더 (ν•„μš”μ‹œ μΏ ν‚€λ‚˜ 기타 헀더 정보 μΆ”κ°€)
19
+ headers = {
20
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
21
+ "Accept": "application/json, text/plain, */*",
22
+ "Referer": "https://chzzk.naver.com/video/3646597",
23
+ "Origin": "https://chzzk.naver.com",
24
+ "Cookie": "your-cookie-string-here" # 둜그인 ν›„ 개발자 λ„κ΅¬μ˜ Request Headers νƒ­μ—μ„œ μΏ ν‚€ κ°’ 볡사
25
+ }
26
+
27
+ # 초기 μš”μ²­ νŒŒλΌλ―Έν„° μ„€μ •
28
+ params = {
29
+ "playerMessageTime": 0, # μ‹œμž‘ μ‹œμ  (0 = 0초)
30
+ "previousVideoChatSize": 50 # κ°€μ Έμ˜¬ μ±„νŒ… λ©”μ‹œμ§€ 수
31
+ }
32
+
33
+ # λ§ˆμ§€λ§‰ μˆ˜μ§‘ν•œ μ±„νŒ… λ©”μ‹œμ§€ μ €μž₯ λ³€μˆ˜
34
+ last_collected_chats = None
35
+ total_collected_chats = 0
36
+ chat_logs = []
37
+
38
+ # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
39
+ while True:
40
+ # API μš”μ²­ 보내기
41
+ response = requests.get(url, params=params, headers=headers)
42
+
43
+ # μš”μ²­ κ²°κ³Όκ°€ 성곡적이지 μ•Šμ„ 경우 μ’…λ£Œ
44
+ if response.status_code != 200:
45
+ return f"API μš”μ²­ μ‹€νŒ¨: {response.status_code}"
46
+
47
+ # JSON λ°μ΄ν„°λ‘œ λ³€ν™˜
48
+ data = response.json()
49
+
50
+ # μ±„νŒ… λ©”μ‹œμ§€κ°€ ν¬ν•¨λœ 'videoChats' λ°°μ—΄ κ°€μ Έμ˜€κΈ°
51
+ chats = data.get("content", {}).get("videoChats", [])
52
+
53
+ # μ±„νŒ… λ©”μ‹œμ§€κ°€ μ—†λ‹€λ©΄ μ’…λ£Œ
54
+ if not chats:
55
+ break
56
+
57
+ # ν˜„μž¬ μˆ˜μ§‘λœ μ±„νŒ…μ΄ 이전에 μˆ˜μ§‘λœ μ±„νŒ…κ³Ό λ™μΌν•œμ§€ 확인
58
+ if last_collected_chats == chats:
59
+ break
60
+
61
+ # μ±„νŒ… λ©”μ‹œμ§€(content)와 playerMessageTime(μ±„νŒ… μ‹œκ°„)만 μˆ˜μ§‘
62
+ for chat in chats:
63
+ chat_content = chat.get("content") # μ±„νŒ… λ©”μ‹œμ§€ λ‚΄μš©
64
+ message_time = chat.get("playerMessageTime") # μ±„νŒ… λ©”μ‹œμ§€ μž…λ ₯ μ‹œκ°„
65
+
66
+ # 순수 ν…μŠ€νŠΈλ§Œ ν¬ν•¨λœ μ±„νŒ… λ©”μ‹œμ§€ 필터링
67
+ if not re.search(r'{:[^}]*:}', chat_content): # 이λͺ¨ν‹°μ½˜ ν˜•μ‹μ˜ λ©”μ‹œμ§€κ°€ 없을 λ•Œλ§Œ μˆ˜μ§‘
68
+ # λ°€λ¦¬μ΄ˆ μ‹œκ°„μ„ "μ‹œκ°„:λΆ„:초" ν˜•μ‹μœΌλ‘œ λ³€ν™˜
69
+ hours, remainder = divmod(message_time // 1000, 3600)
70
+ minutes, seconds = divmod(remainder, 60)
71
+
72
+ if hours > 0:
73
+ formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}" # "HH:MM:SS" ν˜•μ‹
74
+ else:
75
+ formatted_time = f"{minutes:02}:{seconds:02}" # "MM:SS" ν˜•μ‹
76
+
77
+ # μ±„νŒ… λ‘œκ·Έμ— μΆ”κ°€
78
+ chat_logs.append(f"{formatted_time} - {chat_content}")
79
+
80
+ # λ§ˆμ§€λ§‰μœΌλ‘œ μˆ˜μ§‘ν•œ μ±„νŒ… λ©”μ‹œμ§€ μ—…λ°μ΄νŠΈ
81
+ last_collected_chats = chats
82
+
83
+ # μ±„νŒ… 데이터 개수 μ—…λ°μ΄νŠΈ 및 좜λ ₯
84
+ total_collected_chats += len(chats)
85
+
86
+ # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
87
+ next_time = data["content"].get("nextPlayerMessageTime")
88
+ if next_time is None:
89
+ break
90
+ params["playerMessageTime"] = next_time
91
+
92
+ # κ²°κ³Ό λ°˜ν™˜
93
+ return "\n".join(chat_logs)
94
+
95
+
96
+ # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
97
+ if st.button("크둀링 μ‹œμž‘"):
98
+ if vod_url:
99
+ chat_logs = crawl_chats(vod_url)
100
+ st.text_area("μ±„νŒ… 둜그", value=chat_logs, height=400)
101
+
102
+ # 파일둜 μ €μž₯
103
+ file_name = "chat_logs.txt"
104
+ with open(file_name, "w") as file:
105
+ file.write(chat_logs)
106
+
107
+ # λ‹€μš΄λ‘œλ“œ λ²„νŠΌ ν‘œμ‹œ
108
+ with open(file_name, "rb") as file:
109
+ btn = st.download_button(
110
+ label="μ±„νŒ… 둜그 λ‹€μš΄λ‘œλ“œ",
111
+ data=file,
112
+ file_name=file_name,
113
+ mime="text/plain"
114
+ )
115
+ else:
116
+ st.warning("URL을 μž…λ ₯ν•˜μ„Έμš”.")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ requests