lcjln commited on
Commit
e512cee
Β·
verified Β·
1 Parent(s): 1f9f260

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -18
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import streamlit as st
2
- import requests
 
 
 
3
  import re
 
4
 
5
  # Streamlit 제λͺ© 및 μ„€λͺ…
6
  st.title("VOD μ±„νŒ… 크둀러")
@@ -29,10 +33,9 @@ def crawl_chats(vod_url):
29
  "previousVideoChatSize": 50 # κ°€μ Έμ˜¬ μ±„νŒ… λ©”μ‹œμ§€ 수
30
  }
31
 
32
- # λ§ˆμ§€λ§‰ μˆ˜μ§‘ν•œ μ±„νŒ… λ©”μ‹œμ§€ μ €μž₯ λ³€μˆ˜
33
- last_collected_chats = None
34
- total_collected_chats = 0
35
  chat_logs = []
 
 
36
 
37
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
38
  while True:
@@ -41,7 +44,7 @@ def crawl_chats(vod_url):
41
 
42
  # μš”μ²­ κ²°κ³Όκ°€ 성곡적이지 μ•Šμ„ 경우 μ’…λ£Œ
43
  if response.status_code != 200:
44
- return f"API μš”μ²­ μ‹€νŒ¨: {response.status_code}"
45
 
46
  # JSON λ°μ΄ν„°λ‘œ λ³€ν™˜
47
  data = response.json()
@@ -53,10 +56,6 @@ def crawl_chats(vod_url):
53
  if not chats:
54
  break
55
 
56
- # ν˜„μž¬ μˆ˜μ§‘λœ μ±„νŒ…μ΄ 이전에 μˆ˜μ§‘λœ μ±„νŒ…κ³Ό λ™μΌν•œμ§€ 확인
57
- if last_collected_chats == chats:
58
- break
59
-
60
  # μ±„νŒ… λ©”μ‹œμ§€(content)와 playerMessageTime(μ±„νŒ… μ‹œκ°„)만 μˆ˜μ§‘
61
  for chat in chats:
62
  chat_content = chat.get("content") # μ±„νŒ… λ©”μ‹œμ§€ λ‚΄μš©
@@ -76,11 +75,14 @@ def crawl_chats(vod_url):
76
  # μ±„νŒ… λ‘œκ·Έμ— μΆ”κ°€
77
  chat_logs.append(f"{formatted_time} - {chat_content}")
78
 
79
- # λ§ˆμ§€λ§‰μœΌλ‘œ μˆ˜μ§‘ν•œ μ±„νŒ… λ©”μ‹œμ§€ μ—…λ°μ΄νŠΈ
80
- last_collected_chats = chats
 
 
81
 
82
- # μ±„νŒ… 데이터 개수 μ—…λ°μ΄νŠΈ 및 좜λ ₯
83
- total_collected_chats += len(chats)
 
84
 
85
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
86
  next_time = data["content"].get("nextPlayerMessageTime")
@@ -88,15 +90,13 @@ def crawl_chats(vod_url):
88
  break
89
  params["playerMessageTime"] = next_time
90
 
91
- # κ²°κ³Ό λ°˜ν™˜
92
- return "\n".join(chat_logs)
93
 
94
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
95
  if st.button("크둀링 μ‹œμž‘"):
96
  if vod_url:
97
- chat_logs = crawl_chats(vod_url)
98
- st.text_area("μ±„νŒ… 둜그", value=chat_logs, height=400)
99
-
100
  # 파일둜 μ €μž₯
101
  file_name = "chat_logs.txt"
102
  with open(file_name, "w") as file:
@@ -110,5 +110,53 @@ if st.button("크둀링 μ‹œμž‘"):
110
  file_name=file_name,
111
  mime="text/plain"
112
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  else:
114
  st.warning("URL을 μž…λ ₯ν•˜μ„Έμš”.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import plotly.graph_objects as go
4
+ from collections import defaultdict
5
+ import datetime
6
  import re
7
+ import requests
8
 
9
  # Streamlit 제λͺ© 및 μ„€λͺ…
10
  st.title("VOD μ±„νŒ… 크둀러")
 
33
  "previousVideoChatSize": 50 # κ°€μ Έμ˜¬ μ±„νŒ… λ©”μ‹œμ§€ 수
34
  }
35
 
 
 
 
36
  chat_logs = []
37
+ chat_counts = defaultdict(int)
38
+ laugh_counts = defaultdict(int)
39
 
40
  # μ±„νŒ… 데이터λ₯Ό 순차적으둜 μš”μ²­ν•˜μ—¬ κ°€μ Έμ˜€κΈ°
41
  while True:
 
44
 
45
  # μš”μ²­ κ²°κ³Όκ°€ 성곡적이지 μ•Šμ„ 경우 μ’…λ£Œ
46
  if response.status_code != 200:
47
+ return f"API μš”μ²­ μ‹€νŒ¨: {response.status_code}", None, None
48
 
49
  # JSON λ°μ΄ν„°λ‘œ λ³€ν™˜
50
  data = response.json()
 
56
  if not chats:
57
  break
58
 
 
 
 
 
59
  # μ±„νŒ… λ©”μ‹œμ§€(content)와 playerMessageTime(μ±„νŒ… μ‹œκ°„)만 μˆ˜μ§‘
60
  for chat in chats:
61
  chat_content = chat.get("content") # μ±„νŒ… λ©”μ‹œμ§€ λ‚΄μš©
 
75
  # μ±„νŒ… λ‘œκ·Έμ— μΆ”κ°€
76
  chat_logs.append(f"{formatted_time} - {chat_content}")
77
 
78
+ # μ‹œκ°„λŒ€λ³„λ‘œ μ±„νŒ… 개수 계산
79
+ time_obj = datetime.datetime.strptime(formatted_time, '%H:%M:%S')
80
+ minute_key = time_obj.replace(second=0) # λΆ„ λ‹¨μœ„λ‘œ λ³€ν™˜ν•˜μ—¬ 집계
81
+ chat_counts[minute_key] += 1
82
 
83
+ # 'γ…‹γ…‹γ…‹γ…‹'κ°€ ν¬ν•¨λœ μ±„νŒ… 개수 카운트
84
+ if len(re.findall(r'γ…‹', chat_content)) >= 4:
85
+ laugh_counts[minute_key] += 1
86
 
87
  # λ‹€μŒ μš”μ²­μ„ μœ„ν•΄ playerMessageTime νŒŒλΌλ―Έν„° μ—…λ°μ΄νŠΈ
88
  next_time = data["content"].get("nextPlayerMessageTime")
 
90
  break
91
  params["playerMessageTime"] = next_time
92
 
93
+ return "\n".join(chat_logs), chat_counts, laugh_counts
 
94
 
95
  # λ²„νŠΌμ„ λˆŒλ €μ„ λ•Œ μ±„νŒ… 크둀링 μ‹œμž‘
96
  if st.button("크둀링 μ‹œμž‘"):
97
  if vod_url:
98
+ chat_logs, chat_counts, laugh_counts = crawl_chats(vod_url)
99
+
 
100
  # 파일둜 μ €μž₯
101
  file_name = "chat_logs.txt"
102
  with open(file_name, "w") as file:
 
110
  file_name=file_name,
111
  mime="text/plain"
112
  )
113
+
114
+ # κ·Έλž˜ν”„ 좜λ ₯
115
+ if chat_counts and laugh_counts:
116
+ # λ°μ΄ν„°ν”„λ ˆμž„ 생성
117
+ times = [time.strftime('%H:%M:%S') for time in chat_counts.keys()]
118
+ chat_numbers = list(chat_counts.values())
119
+ laugh_numbers = [laugh_counts.get(time, 0) for time in chat_counts.keys()]
120
+ df = pd.DataFrame({'μ‹œκ°„': times, '전체 μ±„νŒ… 개수': chat_numbers, 'γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수': laugh_numbers})
121
+
122
+ # Plotly μ„  κ·Έλž˜ν”„ 그리기
123
+ fig = go.Figure()
124
+
125
+ # 전체 μ±„νŒ… 개수 μ„  κ·Έλž˜ν”„ μΆ”κ°€
126
+ fig.add_trace(go.Scatter(
127
+ x=df['μ‹œκ°„'],
128
+ y=df['전체 μ±„νŒ… 개수'],
129
+ mode='lines', # 마컀 없이 μ„ λ§Œ ν‘œμ‹œ
130
+ name='전체 μ±„νŒ… 개수',
131
+ line=dict(color='blue'),
132
+ hovertemplate='%{x} - 전체 μ±„νŒ… 개수: %{y}<extra></extra>'
133
+ ))
134
+
135
+ # γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수 μ„  κ·Έλž˜ν”„ μΆ”κ°€
136
+ fig.add_trace(go.Scatter(
137
+ x=df['μ‹œκ°„'],
138
+ y=df['γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수'],
139
+ mode='lines', # 마컀 없이 μ„ λ§Œ ν‘œμ‹œ
140
+ name='γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수',
141
+ line=dict(color='red'),
142
+ hovertemplate='%{x} - γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수: %{y}<extra></extra>'
143
+ ))
144
+
145
+ # κ·Έλž˜ν”„ λ ˆμ΄μ•„μ›ƒ μ„€μ •
146
+ fig.update_layout(
147
+ title="λΆ„λ‹Ή μ±„νŒ… 및 γ…‹γ…‹γ…‹γ…‹ μ±„νŒ… 개수",
148
+ xaxis_title="μ‹œκ°„",
149
+ yaxis_title="μ±„νŒ… 개수",
150
+ xaxis=dict(
151
+ showticklabels=True # xμΆ• μ‹œκ°„ λ ˆμ΄λΈ” ν‘œμ‹œ
152
+ ),
153
+ hovermode="x unified", # 마우슀λ₯Ό μ˜¬λ Έμ„ λ•Œ ν•΄λ‹Ή xμΆ•μ—μ„œ 툴팁 ν‘œμ‹œ
154
+ showlegend=True, # λ²”λ‘€ ν‘œμ‹œ
155
+ margin=dict(l=50, r=50, t=100, b=100) # κ·Έλž˜ν”„ μ•„λž˜μͺ½ 여백을 μ‘°μ •
156
+ )
157
+
158
+ # κ·Έλž˜ν”„ 좜λ ₯
159
+ st.plotly_chart(fig)
160
+
161
  else:
162
  st.warning("URL을 μž…λ ₯ν•˜μ„Έμš”.")