Spaces:

lcjln
/

VTVTB

Sleeping

File size: 6,639 Bytes

fe42b4a
e512cee
 
 
 
fe42b4a
e512cee
fe42b4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e512cee
 
fe42b4a
 
 
 
 
 
 
 
e512cee
fe42b4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e512cee
 
 
 
fe42b4a
e512cee
 
 
fe42b4a
 
 
 
 
 
 
e512cee
fe42b4a
 
62a9a6a
 
 
 
 
 
 
fe42b4a
 
e512cee
 
62a9a6a
 
 
 
fe42b4a
 
 
 
 
 
 
 
 
 
 
 
 
 
62a9a6a

import streamlit as st
import pandas as pd
import plotly.graph_objects as go
from collections import defaultdict
import datetime
import re
import requests

# Streamlit 제목 및 설명
st.title("VOD 채팅 크롤러")
st.write("VOD URL을 입력하고 채팅 데이터를 크롤링합니다.")

# URL 입력 받기
vod_url = st.text_input("VOD URL 입력")

# 채팅 크롤링 함수
def crawl_chats(vod_url):
    # URL 설정
    url = vod_url + "/chats"

    # 요청 헤더 (필요시 쿠키나 기타 헤더 정보 추가)
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36",
        "Accept": "application/json, text/plain, */*",
        "Referer": "https://chzzk.naver.com/video/3646597",
        "Origin": "https://chzzk.naver.com",
        "Cookie": "your-cookie-string-here"  # 로그인 후 개발자 도구의 Request Headers 탭에서 쿠키 값 복사
    }

    # 초기 요청 파라미터 설정
    params = {
        "playerMessageTime": 0,  # 시작 시점 (0 = 0초)
        "previousVideoChatSize": 50  # 가져올 채팅 메시지 수
    }

    chat_logs = []
    chat_counts = defaultdict(int)
    laugh_counts = defaultdict(int)

    # 채팅 데이터를 순차적으로 요청하여 가져오기
    while True:
        # API 요청 보내기
        response = requests.get(url, params=params, headers=headers)

        # 요청 결과가 성공적이지 않을 경우 종료
        if response.status_code != 200:
            return f"API 요청 실패: {response.status_code}", None, None

        # JSON 데이터로 변환
        data = response.json()

        # 채팅 메시지가 포함된 'videoChats' 배열 가져오기
        chats = data.get("content", {}).get("videoChats", [])

        # 채팅 메시지가 없다면 종료
        if not chats:
            break

        # 채팅 메시지(content)와 playerMessageTime(채팅 시간)만 수집
        for chat in chats:
            chat_content = chat.get("content")  # 채팅 메시지 내용
            message_time = chat.get("playerMessageTime")  # 채팅 메시지 입력 시간

            # 순수 텍스트만 포함된 채팅 메시지 필터링
            if not re.search(r'{:[^}]*:}', chat_content):  # 이모티콘 형식의 메시지가 없을 때만 수집
                # 밀리초 시간을 "시간:분:초" 형식으로 변환
                hours, remainder = divmod(message_time // 1000, 3600)
                minutes, seconds = divmod(remainder, 60)

                if hours > 0:
                    formatted_time = f"{hours:02}:{minutes:02}:{seconds:02}"  # "HH:MM:SS" 형식
                else:
                    formatted_time = f"{minutes:02}:{seconds:02}"  # "MM:SS" 형식

                # 채팅 로그에 추가
                chat_logs.append(f"{formatted_time} - {chat_content}")

                # 시간대별로 채팅 개수 계산
                time_obj = datetime.datetime.strptime(formatted_time, '%H:%M:%S')
                minute_key = time_obj.replace(second=0)  # 분 단위로 변환하여 집계
                chat_counts[minute_key] += 1

                # 'ㅋㅋㅋㅋ'가 포함된 채팅 개수 카운트
                if len(re.findall(r'ㅋ', chat_content)) >= 4:
                    laugh_counts[minute_key] += 1

        # 다음 요청을 위해 playerMessageTime 파라미터 업데이트
        next_time = data["content"].get("nextPlayerMessageTime")
        if next_time is None:
            break
        params["playerMessageTime"] = next_time

    return "\n".join(chat_logs), chat_counts, laugh_counts

# 버튼을 눌렀을 때 채팅 크롤링 시작
if 'chat_logs' not in st.session_state:
    st.session_state['chat_logs'] = None
if 'chat_counts' not in st.session_state:
    st.session_state['chat_counts'] = None
if 'laugh_counts' not in st.session_state:
    st.session_state['laugh_counts'] = None

if st.button("크롤링 시작"):
    if vod_url:
        chat_logs, chat_counts, laugh_counts = crawl_chats(vod_url)

        st.session_state['chat_logs'] = chat_logs
        st.session_state['chat_counts'] = chat_counts
        st.session_state['laugh_counts'] = laugh_counts

        # 파일로 저장
        file_name = "chat_logs.txt"
        with open(file_name, "w") as file:
            file.write(chat_logs)

        # 다운로드 버튼 표시
        with open(file_name, "rb") as file:
            btn = st.download_button(
                label="채팅 로그 다운로드",
                data=file,
                file_name=file_name,
                mime="text/plain"
            )
    else:
        st.warning("URL을 입력하세요.")

# 이전에 크롤링한 결과가 있으면 그래프와 다운로드 버튼 표시
if st.session_state['chat_logs']:
    # 데이터프레임 생성
    times = [time.strftime('%H:%M:%S') for time in st.session_state['chat_counts'].keys()]
    chat_numbers = list(st.session_state['chat_counts'].values())
    laugh_numbers = [st.session_state['laugh_counts'].get(time, 0) for time in st.session_state['chat_counts'].keys()]
    df = pd.DataFrame({'시간': times, '전체 채팅 개수': chat_numbers, 'ㅋㅋㅋㅋ 채팅 개수': laugh_numbers})

    # Plotly 선 그래프 그리기
    fig = go.Figure()

    # 전체 채팅 개수 선 그래프 추가
    fig.add_trace(go.Scatter(
        x=df['시간'],
        y=df['전체 채팅 개수'],
        mode='lines',  # 마커 없이 선만 표시
        name='전체 채팅 개수',
        line=dict(color='blue'),
        hovertemplate='%{x} - 전체 채팅 개수: %{y}<extra></extra>'
    ))

    # ㅋㅋㅋㅋ 채팅 개수 선 그래프 추가
    fig.add_trace(go.Scatter(
        x=df['시간'],
        y=df['ㅋㅋㅋㅋ 채팅 개수'],
        mode='lines',  # 마커 없이 선만 표시
        name='ㅋㅋㅋㅋ 채팅 개수',
        line=dict(color='red'),
        hovertemplate='%{x} - ㅋㅋㅋㅋ 채팅 개수: %{y}<extra></extra>'
    ))

    # 그래프 레이아웃 설정
    fig.update_layout(
        title="분당 채팅 및 ㅋㅋㅋㅋ 채팅 개수",
        xaxis_title="시간",
        yaxis_title="채팅 개수",
        xaxis=dict(
            showticklabels=True  # x축 시간 레이블 표시
        ),
        hovermode="x unified",  # 마우스를 올렸을 때 해당 x축에서 툴팁 표시
        showlegend=True,  # 범례 표시
        margin=dict(l=50, r=50, t=100, b=100)  # 그래프 아래쪽 여백을 조정
    )

    # 그래프 출력
    st.plotly_chart(fig)