ASokirka commited on
Commit
03bc94b
·
verified ·
1 Parent(s): 9663a29

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -154
app.py DELETED
@@ -1,154 +0,0 @@
1
- import os
2
- import re
3
- import streamlit as st
4
- import googleapiclient.discovery
5
- import pandas as pd
6
- from transformers import pipeline
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
11
-
12
-
13
- # Инициализируем модель Hugging Face для анализа тональности текста
14
- # Кэшируем ресурс для одной загрузки модели на все сессии
15
- #@st.cache_resource
16
- def load_model():
17
- """
18
- Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
19
- and saves to cache for consecutive loads.
20
- """
21
- model = pipeline(
22
- "sentiment-analysis",
23
- "blanchefort/rubert-base-cased-sentiment")
24
- return model
25
-
26
-
27
- def extract_video_id(url: str) -> str:
28
- """
29
- Extracts the video ID from a YouTube video URL.
30
- Args: url (str): The YouTube video URL.
31
- Returns: str: The extracted video ID,
32
- or an empty string if the URL is not valid.
33
- """
34
- pattern = r"(?<=v=)[\w-]+(?=&|\b)"
35
- match = re.search(pattern, url)
36
- if match:
37
- return match.group()
38
- else:
39
- return ""
40
-
41
-
42
- def download_comments(video_id: str) -> pd.DataFrame:
43
- """
44
- Downloads comments from a YouTube video based on the provided video ID
45
- and returns them as a DataFrame.
46
- Args: video_id (str): The video ID of the YouTube video.
47
- Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
48
- """
49
- DEV_KEY = os.getenv('API_KEY_YOUTUBE')
50
- youtube = googleapiclient.discovery.build("youtube",
51
- "v3",
52
- developerKey=DEV_KEY)
53
- request = youtube.commentThreads().list(part="snippet",
54
- videoId=video_id,
55
- maxResults=100)
56
- response = request.execute()
57
- comments = []
58
- for item in response['items']:
59
- comment = item['snippet']['topLevelComment']['snippet']
60
- comments.append([comment['authorDisplayName'],
61
- comment['publishedAt'],
62
- comment['updatedAt'],
63
- comment['likeCount'],
64
- comment['textDisplay'],])
65
- return pd.DataFrame(comments,
66
- columns=['author',
67
- 'published_at',
68
- 'updated_at',
69
- 'like_count',
70
- 'text',])
71
-
72
-
73
- def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
74
- """
75
- Takes a DataFrame with comments,
76
- processes the emotional sentiment of each comment in the DataFrame
77
- Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
78
- Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
79
- and the total count of processed comments.
80
- """
81
- model = load_model()
82
- selected_columns = ['text', 'author', 'published_at']
83
- df = df[selected_columns]
84
- res_list = []
85
- res_list = model(df['text'][:513].to_list())
86
- full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
87
- return (full_df, len(res_list))
88
-
89
-
90
- def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
91
- """
92
- Visualizes the data from the input DataFrame and returns a matplotlib plot object.
93
- Args: df (DataFrame): The input DataFrame containing the data to be visualized.
94
- Returns: plt: A matplotlib plot object showing the visualization of the data.
95
- """
96
- df['published_at'] = pd.to_datetime(df['published_at'])
97
- df['Date'] = df['published_at'].dt.date
98
- df['Hour'] = df['published_at'].dt.hour
99
- pivot_table = df.pivot_table(index='Hour',
100
- columns='Date',
101
- values='text',
102
- aggfunc='count')
103
- plt.figure(figsize=(10, 6))
104
- sns.heatmap(pivot_table,
105
- cmap='YlGnBu')
106
- plt.title('Количество комментариев по часам и датам')
107
- plt.xlabel('Дата')
108
- plt.ylabel('Час')
109
- return plt
110
-
111
-
112
- def visualize_data(df: pd.DataFrame):
113
- """
114
- Visualizes the data from the input DataFrame and returns a matplotlib figure object.
115
- Args: df (DataFrame): The input DataFrame containing the data to be visualized.
116
- Returns: fig: A matplotlib figure object
117
- """
118
- data = df['label'].value_counts()
119
- fig, ax = plt.subplots()
120
- plt.title("Эмоциональная окраска комментариев на YouTube")
121
- label = data.index
122
- ax.pie(data, labels=label, autopct='%1.1f%%')
123
- return fig
124
-
125
-
126
- def change_url():
127
- st.session_state.start = False
128
-
129
-
130
- if "start" not in st.session_state:
131
- st.session_state.start = False
132
-
133
- # Получаем id видеоролика из URL для отправки запроса
134
- url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
135
- video_id = extract_video_id(url)
136
- if video_id != "":
137
- if btn_start := st.button('Загрузить комментарии'):
138
- st.session_state.start = True
139
-
140
- if st.session_state.start:
141
- # Выводим таблицу с результатами на странице
142
- comments_df = download_comments(video_id)
143
- with st.spinner('Analyzing comments...'):
144
- full_df, num_comments = analyze_emotions_in_comments(comments_df)
145
- st.success(f'Готово! Обработано {num_comments} комментариев.')
146
- st.write(full_df)
147
- st.markdown('***')
148
-
149
- # Выводим heatmap комментариев по часам и датам
150
- st.pyplot(plot_heatmap_from_dataframe(full_df))
151
- st.markdown('***')
152
-
153
- # Выводим круговую диаграмму
154
- st.pyplot(visualize_data(full_df))