shunwellbeing commited on
Commit
add81e1
·
verified ·
1 Parent(s): a0882f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -0
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from googleapiclient.discovery import build
4
+ import plotly.express as px
5
+ import base64
6
+ import numpy as np
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.cluster import KMeans
9
+ import openai
10
+ from datetime import datetime, timedelta
11
+
12
+ def get_video_stats(api_key, video_id):
13
+ youtube = build("youtube", "v3", developerKey=api_key)
14
+ video_response = youtube.videos().list(
15
+ part="snippet,statistics",
16
+ id=video_id
17
+ ).execute()
18
+
19
+ video = video_response["items"][0]
20
+ title = video["snippet"]["title"]
21
+ channel_id = video["snippet"]["channelId"]
22
+ publish_time = video["snippet"]["publishedAt"]
23
+ view_count = int(video["statistics"].get("viewCount", 0))
24
+ like_count = int(video["statistics"].get("likeCount", 0))
25
+ comment_count = int(video["statistics"].get("commentCount", 0))
26
+
27
+ return {
28
+ "Video ID": video_id,
29
+ "Title": title,
30
+ "publishedAt": publish_time,
31
+ "Channel ID": channel_id,
32
+ "View Count": view_count,
33
+ "Like Count": like_count,
34
+ "Comment Count": comment_count
35
+ }
36
+
37
+ def get_channel_stats(api_key, channel_id):
38
+ youtube = build("youtube", "v3", developerKey=api_key)
39
+ channel_response = youtube.channels().list(
40
+ part="statistics",
41
+ id=channel_id
42
+ ).execute()
43
+
44
+ if channel_response["items"]:
45
+ channel = channel_response["items"][0]
46
+ subscriber_count = int(channel["statistics"]["subscriberCount"])
47
+ else:
48
+ subscriber_count = 0
49
+
50
+ return subscriber_count
51
+
52
+ def get_video_data(api_key, query, max_results, published_after, published_before):
53
+ youtube = build("youtube", "v3", developerKey=api_key)
54
+ video_ids = []
55
+ next_page_token = None
56
+
57
+ while len(video_ids) < max_results:
58
+ search_response = youtube.search().list(
59
+ q=query,
60
+ type="video",
61
+ part="id",
62
+ maxResults=50,
63
+ pageToken=next_page_token,
64
+ order="viewCount",
65
+ publishedAfter=published_after,
66
+ publishedBefore=published_before
67
+ ).execute()
68
+
69
+ video_ids.extend([item["id"]["videoId"] for item in search_response["items"]])
70
+ next_page_token = search_response.get("nextPageToken")
71
+
72
+ if not next_page_token:
73
+ break
74
+
75
+ video_ids = video_ids[:max_results]
76
+
77
+ video_stats = []
78
+ for video_id in video_ids:
79
+ stats = get_video_stats(api_key, video_id)
80
+ channel_id = stats["Channel ID"]
81
+ subscriber_count = get_channel_stats(api_key, channel_id)
82
+ stats["Subscriber Count"] = subscriber_count
83
+ video_stats.append(stats)
84
+
85
+ video_stats_df = pd.DataFrame(video_stats)
86
+ return video_stats_df
87
+
88
+ def download_csv(df, filename):
89
+ csv = df.to_csv(index=False)
90
+ b64 = base64.b64encode(csv.encode()).decode()
91
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
92
+ return href
93
+
94
+ def visualize_video_ranking(video_stats_df):
95
+ video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
96
+
97
+ csv_download_link = download_csv(video_stats_df, "video_stats")
98
+
99
+ fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
100
+ labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
101
+ title="Video Active Index")
102
+ fig.update_layout(height=500, width=500)
103
+
104
+ return video_stats_df, fig, csv_download_link
105
+
106
+ def analyze_titles(video_stats_df, openai_key, n_clusters=5):
107
+ titles = video_stats_df['Title'].tolist()
108
+ vectorizer = TfidfVectorizer()
109
+ tfidf_matrix = vectorizer.fit_transform(titles)
110
+
111
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
112
+ kmeans.fit(tfidf_matrix)
113
+ labels = kmeans.labels_
114
+ video_stats_df["Cluster"] = labels
115
+
116
+ cluster_summaries = []
117
+ for i in range(n_clusters):
118
+ cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
119
+ cluster_text = ' '.join(cluster_titles)
120
+ summary = summarize_cluster(cluster_text, openai_key, i)
121
+ cluster_summaries.append(summary)
122
+
123
+ cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
124
+ return cluster_summary_df
125
+
126
+ def summarize_cluster(cluster_text, openai_key, cluster_num):
127
+ openai.api_key = openai_key
128
+ prompt = f"これらの動画を日本語で徹底解析して要約し、動画の特徴・人気要因を500文字以内で解説してください: {cluster_text}"
129
+ response = openai.ChatCompletion.create(
130
+ model="gpt-3.5-turbo",
131
+ messages=[
132
+ {"role": "system", "content": "あなたは世界中の人気動画や大規模データを解析してきた天才AI・データサイエンティストです"},
133
+ {"role": "user", "content": prompt}
134
+ ],
135
+ max_tokens=500,
136
+ n=1,
137
+ stop=None,
138
+ temperature=0.7,
139
+ )
140
+ summary = response['choices'][0]['message']['content'].strip()
141
+ return summary
142
+
143
+ def main(api_key, openai_key, query, max_results, period, page, n_clusters=5):
144
+ if query:
145
+ # 期間の設定
146
+ now = datetime.utcnow()
147
+ published_before = now.isoformat("T") + "Z"
148
+ if period == "1週間":
149
+ published_after = (now - timedelta(days=7)).isoformat("T") + "Z"
150
+ elif period == "1か月":
151
+ published_after = (now - timedelta(days=30)).isoformat("T") + "Z"
152
+ elif period == "3か月":
153
+ published_after = (now - timedelta(days=90)).isoformat("T") + "Z"
154
+ else:
155
+ published_after = (now - timedelta(days=30)).isoformat("T") + "Z" # デフォルトで1か月
156
+
157
+ video_stats_df = get_video_data(api_key, query, max_results, published_after, published_before)
158
+
159
+ if page == "Video Ranking":
160
+ video_stats_df, fig, csv_download_link = visualize_video_ranking(video_stats_df)
161
+ return video_stats_df, fig, csv_download_link
162
+
163
+ elif page == "Title Analysis":
164
+ cluster_summary_df = analyze_titles(video_stats_df, openai_key, n_clusters)
165
+ return cluster_summary_df, None, None
166
+
167
+ iface = gr.Interface(
168
+ fn=main,
169
+ inputs=[
170
+ gr.components.Textbox(label="YouTube API Keyを入力してください", type="password"),
171
+ gr.components.Textbox(label="OpenAI API Keyを入力してください", type="password"),
172
+ gr.components.Textbox(label="Search query"),
173
+ gr.components.Slider(minimum=1, maximum=1000, value=5, label="Max results"),
174
+ gr.components.Dropdown(["1週間", "1か月", "3か月"], label="Period"),
175
+ gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="Page"),
176
+ gr.components.Slider(minimum=2, maximum=10, value=5, label="Number of clusters")
177
+ ],
178
+ outputs=[
179
+ gr.components.Dataframe(label="Results"),
180
+ gr.components.Plot(label="Plot"),
181
+ gr.components.HTML(label="CSV Download Link")
182
+ ],
183
+ live=False,
184
+ title="YouTube Analysis Tool"
185
+ )
186
+
187
+ if __name__ == "__main__":
188
+ iface.launch()