shunwellbeing commited on
Commit
a0882f2
·
verified ·
1 Parent(s): 5ebf1b4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -188
app.py DELETED
@@ -1,188 +0,0 @@
1
- import gradio as gr
2
- import pandas as pd
3
- from googleapiclient.discovery import build
4
- import plotly.express as px
5
- import base64
6
- import numpy as np
7
- from sklearn.feature_extraction.text import TfidfVectorizer
8
- from sklearn.cluster import KMeans
9
- import openai
10
- from datetime import datetime, timedelta
11
-
12
- def get_video_stats(api_key, video_id):
13
- youtube = build("youtube", "v3", developerKey=api_key)
14
- video_response = youtube.videos().list(
15
- part="snippet,statistics",
16
- id=video_id
17
- ).execute()
18
-
19
- video = video_response["items"][0]
20
- title = video["snippet"]["title"]
21
- channel_id = video["snippet"]["channelId"]
22
- publish_time = video["snippet"]["publishedAt"]
23
- view_count = int(video["statistics"].get("viewCount", 0))
24
- like_count = int(video["statistics"].get("likeCount", 0))
25
- comment_count = int(video["statistics"].get("commentCount", 0))
26
-
27
- return {
28
- "Video ID": video_id,
29
- "Title": title,
30
- "publishedAt": publish_time,
31
- "Channel ID": channel_id,
32
- "View Count": view_count,
33
- "Like Count": like_count,
34
- "Comment Count": comment_count
35
- }
36
-
37
- def get_channel_stats(api_key, channel_id):
38
- youtube = build("youtube", "v3", developerKey=api_key)
39
- channel_response = youtube.channels().list(
40
- part="statistics",
41
- id=channel_id
42
- ).execute()
43
-
44
- if channel_response["items"]:
45
- channel = channel_response["items"][0]
46
- subscriber_count = int(channel["statistics"]["subscriberCount"])
47
- else:
48
- subscriber_count = 0
49
-
50
- return subscriber_count
51
-
52
- def get_video_data(api_key, query, max_results, published_after, published_before):
53
- youtube = build("youtube", "v3", developerKey=api_key)
54
- video_ids = []
55
- next_page_token = None
56
-
57
- while len(video_ids) < max_results:
58
- search_response = youtube.search().list(
59
- q=query,
60
- type="video",
61
- part="id",
62
- maxResults=50,
63
- pageToken=next_page_token,
64
- order="viewCount",
65
- publishedAfter=published_after,
66
- publishedBefore=published_before
67
- ).execute()
68
-
69
- video_ids.extend([item["id"]["videoId"] for item in search_response["items"]])
70
- next_page_token = search_response.get("nextPageToken")
71
-
72
- if not next_page_token:
73
- break
74
-
75
- video_ids = video_ids[:max_results]
76
-
77
- video_stats = []
78
- for video_id in video_ids:
79
- stats = get_video_stats(api_key, video_id)
80
- channel_id = stats["Channel ID"]
81
- subscriber_count = get_channel_stats(api_key, channel_id)
82
- stats["Subscriber Count"] = subscriber_count
83
- video_stats.append(stats)
84
-
85
- video_stats_df = pd.DataFrame(video_stats)
86
- return video_stats_df
87
-
88
- def download_csv(df, filename):
89
- csv = df.to_csv(index=False)
90
- b64 = base64.b64encode(csv.encode()).decode()
91
- href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
92
- return href
93
-
94
- def visualize_video_ranking(video_stats_df):
95
- video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
96
-
97
- csv_download_link = download_csv(video_stats_df, "video_stats")
98
-
99
- fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
100
- labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
101
- title="Video Active Index")
102
- fig.update_layout(height=500, width=500)
103
-
104
- return video_stats_df, fig, csv_download_link
105
-
106
- def analyze_titles(video_stats_df, openai_key, n_clusters=5):
107
- titles = video_stats_df['Title'].tolist()
108
- vectorizer = TfidfVectorizer()
109
- tfidf_matrix = vectorizer.fit_transform(titles)
110
-
111
- kmeans = KMeans(n_clusters=n_clusters, random_state=42)
112
- kmeans.fit(tfidf_matrix)
113
- labels = kmeans.labels_
114
- video_stats_df["Cluster"] = labels
115
-
116
- cluster_summaries = []
117
- for i in range(n_clusters):
118
- cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
119
- cluster_text = ' '.join(cluster_titles)
120
- summary = summarize_cluster(cluster_text, openai_key, i)
121
- cluster_summaries.append(summary)
122
-
123
- cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
124
- return cluster_summary_df
125
-
126
- def summarize_cluster(cluster_text, openai_key, cluster_num):
127
- openai.api_key = openai_key
128
- prompt = f"これらの動画を日本語で徹底解析して要約し、動画の特徴・人気要因を500文字以内で解説してください: {cluster_text}"
129
- response = openai.ChatCompletion.create(
130
- model="gpt-3.5-turbo",
131
- messages=[
132
- {"role": "system", "content": "あなたは世界中の人気動画や大規模データを解析してきた天才AI・データサイエンティストです"},
133
- {"role": "user", "content": prompt}
134
- ],
135
- max_tokens=500,
136
- n=1,
137
- stop=None,
138
- temperature=0.7,
139
- )
140
- summary = response['choices'][0]['message']['content'].strip()
141
- return summary
142
-
143
- def main(api_key, openai_key, query, max_results, period, page, n_clusters=5):
144
- if query:
145
- # 期間の設定
146
- now = datetime.utcnow()
147
- published_before = now.isoformat("T") + "Z"
148
- if period == "1週間":
149
- published_after = (now - timedelta(days=7)).isoformat("T") + "Z"
150
- elif period == "1か月":
151
- published_after = (now - timedelta(days=30)).isoformat("T") + "Z"
152
- elif period == "3か月":
153
- published_after = (now - timedelta(days=90)).isoformat("T") + "Z"
154
- else:
155
- published_after = (now - timedelta(days=30)).isoformat("T") + "Z" # デフォルトで1か月
156
-
157
- video_stats_df = get_video_data(api_key, query, max_results, published_after, published_before)
158
-
159
- if page == "Video Ranking":
160
- video_stats_df, fig, csv_download_link = visualize_video_ranking(video_stats_df)
161
- return video_stats_df, fig, csv_download_link
162
-
163
- elif page == "Title Analysis":
164
- cluster_summary_df = analyze_titles(video_stats_df, openai_key, n_clusters)
165
- return cluster_summary_df, None, None
166
-
167
- iface = gr.Interface(
168
- fn=main,
169
- inputs=[
170
- gr.components.Textbox(label="YouTube API Keyを入力してください", type="password"),
171
- gr.components.Textbox(label="OpenAI API Keyを入力してください", type="password"),
172
- gr.components.Textbox(label="Search query"),
173
- gr.components.Slider(minimum=1, maximum=1000, value=5, label="Max results"),
174
- gr.components.Dropdown(["1週間", "1か月", "3か月"], label="Period"),
175
- gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="Page"),
176
- gr.components.Slider(minimum=2, maximum=10, value=5, label="Number of clusters")
177
- ],
178
- outputs=[
179
- gr.components.Dataframe(label="Results"),
180
- gr.components.Plot(label="Plot"),
181
- gr.components.HTML(label="CSV Download Link")
182
- ],
183
- live=False,
184
- title="YouTube Analysis Tool"
185
- )
186
-
187
- if __name__ == "__main__":
188
- iface.launch()