Spaces:

shunwellbeing
/

youtube_analysis

Running

App Files Files Community

shunwellbeing commited on Jul 10, 2024

Commit

a0882f2

verified ·

1 Parent(s): 5ebf1b4

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -188

app.py DELETED Viewed

@@ -1,188 +0,0 @@
-import gradio as gr
-import pandas as pd
-from googleapiclient.discovery import build
-import plotly.express as px
-import base64
-import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.cluster import KMeans
-import openai
-from datetime import datetime, timedelta
-def get_video_stats(api_key, video_id):
-    youtube = build("youtube", "v3", developerKey=api_key)
-    video_response = youtube.videos().list(
-        part="snippet,statistics",
-        id=video_id
-    ).execute()
-    video = video_response["items"][0]
-    title = video["snippet"]["title"]
-    channel_id = video["snippet"]["channelId"]
-    publish_time = video["snippet"]["publishedAt"]
-    view_count = int(video["statistics"].get("viewCount", 0))
-    like_count = int(video["statistics"].get("likeCount", 0))
-    comment_count = int(video["statistics"].get("commentCount", 0))
-    return {
-        "Video ID": video_id,
-        "Title": title,
-        "publishedAt": publish_time,
-        "Channel ID": channel_id,
-        "View Count": view_count,
-        "Like Count": like_count,
-        "Comment Count": comment_count
-    }
-def get_channel_stats(api_key, channel_id):
-    youtube = build("youtube", "v3", developerKey=api_key)
-    channel_response = youtube.channels().list(
-        part="statistics",
-        id=channel_id
-    ).execute()
-    if channel_response["items"]:
-        channel = channel_response["items"][0]
-        subscriber_count = int(channel["statistics"]["subscriberCount"])
-    else:
-        subscriber_count = 0
-    return subscriber_count
-def get_video_data(api_key, query, max_results, published_after, published_before):
-    youtube = build("youtube", "v3", developerKey=api_key)
-    video_ids = []
-    next_page_token = None
-    while len(video_ids) < max_results:
-        search_response = youtube.search().list(
-            q=query,
-            type="video",
-            part="id",
-            maxResults=50,
-            pageToken=next_page_token,
-            order="viewCount",
-            publishedAfter=published_after,
-            publishedBefore=published_before
-        ).execute()
-        video_ids.extend([item["id"]["videoId"] for item in search_response["items"]])
-        next_page_token = search_response.get("nextPageToken")
-        if not next_page_token:
-            break
-    video_ids = video_ids[:max_results]
-    video_stats = []
-    for video_id in video_ids:
-        stats = get_video_stats(api_key, video_id)
-        channel_id = stats["Channel ID"]
-        subscriber_count = get_channel_stats(api_key, channel_id)
-        stats["Subscriber Count"] = subscriber_count
-        video_stats.append(stats)
-    video_stats_df = pd.DataFrame(video_stats)
-    return video_stats_df
-def download_csv(df, filename):
-    csv = df.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()
-    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download {filename} CSV</a>'
-    return href
-def visualize_video_ranking(video_stats_df):
-    video_stats_df["Active_Index"] = video_stats_df["View Count"] / video_stats_df["Subscriber Count"]
-    csv_download_link = download_csv(video_stats_df, "video_stats")
-    fig = px.bar(video_stats_df, x="Video ID", y="Active_Index", color="View Count",
-                 labels={"Video ID": "Video ID", "Active_Index": "Active_Index"},
-                 title="Video Active Index")
-    fig.update_layout(height=500, width=500)
-    return video_stats_df, fig, csv_download_link
-def analyze_titles(video_stats_df, openai_key, n_clusters=5):
-    titles = video_stats_df['Title'].tolist()
-    vectorizer = TfidfVectorizer()
-    tfidf_matrix = vectorizer.fit_transform(titles)
-    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
-    kmeans.fit(tfidf_matrix)
-    labels = kmeans.labels_
-    video_stats_df["Cluster"] = labels
-    cluster_summaries = []
-    for i in range(n_clusters):
-        cluster_titles = video_stats_df[video_stats_df["Cluster"] == i]['Title'].tolist()
-        cluster_text = ' '.join(cluster_titles)
-        summary = summarize_cluster(cluster_text, openai_key, i)
-        cluster_summaries.append(summary)
-    cluster_summary_df = pd.DataFrame({'Cluster': range(n_clusters), 'Summary': cluster_summaries})
-    return cluster_summary_df
-def summarize_cluster(cluster_text, openai_key, cluster_num):
-    openai.api_key = openai_key
-    prompt = f"これらの動画を日本語で徹底解析して要約し、動画の特徴・人気要因を500文字以内で解説してください: {cluster_text}"
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "あなたは世界中の人気動画や大規模データを解析してきた天才AI・データサイエンティストです"},
-            {"role": "user", "content": prompt}
-        ],
-        max_tokens=500,
-        n=1,
-        stop=None,
-        temperature=0.7,
-    )
-    summary = response['choices'][0]['message']['content'].strip()
-    return summary
-def main(api_key, openai_key, query, max_results, period, page, n_clusters=5):
-    if query:
-        # 期間の設定
-        now = datetime.utcnow()
-        published_before = now.isoformat("T") + "Z"
-        if period == "1週間":
-            published_after = (now - timedelta(days=7)).isoformat("T") + "Z"
-        elif period == "1か月":
-            published_after = (now - timedelta(days=30)).isoformat("T") + "Z"
-        elif period == "3か月":
-            published_after = (now - timedelta(days=90)).isoformat("T") + "Z"
-        else:
-            published_after = (now - timedelta(days=30)).isoformat("T") + "Z"  # デフォルトで1か月
-        video_stats_df = get_video_data(api_key, query, max_results, published_after, published_before)
-        if page == "Video Ranking":
-            video_stats_df, fig, csv_download_link = visualize_video_ranking(video_stats_df)
-            return video_stats_df, fig, csv_download_link
-        elif page == "Title Analysis":
-            cluster_summary_df = analyze_titles(video_stats_df, openai_key, n_clusters)
-            return cluster_summary_df, None, None
-iface = gr.Interface(
-    fn=main,
-    inputs=[
-        gr.components.Textbox(label="YouTube API Keyを入力してください", type="password"),
-        gr.components.Textbox(label="OpenAI API Keyを入力してください", type="password"),
-        gr.components.Textbox(label="Search query"),
-        gr.components.Slider(minimum=1, maximum=1000, value=5, label="Max results"),
-        gr.components.Dropdown(["1週間", "1か月", "3か月"], label="Period"),
-        gr.components.Dropdown(["Video Ranking", "Title Analysis"], label="Page"),
-        gr.components.Slider(minimum=2, maximum=10, value=5, label="Number of clusters")
-    ],
-    outputs=[
-        gr.components.Dataframe(label="Results"),
-        gr.components.Plot(label="Plot"),
-        gr.components.HTML(label="CSV Download Link")
-    ],
-    live=False,
-    title="YouTube Analysis Tool"
-)
-if __name__ == "__main__":
-    iface.launch()