Spaces:
Runtime error
Runtime error
import pandas as pd | |
import requests | |
import urllib.parse as urlparse | |
def get_video_id(url_video): | |
"""Get video id""" | |
query = urlparse.urlparse(url_video) | |
if query.hostname == 'youtu.be': | |
return query.path[1:] | |
if query.hostname in ('www.youtube.com', 'youtube.com'): | |
if query.path == '/watch': | |
return urlparse.parse_qs(query.query)["v"][0] | |
if query.path[:7] == '/embed/' or query.path[:3] == '/v/': | |
return query.path.split('/')[2] | |
return None | |
def get_comments(api_key, video_id): | |
"""Get comments""" | |
endpoint = "https://www.googleapis.com/youtube/v3/commentThreads" | |
params = { | |
"part":"snippet", | |
"videoId": video_id, | |
"maxResults": 100, | |
"key": api_key, | |
} | |
response = requests.get(endpoint, params=params) | |
res = response.json() | |
if "items" in res.keys(): | |
return { | |
num: { | |
"text_comment": " ".join( | |
x["snippet"]["topLevelComment"]["snippet"][ | |
"textOriginal" | |
].splitlines() | |
), | |
"publish_data": x["snippet"]["topLevelComment"]["snippet"][ | |
"publishedAt" | |
], | |
} | |
for num, x in enumerate(res['items']) | |
} | |
def get_sentim(data, headers, url): | |
"""Get result of sentimental analysis""" | |
res = requests.post(url, headers=headers, json=data) | |
res = res.json()[0][0] | |
return res['label'], res['score'] | |
def pipeline_sentiment(url_video, api_key, headers, url): | |
"""Pipeline of sentimental analysis""" | |
video_id = get_video_id(url_video) | |
comments = get_comments(api_key, video_id) | |
comments_df = pd.DataFrame(comments).T | |
text_tuple = [get_sentim(i, headers, url) for i in comments_df["text_comment"]] | |
comments_df[["sentiment", "score"]] = pd.DataFrame(list(text_tuple)) | |
return comments_df | |
def pipeline_stats(data): | |
"""Get statistic of sentiment""" | |
return data['sentiment'].value_counts(normalize=True).mul(100).round(2) | |
def pipeline_summarize(data, headers, url, length=2000, max_length=35): | |
"""Get summarization result""" | |
text = " ".join(data) | |
result_text = [] | |
for i in range(0, len(text), length): | |
new_text = text[i : i + length] | |
payload = { | |
"inputs": new_text, | |
"parameters": { | |
"max_length": max_length | |
} | |
} | |
res = requests.post(url, headers=headers, json=payload) | |
result_text.append(res.json()[0]["generated_text"]) | |
return ". ".join(result_text) | |