macrdel commited on
Commit
5da36ef
1 Parent(s): 8843bcd

Add sentiment pipeline

Browse files
Files changed (2) hide show
  1. app/api.py +3 -1
  2. app/src/src.py +58 -0
app/api.py CHANGED
@@ -1,5 +1,7 @@
1
- from fastapi import FastAPI
2
  import config
 
 
 
3
  from pydantic import BaseModel
4
  from transformers import pipeline
5
  import uvicorn
 
 
1
  import config
2
+ from src import pipeline_sentiment
3
+
4
+ from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from transformers import pipeline
7
  import uvicorn
app/src/src.py CHANGED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ import urllib.parse as urlparse
4
+
5
+
6
+ def get_video_id(url_video):
7
+ """Get video id"""
8
+ query = urlparse.urlparse(url_video)
9
+ if query.hostname == 'youtu.be':
10
+ return query.path[1:]
11
+ if query.hostname in ('www.youtube.com', 'youtube.com'):
12
+ if query.path == '/watch':
13
+ return urlparse.parse_qs(query.query)["v"][0]
14
+ if query.path[:7] == '/embed/' or query.path[:3] == '/v/':
15
+ return query.path.split('/')[2]
16
+ return None
17
+
18
+ def get_comments(api_key, video_id):
19
+ """Get comments"""
20
+ endpoint = "https://www.qoogleapis.com/youtube/v3/commentThreads"
21
+ params = {
22
+ "part":"snippet",
23
+ "videoId": video_id,
24
+ "maxResults": 100,
25
+ "key": api_key,
26
+ }
27
+ response = requests.get(endpoint, params=params)
28
+ res = response.json()
29
+
30
+ if "items" in res.keys():
31
+ return {
32
+ num: {
33
+ "test_comment": " ".join(
34
+ x["snippet"]["topLevelComment"]["snippet"][
35
+ "textOriginal"
36
+ ].splitlines()
37
+ ),
38
+ "publish_data": x["snippet"]["topLevelComment"]["snippet"][
39
+ "publishedAt"
40
+ ],
41
+ }
42
+ for num, x in enumerate(res['items'])
43
+ }
44
+
45
+ def get_sentim(data, model):
46
+ """Get result of sentimental analysis"""
47
+ res = model(data)[0]
48
+ return res['label'], res['score']
49
+
50
+ def pipeline_sentiment(url_video, api_key, model):
51
+ """Pipeline of sentimental analysis"""
52
+ video_id = get_video_id(url_video)
53
+ comments = get_comments(api_key, video_id)
54
+ comments_df = pd.DataFrame(comments).T
55
+
56
+ text_tuple = [get_sentim(i, model) for i in comments_df["text_comment"]]
57
+ comments_df[["sentiment", "score"]] = pd.DataFrame(list(text_tuple))
58
+ return comments_df