File size: 2,400 Bytes
7688b91
f66e0ee
7688b91
 
 
 
 
 
 
6a77b20
7688b91
 
0df545e
22c2dba
 
 
 
 
 
7688b91
f66e0ee
 
 
 
 
7688b91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
'''
Run 'python3 app.py' in terminal
Follow local URL
'''

import praw
import pandas as pd
from praw.models import MoreComments
from transformers import pipeline
#from transformers import DistilBertTokenizerFast
import gradio as gr
import numpy as np


reddit_keys = {'client_id' : "Q1w42RHhLq2fgwljAk_k-Q",		 # your client id
'client_secret': "enUJfFthiZRynGfPQtoK1nCxRer2Dw",	 # your client secret
'usernme' : "xl395", #profile username
'password' : "12xiao34quanAria!", #profile password
'user_agent' :"706_post"}	 # your user agent

reddit= praw.Reddit(client_id=reddit_keys['client_id'],		 # your client id
					client_secret=reddit_keys['client_secret'],	 # your client secret
                    usernme = reddit_keys['usernme'], #profile username
                    password = reddit_keys['password'], #profile password
					user_agent=reddit_keys['user_agent'])	 # your user agent


classifier = pipeline("sentiment-analysis", model="michellejieli/NSFW_text_classifier")

#input_url = "https://www.reddit.com/r/europe/comments/r0hthg/sweden_is_taking_the_lead_to_persuade_the_rest_of/"

def extract_comments(input_url):
    submission = reddit.submission(url=input_url)
    #posts_dict = {"Post text":[],}
    posts_dict = {"Post text":[], "class": []}
    for top_level_comment in submission.comments:
        if isinstance(top_level_comment, MoreComments):
            continue
        
        posts_dict["Post text"].append(top_level_comment.body)
        posts_dict["class"].append(classifier(top_level_comment.body)[0]['label'])
    df = pd.DataFrame(posts_dict)
    percent_exp = np.count_nonzero(np.array(df["class"]) == 'NSFW') / df.shape[0]
    output_msg = "Reddit page is contains no explicit content. Page is safe for users under 18."
    if percent_exp > 0:
        output_msg = f"Reddit page contains some explicit content. Users under the age of 18 should proceed with caution. \n{percent_exp * 100:.0f}% explicit."
    if percent_exp > 0.5:
        output_msg = f"Reddit page contains major explicit content. Users be wary. \n{percent_exp * 100:.0f}% explicit."
    return output_msg

# use gradio to create a web interface take a wikipedia page and summarize it
iface = gr.Interface(
    fn=extract_comments,
    inputs=gr.Textbox(
        lines=2,
        placeholder="Enter Reddit page link",
    ),
    outputs="text",
)


if __name__ == "__main__":
    iface.launch()