File size: 3,622 Bytes
ebf4f9e
 
 
 
 
 
6e5cb42
 
7f708bd
1b06101
ebf4f9e
 
 
1b06101
 
 
6e5cb42
ebf4f9e
 
 
1b06101
ebf4f9e
1b06101
ebf4f9e
 
1b06101
ebf4f9e
1b06101
7f708bd
 
 
 
 
 
ebf4f9e
6e5cb42
1b06101
6e5cb42
 
 
 
 
1b06101
 
 
 
 
6e5cb42
1b06101
6e5cb42
 
 
1b06101
 
 
 
 
 
6e5cb42
1b06101
 
 
 
 
 
 
 
6e5cb42
1b06101
6e5cb42
 
 
 
 
 
ebf4f9e
 
1b06101
ebf4f9e
 
1b06101
ebf4f9e
 
 
1b06101
 
ebf4f9e
1b06101
ebf4f9e
 
 
1b06101
 
 
 
 
 
 
 
 
 
ebf4f9e
 
 
 
7f708bd
 
1b06101
ebf4f9e
 
1b06101
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
from dotenv import load_dotenv
from transformers import pipeline
import feedparser
import json
from dateutil import parser
import re

load_dotenv()
# Load Setiment Classifier
sentiment_analysis = pipeline(
    "sentiment-analysis", model="siebert/sentiment-roberta-large-english"
)
app = Flask(__name__, static_url_path="", static_folder="dist")

CORS(app)


@app.route("/")
def index():
    return app.send_static_file("index.html")


@app.route("/api/news")
def get_news():
    feed_url = request.args.get("feed_url")
    # check if string is a valid

    # file name for cache
    file_name = "".join(re.split(r"https://|\.|/", feed_url))

    feed_entries = get_feed(feed_url)
    # filter only titles for sentiment analysis
    try:
        with open(f"{file_name}_cache.json") as file:
            cache = json.load(file)
    except:
        cache = {}

    # if new homepage is newer than cache, update cache and return
    print("new date", feed_entries["last_update"])
    print("old date", cache["last_update"] if "last_update" in cache else "None")
    if not cache or parser.parse(feed_entries["last_update"]) > parser.parse(
        cache["last_update"]
    ):
        print("Updating cache with new preditions")
        titles = [entry["title"] for entry in feed_entries["entries"]]
        # run sentiment analysis on titles
        predictions = [sentiment_analysis(sentence) for sentence in titles]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [
            -prediction[0]["score"]
            if prediction[0]["label"] == "NEGATIVE"
            else prediction[0]["score"]
            for prediction in predictions
        ]
        # merge rss data with predictions
        entries_predicitons = [
            {**entry, "sentiment": prediction}
            for entry, prediction in zip(feed_entries["entries"], predictions)
        ]
        output = {
            "entries": entries_predicitons,
            "last_update": feed_entries["last_update"],
        }
        # update last precitions cache
        with open(f"{file_name}_cache.json", "w") as file:
            json.dump(output, file)
        # send back json
        return jsonify(output)
    else:
        print("Returning cached predictions")
        return jsonify(cache)


@app.route("/api/predict", methods=["POST"])
def predict():
    # get data from POST
    if request.method == "POST":
        # get current news
        # get post body data
        data = request.get_json()
        if data.get("sentences") is None:
            return jsonify({"error": "No text provided"})
        # get post expeceted to be under {'sentences': ['text': '...']}
        sentences = data.get("sentences")
        # prencit sentiments
        predictions = [sentiment_analysis(sentence) for sentence in sentences]
        # parse Negative and Positive, normalize to -1 to 1
        predictions = [
            -prediction[0]["score"]
            if prediction[0]["label"] == "NEGATIVE"
            else prediction[0]["score"]
            for prediction in predictions
        ]
        output = [
            dict(sentence=sentence, sentiment=prediction)
            for sentence, prediction in zip(sentences, predictions)
        ]
        # send back json
        return jsonify(output)


def get_feed(feed_url):
    feed = feedparser.parse(feed_url)
    return {"entries": feed["entries"], "last_update": feed["feed"]["updated"]}


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))