File size: 2,999 Bytes
5fa49b4
 
 
 
 
 
cffb37c
5fa49b4
 
 
 
 
7e36539
5fa49b4
 
 
486bb8b
daa46b8
5fa49b4
 
 
 
 
 
 
 
 
a2ad80c
643929c
5fa49b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feacad7
 
7bf36f0
feacad7
7bf36f0
 
 
 
5fa49b4
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import requests
import streamlit as st
import json
def main():

    st.title("Sentiment Analysis for Book Reviews")

    st.write("This application lets you perform sentiment analysis on book reviews.\
        Simply input a review into the text below and the application will give two predictions for what the \
             rating is on a scale of 0-5. The models will also produce the score they assigned their prediction. The score is\
             between 0 and 1 and quantifies the confidence the model has in its prediction.\
             \n\n Specifically, we consider two pre-trained models, [BERT-tiny](https://huggingface.co/dhmeltzer/bert-tiny-goodreads-wandb) and [DistilBERT](https://huggingface.co/dhmeltzer/distilbert-goodreads-wandb)\
              which have been fine-tuned on a dataset of Goodreads book \
             reviews, see [here](https://www.kaggle.com/competitions/goodreads-books-reviews-290312/data) for the original dataset. \
             These models are deployed on AWS and are accessed using a REST API. To deploy the models we used a combination of AWS Sagemaker, Lambda, and API Gateway.\
             There may be a cold start problem when you first use the application, but the models will respond quicker to any subsequent queries.\
             \n\n To read more about this project and specifically how we cleaned the data and trained the models, see the following GitHub [repository](https://github.com/david-meltzer/Goodreads-Sentiment-Analysis).")


    AWS_key = st.secrets['AWS-key']

    checkpoints = {}
    checkpoints['DistilBERT'] = 'https://85a720iwy2.execute-api.us-east-1.amazonaws.com/add_apis/distilbert-goodreads'
    checkpoints['BERT-tiny'] = 'https://055dugvmzl.execute-api.us-east-1.amazonaws.com/beta/'
    
    # User search with default question.
    user_input = st.text_area("Search box", """I loved the Lord of the Rings trilogy. It is a classic and beautifully written story. \
My favorite part of the book though was when the hobbits met Tom Bombadil, it's too bad he was not in the movies.""")


    convert_dict = {}
    for i in range(6):
        convert_dict[f'LABEL_{i}'] = i

    # Fetch results
    if user_input:
        # Get IDs for each search result. 
        for model_name, URL in checkpoints.items():

            headers={'x-api-key': AWS_key}

            input_data = json.dumps({'inputs':user_input})
            r = requests.post(URL,
                  data=input_data,
                  headers=headers).json()

            try:
                r=r[0]
            except:
                st.write("Model loading timed out. Please enter the text again.")
                continue
                
            label, score = convert_dict[r['label']], r['score']

            st.write(f"**Model Name**: {model_name}")
            st.write(f"**Predicted Review**: {label}")
            st.write(f"**Confidence**: {score}")
            st.write("-"*20) 

if __name__ == "__main__":
    main()