Spaces:
Sleeping
Sleeping
File size: 2,999 Bytes
5fa49b4 cffb37c 5fa49b4 7e36539 5fa49b4 486bb8b daa46b8 5fa49b4 a2ad80c 643929c 5fa49b4 feacad7 7bf36f0 feacad7 7bf36f0 5fa49b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import numpy as np
import requests
import streamlit as st
import json
def main():
st.title("Sentiment Analysis for Book Reviews")
st.write("This application lets you perform sentiment analysis on book reviews.\
Simply input a review into the text below and the application will give two predictions for what the \
rating is on a scale of 0-5. The models will also produce the score they assigned their prediction. The score is\
between 0 and 1 and quantifies the confidence the model has in its prediction.\
\n\n Specifically, we consider two pre-trained models, [BERT-tiny](https://huggingface.co/dhmeltzer/bert-tiny-goodreads-wandb) and [DistilBERT](https://huggingface.co/dhmeltzer/distilbert-goodreads-wandb)\
which have been fine-tuned on a dataset of Goodreads book \
reviews, see [here](https://www.kaggle.com/competitions/goodreads-books-reviews-290312/data) for the original dataset. \
These models are deployed on AWS and are accessed using a REST API. To deploy the models we used a combination of AWS Sagemaker, Lambda, and API Gateway.\
There may be a cold start problem when you first use the application, but the models will respond quicker to any subsequent queries.\
\n\n To read more about this project and specifically how we cleaned the data and trained the models, see the following GitHub [repository](https://github.com/david-meltzer/Goodreads-Sentiment-Analysis).")
AWS_key = st.secrets['AWS-key']
checkpoints = {}
checkpoints['DistilBERT'] = 'https://85a720iwy2.execute-api.us-east-1.amazonaws.com/add_apis/distilbert-goodreads'
checkpoints['BERT-tiny'] = 'https://055dugvmzl.execute-api.us-east-1.amazonaws.com/beta/'
# User search with default question.
user_input = st.text_area("Search box", """I loved the Lord of the Rings trilogy. It is a classic and beautifully written story. \
My favorite part of the book though was when the hobbits met Tom Bombadil, it's too bad he was not in the movies.""")
convert_dict = {}
for i in range(6):
convert_dict[f'LABEL_{i}'] = i
# Fetch results
if user_input:
# Get IDs for each search result.
for model_name, URL in checkpoints.items():
headers={'x-api-key': AWS_key}
input_data = json.dumps({'inputs':user_input})
r = requests.post(URL,
data=input_data,
headers=headers).json()
try:
r=r[0]
except:
st.write("Model loading timed out. Please enter the text again.")
continue
label, score = convert_dict[r['label']], r['score']
st.write(f"**Model Name**: {model_name}")
st.write(f"**Predicted Review**: {label}")
st.write(f"**Confidence**: {score}")
st.write("-"*20)
if __name__ == "__main__":
main()
|