import streamlit as st
import pandas as pd
import numpy as np
import nltk 
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import os

from src.doc2vec import inference
from src.abstractive_sum import summarize_text_with_model

CUSTOM_MODEL_NAME = "utkarshsaboo45/ClearlyDefinedLicenseSummarizer"

nltk.download('punkt')
os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

with st.spinner('Loading...'):
    model = AutoModelForSeq2SeqLM.from_pretrained(CUSTOM_MODEL_NAME).to(device)
    tokenizer = AutoTokenizer.from_pretrained(CUSTOM_MODEL_NAME)

st.title('Clearly Defined: License Summarizer')
input = st.text_area('Enter contents of the license')

if len(input) > 0:
    with st.spinner('Loading...'):
        summary = summarize_text_with_model(input, model, tokenizer)
        st.header('Summary')
        st.write(summary)
        
        prediction_scores = inference(input)
        st.header('Similarity Index')
        st.dataframe(prediction_scores)