import streamlit as st import pandas as pd import numpy as np import nltk from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import torch import os from src.doc2vec import inference from src.abstractive_sum import summarize_text_with_model CUSTOM_MODEL_NAME = "utkarshsaboo45/ClearlyDefinedLicenseSummarizer" nltk.download('punkt') os.environ["TOKENIZERS_PARALLELISM"] = "false" device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") with st.spinner('Loading...'): model = AutoModelForSeq2SeqLM.from_pretrained(CUSTOM_MODEL_NAME).to(device) tokenizer = AutoTokenizer.from_pretrained(CUSTOM_MODEL_NAME) st.title('Clearly Defined: License Summarizer') input = st.text_area('Enter contents of the license') if len(input) > 0: with st.spinner('Loading...'): summary = summarize_text_with_model(input, model, tokenizer) st.header('Summary') st.write(summary) prediction_scores = inference(input) st.header('Similarity Index') st.dataframe(prediction_scores)