Spaces:
Runtime error
Runtime error
File size: 2,482 Bytes
a804ced fc6772f a804ced fc6772f a804ced fc6772f 0599777 fc6772f a804ced fc6772f a804ced fc6772f a804ced |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import nltk
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import streamlit as st
from src.doc2vec import inference
from src.abstractive_sum import summarize_text_with_model
from src.textrank import custom_textrank_summarizer
from src.clean import clean_license_text
CUSTOM_MODEL_NAME = "utkarshsaboo45/ClearlyDefinedLicenseSummarizer"
nltk.download('punkt')
os.environ["TOKENIZERS_PARALLELISM"] = "false"
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
with st.spinner('Loading...'):
model = AutoModelForSeq2SeqLM.from_pretrained(CUSTOM_MODEL_NAME).to(device)
tokenizer = AutoTokenizer.from_pretrained(CUSTOM_MODEL_NAME)
summarization_type = st.sidebar.selectbox(
"Select summarization type.",
("Abstractive", "Extractive", "Both")
)
if summarization_type == 'Abstractive':
st.sidebar.caption('Summary will be generated by the T5 Transformer Model')
elif summarization_type == 'Extractive':
st.sidebar.caption('Summary will be generated by a custom TextRank Algorithm')
summary_len = st.sidebar.slider('Summary length percentage', 1, 10, 3)
elif summarization_type == 'Both':
st.sidebar.caption('The License text will be first passed through the custom TextRank algorithm and then passed on to the T5 Transformer Model to generate a summary.')
clean_text = st.sidebar.checkbox('Show cleaned license text')
st.title('Clearly Defined: License Summarizer')
input = st.text_area('Enter contents of the license')
if len(input) > 0:
with st.spinner('Loading...'):
if summarization_type == 'Abstractive':
summary, definitions = summarize_text_with_model(input, model, tokenizer)
if summarization_type == 'Extractive':
summary, definitions = custom_textrank_summarizer(input, summary_len = summary_len/10)
if summarization_type == 'Both':
summary, definitions = summarize_text_with_model(input, model, tokenizer)
summary, _ = custom_textrank_summarizer(summary, summary_len = 1)
if clean_text:
st.header('Cleaned License Text')
st.write(clean_license_text(input)[0])
st.header('Summary')
st.write(summary)
prediction_scores = inference(input)
st.header('Similarity Index')
st.dataframe(prediction_scores)
if definitions:
st.header('Definitions')
st.write(definitions)
|