Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
import pandas as pd | |
from io import StringIO | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
class preProcess: | |
def __init__(self, filename, titlename): | |
self.filename = filename | |
self.title = titlename + '\n' | |
def read_data(self): | |
df = pd.read_csv(self.filename) | |
return df | |
def check_columns(self, df): | |
if (len(df.columns) > 3): | |
st.error('File has more than 3 coloumns.') | |
return False | |
if (len(df.columns) == 0): | |
st.error('File has no column.') | |
return False | |
else: | |
return True | |
def format_data(self, df): | |
headers = [[] for i in range(0, len(df.columns))] | |
for i in range(len(df.columns)): | |
headers[i] = list(df[df.columns[i]]) | |
zipped = list(zip(*headers)) | |
res = [' '.join(map(str,tups)) for tups in zipped] | |
input_format = ' labels ' + ' - '.join(list(df.columns)) + ' values ' + ' , '.join(res) | |
return input_format | |
def combine_title_data(self,df): | |
data = self.format_data(df) | |
title_data = ' '.join([self.title,data]) | |
return title_data | |
class Model: | |
def __init__(self,text,mode): | |
self.padding = 'max_length' | |
self.truncation = True | |
self.prefix = 'C2T: ' | |
self.device = device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
self.text = text | |
if mode.lower() == 'simple': | |
self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_big') | |
self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_big').to(self.device) | |
elif mode.lower() == 'analytical': | |
self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_autochart') | |
self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_autochart').to(self.device) | |
def generate(self): | |
tokens = self.tokenizer.encode(self.prefix + self.text, truncation=self.truncation, padding=self.padding, return_tensors='pt').to(self.device) | |
generated = self.model.generate(tokens, num_beams=4, max_length=256) | |
tgt_text = self.tokenizer.decode(generated[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) | |
summary = str(tgt_text).strip('[]""') | |
return summary | |
uploaded_file = st.file_uploader("Choose a file") | |
if uploaded_file is not None: | |
# To read file as bytes: | |
bytes_data = uploaded_file.getvalue() | |
st.write(bytes_data) | |
# To convert to a string based IO: | |
stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
st.write(stringio) | |
# To read file as string: | |
string_data = stringio.read() | |
st.write(string_data) | |
# Can be used wherever a "file-like" object is accepted: | |
dataframe = pd.read_csv(uploaded_file) | |
st.write(dataframe) | |