import streamlit as st import torch import pandas as pd from io import StringIO from transformers import AutoTokenizer, AutoModelForSeq2SeqLM class preProcess: def __init__(self, filename, titlename): self.filename = filename self.title = titlename + '\n' def read_data(self): df = pd.read_csv(self.filename) return df def check_columns(self, df): if (len(df.columns) > 3): st.error('File has more than 3 coloumns.') return False if (len(df.columns) == 0): st.error('File has no column.') return False else: return True def format_data(self, df): headers = [[] for i in range(0, len(df.columns))] for i in range(len(df.columns)): headers[i] = list(df[df.columns[i]]) zipped = list(zip(*headers)) res = [' '.join(map(str,tups)) for tups in zipped] input_format = ' labels ' + ' - '.join(list(df.columns)) + ' values ' + ' , '.join(res) return input_format def combine_title_data(self,df): data = self.format_data(df) title_data = ' '.join([self.title,data]) return title_data class Model: def __init__(self,text,mode): self.padding = 'max_length' self.truncation = True self.prefix = 'C2T: ' self.device = device = "cuda:0" if torch.cuda.is_available() else "cpu" self.text = text if mode.lower() == 'simple': self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_big') self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_big').to(self.device) elif mode.lower() == 'analytical': self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_autochart') self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_autochart').to(self.device) def generate(self): tokens = self.tokenizer.encode(self.prefix + self.text, truncation=self.truncation, padding=self.padding, return_tensors='pt').to(self.device) generated = self.model.generate(tokens, num_beams=4, max_length=256) tgt_text = self.tokenizer.decode(generated[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) summary = str(tgt_text).strip('[]""') return summary uploaded_file = st.file_uploader("Choose a file") if uploaded_file is not None: # To read file as bytes: bytes_data = uploaded_file.getvalue() st.write(bytes_data) # To convert to a string based IO: stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) st.write(stringio) # To read file as string: string_data = stringio.read() st.write(string_data) # Can be used wherever a "file-like" object is accepted: dataframe = pd.read_csv(uploaded_file) st.write(dataframe)