Spaces:
Runtime error
Runtime error
File size: 2,827 Bytes
97beac5 4be804d 70f757d 4be804d 97beac5 4be804d aff4412 4be804d 529aa95 a02613f 8cd9729 529aa95 4be804d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
import torch
import pandas as pd
from io import StringIO
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
class preProcess:
def __init__(self, filename, titlename):
self.filename = filename
self.title = titlename + '\n'
def read_data(self):
df = pd.read_csv(self.filename)
return df
def check_columns(self, df):
if (len(df.columns) > 3):
st.error('File has more than 3 coloumns.')
return False
if (len(df.columns) == 0):
st.error('File has no column.')
return False
else:
return True
def format_data(self, df):
headers = [[] for i in range(0, len(df.columns))]
for i in range(len(df.columns)):
headers[i] = list(df[df.columns[i]])
zipped = list(zip(*headers))
res = [' '.join(map(str,tups)) for tups in zipped]
input_format = ' labels ' + ' - '.join(list(df.columns)) + ' values ' + ' , '.join(res)
return input_format
def combine_title_data(self,df):
data = self.format_data(df)
title_data = ' '.join([self.title,data])
return title_data
class Model:
def __init__(self,text,mode):
self.padding = 'max_length'
self.truncation = True
self.prefix = 'C2T: '
self.device = device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.text = text
if mode.lower() == 'simple':
self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_big')
self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_big').to(self.device)
elif mode.lower() == 'analytical':
self.tokenizer = AutoTokenizer.from_pretrained('saadob12/t5_C2T_autochart')
self.model = AutoModelForSeq2SeqLM.from_pretrained('saadob12/t5_C2T_autochart').to(self.device)
def generate(self):
tokens = self.tokenizer.encode(self.prefix + self.text, truncation=self.truncation, padding=self.padding, return_tensors='pt').to(self.device)
generated = self.model.generate(tokens, num_beams=4, max_length=256)
tgt_text = self.tokenizer.decode(generated[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
summary = str(tgt_text).strip('[]""')
return summary
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
st.write(bytes_data)
# To convert to a string based IO:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
st.write(stringio)
# To read file as string:
string_data = stringio.read()
st.write(string_data)
# Can be used wherever a "file-like" object is accepted:
dataframe = pd.read_csv(uploaded_file)
st.write(dataframe)
|