Spaces:
Runtime error
Runtime error
File size: 2,429 Bytes
926183f 46a030d d4c02d7 9ee38ea 89a2a73 46a030d e3ce53f 46a030d 926183f 46a030d 926183f 89a2a73 46a030d 97b1ca5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import re
import pickle
import numpy as np
import random
import torch
from solver import TrainSolver
from model import PointerNetworks
import gensim
import MeCab
import pysbd
import io
class CPU_Unpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'torch.storage' and name == '_load_from_bytes':
return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
else: return super().find_class(module, name)
def create_data(doc,fm,split_method):
wakati = MeCab.Tagger("-Owakati -b 81920 -r /etc/mecabrc -d /home/user/app/mecab-ipadic-neologd")
seg = pysbd.Segmenter(language="ja", clean=False)
texts = []
sent = ""
label = []
alls = []
labels, text, num = [], [], []
allab, altex, fukugenss = [], [], []
for n in range(1):
fukugens = []
if split_method == "pySBD":
lines = seg.segment(doc)
else:
doc = doc.strip().replace("。","。\n").replace(".",".\n")
doc = re.sub("(\n)+","\n",doc)
lines = doc.split("\n")
for line in lines:
line = line.strip()
if line == "":
continue
sent = wakati.parse(line).split(" ")[:-1]
flag = 0
label = []
texts = []
fukugen = []
for i in sent:
try:
texts.append(fm.vocab[i].index)
except KeyError:
texts.append(fm.vocab["<unk>"].index)
fukugen.append(i)
label.append(0)
label[-1] = 1
labels.append(np.array(label))
text.append(np.array(texts))
fukugens.append(fukugen)
allab.append(labels)
altex.append(text)
fukugenss.append(fukugens)
labels, text, fukugens= [], [], []
return altex, allab, fukugenss
def generate(doc, mymodel, fm, index2word, split_method):
X_tes, Y_tes, fukugen = create_data(doc,fm,split_method)
output_texts = mymodel.check_accuracy(X_tes, Y_tes,index2word, fukugen)
return output_texts
def setup():
with open('index2word.pickle', 'rb') as f:
index2word = pickle.load(f)
with open('model.pickle', 'rb') as f:
mysolver = CPU_Unpickler(f).load()
with open('fm.pickle', 'rb') as f:
fm = pickle.load(f)
return mysolver,fm,index2word
|