mmir_usersim / captioning /models /AttEnsemble.py
yashonwu
add captioning
9bf9e42
raw
history blame
4.27 kB
# This file is the implementation for ensemble evaluation.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import *
from .CaptionModel import CaptionModel
from .AttModel import pack_wrapper, AttModel
class AttEnsemble(AttModel):
def __init__(self, models, weights=None):
CaptionModel.__init__(self)
# super(AttEnsemble, self).__init__()
self.models = nn.ModuleList(models)
self.vocab_size = models[0].vocab_size
self.seq_length = models[0].seq_length
self.bad_endings_ix = models[0].bad_endings_ix
self.ss_prob = 0
weights = weights or [1.0] * len(self.models)
self.register_buffer('weights', torch.tensor(weights))
def init_hidden(self, batch_size):
state = [m.init_hidden(batch_size) for m in self.models]
return self.pack_state(state)
def pack_state(self, state):
self.state_lengths = [len(_) for _ in state]
return sum([list(_) for _ in state], [])
def unpack_state(self, state):
out = []
for l in self.state_lengths:
out.append(state[:l])
state = state[l:]
return out
def embed(self, it):
return [m.embed(it) for m in self.models]
def core(self, *args):
return zip(*[m.core(*_) for m, _ in zip(self.models, zip(*args))])
def get_logprobs_state(self, it, tmp_fc_feats, tmp_att_feats, tmp_p_att_feats, tmp_att_masks, state, output_logsoftmax=1):
# 'it' contains a word index
xt = self.embed(it)
state = self.unpack_state(state)
output, state = self.core(xt, tmp_fc_feats, tmp_att_feats, tmp_p_att_feats, state, tmp_att_masks)
logprobs = torch.stack([F.softmax(m.logit(output[i]), dim=1) for i,m in enumerate(self.models)], 2).mul(self.weights).div(self.weights.sum()).sum(-1).log()
return logprobs, self.pack_state(state)
def _prepare_feature(self, *args):
return tuple(zip(*[m._prepare_feature(*args) for m in self.models]))
def _old_sample_beam(self, fc_feats, att_feats, att_masks=None, opt={}):
beam_size = opt.get('beam_size', 10)
batch_size = fc_feats.size(0)
fc_feats, att_feats, p_att_feats, att_masks = self._prepare_feature(fc_feats, att_feats, att_masks)
assert beam_size <= self.vocab_size + 1, 'lets assume this for now, otherwise this corner case causes a few headaches down the road. can be dealt with in future if needed'
seq = torch.LongTensor(self.seq_length, batch_size).zero_()
seqLogprobs = torch.FloatTensor(self.seq_length, batch_size, self.vocab_size + 1)
# lets process every image independently for now, for simplicity
self.done_beams = [[] for _ in range(batch_size)]
for k in range(batch_size):
state = self.init_hidden(beam_size)
tmp_fc_feats = [fc_feats[i][k:k+1].expand(beam_size, fc_feats[i].size(1)) for i,m in enumerate(self.models)]
tmp_att_feats = [att_feats[i][k:k+1].expand(*((beam_size,)+att_feats[i].size()[1:])).contiguous() for i,m in enumerate(self.models)]
tmp_p_att_feats = [p_att_feats[i][k:k+1].expand(*((beam_size,)+p_att_feats[i].size()[1:])).contiguous() for i,m in enumerate(self.models)]
tmp_att_masks = [att_masks[i][k:k+1].expand(*((beam_size,)+att_masks[i].size()[1:])).contiguous() if att_masks[i] is not None else att_masks[i] for i,m in enumerate(self.models)]
it = fc_feats[0].data.new(beam_size).long().zero_()
logprobs, state = self.get_logprobs_state(it, tmp_fc_feats, tmp_att_feats, tmp_p_att_feats, tmp_att_masks, state)
self.done_beams[k] = self.old_beam_search(state, logprobs, tmp_fc_feats, tmp_att_feats, tmp_p_att_feats, tmp_att_masks, opt=opt)
seq[:, k] = self.done_beams[k][0]['seq'] # the first beam has highest cumulative score
seqLogprobs[:, k] = self.done_beams[k][0]['logps']
# return the samples and their log likelihoods
return seq.transpose(0, 1), seqLogprobs.transpose(0, 1)
# return the samples and their log likelihoods