sakharamg's picture
Uploading all files
158b61b
#!/usr/bin/env python
#
#
#
import heapq
import math
import random
import sys
from bleu import BleuScorer
class Sample:
"""A pair of hypotheses, and their score difference"""
def __init__(self,hyp1,hyp2):
self.hyp1 = hyp1
self.hyp2 = hyp2
self.diff = abs(hyp1.score-hyp2.score)
def __cmp__(self,other):
return cmp(self.diff,other.diff)
class HopkinsMaySampler:
"""Implements Hopkins & May sampling"""
def __init__(self):
self.ncandidates = 5000 # Gamma in Hopkins and May
self.nsamples = 50 # Xi in Hopkins and May
self.min_diff = 0.05 # Minimum scoring difference
def sample(self,nbest):
samples = []
for i in xrange(self.ncandidates):
hyp1 = random.choice(nbest.hyps)
hyp2 = random.choice(nbest.hyps)
sample = Sample(hyp1,hyp2)
if sample.diff < self.min_diff: continue
# maintain nsamples biggest samples
heapq.heappush(samples,sample)
while len(samples) > self.nsamples:
heapq.heappop(samples)
return samples