File size: 1,024 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
#!/usr/bin/env python
#
#
#
import heapq
import math
import random
import sys
from bleu import BleuScorer
class Sample:
"""A pair of hypotheses, and their score difference"""
def __init__(self,hyp1,hyp2):
self.hyp1 = hyp1
self.hyp2 = hyp2
self.diff = abs(hyp1.score-hyp2.score)
def __cmp__(self,other):
return cmp(self.diff,other.diff)
class HopkinsMaySampler:
"""Implements Hopkins & May sampling"""
def __init__(self):
self.ncandidates = 5000 # Gamma in Hopkins and May
self.nsamples = 50 # Xi in Hopkins and May
self.min_diff = 0.05 # Minimum scoring difference
def sample(self,nbest):
samples = []
for i in xrange(self.ncandidates):
hyp1 = random.choice(nbest.hyps)
hyp2 = random.choice(nbest.hyps)
sample = Sample(hyp1,hyp2)
if sample.diff < self.min_diff: continue
# maintain nsamples biggest samples
heapq.heappush(samples,sample)
while len(samples) > self.nsamples:
heapq.heappop(samples)
return samples
|