|
|
|
|
|
|
|
|
|
|
|
|
|
import heapq |
|
import math |
|
import random |
|
import sys |
|
|
|
from bleu import BleuScorer |
|
|
|
|
|
class Sample: |
|
"""A pair of hypotheses, and their score difference""" |
|
def __init__(self,hyp1,hyp2): |
|
self.hyp1 = hyp1 |
|
self.hyp2 = hyp2 |
|
self.diff = abs(hyp1.score-hyp2.score) |
|
|
|
def __cmp__(self,other): |
|
return cmp(self.diff,other.diff) |
|
|
|
class HopkinsMaySampler: |
|
"""Implements Hopkins & May sampling""" |
|
def __init__(self): |
|
self.ncandidates = 5000 |
|
self.nsamples = 50 |
|
self.min_diff = 0.05 |
|
|
|
def sample(self,nbest): |
|
samples = [] |
|
for i in xrange(self.ncandidates): |
|
hyp1 = random.choice(nbest.hyps) |
|
hyp2 = random.choice(nbest.hyps) |
|
sample = Sample(hyp1,hyp2) |
|
if sample.diff < self.min_diff: continue |
|
|
|
heapq.heappush(samples,sample) |
|
while len(samples) > self.nsamples: |
|
heapq.heappop(samples) |
|
return samples |
|
|
|
|