Spaces:
Sleeping
Sleeping
from random import uniform | |
import numpy as np | |
from collections import OrderedDict, defaultdict | |
from itertools import tee | |
import time | |
# ----------------------------------------------- | |
def find_ngrams(input_list, n): | |
return zip(*[input_list[i:] for i in range(n)]) | |
def compute_div_n(caps,n=1): | |
aggr_div = [] | |
for k in caps: | |
all_ngrams = set() | |
lenT = 0. | |
for c in caps[k]: | |
tkns = c.split() | |
lenT += len(tkns) | |
ng = find_ngrams(tkns, n) | |
all_ngrams.update(ng) | |
aggr_div.append(float(len(all_ngrams))/ (1e-6 + float(lenT))) | |
return np.array(aggr_div).mean(), np.array(aggr_div) | |
def compute_global_div_n(caps,n=1): | |
aggr_div = [] | |
all_ngrams = set() | |
lenT = 0. | |
for k in caps: | |
for c in caps[k]: | |
tkns = c.split() | |
lenT += len(tkns) | |
ng = find_ngrams(tkns, n) | |
all_ngrams.update(ng) | |
if n == 1: | |
aggr_div.append(float(len(all_ngrams))) | |
else: | |
aggr_div.append(float(len(all_ngrams))/ (1e-6 + float(lenT))) | |
return aggr_div[0], np.repeat(np.array(aggr_div),len(caps)) |