Spaces:
Runtime error
Runtime error
from typing import List, Iterable, Union | |
from pprint import pprint | |
class KeyphraseCount: | |
def __init__(self, keyphrase: str, count: int) -> None: | |
super().__init__() | |
self.keyphrase = keyphrase | |
self.count = count | |
def reduce(cls, kcs: list) : | |
''' | |
kcs: List[KeyphraseCount] | |
''' | |
keys = '' | |
count = 0 | |
for i in range(len(kcs)-1): | |
kc = kcs[i] | |
keys += kc.keyphrase + '/' | |
count += kc.count | |
keys += kcs[-1].keyphrase | |
count += kcs[-1].count | |
return KeyphraseCount(keys, count) | |
class SingleCluster: | |
def __init__(self): | |
self.__container__ = [] | |
self.__keyphrases__ = {} | |
def add(self, id:int): | |
self.__container__.append(id) | |
def __str__(self) -> str: | |
return str(self.__container__) | |
def elements(self) -> List: | |
return self.__container__ | |
def get_keyphrases(self): | |
ret = [] | |
for key, count in self.__keyphrases__.items(): | |
ret.append(KeyphraseCount(key,count)) | |
return ret | |
def add_keyphrase(self, keyphrase:Union[str,Iterable]): | |
if isinstance(keyphrase,str): | |
if keyphrase not in self.__keyphrases__.keys(): | |
self.__keyphrases__[keyphrase] = 1 | |
else: | |
self.__keyphrases__[keyphrase] += 1 | |
elif isinstance(keyphrase,Iterable): | |
for i in keyphrase: | |
self.add_keyphrase(i) | |
def __len__(self): | |
return len(self.__container__) | |
def print_keyphrases(self): | |
pprint(self.__keyphrases__) | |
class ClusterList: | |
def __init__(self, k:int): | |
self.__clusters__ = [SingleCluster() for _ in range(k)] | |
# subscriptable and slice-able | |
def __getitem__(self, idx): | |
if isinstance(idx, int): | |
return self.__clusters__[idx] | |
if isinstance(idx, slice): | |
# return | |
return self.__clusters__[0 if idx.start is None else idx.start: idx.stop: 0 if idx.step is None else idx.step] | |
def instantiate(self, labels: Iterable): | |
for id, label in enumerate(labels): | |
self.__clusters__[label].add(id) | |
def __str__(self): | |
ret = f'There are {len(self.__clusters__)} clusters:\n' | |
for id,cluster in enumerate(self.__clusters__): | |
ret += f'cluster {id} contains: {cluster}.\n' | |
return ret | |
# return an iterator that can be used in for loop etc. | |
def __iter__(self): | |
return self.__clusters__.__iter__() | |
def __len__(self): return len(self.__clusters__) | |
def sort(self): | |
self.__clusters__.sort(key=len,reverse=True) | |