Spaces:
Runtime error
Runtime error
File size: 2,684 Bytes
16188ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from typing import List, Iterable, Union
from pprint import pprint
class KeyphraseCount:
def __init__(self, keyphrase: str, count: int) -> None:
super().__init__()
self.keyphrase = keyphrase
self.count = count
@classmethod
def reduce(cls, kcs: list) :
'''
kcs: List[KeyphraseCount]
'''
keys = ''
count = 0
for i in range(len(kcs)-1):
kc = kcs[i]
keys += kc.keyphrase + '/'
count += kc.count
keys += kcs[-1].keyphrase
count += kcs[-1].count
return KeyphraseCount(keys, count)
class SingleCluster:
def __init__(self):
self.__container__ = []
self.__keyphrases__ = {}
def add(self, id:int):
self.__container__.append(id)
def __str__(self) -> str:
return str(self.__container__)
def elements(self) -> List:
return self.__container__
def get_keyphrases(self):
ret = []
for key, count in self.__keyphrases__.items():
ret.append(KeyphraseCount(key,count))
return ret
def add_keyphrase(self, keyphrase:Union[str,Iterable]):
if isinstance(keyphrase,str):
if keyphrase not in self.__keyphrases__.keys():
self.__keyphrases__[keyphrase] = 1
else:
self.__keyphrases__[keyphrase] += 1
elif isinstance(keyphrase,Iterable):
for i in keyphrase:
self.add_keyphrase(i)
def __len__(self):
return len(self.__container__)
def print_keyphrases(self):
pprint(self.__keyphrases__)
class ClusterList:
def __init__(self, k:int):
self.__clusters__ = [SingleCluster() for _ in range(k)]
# subscriptable and slice-able
def __getitem__(self, idx):
if isinstance(idx, int):
return self.__clusters__[idx]
if isinstance(idx, slice):
# return
return self.__clusters__[0 if idx.start is None else idx.start: idx.stop: 0 if idx.step is None else idx.step]
def instantiate(self, labels: Iterable):
for id, label in enumerate(labels):
self.__clusters__[label].add(id)
def __str__(self):
ret = f'There are {len(self.__clusters__)} clusters:\n'
for id,cluster in enumerate(self.__clusters__):
ret += f'cluster {id} contains: {cluster}.\n'
return ret
# return an iterator that can be used in for loop etc.
def __iter__(self):
return self.__clusters__.__iter__()
def __len__(self): return len(self.__clusters__)
def sort(self):
self.__clusters__.sort(key=len,reverse=True)
|