from const import KEYS_HOUSE, KEYS_FOOD, KEYS_CLOTHES, KEYS_MEDICAL import nltk from nltk.stem import WordNetLemmatizer DEGREE_SCORE = {'High': 9, 'Medium': 3, 'Low': 1} nltk.download('wordnet') nltk.download('omw-1.4') lemmatizer = WordNetLemmatizer() lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE] lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD] lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES] lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL] from typing import List from enum import Enum class HelpCategory(Enum): HOUSE = 'house' FOOD = 'food' CLOTHES = 'clothes' MEDICAL = 'medical' UNKNOW = 'unknow' def to_category(text: str) -> List[HelpCategory]: if text in PHRASE_NO_PROBLEMS: return [] words = text.split() categories = [] for word in words: if word in KEYS_HOUSE: categories.append(HelpCategory.HOUSE) elif word in KEYS_FOOD: categories.append(HelpCategory.FOOD) if word in KEYS_CLOTHES: categories.append(HelpCategory.CLOTHES) if word in KEYS_MEDICAL: categories.append(HelpCategory.MEDICAL) if lemmatizer.lemmatize(word) in lemmatize_house: categories.append(HelpCategory.HOUSE) if lemmatizer.lemmatize(word) in lemmatize_food: categories.append(HelpCategory.FOOD) if lemmatizer.lemmatize(word) in lemmatize_clothes: categories.append(HelpCategory.CLOTHES) if lemmatizer.lemmatize(word) in lemmatize_medical: categories.append(HelpCategory.MEDICAL) if len(categories) == 0: categories = [HelpCategory.UNKNOW] return categories def clean(text: str) -> str: text = text.replace('Housing/Shelter', 'housing_shelter') text = text.replace('/', ',') text = text.lower() text = text.strip() return text def to_list(text: str) -> List[str]: helps = text.split(',') helps = [help_string.replace('.', ' ').strip() for help_string in helps] return helps def help_text_to_help_category(helps: List[str]) -> List[str]: all_categories = set() for help_string in helps: categories = to_category(help_string) all_categories.update(categories) return list(all_categories) def aggregate_degree(degrees): total_score = sum([DEGREE_SCORE[degree] for degree in degrees]) if total_score >= 9: return 'High' if total_score >= 3: return 'Medium' else: return 'Low' def add_category(df): df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category) return df def aggregate_locations(df): flatten_list = lambda lst: [item for sublist in lst for item in sublist] need = df.groupby('Location Details')['help_category'].apply(list).apply(flatten_list).apply(lambda x: list(set(x))) emergency_degree = df.groupby('Location Details')['Emergency Degree'].apply(list).apply(aggregate_degree) result = pd.merge(need, emergency_degree, left_index=True, right_index=True) return result def filter_category(category:HelpCategory, request:pd.DataFrame)-> pd.DataFrame: in_category = request['help_category'].apply(lambda x : category in x) return request[in_category]