Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from const import KEYS_HOUSE, KEYS_FOOD, KEYS_CLOTHES, KEYS_MEDICAL | |
import nltk | |
from nltk.stem import WordNetLemmatizer | |
DEGREE_SCORE = {'High': 9, 'Medium': 3, 'Low': 1} | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
lemmatizer = WordNetLemmatizer() | |
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE] | |
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD] | |
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES] | |
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL] | |
from typing import List | |
from enum import Enum | |
class HelpCategory(Enum): | |
HOUSE = 'house' | |
FOOD = 'food' | |
CLOTHES = 'clothes' | |
MEDICAL = 'medical' | |
UNKNOW = 'unknow' | |
def to_category(text: str) -> List[HelpCategory]: | |
if text in PHRASE_NO_PROBLEMS: | |
return [] | |
words = text.split() | |
categories = [] | |
for word in words: | |
if word in KEYS_HOUSE: | |
categories.append(HelpCategory.HOUSE) | |
elif word in KEYS_FOOD: | |
categories.append(HelpCategory.FOOD) | |
if word in KEYS_CLOTHES: | |
categories.append(HelpCategory.CLOTHES) | |
if word in KEYS_MEDICAL: | |
categories.append(HelpCategory.MEDICAL) | |
if lemmatizer.lemmatize(word) in lemmatize_house: | |
categories.append(HelpCategory.HOUSE) | |
if lemmatizer.lemmatize(word) in lemmatize_food: | |
categories.append(HelpCategory.FOOD) | |
if lemmatizer.lemmatize(word) in lemmatize_clothes: | |
categories.append(HelpCategory.CLOTHES) | |
if lemmatizer.lemmatize(word) in lemmatize_medical: | |
categories.append(HelpCategory.MEDICAL) | |
if len(categories) == 0: | |
categories = [HelpCategory.UNKNOW] | |
return categories | |
def clean(text: str) -> str: | |
text = text.replace('Housing/Shelter', 'housing_shelter') | |
text = text.replace('/', ',') | |
text = text.lower() | |
text = text.strip() | |
return text | |
def to_list(text: str) -> List[str]: | |
helps = text.split(',') | |
helps = [help_string.replace('.', ' ').strip() for help_string in helps] | |
return helps | |
def help_text_to_help_category(helps: List[str]) -> List[str]: | |
all_categories = set() | |
for help_string in helps: | |
categories = to_category(help_string) | |
all_categories.update(categories) | |
return list(all_categories) | |
def aggregate_degree(degrees): | |
total_score = sum([DEGREE_SCORE[degree] for degree in degrees]) | |
if total_score >= 9: | |
return 'High' | |
if total_score >= 3: | |
return 'Medium' | |
else: | |
return 'Low' | |
def add_category(df): | |
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category) | |
return df | |
def aggregate_locations(df): | |
flatten_list = lambda lst: [item for sublist in lst for item in sublist] | |
need = df.groupby('Location Details')['help_category'].apply(list).apply(flatten_list).apply(lambda x: list(set(x))) | |
emergency_degree = df.groupby('Location Details')['Emergency Degree'].apply(list).apply(aggregate_degree) | |
result = pd.merge(need, emergency_degree, left_index=True, right_index=True) | |
return result | |
def filter_category(category:HelpCategory, request:pd.DataFrame)-> pd.DataFrame: | |
in_category = request['help_category'].apply(lambda x : category in x) | |
return request[in_category] |