jules.lambert1
code of category and ranking
2067d6a
raw
history blame
3.37 kB
from const import KEYS_HOUSE, KEYS_FOOD, KEYS_CLOTHES, KEYS_MEDICAL
import nltk
from nltk.stem import WordNetLemmatizer
DEGREE_SCORE = {'High': 9, 'Medium': 3, 'Low': 1}
nltk.download('wordnet')
nltk.download('omw-1.4')
lemmatizer = WordNetLemmatizer()
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
from typing import List
from enum import Enum
class HelpCategory(Enum):
HOUSE = 'house'
FOOD = 'food'
CLOTHES = 'clothes'
MEDICAL = 'medical'
UNKNOW = 'unknow'
def to_category(text: str) -> List[HelpCategory]:
if text in PHRASE_NO_PROBLEMS:
return []
words = text.split()
categories = []
for word in words:
if word in KEYS_HOUSE:
categories.append(HelpCategory.HOUSE)
elif word in KEYS_FOOD:
categories.append(HelpCategory.FOOD)
if word in KEYS_CLOTHES:
categories.append(HelpCategory.CLOTHES)
if word in KEYS_MEDICAL:
categories.append(HelpCategory.MEDICAL)
if lemmatizer.lemmatize(word) in lemmatize_house:
categories.append(HelpCategory.HOUSE)
if lemmatizer.lemmatize(word) in lemmatize_food:
categories.append(HelpCategory.FOOD)
if lemmatizer.lemmatize(word) in lemmatize_clothes:
categories.append(HelpCategory.CLOTHES)
if lemmatizer.lemmatize(word) in lemmatize_medical:
categories.append(HelpCategory.MEDICAL)
if len(categories) == 0:
categories = [HelpCategory.UNKNOW]
return categories
def clean(text: str) -> str:
text = text.replace('Housing/Shelter', 'housing_shelter')
text = text.replace('/', ',')
text = text.lower()
text = text.strip()
return text
def to_list(text: str) -> List[str]:
helps = text.split(',')
helps = [help_string.replace('.', ' ').strip() for help_string in helps]
return helps
def help_text_to_help_category(helps: List[str]) -> List[str]:
all_categories = set()
for help_string in helps:
categories = to_category(help_string)
all_categories.update(categories)
return list(all_categories)
def aggregate_degree(degrees):
total_score = sum([DEGREE_SCORE[degree] for degree in degrees])
if total_score >= 9:
return 'High'
if total_score >= 3:
return 'Medium'
else:
return 'Low'
def add_category(df):
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
return df
def aggregate_locations(df):
flatten_list = lambda lst: [item for sublist in lst for item in sublist]
need = df.groupby('Location Details')['help_category'].apply(list).apply(flatten_list).apply(lambda x: list(set(x)))
emergency_degree = df.groupby('Location Details')['Emergency Degree'].apply(list).apply(aggregate_degree)
result = pd.merge(need, emergency_degree, left_index=True, right_index=True)
return result
def filter_category(category:HelpCategory, request:pd.DataFrame)-> pd.DataFrame:
in_category = request['help_category'].apply(lambda x : category in x)
return request[in_category]