Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
jules.lambert1
commited on
Commit
•
2067d6a
1
Parent(s):
597fb2d
code of category and ranking
Browse files- app.py +3 -0
- src/filter/const.py +45 -0
- src/filter/filter.py +116 -0
- src/ranking/ranking.py +91 -0
app.py
CHANGED
@@ -17,6 +17,9 @@ from src.text_content import (
|
|
17 |
from src.utils import add_latlng_col, init_map, parse_gg_sheet, is_request_in_list, parse_json_file
|
18 |
from src.map_utils import get_legend_macro
|
19 |
from src.dataframes import display_dataframe
|
|
|
|
|
|
|
20 |
|
21 |
TOKEN = os.environ.get("HF_TOKEN", None)
|
22 |
VERIFIED_REQUESTS_URL = (
|
|
|
17 |
from src.utils import add_latlng_col, init_map, parse_gg_sheet, is_request_in_list, parse_json_file
|
18 |
from src.map_utils import get_legend_macro
|
19 |
from src.dataframes import display_dataframe
|
20 |
+
from src.filter.filter import add_category
|
21 |
+
from src.filter.filter import HelpCategory
|
22 |
+
from src.ranking.ranking import sort_request
|
23 |
|
24 |
TOKEN = os.environ.get("HF_TOKEN", None)
|
25 |
VERIFIED_REQUESTS_URL = (
|
src/filter/const.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PHRASE_NO_PROBLEMS = ['got food',
|
2 |
+
'got food and clothes',
|
3 |
+
'got food and covers']
|
4 |
+
|
5 |
+
KEYS_HOUSE = [
|
6 |
+
"shelters",
|
7 |
+
"mattresses",
|
8 |
+
"pillows",
|
9 |
+
"blankets",
|
10 |
+
"shelter",
|
11 |
+
"tentes",
|
12 |
+
"housing",
|
13 |
+
"couvertures",
|
14 |
+
"tents",
|
15 |
+
"covers",
|
16 |
+
"sdader",
|
17 |
+
"housing_shelter",
|
18 |
+
]
|
19 |
+
KEYS_FOOD = [
|
20 |
+
"groceries",
|
21 |
+
"nouriture",
|
22 |
+
"food",
|
23 |
+
"water",
|
24 |
+
"gaz",
|
25 |
+
"dishes",
|
26 |
+
"oil",
|
27 |
+
"sugar",
|
28 |
+
"tea",
|
29 |
+
"hungry",
|
30 |
+
]
|
31 |
+
KEYS_CLOTHES = [
|
32 |
+
"clothes",
|
33 |
+
"clothing",
|
34 |
+
"hygiene",
|
35 |
+
]
|
36 |
+
KEYS_MEDICAL = [
|
37 |
+
"betadine",
|
38 |
+
"medical",
|
39 |
+
"diabetics",
|
40 |
+
"medicaments",
|
41 |
+
"diabetes",
|
42 |
+
"doliprane",
|
43 |
+
"vitamines",
|
44 |
+
"drugs",
|
45 |
+
]
|
src/filter/filter.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from const import KEYS_HOUSE, KEYS_FOOD, KEYS_CLOTHES, KEYS_MEDICAL
|
2 |
+
|
3 |
+
import nltk
|
4 |
+
from nltk.stem import WordNetLemmatizer
|
5 |
+
|
6 |
+
DEGREE_SCORE = {'High': 9, 'Medium': 3, 'Low': 1}
|
7 |
+
|
8 |
+
nltk.download('wordnet')
|
9 |
+
nltk.download('omw-1.4')
|
10 |
+
lemmatizer = WordNetLemmatizer()
|
11 |
+
|
12 |
+
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
|
13 |
+
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
|
14 |
+
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
|
15 |
+
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
from typing import List
|
22 |
+
from enum import Enum
|
23 |
+
|
24 |
+
|
25 |
+
class HelpCategory(Enum):
|
26 |
+
HOUSE = 'house'
|
27 |
+
FOOD = 'food'
|
28 |
+
CLOTHES = 'clothes'
|
29 |
+
MEDICAL = 'medical'
|
30 |
+
UNKNOW = 'unknow'
|
31 |
+
|
32 |
+
|
33 |
+
def to_category(text: str) -> List[HelpCategory]:
|
34 |
+
if text in PHRASE_NO_PROBLEMS:
|
35 |
+
return []
|
36 |
+
|
37 |
+
words = text.split()
|
38 |
+
categories = []
|
39 |
+
for word in words:
|
40 |
+
if word in KEYS_HOUSE:
|
41 |
+
categories.append(HelpCategory.HOUSE)
|
42 |
+
elif word in KEYS_FOOD:
|
43 |
+
categories.append(HelpCategory.FOOD)
|
44 |
+
if word in KEYS_CLOTHES:
|
45 |
+
categories.append(HelpCategory.CLOTHES)
|
46 |
+
if word in KEYS_MEDICAL:
|
47 |
+
categories.append(HelpCategory.MEDICAL)
|
48 |
+
if lemmatizer.lemmatize(word) in lemmatize_house:
|
49 |
+
categories.append(HelpCategory.HOUSE)
|
50 |
+
if lemmatizer.lemmatize(word) in lemmatize_food:
|
51 |
+
categories.append(HelpCategory.FOOD)
|
52 |
+
if lemmatizer.lemmatize(word) in lemmatize_clothes:
|
53 |
+
categories.append(HelpCategory.CLOTHES)
|
54 |
+
if lemmatizer.lemmatize(word) in lemmatize_medical:
|
55 |
+
categories.append(HelpCategory.MEDICAL)
|
56 |
+
if len(categories) == 0:
|
57 |
+
categories = [HelpCategory.UNKNOW]
|
58 |
+
return categories
|
59 |
+
|
60 |
+
|
61 |
+
def clean(text: str) -> str:
|
62 |
+
text = text.replace('Housing/Shelter', 'housing_shelter')
|
63 |
+
text = text.replace('/', ',')
|
64 |
+
text = text.lower()
|
65 |
+
text = text.strip()
|
66 |
+
return text
|
67 |
+
|
68 |
+
|
69 |
+
def to_list(text: str) -> List[str]:
|
70 |
+
helps = text.split(',')
|
71 |
+
helps = [help_string.replace('.', ' ').strip() for help_string in helps]
|
72 |
+
return helps
|
73 |
+
|
74 |
+
|
75 |
+
def help_text_to_help_category(helps: List[str]) -> List[str]:
|
76 |
+
all_categories = set()
|
77 |
+
for help_string in helps:
|
78 |
+
categories = to_category(help_string)
|
79 |
+
all_categories.update(categories)
|
80 |
+
return list(all_categories)
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
def aggregate_degree(degrees):
|
88 |
+
total_score = sum([DEGREE_SCORE[degree] for degree in degrees])
|
89 |
+
if total_score >= 9:
|
90 |
+
return 'High'
|
91 |
+
if total_score >= 3:
|
92 |
+
return 'Medium'
|
93 |
+
else:
|
94 |
+
return 'Low'
|
95 |
+
|
96 |
+
def add_category(df):
|
97 |
+
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
|
98 |
+
return df
|
99 |
+
|
100 |
+
|
101 |
+
def aggregate_locations(df):
|
102 |
+
flatten_list = lambda lst: [item for sublist in lst for item in sublist]
|
103 |
+
need = df.groupby('Location Details')['help_category'].apply(list).apply(flatten_list).apply(lambda x: list(set(x)))
|
104 |
+
|
105 |
+
emergency_degree = df.groupby('Location Details')['Emergency Degree'].apply(list).apply(aggregate_degree)
|
106 |
+
|
107 |
+
result = pd.merge(need, emergency_degree, left_index=True, right_index=True)
|
108 |
+
return result
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
def filter_category(category:HelpCategory, request:pd.DataFrame)-> pd.DataFrame:
|
115 |
+
in_category = request['help_category'].apply(lambda x : category in x)
|
116 |
+
return request[in_category]
|
src/ranking/ranking.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
|
3 |
+
def calculate_score(row):
|
4 |
+
current_time = datetime.datetime.now()
|
5 |
+
delta = current_time - row['Horodateur']
|
6 |
+
base_score = delta.total_seconds() / 60
|
7 |
+
|
8 |
+
text_score = get_text_score(row)
|
9 |
+
|
10 |
+
temp_score = get_score_temp(row)
|
11 |
+
|
12 |
+
return base_score + text_score + temp_score
|
13 |
+
|
14 |
+
def get_temp(lat, lon):
|
15 |
+
url = f'https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={API_KEY}'
|
16 |
+
|
17 |
+
response = requests.get(url)
|
18 |
+
|
19 |
+
if response.status_code == 200:
|
20 |
+
data = response.json()
|
21 |
+
temp = sum([single_point['main']['temp_min'] for single_point in data['list']])/40
|
22 |
+
else:
|
23 |
+
print(f'Error: Unable to fetch weather data. Status code: {response.status_code}')
|
24 |
+
return temp
|
25 |
+
|
26 |
+
NEED_COL = 'ما هي احتياجاتك؟ (أضفها إذا لم يتم ذكرها)'
|
27 |
+
COOR_COL = 'هل يمكنك تقديم الإحداثيات الدقيقة للموقع؟ (ادا كنت لا توجد بعين المكان) متلاً \n31.01837503440344, -6.781405948842175'
|
28 |
+
|
29 |
+
def get_text_score(row):
|
30 |
+
score = 0
|
31 |
+
|
32 |
+
need = row[NEED_COL]
|
33 |
+
needs = need.split(' ')
|
34 |
+
if 'وماء' in needs:#water
|
35 |
+
score += 500
|
36 |
+
if 'طعام' in needs:#food
|
37 |
+
score += 500
|
38 |
+
if 'مساعدة طبية' in needs: #medical
|
39 |
+
score += 1000
|
40 |
+
if 'إغاثة' in needs:#secours
|
41 |
+
score+=800
|
42 |
+
if 'لنقود' in needs: #secours
|
43 |
+
score += 800
|
44 |
+
if 'الخيام' in needs: #tent
|
45 |
+
score += 500
|
46 |
+
if 'ولملابس' in needs:#clothes
|
47 |
+
score += 250
|
48 |
+
if 'الأغطية' in needs: #covers
|
49 |
+
score += 250
|
50 |
+
if 'أفرشة' in needs: #matress
|
51 |
+
score+=100
|
52 |
+
|
53 |
+
return score
|
54 |
+
|
55 |
+
def get_score_temp(row):
|
56 |
+
score = 0
|
57 |
+
need = row[NEED_COL]
|
58 |
+
needs = need.split(' ')
|
59 |
+
# tent, clothes or cover
|
60 |
+
if ('الخيام' not in needs) and ('ولملابس' not in needs) and ('الأغطية' not in needs):
|
61 |
+
return score
|
62 |
+
|
63 |
+
|
64 |
+
lat, lon = row[COOR_COL].split(',')
|
65 |
+
lon = lon.strip()
|
66 |
+
lat = lat.strip()
|
67 |
+
|
68 |
+
average_temp = get_temp(lat, lon)
|
69 |
+
if average_temp < 283:
|
70 |
+
score += 1000
|
71 |
+
if average_temp < 273:
|
72 |
+
score += 1000
|
73 |
+
return score
|
74 |
+
|
75 |
+
def sort_request(requests):
|
76 |
+
|
77 |
+
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
78 |
+
requests['Horodateur'].fillna(current_time, inplace=True)
|
79 |
+
|
80 |
+
scores = []
|
81 |
+
for index, row in requests.iterrows():
|
82 |
+
scores.append(calculate_score(row))
|
83 |
+
|
84 |
+
requests['score'] = scores
|
85 |
+
|
86 |
+
requests = requests.sort_values(by='score', ascending=False)
|
87 |
+
|
88 |
+
return requests
|
89 |
+
|
90 |
+
|
91 |
+
|