Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,13 +4,14 @@ import jieba.analyse
|
|
4 |
import aiohttp
|
5 |
import asyncio
|
6 |
import ssl
|
7 |
-
from aiohttp import ClientSession
|
8 |
from functools import lru_cache
|
9 |
|
|
|
10 |
ssl_context = ssl.create_default_context()
|
11 |
ssl_context.check_hostname = False
|
12 |
ssl_context.verify_mode = ssl.CERT_NONE
|
13 |
|
|
|
14 |
area_data = {
|
15 |
'香港': {
|
16 |
'中西區': ["西環", "堅尼地城", "石塘咀", "西營盤", "上環", "中環", "金鐘", "西半山", "中半山", "半山", "山頂"],
|
@@ -38,15 +39,14 @@ area_data = {
|
|
38 |
}
|
39 |
}
|
40 |
|
|
|
41 |
@lru_cache(maxsize=None)
|
42 |
def load_user_dict_terms():
|
43 |
user_dict_terms = set()
|
44 |
dictionaries = [
|
45 |
-
'flag/RVT_AddressCh.txt',
|
46 |
-
'flag/
|
47 |
-
'flag/
|
48 |
-
'flag/RVT_BuildingCh.txt',
|
49 |
-
'flag/RVT_BuildingEn.txt'
|
50 |
]
|
51 |
for file_path in dictionaries:
|
52 |
try:
|
@@ -58,17 +58,17 @@ def load_user_dict_terms():
|
|
58 |
print(f'Error reading file {file_path}: {e}')
|
59 |
return user_dict_terms
|
60 |
|
|
|
61 |
def setup_jieba_dictionaries():
|
62 |
dictionaries = [
|
63 |
-
'flag/RVT_AddressCh.txt',
|
64 |
-
'flag/
|
65 |
-
'flag/
|
66 |
-
'flag/RVT_BuildingCh.txt',
|
67 |
-
'flag/RVT_BuildingEn.txt'
|
68 |
]
|
69 |
for file_path in dictionaries:
|
70 |
jieba.load_userdict(file_path)
|
71 |
|
|
|
72 |
def process_text(text):
|
73 |
setup_jieba_dictionaries()
|
74 |
user_dict_terms = load_user_dict_terms()
|
@@ -84,26 +84,24 @@ def process_text(text):
|
|
84 |
|
85 |
return results
|
86 |
|
|
|
87 |
def reformat_text(text):
|
88 |
return [line.strip() for line in text.splitlines() if line.strip()]
|
89 |
|
|
|
90 |
def process_text_only(text, reformat):
|
91 |
extracted_keywords = process_text(text)
|
92 |
if reformat:
|
93 |
extracted_keywords = reformat_text('\n'.join(extracted_keywords))
|
94 |
return '\n'.join(extracted_keywords)
|
95 |
|
|
|
96 |
async def lookup_address(query, language='zh-Hant'):
|
97 |
url = 'https://www.als.gov.hk/lookup'
|
98 |
-
headers = {
|
99 |
-
|
100 |
-
'Accept-Language': language
|
101 |
-
}
|
102 |
-
payload = {
|
103 |
-
'q': query
|
104 |
-
}
|
105 |
|
106 |
-
async with ClientSession() as session:
|
107 |
try:
|
108 |
async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
|
109 |
if response.status == 200:
|
@@ -118,20 +116,11 @@ async def lookup_address(query, language='zh-Hant'):
|
|
118 |
print(f'General Error: {e}')
|
119 |
return {'error': f'General Error: {e}'}
|
120 |
|
|
|
121 |
async def get_address_lookup_results(keywords):
|
122 |
results = []
|
123 |
-
tasks = [
|
124 |
-
|
125 |
-
for keyword in keywords:
|
126 |
-
keyword = keyword.strip()
|
127 |
-
if not keyword:
|
128 |
-
continue
|
129 |
-
|
130 |
-
if not is_valid_for_lookup(keyword):
|
131 |
-
|
132 |
-
continue
|
133 |
-
else:
|
134 |
-
tasks.append(lookup_address(keyword))
|
135 |
|
136 |
lookup_results = await asyncio.gather(*tasks)
|
137 |
|
@@ -163,11 +152,16 @@ async def get_address_lookup_results(keywords):
|
|
163 |
|
164 |
if len(result) > 1: # Only add if there's more than just the Keyword
|
165 |
results.append(result)
|
166 |
-
else:
|
167 |
-
pass
|
168 |
|
169 |
return results
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
async def gradio_function(text, reformat, perform_lookup):
|
172 |
extracted_keywords = process_text_only(text, reformat)
|
173 |
keywords_list = extracted_keywords.splitlines()
|
@@ -178,18 +172,11 @@ async def gradio_function(text, reformat, perform_lookup):
|
|
178 |
|
179 |
return extracted_keywords, address_results
|
180 |
|
181 |
-
def is_valid_for_lookup(keyword):
|
182 |
-
for region, districts in area_data.items():
|
183 |
-
if keyword in districts.keys():
|
184 |
-
return False
|
185 |
-
for subdistricts in districts.values():
|
186 |
-
if keyword in subdistricts:
|
187 |
-
return False
|
188 |
-
return True
|
189 |
|
190 |
def gradio_interface(text, reformat, perform_lookup):
|
191 |
return asyncio.run(gradio_function(text, reformat, perform_lookup))
|
192 |
|
|
|
193 |
interface = gr.Interface(
|
194 |
fn=gradio_interface,
|
195 |
inputs=[
|
|
|
4 |
import aiohttp
|
5 |
import asyncio
|
6 |
import ssl
|
|
|
7 |
from functools import lru_cache
|
8 |
|
9 |
+
# SSL context setup
|
10 |
ssl_context = ssl.create_default_context()
|
11 |
ssl_context.check_hostname = False
|
12 |
ssl_context.verify_mode = ssl.CERT_NONE
|
13 |
|
14 |
+
# Area data (unchanged)
|
15 |
area_data = {
|
16 |
'香港': {
|
17 |
'中西區': ["西環", "堅尼地城", "石塘咀", "西營盤", "上環", "中環", "金鐘", "西半山", "中半山", "半山", "山頂"],
|
|
|
39 |
}
|
40 |
}
|
41 |
|
42 |
+
|
43 |
@lru_cache(maxsize=None)
|
44 |
def load_user_dict_terms():
|
45 |
user_dict_terms = set()
|
46 |
dictionaries = [
|
47 |
+
'flag/RVT_AddressCh.txt', 'flag/RVT_AddressEn.txt', 'flag/RVT_Area.txt',
|
48 |
+
'flag/RVT_BuildingCh.txt', 'flag/RVT_BuildingEn.txt', 'flag/ChiVillage.txt',
|
49 |
+
'flag/ChiEstate.txt', 'flag/ChiStreet.txt', 'flag/ChiBuilding.txt'
|
|
|
|
|
50 |
]
|
51 |
for file_path in dictionaries:
|
52 |
try:
|
|
|
58 |
print(f'Error reading file {file_path}: {e}')
|
59 |
return user_dict_terms
|
60 |
|
61 |
+
|
62 |
def setup_jieba_dictionaries():
|
63 |
dictionaries = [
|
64 |
+
'flag/RVT_AddressCh.txt', 'flag/RVT_AddressEn.txt', 'flag/RVT_Area.txt',
|
65 |
+
'flag/RVT_BuildingCh.txt', 'flag/RVT_BuildingEn.txt', 'flag/ChiVillage.txt',
|
66 |
+
'flag/ChiEstate.txt', 'flag/ChiStreet.txt', 'flag/ChiBuilding.txt'
|
|
|
|
|
67 |
]
|
68 |
for file_path in dictionaries:
|
69 |
jieba.load_userdict(file_path)
|
70 |
|
71 |
+
|
72 |
def process_text(text):
|
73 |
setup_jieba_dictionaries()
|
74 |
user_dict_terms = load_user_dict_terms()
|
|
|
84 |
|
85 |
return results
|
86 |
|
87 |
+
|
88 |
def reformat_text(text):
|
89 |
return [line.strip() for line in text.splitlines() if line.strip()]
|
90 |
|
91 |
+
|
92 |
def process_text_only(text, reformat):
|
93 |
extracted_keywords = process_text(text)
|
94 |
if reformat:
|
95 |
extracted_keywords = reformat_text('\n'.join(extracted_keywords))
|
96 |
return '\n'.join(extracted_keywords)
|
97 |
|
98 |
+
|
99 |
async def lookup_address(query, language='zh-Hant'):
|
100 |
url = 'https://www.als.gov.hk/lookup'
|
101 |
+
headers = {'Accept': 'application/json', 'Accept-Language': language}
|
102 |
+
payload = {'q': query}
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
+
async with aiohttp.ClientSession() as session:
|
105 |
try:
|
106 |
async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
|
107 |
if response.status == 200:
|
|
|
116 |
print(f'General Error: {e}')
|
117 |
return {'error': f'General Error: {e}'}
|
118 |
|
119 |
+
|
120 |
async def get_address_lookup_results(keywords):
|
121 |
results = []
|
122 |
+
tasks = [lookup_address(keyword.strip()) for keyword in keywords if
|
123 |
+
keyword.strip() and is_valid_for_lookup(keyword.strip())]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
lookup_results = await asyncio.gather(*tasks)
|
126 |
|
|
|
152 |
|
153 |
if len(result) > 1: # Only add if there's more than just the Keyword
|
154 |
results.append(result)
|
|
|
|
|
155 |
|
156 |
return results
|
157 |
|
158 |
+
|
159 |
+
def is_valid_for_lookup(keyword):
|
160 |
+
return not any(keyword in districts.keys() or keyword in subdistrict
|
161 |
+
for districts in area_data.values()
|
162 |
+
for subdistrict in districts.values())
|
163 |
+
|
164 |
+
|
165 |
async def gradio_function(text, reformat, perform_lookup):
|
166 |
extracted_keywords = process_text_only(text, reformat)
|
167 |
keywords_list = extracted_keywords.splitlines()
|
|
|
172 |
|
173 |
return extracted_keywords, address_results
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
def gradio_interface(text, reformat, perform_lookup):
|
177 |
return asyncio.run(gradio_function(text, reformat, perform_lookup))
|
178 |
|
179 |
+
|
180 |
interface = gr.Interface(
|
181 |
fn=gradio_interface,
|
182 |
inputs=[
|