OttoYu commited on
Commit
6b1ec2d
·
verified ·
1 Parent(s): 4f4ddab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -41
app.py CHANGED
@@ -4,13 +4,14 @@ import jieba.analyse
4
  import aiohttp
5
  import asyncio
6
  import ssl
7
- from aiohttp import ClientSession
8
  from functools import lru_cache
9
 
 
10
  ssl_context = ssl.create_default_context()
11
  ssl_context.check_hostname = False
12
  ssl_context.verify_mode = ssl.CERT_NONE
13
 
 
14
  area_data = {
15
  '香港': {
16
  '中西區': ["西環", "堅尼地城", "石塘咀", "西營盤", "上環", "中環", "金鐘", "西半山", "中半山", "半山", "山頂"],
@@ -38,15 +39,14 @@ area_data = {
38
  }
39
  }
40
 
 
41
  @lru_cache(maxsize=None)
42
  def load_user_dict_terms():
43
  user_dict_terms = set()
44
  dictionaries = [
45
- 'flag/RVT_AddressCh.txt',
46
- 'flag/RVT_AddressEn.txt',
47
- 'flag/RVT_Area.txt',
48
- 'flag/RVT_BuildingCh.txt',
49
- 'flag/RVT_BuildingEn.txt'
50
  ]
51
  for file_path in dictionaries:
52
  try:
@@ -58,17 +58,17 @@ def load_user_dict_terms():
58
  print(f'Error reading file {file_path}: {e}')
59
  return user_dict_terms
60
 
 
61
  def setup_jieba_dictionaries():
62
  dictionaries = [
63
- 'flag/RVT_AddressCh.txt',
64
- 'flag/RVT_AddressEn.txt',
65
- 'flag/RVT_Area.txt',
66
- 'flag/RVT_BuildingCh.txt',
67
- 'flag/RVT_BuildingEn.txt'
68
  ]
69
  for file_path in dictionaries:
70
  jieba.load_userdict(file_path)
71
 
 
72
  def process_text(text):
73
  setup_jieba_dictionaries()
74
  user_dict_terms = load_user_dict_terms()
@@ -84,26 +84,24 @@ def process_text(text):
84
 
85
  return results
86
 
 
87
  def reformat_text(text):
88
  return [line.strip() for line in text.splitlines() if line.strip()]
89
 
 
90
  def process_text_only(text, reformat):
91
  extracted_keywords = process_text(text)
92
  if reformat:
93
  extracted_keywords = reformat_text('\n'.join(extracted_keywords))
94
  return '\n'.join(extracted_keywords)
95
 
 
96
  async def lookup_address(query, language='zh-Hant'):
97
  url = 'https://www.als.gov.hk/lookup'
98
- headers = {
99
- 'Accept': 'application/json',
100
- 'Accept-Language': language
101
- }
102
- payload = {
103
- 'q': query
104
- }
105
 
106
- async with ClientSession() as session:
107
  try:
108
  async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
109
  if response.status == 200:
@@ -118,20 +116,11 @@ async def lookup_address(query, language='zh-Hant'):
118
  print(f'General Error: {e}')
119
  return {'error': f'General Error: {e}'}
120
 
 
121
  async def get_address_lookup_results(keywords):
122
  results = []
123
- tasks = []
124
-
125
- for keyword in keywords:
126
- keyword = keyword.strip()
127
- if not keyword:
128
- continue
129
-
130
- if not is_valid_for_lookup(keyword):
131
-
132
- continue
133
- else:
134
- tasks.append(lookup_address(keyword))
135
 
136
  lookup_results = await asyncio.gather(*tasks)
137
 
@@ -163,11 +152,16 @@ async def get_address_lookup_results(keywords):
163
 
164
  if len(result) > 1: # Only add if there's more than just the Keyword
165
  results.append(result)
166
- else:
167
- pass
168
 
169
  return results
170
 
 
 
 
 
 
 
 
171
  async def gradio_function(text, reformat, perform_lookup):
172
  extracted_keywords = process_text_only(text, reformat)
173
  keywords_list = extracted_keywords.splitlines()
@@ -178,18 +172,11 @@ async def gradio_function(text, reformat, perform_lookup):
178
 
179
  return extracted_keywords, address_results
180
 
181
- def is_valid_for_lookup(keyword):
182
- for region, districts in area_data.items():
183
- if keyword in districts.keys():
184
- return False
185
- for subdistricts in districts.values():
186
- if keyword in subdistricts:
187
- return False
188
- return True
189
 
190
  def gradio_interface(text, reformat, perform_lookup):
191
  return asyncio.run(gradio_function(text, reformat, perform_lookup))
192
 
 
193
  interface = gr.Interface(
194
  fn=gradio_interface,
195
  inputs=[
 
4
  import aiohttp
5
  import asyncio
6
  import ssl
 
7
  from functools import lru_cache
8
 
9
+ # SSL context setup
10
  ssl_context = ssl.create_default_context()
11
  ssl_context.check_hostname = False
12
  ssl_context.verify_mode = ssl.CERT_NONE
13
 
14
+ # Area data (unchanged)
15
  area_data = {
16
  '香港': {
17
  '中西區': ["西環", "堅尼地城", "石塘咀", "西營盤", "上環", "中環", "金鐘", "西半山", "中半山", "半山", "山頂"],
 
39
  }
40
  }
41
 
42
+
43
  @lru_cache(maxsize=None)
44
  def load_user_dict_terms():
45
  user_dict_terms = set()
46
  dictionaries = [
47
+ 'flag/RVT_AddressCh.txt', 'flag/RVT_AddressEn.txt', 'flag/RVT_Area.txt',
48
+ 'flag/RVT_BuildingCh.txt', 'flag/RVT_BuildingEn.txt', 'flag/ChiVillage.txt',
49
+ 'flag/ChiEstate.txt', 'flag/ChiStreet.txt', 'flag/ChiBuilding.txt'
 
 
50
  ]
51
  for file_path in dictionaries:
52
  try:
 
58
  print(f'Error reading file {file_path}: {e}')
59
  return user_dict_terms
60
 
61
+
62
  def setup_jieba_dictionaries():
63
  dictionaries = [
64
+ 'flag/RVT_AddressCh.txt', 'flag/RVT_AddressEn.txt', 'flag/RVT_Area.txt',
65
+ 'flag/RVT_BuildingCh.txt', 'flag/RVT_BuildingEn.txt', 'flag/ChiVillage.txt',
66
+ 'flag/ChiEstate.txt', 'flag/ChiStreet.txt', 'flag/ChiBuilding.txt'
 
 
67
  ]
68
  for file_path in dictionaries:
69
  jieba.load_userdict(file_path)
70
 
71
+
72
  def process_text(text):
73
  setup_jieba_dictionaries()
74
  user_dict_terms = load_user_dict_terms()
 
84
 
85
  return results
86
 
87
+
88
  def reformat_text(text):
89
  return [line.strip() for line in text.splitlines() if line.strip()]
90
 
91
+
92
  def process_text_only(text, reformat):
93
  extracted_keywords = process_text(text)
94
  if reformat:
95
  extracted_keywords = reformat_text('\n'.join(extracted_keywords))
96
  return '\n'.join(extracted_keywords)
97
 
98
+
99
  async def lookup_address(query, language='zh-Hant'):
100
  url = 'https://www.als.gov.hk/lookup'
101
+ headers = {'Accept': 'application/json', 'Accept-Language': language}
102
+ payload = {'q': query}
 
 
 
 
 
103
 
104
+ async with aiohttp.ClientSession() as session:
105
  try:
106
  async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
107
  if response.status == 200:
 
116
  print(f'General Error: {e}')
117
  return {'error': f'General Error: {e}'}
118
 
119
+
120
  async def get_address_lookup_results(keywords):
121
  results = []
122
+ tasks = [lookup_address(keyword.strip()) for keyword in keywords if
123
+ keyword.strip() and is_valid_for_lookup(keyword.strip())]
 
 
 
 
 
 
 
 
 
 
124
 
125
  lookup_results = await asyncio.gather(*tasks)
126
 
 
152
 
153
  if len(result) > 1: # Only add if there's more than just the Keyword
154
  results.append(result)
 
 
155
 
156
  return results
157
 
158
+
159
+ def is_valid_for_lookup(keyword):
160
+ return not any(keyword in districts.keys() or keyword in subdistrict
161
+ for districts in area_data.values()
162
+ for subdistrict in districts.values())
163
+
164
+
165
  async def gradio_function(text, reformat, perform_lookup):
166
  extracted_keywords = process_text_only(text, reformat)
167
  keywords_list = extracted_keywords.splitlines()
 
172
 
173
  return extracted_keywords, address_results
174
 
 
 
 
 
 
 
 
 
175
 
176
  def gradio_interface(text, reformat, perform_lookup):
177
  return asyncio.run(gradio_function(text, reformat, perform_lookup))
178
 
179
+
180
  interface = gr.Interface(
181
  fn=gradio_interface,
182
  inputs=[