Address-Lookup / app.py
OttoYu's picture
Rename main.py to app.py
ec84976 verified
raw
history blame
5.65 kB
import gradio as gr
import jieba
import jieba.analyse
import aiohttp
import asyncio
import ssl
from aiohttp import ClientSession
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
def setup_jieba_dictionaries():
dictionaries = [
'flag/RVT_AddressCh.txt',
'flag/RVT_AddressEn.txt',
'flag/RVT_Area.txt',
'flag/RVT_BuildingCh.txt',
'flag/RVT_BuildingEn.txt'
]
for file_path in dictionaries:
jieba.load_userdict(file_path)
def process_text(text):
setup_jieba_dictionaries()
user_dict_terms = set()
for file_path in [
'flag/RVT_AddressCh.txt',
'flag/RVT_AddressEn.txt',
'flag/RVT_Area.txt',
'flag/RVT_BuildingCh.txt',
'flag/RVT_BuildingEn.txt'
]:
try:
with open(file_path, 'r', encoding='utf-8') as f:
user_dict_terms.update(line.strip().split()[0] for line in f)
except FileNotFoundError:
print(f'File not found: {file_path}')
except Exception as e:
print(f'Error reading file {file_path}: {e}')
lines = text.splitlines()
results = []
for line in lines:
line = line.strip()
keywords = jieba.analyse.textrank(line, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))
keyword_text = ' '.join(keyword for keyword in keywords if keyword in user_dict_terms)
results.append(keyword_text)
return results
def reformat_text(text):
lines = text.splitlines()
return [line.strip() for line in lines if line.strip()]
def process_text_only(text, reformat):
extracted_keywords = process_text(text)
if reformat:
extracted_keywords = reformat_text('\n'.join(extracted_keywords))
# Join keywords with newline characters
return '\n'.join(extracted_keywords)
async def lookup_address(query, language='zh-Hant'):
url = 'https://www.als.gov.hk/lookup'
headers = {
'Accept': 'application/json',
'Accept-Language': language
}
payload = {
'q': query
}
async with ClientSession() as session:
try:
async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
if response.status == 200:
return await response.json()
else:
print(f'Error fetching data: Status Code {response.status}')
return {'error': f'Error fetching data: Status Code {response.status}'}
except aiohttp.ClientError as e:
print(f'Client Error: {e}')
return {'error': f'Client Error: {e}'}
except Exception as e:
print(f'General Error: {e}')
return {'error': f'General Error: {e}'}
async def get_address_lookup_results(keywords):
results = []
for keyword in keywords:
keyword = keyword.strip()
if not keyword: # Skip empty keywords
continue
lookup_results = await lookup_address(keyword)
if 'SuggestedAddress' in lookup_results and isinstance(lookup_results['SuggestedAddress'], list):
first_match = lookup_results['SuggestedAddress'][0] # Use the first match
full_address = 'No matches found'
geo_address = 'N/A'
latitude = 'N/A'
longitude = 'N/A'
matched_building = 'No Building Name'
if first_match:
premises_address = first_match['Address']['PremisesAddress']
raw_address = premises_address.get('ChiPremisesAddress', {})
matched_building = raw_address.get('BuildingName', 'No Building Name')
full_address = matched_building
geo_address = premises_address.get('GeoAddress', 'N/A')
geo_info = premises_address.get('GeospatialInformation', {})
latitude = geo_info.get('Latitude', 'N/A')
longitude = geo_info.get('Longitude', 'N/A')
results.append({
'Keyword': keyword,
'Full Address': full_address,
'Geo Address': geo_address,
'Latitude': latitude,
'Longitude': longitude
})
else:
results.append({
'Keyword': keyword,
'Full Address': 'No matches found',
'Geo Address': 'N/A',
'Latitude': 'N/A',
'Longitude': 'N/A'
})
return results
async def gradio_function(text, reformat, perform_lookup):
extracted_keywords = process_text_only(text, reformat)
keywords_list = extracted_keywords.splitlines()
address_results = []
if perform_lookup:
address_results = await get_address_lookup_results(keywords_list)
return extracted_keywords, address_results
def gradio_interface(text, reformat, perform_lookup):
return asyncio.run(gradio_function(text, reformat, perform_lookup))
interface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(lines=20, placeholder="Paste text here, each line will be processed separately..."),
gr.Checkbox(label="Reformat text (remove empty lines)"),
gr.Checkbox(label="Perform Address Lookup")
],
outputs=[
gr.Textbox(label="Extracted Address Keywords"),
gr.JSON(label="Address Lookup Results")
],
title="Address Extraction and Lookup with Natural Language Processing",
description="Extract address keywords using NLP and optionally perform address lookup using ALS."
)
interface.launch()