Address-Lookup / main.py
OttoYu's picture
Upload 7 files
d2594f1 verified
raw
history blame
5.65 kB
import gradio as gr
import jieba
import jieba.analyse
import aiohttp
import asyncio
import ssl
from aiohttp import ClientSession
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
def setup_jieba_dictionaries():
dictionaries = [
'flag/RVT_AddressCh.txt',
'flag/RVT_AddressEn.txt',
'flag/RVT_Area.txt',
'flag/RVT_BuildingCh.txt',
'flag/RVT_BuildingEn.txt'
]
for file_path in dictionaries:
jieba.load_userdict(file_path)
def process_text(text):
setup_jieba_dictionaries()
user_dict_terms = set()
for file_path in [
'flag/RVT_AddressCh.txt',
'flag/RVT_AddressEn.txt',
'flag/RVT_Area.txt',
'flag/RVT_BuildingCh.txt',
'flag/RVT_BuildingEn.txt'
]:
try:
with open(file_path, 'r', encoding='utf-8') as f:
user_dict_terms.update(line.strip().split()[0] for line in f)
except FileNotFoundError:
print(f'File not found: {file_path}')
except Exception as e:
print(f'Error reading file {file_path}: {e}')
lines = text.splitlines()
results = []
for line in lines:
line = line.strip()
keywords = jieba.analyse.textrank(line, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))
keyword_text = ' '.join(keyword for keyword in keywords if keyword in user_dict_terms)
results.append(keyword_text)
return results
def reformat_text(text):
lines = text.splitlines()
return [line.strip() for line in lines if line.strip()]
def process_text_only(text, reformat):
extracted_keywords = process_text(text)
if reformat:
extracted_keywords = reformat_text('\n'.join(extracted_keywords))
# Join keywords with newline characters
return '\n'.join(extracted_keywords)
async def lookup_address(query, language='zh-Hant'):
url = 'https://www.als.gov.hk/lookup'
headers = {
'Accept': 'application/json',
'Accept-Language': language
}
payload = {
'q': query
}
async with ClientSession() as session:
try:
async with session.post(url, headers=headers, data=payload, ssl=ssl_context) as response:
if response.status == 200:
return await response.json()
else:
print(f'Error fetching data: Status Code {response.status}')
return {'error': f'Error fetching data: Status Code {response.status}'}
except aiohttp.ClientError as e:
print(f'Client Error: {e}')
return {'error': f'Client Error: {e}'}
except Exception as e:
print(f'General Error: {e}')
return {'error': f'General Error: {e}'}
async def get_address_lookup_results(keywords):
results = []
for keyword in keywords:
keyword = keyword.strip()
if not keyword: # Skip empty keywords
continue
lookup_results = await lookup_address(keyword)
if 'SuggestedAddress' in lookup_results and isinstance(lookup_results['SuggestedAddress'], list):
first_match = lookup_results['SuggestedAddress'][0] # Use the first match
full_address = 'No matches found'
geo_address = 'N/A'
latitude = 'N/A'
longitude = 'N/A'
matched_building = 'No Building Name'
if first_match:
premises_address = first_match['Address']['PremisesAddress']
raw_address = premises_address.get('ChiPremisesAddress', {})
matched_building = raw_address.get('BuildingName', 'No Building Name')
full_address = matched_building
geo_address = premises_address.get('GeoAddress', 'N/A')
geo_info = premises_address.get('GeospatialInformation', {})
latitude = geo_info.get('Latitude', 'N/A')
longitude = geo_info.get('Longitude', 'N/A')
results.append({
'Keyword': keyword,
'Full Address': full_address,
'Geo Address': geo_address,
'Latitude': latitude,
'Longitude': longitude
})
else:
results.append({
'Keyword': keyword,
'Full Address': 'No matches found',
'Geo Address': 'N/A',
'Latitude': 'N/A',
'Longitude': 'N/A'
})
return results
async def gradio_function(text, reformat, perform_lookup):
extracted_keywords = process_text_only(text, reformat)
keywords_list = extracted_keywords.splitlines()
address_results = []
if perform_lookup:
address_results = await get_address_lookup_results(keywords_list)
return extracted_keywords, address_results
def gradio_interface(text, reformat, perform_lookup):
return asyncio.run(gradio_function(text, reformat, perform_lookup))
interface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(lines=20, placeholder="Paste text here, each line will be processed separately..."),
gr.Checkbox(label="Reformat text (remove empty lines)"),
gr.Checkbox(label="Perform Address Lookup")
],
outputs=[
gr.Textbox(label="Extracted Address Keywords"),
gr.JSON(label="Address Lookup Results")
],
title="Address Extraction and Lookup with Natural Language Processing",
description="Extract address keywords using NLP and optionally perform address lookup using ALS."
)
interface.launch()