Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import pandas as pd | |
from dotenv import load_dotenv | |
import jieba | |
jieba.cut('你好') | |
from wordcloud import WordCloud | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
from loguru import logger | |
from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory | |
load_dotenv() | |
# logger = logging.getLogger(__name__) | |
# logger.setLevel(logging.DEBUG) | |
classes = list([ x for x in category2supercategory.keys() if len(x)>0]) | |
def plot_wordcloud( text): | |
""" | |
""" | |
if os.getenv("FONT_PATH", None) is not None: | |
wc_generator = WordCloud(font_path=os.getenv("FONT_PATH")) | |
else: | |
wc_generator = WordCloud() | |
img = wc_generator.generate( " ".join(jieba.cut(text))) | |
# fig, ax = plt.subplots() | |
# ax.imshow(wordcloud, interpolation='bilinear') | |
# ax.axis("off") | |
return img.to_image() | |
def format_category( formatted_results): | |
""" | |
""" | |
return "\n\n".join([ | |
f"> 大類別:{formatted_results['supercategory'].values[0]}", | |
f"> 小類別:{formatted_results['category'].values[0]}", | |
f"> 推測提供酒品:{ '是' if formatted_results['provide_alcohol'].values[0] else '否' }", | |
f"> 商家名稱:{formatted_results['store_name'].values[0]}", | |
f"> 電話:{formatted_results['phone_number'].values[0]}", | |
f"> 描述:{formatted_results['description'].values[0]}" | |
]) | |
def do( business_name: str, address: str): | |
""" | |
""" | |
crawled_results = [] | |
provider = os.environ.get("DEFAULT_PROVIDER", "openai") | |
model = os.environ.get("DEFAULT_MODEL", "'gpt-4o'") | |
google_domain = "google.com.tw" | |
gl = 'tw' | |
lr = 'lang_zh-TW' | |
business_id = 12345678 | |
query = compose_query(address, business_name) | |
try: | |
res = get_serp( query, google_domain, gl, lr) | |
except Exception as e: | |
return f"Error: {e}" | |
cond_res = get_condensed_result(res) | |
crawled_results.append( { | |
"index": 0, | |
"business_id": business_id, | |
"business_name": business_name, | |
"serp": res, | |
"evidence": cond_res, | |
"address": address | |
} ) | |
crawled_results = pd.DataFrame(crawled_results) | |
# logger.debug(crawled_results) | |
extracted_results = extract_results( crawled_results, classes=classes, provider = provider, model = model) | |
# logger.error(extracted_results['extracted_results'].columns) | |
extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name', 'provide_alcohol'] ] | |
logger.debug( extracted_results['category']) | |
postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory) | |
os.remove("/tmp/postprocessed_results.joblib") | |
formatted_results = format_output( postprocessed_results) | |
logger.debug( formatted_results) | |
formatted_output = format_category( formatted_results) | |
img = plot_wordcloud(formatted_results['formatted_evidence'].values[0]) | |
return f"【搜尋結果】\n{formatted_results['formatted_evidence'].values[0][6:]}", img, f"【判斷結果】\n{formatted_output}" | |
def load( blob, progress=gr.Progress()): | |
""" | |
""" | |
if isinstance(blob, str): | |
# df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"]) | |
df = pd.read_csv(blob, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"] | |
else: | |
df = pd.read_csv(blob.name, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"] | |
print( df.head() ) | |
return df | |
## --- interface --- ## | |
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)] | |
# demo = gr.Interface( | |
# fn=do, | |
# inputs=[ "text", "text", "text"], | |
# outputs=outputs, | |
# ) | |
COLUMNS = ['營業地址', '統一編號', '總機構統一編號', '營業人名稱', '資本額', '設立日期', '組織別名稱', '使用統一發票', | |
'行業代號', '名稱', '行業代號1', '名稱1', '行業代號2', '名稱2', '行業代號3', '名稱3'] | |
CSS = """ | |
h1 { | |
text-align: center; | |
display:block; | |
} | |
""" | |
## --- block --- ## | |
with gr.Blocks(css=CSS) as demo: | |
gr.Markdown("# 🌟 自動分類餐廳型態 🌟") | |
with gr.Tab('單筆'): | |
with gr.Row(): | |
inputs = [ gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")] | |
with gr.Row(): | |
btn = gr.Button("Submit") | |
with gr.Row(): | |
outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )] | |
btn.click(fn=do, inputs=inputs, outputs=outputs) | |
with gr.Tab('批次'): | |
with gr.Row(): | |
batch_inputs = [ gr.UploadButton("上傳檔案", file_count="single")] | |
with gr.Row(): | |
batch_btn = gr.Button("批量處理") | |
with gr.Row(): | |
batch_outputs = [ gr.Dataframe( | |
headers=COLUMNS, | |
datatype=["str"] * 16 | |
)] | |
batch_btn.click(fn=load, inputs=batch_inputs, outputs=batch_outputs) | |
if __name__ == "__main__": | |
demo.launch( | |
# share=True, | |
server_name = '0.0.0.0', auth=( os.environ.get('USERNAME'), os.environ.get('PASSWORD')) | |
) | |