Spaces:
Runtime error
Runtime error
File size: 5,591 Bytes
4925baf 07d2942 4925baf 07d2942 4925baf 7cfd43a 4925baf 07d2942 4925baf 8aa4241 4925baf 07d2942 4925baf 8aa4241 4925baf 8aa4241 4925baf 07d2942 60274d1 07d2942 4925baf 60274d1 07d2942 4925baf 07d2942 4925baf 07d2942 4925baf 07d2942 4925baf 07d2942 4925baf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
import gradio as gr
import pandas as pd
from dotenv import load_dotenv
import jieba
jieba.cut('你好')
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
from loguru import logger
from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory
load_dotenv()
# logger = logging.getLogger(__name__)
# logger.setLevel(logging.DEBUG)
classes = list([ x for x in category2supercategory.keys() if len(x)>0])
def plot_wordcloud( text):
"""
"""
if os.getenv("FONT_PATH", None) is not None:
wc_generator = WordCloud(font_path=os.getenv("FONT_PATH"))
else:
wc_generator = WordCloud()
img = wc_generator.generate( " ".join(jieba.cut(text)))
# fig, ax = plt.subplots()
# ax.imshow(wordcloud, interpolation='bilinear')
# ax.axis("off")
return img.to_image()
def format_category( formatted_results):
"""
"""
return "\n\n".join([
f"> 大類別:{formatted_results['supercategory'].values[0]}",
f"> 小類別:{formatted_results['category'].values[0]}",
f"> 推測提供酒品:{ '是' if formatted_results['provide_alcohol'].values[0] else '否' }",
f"> 商家名稱:{formatted_results['store_name'].values[0]}",
f"> 電話:{formatted_results['phone_number'].values[0]}",
f"> 描述:{formatted_results['description'].values[0]}"
])
def do( business_name: str, address: str):
"""
"""
crawled_results = []
provider = os.environ.get("DEFAULT_PROVIDER", "openai")
model = os.environ.get("DEFAULT_MODEL", "'gpt-4o'")
google_domain = "google.com.tw"
gl = 'tw'
lr = 'lang_zh-TW'
business_id = 12345678
query = compose_query(address, business_name)
try:
res = get_serp( query, google_domain, gl, lr)
except Exception as e:
return f"Error: {e}"
cond_res = get_condensed_result(res)
crawled_results.append( {
"index": 0,
"business_id": business_id,
"business_name": business_name,
"serp": res,
"evidence": cond_res,
"address": address
} )
crawled_results = pd.DataFrame(crawled_results)
# logger.debug(crawled_results)
extracted_results = extract_results( crawled_results, classes=classes, provider = provider, model = model)
# logger.error(extracted_results['extracted_results'].columns)
extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name', 'provide_alcohol'] ]
logger.debug( extracted_results['category'])
postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory)
os.remove("/tmp/postprocessed_results.joblib")
formatted_results = format_output( postprocessed_results)
logger.debug( formatted_results)
formatted_output = format_category( formatted_results)
img = plot_wordcloud(formatted_results['formatted_evidence'].values[0])
return f"【搜尋結果】\n{formatted_results['formatted_evidence'].values[0][6:]}", img, f"【判斷結果】\n{formatted_output}"
def load( blob, progress=gr.Progress()):
"""
"""
if isinstance(blob, str):
# df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"])
df = pd.read_csv(blob, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"]
else:
df = pd.read_csv(blob.name, names=COLUMNS, header=None) # parse_dates=[ "Start", "Finish"]
print( df.head() )
return df
## --- interface --- ##
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
# demo = gr.Interface(
# fn=do,
# inputs=[ "text", "text", "text"],
# outputs=outputs,
# )
COLUMNS = ['營業地址', '統一編號', '總機構統一編號', '營業人名稱', '資本額', '設立日期', '組織別名稱', '使用統一發票',
'行業代號', '名稱', '行業代號1', '名稱1', '行業代號2', '名稱2', '行業代號3', '名稱3']
CSS = """
h1 {
text-align: center;
display:block;
}
"""
## --- block --- ##
with gr.Blocks(css=CSS) as demo:
gr.Markdown("# 🌟 自動分類餐廳型態 🌟")
with gr.Tab('單筆'):
with gr.Row():
inputs = [ gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")]
with gr.Row():
btn = gr.Button("Submit")
with gr.Row():
outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )]
btn.click(fn=do, inputs=inputs, outputs=outputs)
with gr.Tab('批次'):
with gr.Row():
batch_inputs = [ gr.UploadButton("上傳檔案", file_count="single")]
with gr.Row():
batch_btn = gr.Button("批量處理")
with gr.Row():
batch_outputs = [ gr.Dataframe(
headers=COLUMNS,
datatype=["str"] * 16
)]
batch_btn.click(fn=load, inputs=batch_inputs, outputs=batch_outputs)
if __name__ == "__main__":
demo.launch(
# share=True,
server_name = '0.0.0.0', auth=( os.environ.get('USERNAME'), os.environ.get('PASSWORD'))
)
|