Spaces:
Runtime error
Runtime error
import os | |
import sys | |
import json | |
import time | |
import yaml | |
import joblib | |
import argparse | |
import jinja2 | |
import anthropic | |
import pandas as pd | |
from tqdm import tqdm | |
from loguru import logger | |
from openai import OpenAI | |
from dotenv import load_dotenv | |
import google.generativeai as genai | |
from google.generativeai.types import HarmCategory, HarmBlockThreshold | |
from utils import parse_json_garbage, compose_query | |
try: | |
logger.remove(0) | |
logger.add(sys.stderr, level="INFO") | |
except ValueError: | |
pass | |
load_dotenv() | |
def llm( provider, model, system_prompt, user_content, delay:int = 0): | |
"""Invoke LLM service | |
Argument | |
-------- | |
provider: str | |
openai or anthropic | |
model: str | |
Model name for the API | |
system_prompt: str | |
System prompt for the API | |
user_content: str | |
User prompt for the API | |
Return | |
------ | |
response: str | |
""" | |
if delay: | |
time.sleep(delay) | |
if provider=='openai': | |
client = OpenAI( organization = os.getenv('ORGANIZATION_ID')) | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "system", | |
"content": system_prompt | |
}, | |
{ | |
"role": "user", | |
"content": user_content, | |
} | |
], | |
model = model, | |
response_format = {"type": "json_object"}, | |
temperature = 0, | |
max_tokens = 4096, | |
# stream = True | |
) | |
response = chat_completion.choices[0].message.content | |
elif provider=='anthropic': | |
client = anthropic.Client(api_key=os.getenv('ANTHROPIC_API_KEY')) | |
response = client.messages.create( | |
model= model, | |
system= system_prompt, | |
messages=[ | |
{"role": "user", "content": user_content} # <-- user prompt | |
], | |
max_tokens = 4000 | |
) | |
response = response.content[0].text | |
elif provider=='google': | |
genai.configure(api_key=os.getenv('GOOGLE_API_KEY')) | |
model = genai.GenerativeModel( | |
model_name = model, | |
system_instruction = system_prompt, | |
generation_config={ | |
"temperature": 0, | |
"max_output_tokens": 8192, | |
"response_mime_type": "application/json" | |
}) | |
safety_settings = { | |
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, | |
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH, | |
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH, | |
} | |
messages = [] | |
# messages.append({ | |
# 'role':'user', | |
# 'parts': [f"System instruction: {system_prompt}"] | |
# }) | |
# response = model.generate_content(messages, safety_settings=safety_settings) | |
# try: | |
# messages.append({ | |
# 'role': 'model', | |
# 'parts': [response.text] | |
# }) | |
# except Exception as e: | |
# logger.error(f"response.candidates -> {response.candidates}") | |
# logger.error(f"error -> {e}") | |
# messages.append({ | |
# 'role': 'model', | |
# 'parts': ["OK. I'm ready to help you."] | |
# }) | |
messages.append({ | |
'role': 'user', | |
'parts': [user_content] | |
}) | |
try: | |
response = model.generate_content(messages, safety_settings=safety_settings, ) | |
response = response.text | |
except Exception as e: | |
logger.error(f"Error (will still return response) -> {e}") | |
logger.error(f"response.candidates -> {response.candidates}") | |
return response | |
else: | |
raise Exception("Invalid provider") | |
return response | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( "-c", "--config", type=str, default='config/config.yml', help="Path to the configuration file") | |
parser.add_argument( "-t", "--task", type=str, default='prepare_batch', choices=['extract', 'classify']) | |
parser.add_argument( "-i", "--input_path", type=str, default='', ) | |
parser.add_argument( "-o", "--output_path", type=str, default='', ) | |
parser.add_argument( "-topn", "--topn", type=int, default=None ) | |
args = parser.parse_args() | |
# classes = ['小吃店', '日式料理(含居酒屋,串燒)', '火(鍋/爐)', '東南亞料理(不含日韓)', '海鮮熱炒', '特色餐廳(含雞、鵝、牛、羊肉)', '傳統餐廳', '燒烤', '韓式料理(含火鍋,烤肉)', '西餐廳(含美式,義式,墨式)', ] | |
# backup_classes = [ '中式', '西式'] | |
assert os.path.exists(args.config), f"File not found: {args.config}" | |
config = yaml.safe_load(open(args.config, "r").read()) | |
if args.task == 'extract': | |
jenv = jinja2.Environment() | |
template = jenv.from_string(config['extraction_prompt']) | |
system_prompt = template.render( classes = config['classes'], traits = config['traits']) | |
query = "山の迴饗" | |
search_results = str([{"title": "山の迴饗", "snippet": "謝謝大家這麼支持山の迴饗 我們會繼續努力用心做出美味的料理 ————————— ⛰️ 山の迴饗地址:台東縣關山鎮中華路56號訂位專線:0975-957-056 · #山的迴饗 · #夢想起飛"}, {"title": "山的迴饗餐館- 店家介紹", "snippet": "營業登記資料 · 統一編號. 92433454 · 公司狀況. 營業中 · 公司名稱. 山的迴饗餐館 · 公司類型. 獨資 · 資本總額. 30000 · 所在地. 臺東縣關山鎮中福里中華路56號 · 使用發票."}, {"title": "關山漫遊| 💥山の迴饗x night bar", "snippet": "山の迴饗x night bar 即將在12/1號台東關山開幕! 別再煩惱池上、鹿野找不到宵夜餐酒館 各位敬請期待並關注我們✨ night bar❌山的迴饗 12/1 ..."}, {"title": "山的迴饗| 中西複合式餐廳|焗烤飯|義大利麵 - 台灣美食網", "snippet": "山的迴饗| 中西複合式餐廳|焗烤飯|義大利麵|台式三杯雞|滷肉飯|便當|CP美食營業時間 ; 星期一, 休息 ; 星期二, 10:00–14:00 16:00–21:00 ; 星期三, 10:00–14:00 16:00– ..."}, {"title": "便當|CP美食- 山的迴饗| 中西複合式餐廳|焗烤飯|義大利麵", "snippet": "餐廳山的迴饗| 中西複合式餐廳|焗烤飯|義大利麵|台式三杯雞|滷肉飯|便當|CP美食google map 導航. 臺東縣關山鎮中華路56號 +886 975 957 056 ..."}, {"title": "山的迴饗餐館", "snippet": "山的迴饗餐館,統編:92433454,地址:臺東縣關山鎮中福里中華路56號,負責人姓名:周偉慈,設立日期:112年11月15日."}, {"title": "山的迴饗餐館", "snippet": "山的迴饗餐館. 資本總額(元), 30,000. 負責人, 周偉慈. 登記地址, 看地圖 臺東縣關山鎮中福里中華路56號 郵遞區號查詢. 設立日期, 2023-11-15. 資料管理 ..."}, {"title": "山的迴饗餐館, 公司統一編號92433454 - 食品業者登錄資料集", "snippet": "公司或商業登記名稱山的迴饗餐館的公司統一編號是92433454, 登錄項目是餐飲場所, 業者地址是台東縣關山鎮中福里中華路56號, 食品業者登錄字號是V-202257990-00001-5."}, {"title": "山的迴饗餐館, 公司統一編號92433454 - 食品業者登錄資料集", "snippet": "公司或商業登記名稱山的迴饗餐館的公司統一編號是92433454, 登錄項目是公司/商業登記, 業者地址是台東縣關山鎮中福里中華路56號, 食品業者登錄字號是V-202257990-00000-4 ..."}, {"title": "山的迴饗餐館", "snippet": "負責人, 周偉慈 ; 登記地址, 台東縣關山鎮中福里中華路56號 ; 公司狀態, 核准設立 「查詢最新營業狀況請至財政部稅務入口網 」 ; 資本額, 30,000元 ; 所在縣市 ..."}, {"title": "山的迴饗 | 關山美食|焗烤飯|酒吧|義大利麵|台式三杯雞|滷肉飯|便當|CP美食", "顧客評價": "324晚餐餐點豬排簡餐加白醬焗烤等等餐點。\t店家也提供免費的紅茶 綠茶 白開水 多種的調味料自取 總而言之 CP值真的很讚\t空間舒適涼爽,店員服務周到"}, {"title": "類似的店", "snippet": "['中國菜']\t['客家料理']\t['餐廳']\t['熟食店']\t['餐廳']"}, {"telephone_number": "0975 957 056"}]) | |
user_content = f''' | |
`query`: `{query}`, | |
`search_results`: {search_results} | |
''' | |
print(f"user_content -> {user_content}") | |
resp = llm( config['provider'], config['model'], system_prompt, user_content) | |
print(resp) | |
elif args.task == 'classify': | |
system_prompt = config['classification_prompt'] | |
else: | |
raise Exception("Invalid task") | |