Spaces:
Sleeping
Sleeping
File size: 2,555 Bytes
bebe766 98d2712 d75ac70 bebe766 98d2712 25f269c 25d3011 25f269c 98d2712 d75ac70 98d2712 0f1a312 98d2712 25d3011 bebe766 0f1a312 25d3011 98d2712 0f1a312 25d3011 0f1a312 e6bc530 0f1a312 25f269c 25d3011 25f269c 0f1a312 25f269c 98d2712 bebe766 98d2712 73b6736 98d2712 e6bc530 0f1a312 52612f6 0f1a312 a43c0c5 0f1a312 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from fastapi import FastAPI
import os
from typing import Union
from custom_llm import CustomLLM
from pydantic import BaseModel
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEndpoint
class ConversationPost(BaseModel):
tenant: Union[str, None] = None
module: Union[str, None] = None
question: str
class InferencePost(BaseModel):
question: str
with_template: Union[str, None] = None
API_TOKEN = os.environ['HF_API_KEY']
os.environ["HUGGINGFACEHUB_API_TOKEN"] = API_TOKEN
app = FastAPI()
prompt_qwen = PromptTemplate.from_template("""<|im_start|>system
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant
""")
prompt_llama = PromptTemplate.from_template("""<|start_header_id|>system<|end_header_id|>
Kamu adalah Asisten AI yang dikembangkan oleh Jonthan Jordan. Answer strictly in Bahasa Indonesia<|eot_id|><|start_header_id|>user<|end_header_id|>
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""")
# llm = prompt | HuggingFacePipeline.from_model_id(
# model_id="Qwen/Qwen2-1.5B-Instruct",
# task="text-generation",
# pipeline_kwargs={
# "max_new_tokens": 150,
# "return_full_text":False
# },
# )
llama = HuggingFaceEndpoint(
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
task="text-generation",
max_new_tokens=150,
do_sample=False,
)
qwen = HuggingFaceEndpoint(
repo_id="Qwen/Qwen1.5-4B-Chat",
task="text-generation",
max_new_tokens=150,
do_sample=False,
)
llm = prompt_qwen | qwen
llm2 = prompt_llama | llama
# llm = prompt | CustomLLM(repo_id="Qwen/Qwen-VL-Chat", model_type='text-generation', api_token=API_TOKEN, max_new_tokens=150).bind(stop=['<|im_end|>'])
@app.get("/")
def greet_json():
return {"Hello": "World!"}
@app.post("/conversation")
async def conversation(data : ConversationPost):
return {"output":llm.invoke({"question":data.question})}
@app.post("/conversation2")
async def conversation2(data : ConversationPost):
return {"output":llm2.invoke({"question":data.question})}
@app.post("/inference")
async def inference(data : InferencePost):
if data.with_template == 'llama':
out = llm2.invoke(data.question)
elif data.with_template == 'qwen':
out = llm.invoke(data.question)
else:
out = llama.invoke(data.question)
return {"output":out} |