Spaces:
Sleeping
Sleeping
File size: 4,474 Bytes
0fc7538 091d4d8 0fc7538 ec77fcb 0fc7538 091d4d8 d42d1b9 091d4d8 f63bd36 091d4d8 0fc7538 091d4d8 0fc7538 091d4d8 0fc7538 be00375 0fc7538 2ae8beb 0fc7538 f63bd36 0fc7538 091d4d8 0fc7538 091d4d8 d42d1b9 be00375 d42d1b9 0fc7538 091d4d8 0fc7538 2ae8beb 0fc7538 f63bd36 0fc7538 091d4d8 0fc7538 091d4d8 0fc7538 728e771 091d4d8 f63bd36 091d4d8 728e771 be00375 4940b5d 728e771 be00375 15a6b6b 091d4d8 15a6b6b ef52b87 091d4d8 728e771 091d4d8 15a6b6b d42d1b9 15a6b6b fc6ce14 15a6b6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from fastapi import FastAPI
import torch
import os
from llama_cpp import Llama
from transformers import AutoModelForCausalLM, AutoTokenizer
import requests
device = "cpu"
access_token = os.getenv("access_token")
privateurl = os.getenv("privateurl")
tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
tokenizer3 = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
llm1 = Llama.from_pretrained(
repo_id="Qwen/Qwen2-1.5B-Instruct-GGUF",
filename="*q8_0.gguf",
verbose=False
)
llm2 = Llama.from_pretrained(
repo_id="NexaAIDev/gemma-2-2b-it-GGUF",
filename="*q4_K_S.gguf",
verbose=False
)
llm3 = Llama.from_pretrained(
repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
filename="*q4.gguf",
verbose=False
)
app = FastAPI()
@app.get("/")
async def read_root():
return {"Hello": "World!"}
def modelResp1(cookie, target, token, prompt):
messages = [
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
{"role": "user", "content": "Who are you?"},
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
{"role": "user", "content": f"{prompt}"}
]
text = tokenizer1.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
output = llm1(
text,
max_tokens=64, # Generate up to 256 tokens
echo=False, # Whether to echo the prompt
)
response = output['choices'][0]['text']
headers['Cookie'] = f"{cookie}"
payload['token'] = f"{token}"
payload['target'] = f"{target}"
payload['content'] = response
requests.post(privateurl, headers=headers, data=payload)
def modelResp2(prompt):
messages = [
{"role": "user", "content": "Who are you?"},
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
{"role": "user", "content": f"{prompt}"}
]
text = tokenizer2.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
output = llm2(
text,
max_tokens=64, # Generate up to 256 tokens
echo=False, # Whether to echo the prompt
)
response = output['choices'][0]['text']
return response
def modelResp3(prompt):
messages = [
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
{"role": "user", "content": "Who are you?"},
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
{"role": "user", "content": f"{prompt}"}
]
text = tokenizer3.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
output = llm2(
text,
max_tokens=64, # Generate up to 256 tokens
echo=False, # Whether to echo the prompt
)
response = output['choices'][0]['text']
return response
@app.post("/modelapi1")
async def modelApi(data: dict):
target = data.get("target_id")
cookie = data.get("Cookie")
token = data.get("token")
prompt = data.get("prompt")
modelResp1(cookie, target, token, prompt)
return {"Hello": "World!"}
@app.post("/modelapi2")
async def modelApi(data: dict):
prompt = data.get("prompt")
#response = modelResp2(prompt)
return {"Hello": "World!"}
@app.post("/modelapi3")
async def modelApi1(data: dict):
prompt = data.get("prompt")
response = modelResp3(prompt)
return response
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': '',
'Sec-Ch-Ua': '"Opera";v="95", "Chromium";v="109", "Not;A=Brand";v="24"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0',
'X-Requested-With': 'XMLHttpRequest'
}
payload = {
'target': '',
'content': '',
'token': ''
} |