LearnAndThink / llm.py
weiwei1392
init
9dc1a52
raw
history blame
4.52 kB
from typing import Optional, List
# from langchain.llms.utils import enforce_stop_tokens
# import torch
import requests
# import logging
# from transformers import AutoTokenizer, AutoModel, AutoConfig
# logging.basicConfig(filename='chat_log.txt', level=logging.INFO)
DEVICE = "cuda"
FORWARD_KEY = 'fk198719-EQCwtk94jYVqrgSbSX61Rmy08KQFdZE7'
# def torch_gc():
# if torch.cuda.is_available():
# with torch.cuda.device(DEVICE):
# torch.cuda.empty_cache()
# torch.cuda.ipc_collect()
class ChatGLM:
max_length: int = 10000
temperature: float = 0
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
URL = 'http://183.131.3.48:9200'
HEADERS = {'Content-Type': 'application/json'}
@property
def _llm_type(self) -> str:
return "ChatGLM"
def __call__(self,
prompt: str,
history: Optional[List[list[str]]] = None,
stop: Optional[List[str]] = None) -> str:
if history:
history = [i for i in history if i[0] is not None] # clear out the system message
history = history[-self.history_len:]
params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p,
'max_length': self.max_length, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['response']
return answer
class LocalChatGLM:
max_length: int = 10000
temperature: float = 0
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
@property
def _llm_type(self) -> str:
return "ChatGLM"
def __call__(self,
prompt: str,
history: List[List[str]] = [],
stop: Optional[List[str]] = None) -> str:
response, _ = self.model.chat(
self.tokenizer,
prompt,
history=history[-self.history_len:] if self.history_len > 0 else [],
max_length=self.max_length,
temperature=self.temperature,
)
# torch_gc()
# if stop is not None:
# response = enforce_stop_tokens(response, stop)
question = prompt.split('question:\n')[-1]
self.history = self.history+[[question, response]]
return response
class OpenAI3:
max_length: int = 10000
temperature: float = 0.2
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-EQCwtk94jYVqrgSbSX61Rmy08KQFdZE7'}
URL ='https://openai.api2d.net/v1/chat/completions'
MODEL_NAME = "gpt-3.5-turbo"
@property
def _llm_type(self) -> str:
return "OPENAI3"
def __call__(self,
prompt: str,
history: Optional[List[List[str]]] = None,
stop: Optional[List[str]] = None) -> str:
message = [{"role": "user", "content": prompt}]
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['choices'][0]['message']['content']
# if stop is not None:
# answer = enforce_stop_tokens(answer, stop)
return answer
class OpenAI4:
max_length: int = 10000
temperature: float = 0.2
top_p = 0.9
tokenizer: object = None
model: object = None
history_len: int = 10
history = []
HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'}
URL ='https://openai.api2d.net/v1/chat/completions'
MODEL_NAME = "gpt-4"
@property
def _llm_type(self) -> str:
return "OPENAI4"
def __call__(self,
prompt: str,
history: Optional[List[List[str]]] = None,
stop: Optional[List[str]] = None) -> str:
message = [{"role": "user", "content": prompt}]
params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature}
response = requests.post(self.URL, headers=self.HEADERS, json=params).json()
answer = response['choices'][0]['message']['content']
# if stop is not None:
# answer = enforce_stop_tokens(answer, stop)
return answer