from typing import Optional, List # from langchain.llms.utils import enforce_stop_tokens # import torch import requests # import logging # from transformers import AutoTokenizer, AutoModel, AutoConfig # logging.basicConfig(filename='chat_log.txt', level=logging.INFO) DEVICE = "cuda" FORWARD_KEY = 'fk198719-EQCwtk94jYVqrgSbSX61Rmy08KQFdZE7' # def torch_gc(): # if torch.cuda.is_available(): # with torch.cuda.device(DEVICE): # torch.cuda.empty_cache() # torch.cuda.ipc_collect() class ChatGLM: max_length: int = 10000 temperature: float = 0 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] URL = 'http://183.131.3.48:9200' HEADERS = {'Content-Type': 'application/json'} @property def _llm_type(self) -> str: return "ChatGLM" def __call__(self, prompt: str, history: Optional[List[list[str]]] = None, stop: Optional[List[str]] = None) -> str: if history: history = [i for i in history if i[0] is not None] # clear out the system message history = history[-self.history_len:] params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p, 'max_length': self.max_length, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['response'] return answer class LocalChatGLM: max_length: int = 10000 temperature: float = 0 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] @property def _llm_type(self) -> str: return "ChatGLM" def __call__(self, prompt: str, history: List[List[str]] = [], stop: Optional[List[str]] = None) -> str: response, _ = self.model.chat( self.tokenizer, prompt, history=history[-self.history_len:] if self.history_len > 0 else [], max_length=self.max_length, temperature=self.temperature, ) # torch_gc() # if stop is not None: # response = enforce_stop_tokens(response, stop) question = prompt.split('question:\n')[-1] self.history = self.history+[[question, response]] return response class OpenAI3: max_length: int = 10000 temperature: float = 0.2 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-EQCwtk94jYVqrgSbSX61Rmy08KQFdZE7'} URL ='https://openai.api2d.net/v1/chat/completions' MODEL_NAME = "gpt-3.5-turbo" @property def _llm_type(self) -> str: return "OPENAI3" def __call__(self, prompt: str, history: Optional[List[List[str]]] = None, stop: Optional[List[str]] = None) -> str: message = [{"role": "user", "content": prompt}] params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['choices'][0]['message']['content'] # if stop is not None: # answer = enforce_stop_tokens(answer, stop) return answer class OpenAI4: max_length: int = 10000 temperature: float = 0.2 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'} URL ='https://openai.api2d.net/v1/chat/completions' MODEL_NAME = "gpt-4" @property def _llm_type(self) -> str: return "OPENAI4" def __call__(self, prompt: str, history: Optional[List[List[str]]] = None, stop: Optional[List[str]] = None) -> str: message = [{"role": "user", "content": prompt}] params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['choices'][0]['message']['content'] # if stop is not None: # answer = enforce_stop_tokens(answer, stop) return answer