zxcgqq
/

ceshi2

Model card Files Files and versions Community

zxcgqq commited on Jun 2, 2023

Commit

b5e593e

1 Parent(s): cedfb75

Upload 8 files

Browse files

Files changed (8) hide show

app.py +93 -0
conversation.py +159 -0
demo.py +24 -0
demo1.py +91 -0
demotool.py +67 -0
llmLoader.py +55 -0
loader.py +171 -0
singleton.py +24 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+from langchain.agents import initialize_agent
+# from langchain.llms import OpenAI
+# from langchain.chat_models import ChatOpenAI
+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+from PIL import Image
+from demotool import *
+from loader import *
+# from llmLoader import *
+import re
+from gradio_tools.tools import (StableDiffusionTool, ImageCaptioningTool, StableDiffusionPromptGeneratorTool,
+                                TextToVideoTool)
+from langchain.memory import ConversationBufferMemory
+# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain import PromptTemplate, HuggingFaceHub, LLMChain
+def init_model_config():
+    llm = ChatLLM()
+    llm.model_type = 'chatglm'
+    llm.model_name_or_path = llm_model_dict['chatglm'][
+                'ChatGLM-6B-int4']
+    llm.load_llm()
+    return llm
+# initialize HF LLM
+# flan_t5 = HuggingFaceHub(
+#     repo_id="google/flan-t5-xl",
+#     model_kwargs={"temperature":1e-10},
+#     huggingfacehub_api_token="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"
+# )
+# llm = ChatOpenAI(openai_api_key="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",temperature=0,streaming=True,callbacks=[StreamingStdOutCallbackHandler()])
+# llm = ModelLoader()
+# llm.loader()
+# chatLLM = ModelLoader()
+# chatLLM.loader()
+memory = ConversationBufferMemory(memory_key="chat_history")
+# tools = [ Text2Image()]
+# tools = [ Tool.from_function(
+#         func=search,
+#         name = "Search",
+#         description="useful for when you need to answer questions about current events"
+#     )]
+# tools = [ Tool.from_function(
+#         func=optimizationProblem,
+#         name = "optimizationProblem",
+#         description=" you must use this tool when you need to Add more information"
+#     )]
+# tools = [ StableDiffusionPromptGeneratorTool().langchain]
+tools = []
+agent = initialize_agent(tools, init_model_config(), memory=memory, agent="conversational-react-description", verbose=True)
+def run_text(text, state):
+    # print("stat:"+text)
+    # res = llm_chain.run(text)
+    # print("res:"+res)
+    res = agent.run(input=(text))
+    response = re.sub('(image/\S*png)', lambda m: f'![](/file={m.group(0)})*{m.group(0)}*', res)
+    state = state + [(text, response)]
+    return state,state
+with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
+    chatbot = gr.Chatbot(elem_id="chatbot",show_label=False)
+    state = gr.State([])
+    with gr.Row() as input_raws:
+        with gr.Column(scale=0.6):
+            txt = gr.Textbox(show_label=False).style(container=False)
+        with gr.Column(scale=0.20, min_width=0):
+            run = gr.Button("🏃‍♂️Run")
+        with gr.Column(scale=0.20, min_width=0):
+            clear = gr.Button("🔄Clear️")
+    txt.submit(run_text, [txt, state], [chatbot,state])
+    txt.submit(lambda: "", None, txt)
+    run.click(run_text, [txt, state], [chatbot,state])
+demo.queue(concurrency_count=10).launch(server_name="0.0.0.0", server_port=7865)

conversation.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import dataclasses
+from enum import auto, Enum
+from typing import List, Tuple, Any
+class SeparatorStyle(Enum):
+    """Different separator style."""
+    SINGLE = auto()
+    TWO = auto()
+@dataclasses.dataclass
+class Conversation:
+    """A class that keeps all conversation history."""
+    system: str
+    roles: List[str]
+    messages: List[List[str]]
+    offset: int
+    sep_style: SeparatorStyle = SeparatorStyle.SINGLE
+    sep: str = "###"
+    sep2: str = None
+    skip_next: bool = False
+    conv_id: Any = None
+    def get_prompt(self):
+        if self.sep_style == SeparatorStyle.SINGLE:
+            ret = self.system + self.sep
+            for role, message in self.messages:
+                if message:
+                    ret += role + ": " + message + self.sep
+                else:
+                    ret += role + ":"
+            return ret
+        elif self.sep_style == SeparatorStyle.TWO:
+            seps = [self.sep, self.sep2]
+            ret = self.system + seps[0]
+            for i, (role, message) in enumerate(self.messages):
+                if message:
+                    ret += role + ": " + message + seps[i % 2]
+                else:
+                    ret += role + ":"
+            return ret
+        else:
+            raise ValueError(f"Invalid style: {self.sep_style}")
+    def append_message(self, role, message):
+        self.messages.append([role, message])
+    def to_gradio_chatbot(self):
+        ret = []
+        for i, (role, msg) in enumerate(self.messages[self.offset:]):
+            if i % 2 == 0:
+                ret.append([msg, None])
+            else:
+                ret[-1][-1] = msg
+        return ret
+    def copy(self):
+        return Conversation(
+            system=self.system,
+            roles=self.roles,
+            messages=[[x, y] for x, y in self.messages],
+            offset=self.offset,
+            sep_style=self.sep_style,
+            sep=self.sep,
+            sep2=self.sep2,
+            conv_id=self.conv_id)
+    def dict(self):
+        return {
+            "system": self.system,
+            "roles": self.roles,
+            "messages": self.messages,
+            "offset": self.offset,
+            "sep": self.sep,
+            "sep2": self.sep2,
+            "conv_id": self.conv_id,
+        }
+conv_v1 = Conversation(
+    system="A chat between a curious human and an artificial intelligence assistant. "
+           "The assistant gives helpful, detailed, and polite answers to the human's questions.",
+    roles=("Human", "Assistant"),
+    messages=(
+        ("Human", "Give three tips for staying healthy."),
+        ("Assistant",
+            "Sure, here are three tips for staying healthy:\n"
+            "1. Exercise regularly: Regular physical activity can help improve your overall health and wellbeing. "
+            "It can also help reduce your risk of chronic conditions such as obesity, diabetes, heart disease, "
+            "and certain cancers. Aim for at least 150 minutes of moderate-intensity aerobic exercise or "
+            "75 minutes of vigorous-intensity aerobic exercise per week, along with muscle-strengthening "
+            "activities at least two days per week.\n"
+            "2. Eat a balanced diet: Eating a balanced diet that is rich in fruits, "
+            "vegetables, whole grains, lean proteins, and healthy fats can help support "
+            "your overall health. Try to limit your intake of processed and high-sugar foods, "
+            "and aim to drink plenty of water throughout the day.\n"
+            "3. Get enough sleep: Getting enough quality sleep is essential for your physical "
+            "and mental health. Adults should aim for seven to nine hours of sleep per night. "
+            "Establish a regular sleep schedule and try to create a relaxing bedtime routine to "
+            "help improve the quality of your sleep.")
+    ),
+    offset=2,
+    sep_style=SeparatorStyle.SINGLE,
+    sep="###",
+)
+conv_v1_2 = Conversation(
+    system="A chat between a curious human and an artificial intelligence assistant. "
+           "The assistant gives helpful, detailed, and polite answers to the human's questions.",
+    roles=("Human", "Assistant"),
+    messages=(
+        ("Human", "What are the key differences between renewable and non-renewable energy sources?"),
+        ("Assistant",
+            "Renewable energy sources are those that can be replenished naturally in a relatively "
+            "short amount of time, such as solar, wind, hydro, geothermal, and biomass. "
+            "Non-renewable energy sources, on the other hand, are finite and will eventually be "
+            "depleted, such as coal, oil, and natural gas. Here are some key differences between "
+            "renewable and non-renewable energy sources:\n"
+            "1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable "
+            "energy sources are finite and will eventually run out.\n"
+            "2. Environmental impact: Renewable energy sources have a much lower environmental impact "
+            "than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, "
+            "and other negative effects.\n"
+            "3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically "
+            "have lower operational costs than non-renewable sources.\n"
+            "4. Reliability: Renewable energy sources are often more reliable and can be used in more remote "
+            "locations than non-renewable sources.\n"
+            "5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different "
+            "situations and needs, while non-renewable sources are more rigid and inflexible.\n"
+            "6. Sustainability: Renewable energy sources are more sustainable over the long term, while "
+            "non-renewable sources are not, and their depletion can lead to economic and social instability.\n")
+    ),
+    offset=2,
+    sep_style=SeparatorStyle.SINGLE,
+    sep="###",
+)
+conv_bair_v1 = Conversation(
+    system="BEGINNING OF CONVERSATION:",
+    roles=("USER", "GPT"),
+    messages=(),
+    offset=0,
+    sep_style=SeparatorStyle.TWO,
+    sep=" ",
+    sep2="</s>",
+)
+default_conversation = conv_v1_2
+conv_templates = {
+    "v1": conv_v1_2,
+    "bair_v1": conv_bair_v1,
+}
+if __name__ == "__main__":
+    print(default_conversation.get_prompt())

demo.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from langchain import PromptTemplate, HuggingFaceHub, LLMChain
+# initialize HF LLM
+flan_t5 = HuggingFaceHub(
+    repo_id="google/flan-t5-xl",
+    model_kwargs={"temperature":1e-10},
+    huggingfacehub_api_token="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"
+)
+# build prompt template for simple question-answering
+template = """Question: {question}
+Answer: """
+prompt = PromptTemplate(template=template, input_variables=["question"])
+llm_chain = LLMChain(
+    prompt=prompt,
+    llm=flan_t5
+)
+question = "Which NFL team won the Super Bowl in the 2010 season?"
+print(llm_chain.run(question))

demo1.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+from langchain.agents import initialize_agent
+# from langchain.llms import OpenAI
+# from langchain.chat_models import ChatOpenAI
+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+from PIL import Image
+from demotool import *
+from loader import *
+from llmLoader import *
+import re
+from gradio_tools.tools import (StableDiffusionTool, ImageCaptioningTool, StableDiffusionPromptGeneratorTool,
+                                TextToVideoTool)
+from langchain.memory import ConversationBufferMemory
+# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain import PromptTemplate, HuggingFaceHub, LLMChain
+# def init_model_config():
+#     llm = ChatLLM()
+#     llm.model_type = 'chatglm'
+#     llm.model_name_or_path = llm_model_dict['chatglm'][
+#                 'ChatGLM-6B-int4']
+#     llm.load_llm()
+#     return llm
+# initialize HF LLM
+# flan_t5 = HuggingFaceHub(
+#     repo_id="google/flan-t5-xl",
+#     model_kwargs={"temperature":1e-10},
+#     huggingfacehub_api_token="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"
+# )
+# llm = ChatOpenAI(openai_api_key="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",temperature=0,streaming=True,callbacks=[StreamingStdOutCallbackHandler()])
+chatLLMm = ModelLoader()
+chatLLMm.load_model()
+memory = ConversationBufferMemory(memory_key="chat_history")
+# tools = [ Text2Image()]
+# tools = [ Tool.from_function(
+#         func=search,
+#         name = "Search",
+#         description="useful for when you need to answer questions about current events"
+#     )]
+# tools = [ Tool.from_function(
+#         func=optimizationProblem,
+#         name = "optimizationProblem",
+#         description=" you must use this tool when you need to Add more information"
+#     )]
+# tools = [ StableDiffusionPromptGeneratorTool().langchain]
+tools = []
+agent = initialize_agent(tools, chatLLMm, memory=memory, agent="conversational-react-description", verbose=True)
+def run_text(text, state):
+    # print("stat:"+text)
+    # res = llm_chain.run(text)
+    # print("res:"+res)
+    res = agent.run(input=(text))
+    response = re.sub('(image/\S*png)', lambda m: f'![](/file={m.group(0)})*{m.group(0)}*', res)
+    state = state + [(text, response)]
+    return state,state
+with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
+    chatbot = gr.Chatbot(elem_id="chatbot",show_label=False)
+    state = gr.State([])
+    with gr.Row() as input_raws:
+        with gr.Column(scale=0.6):
+            txt = gr.Textbox(show_label=False).style(container=False)
+        with gr.Column(scale=0.20, min_width=0):
+            run = gr.Button("🏃‍♂️Run")
+        with gr.Column(scale=0.20, min_width=0):
+            clear = gr.Button("🔄Clear️")
+    txt.submit(run_text, [txt, state], [chatbot,state])
+    txt.submit(lambda: "", None, txt)
+    run.click(run_text, [txt, state], [chatbot,state])
+demo.queue(concurrency_count=10).launch(server_name="0.0.0.0", server_port=7865)

demotool.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from langchain.tools import BaseTool, StructuredTool, Tool, tool
+from typing import Optional, Type
+from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun
+import requests
+import base64
+import os
+import uuid
+from PIL import Image, ImageOps, ImageDraw, ImageFont
+def optimizationProblem(query):
+  query = query +" What's the date today?"
+  return query
+class CustomWeatherTool(BaseTool):
+    name = "weather"
+    description = "useful for when the input to this tool should be city"
+    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
+        return "The weather in "+query
+    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
+        """Use the tool asynchronously."""
+        raise NotImplementedError("custom_search does not support async")
+class Text2Image(BaseTool):
+    name = "Generate Image From User Input Text"
+    description ="useful when you want to generate an image from a user input text and save it to a file. like: generate an image of an object or something, or generate an image that includes some objects. The input to this tool should be a string, representing the text used to generate image. "
+    return_direct=True
+    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
+        url = "http://region-9.seetacloud.com:39487/sdapi/v1/txt2img"
+        body = {
+                "negative_prompt": "",
+                "width": "900",
+                "prompt": query,
+                "steps": "30",
+                "cfg_scale": "8",
+                "height": "900"
+            }
+        try:
+            result = requests.post(url, json=body, stream=True)
+            result.raise_for_status()  # Raise an exception if request was not successful
+            response_data = result.json()
+            images_json = response_data["images"]
+            if len(images_json) > 0:
+                image_data = images_json[0].split(",", 1)[0]
+                image_bytes = base64.b64decode(image_data)
+                image_filename = os.path.join('image', f"{str(uuid.uuid4())[:8]}.png")
+                with open(image_filename, "wb") as file:
+                    file.write(image_bytes)
+        except requests.exceptions.RequestException as e:
+            print("An error occurred:", e)
+        return image_filename
+    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
+        """Use the tool asynchronously."""
+        raise NotImplementedError("custom_search does not support async")
+@tool("search", return_direct=True)
+def search_api(query: str) -> str:
+    """Searches the API for the query."""
+    return f"Results for query {query}"

llmLoader.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from typing import List, Optional
+from langchain.llms.base import LLM
+import torch
+from transformers import AutoModel, AutoTokenizer
+from langchain.llms.utils import enforce_stop_tokens
+from fastchat.conversation import (compute_skip_echo_len,
+                                   get_default_conv_template)
+class ModelLoader(LLM):
+    tokenizer: object = None
+    model: object = None
+    max_token: int = 10000
+    temperature: float = 0.1
+    top_p = 0.9
+    history = []
+    def __init__(self):
+        super().__init__()
+    @property
+    def _llm_type(self) -> str:
+        return "ChatLLM"
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+            conv = get_default_conv_template("/DATA/gpt/lang/model_cache/THUDM/chatglm-6b-int8").copy()
+            conv.append_message(conv.roles[0], prompt)
+            conv.append_message(conv.roles[1], None)
+            prompt = conv.get_prompt()
+            inputs = self.tokenizer([prompt])
+            output_ids = self.model.generate(
+                torch.as_tensor(inputs.input_ids).cuda(),
+                do_sample=True,
+                temperature=self.temperature,
+                max_new_tokens=self.max_token,
+            )
+            outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
+            skip_echo_len = compute_skip_echo_len("/DATA/gpt/lang/model_cache/THUDM/chatglm-6b-int8", conv, prompt)
+            response = outputs[skip_echo_len:]
+            if stop is not None:
+                response = enforce_stop_tokens(response, stop)
+            self.history =  [[None, response]]
+            return response
+    def load_model(self, model_name_or_path: str = "/DATA/gpt/lang/model_cache/THUDM/chatglm-6b-int8"):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            "/DATA/gpt/mingpt-7b/MiniGPT-4-LLaMA-7B",
+            trust_remote_code=True
+        )
+        self.model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True)
+        self.model = self.model.eval()

loader.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+from fastchat.conversation import (compute_skip_echo_len,
+                                   get_default_conv_template)
+from fastchat.serve.inference import load_model as load_fastchat_model
+from langchain.llms.base import LLM
+from langchain.llms.utils import enforce_stop_tokens
+from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer
+MODEL_CACHE_PATH = os.path.join(os.path.dirname(__file__), 'model_cache')
+llm_model_dict = {
+    "chatglm": {
+        "ChatGLM-6B": "THUDM/chatglm-6b",
+        "ChatGLM-6B-int4": "THUDM/chatglm-6b-int4",
+        "ChatGLM-6B-int8": "THUDM/chatglm-6b-int8",
+        "ChatGLM-6b-int4-qe": "THUDM/chatglm-6b-int4-qe"
+    },
+    "belle": {
+        "BELLE-LLaMA-Local": "/pretrainmodel/belle",
+    },
+    "vicuna": {
+        "Vicuna-Local": "/pretrainmodel/vicuna",
+    }
+}
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+DEVICE_ID = "0"
+CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
+def torch_gc():
+    if torch.cuda.is_available():
+        with torch.cuda.device(CUDA_DEVICE):
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+def auto_configure_device_map(num_gpus: int) -> Dict[str, int]:
+    num_trans_layers = 28
+    per_gpu_layers = 30 / num_gpus
+    device_map = {
+        'transformer.word_embeddings': 0,
+        'transformer.final_layernorm': 0,
+        'lm_head': 0
+    }
+    used = 2
+    gpu_target = 0
+    for i in range(num_trans_layers):
+        if used >= per_gpu_layers:
+            gpu_target += 1
+            used = 0
+        assert gpu_target < num_gpus
+        device_map[f'transformer.layers.{i}'] = gpu_target
+        used += 1
+    return device_map
+class ChatLLM(LLM):
+    max_token: int = 10000
+    temperature: float = 0.1
+    top_p = 0.9
+    history = []
+    model_type: str = "chatglm"
+    model_name_or_path: str = "ChatGLM-6B-int4",
+    tokenizer: object = None
+    model: object = None
+    def __init__(self):
+        super().__init__()
+    @property
+    def _llm_type(self) -> str:
+        return "ChatLLM"
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        if self.model_type == 'vicuna':
+            conv = get_default_conv_template(self.model_name_or_path).copy()
+            conv.append_message(conv.roles[0], prompt)
+            conv.append_message(conv.roles[1], None)
+            prompt = conv.get_prompt()
+            inputs = self.tokenizer([prompt])
+            output_ids = self.model.generate(
+                torch.as_tensor(inputs.input_ids).cuda(),
+                do_sample=True,
+                temperature=self.temperature,
+                max_new_tokens=self.max_token,
+            )
+            outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
+            skip_echo_len = compute_skip_echo_len(self.model_name_or_path, conv, prompt)
+            response = outputs[skip_echo_len:]
+            torch_gc()
+            if stop is not None:
+                response = enforce_stop_tokens(response, stop)
+            self.history =  [[None, response]]
+        elif self.model_type == 'belle':
+            prompt = "Human: "+ prompt +" \n\nAssistant: "
+            input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
+            generate_ids =  self.model.generate(input_ids, max_new_tokens=self.max_token, do_sample = True, top_k = 30, top_p = self.top_p, temperature = self.temperature, repetition_penalty=1., eos_token_id=2, bos_token_id=1, pad_token_id=0)
+            output = self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+            response = output[len(prompt)+1:]
+            torch_gc()
+            if stop is not None:
+                response = enforce_stop_tokens(response, stop)
+            self.history =  [[None, response]]
+        elif self.model_type == 'chatglm':
+            response, _ = self.model.chat(
+                self.tokenizer,
+                prompt,
+                history=self.history,
+                max_length=self.max_token,
+                temperature=self.temperature,
+            )
+            torch_gc()
+            if stop is not None:
+                response = enforce_stop_tokens(response, stop)
+            self.history = self.history + [[None, response]]
+        return response
+    def load_llm(self,
+                   llm_device=DEVICE,
+                   num_gpus=torch.cuda.device_count(),
+                   device_map: Optional[Dict[str, int]] = None,
+                   **kwargs):
+        if 'chatglm' in self.model_name_or_path.lower():
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path,
+                                                       trust_remote_code=True, cache_dir=os.path.join(MODEL_CACHE_PATH, self.model_name_or_path))
+            if torch.cuda.is_available() and llm_device.lower().startswith("cuda"):
+                num_gpus = torch.cuda.device_count()
+                if num_gpus < 2 and device_map is None:
+                    self.model = (AutoModel.from_pretrained(
+                        self.model_name_or_path, trust_remote_code=True, cache_dir=os.path.join(MODEL_CACHE_PATH, self.model_name_or_path),
+                        **kwargs).half().cuda())
+                else:
+                    from accelerate import dispatch_model
+                    model = AutoModel.from_pretrained(self.model_name_or_path,
+                                                    trust_remote_code=True, cache_dir=os.path.join(MODEL_CACHE_PATH, self.model_name_or_path),
+                                                    **kwargs).half()
+                    if device_map is None:
+                        device_map = auto_configure_device_map(num_gpus)
+                    self.model = dispatch_model(model, device_map=device_map)
+            else:
+                self.model = (AutoModel.from_pretrained(
+                    self.model_name_or_path,
+                    trust_remote_code=True, cache_dir=os.path.join(MODEL_CACHE_PATH, self.model_name_or_path)).float().to(llm_device))
+            self.model = self.model.eval()
+        else:
+            self.model, self.tokenizer = load_fastchat_model(
+                model_path = self.model_name_or_path,
+                device = llm_device,
+                num_gpus = num_gpus
+            )

singleton.py ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""The singleton metaclass for ensuring only one instance of a class."""
+import abc
+from typing import Any
+class Singleton(abc.ABCMeta, type):
+    """Singleton metaclass for ensuring only one instance of a class"""
+    _instances = {}
+    def __call__(cls, *args: Any, **kwargs: Any) -> Any:
+        """Call method for the singleton metaclass"""
+        if cls not in cls._instances:
+            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
+class AbstractSingleton(abc.ABC, metaclass=Singleton):
+    """Abstract singleton class for ensuring only one instance of a class"""
+    pass