File size: 6,299 Bytes
bb2872c
 
 
 
 
 
 
 
 
 
 
 
 
 
d148285
c62da2b
 
 
e495a0c
c62da2b
 
 
 
bb2872c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27a367c
bb2872c
f6842b2
 
bb2872c
f6842b2
bb2872c
16a199e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb2872c
 
 
 
27a367c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb2872c
 
 
c62da2b
 
 
 
 
 
 
 
bb2872c
 
 
 
 
 
 
 
 
 
16a199e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

DESCRIPTION = """\
# Machine Mindset [https://huggingface.co/FarReelAILab](https://huggingface.co/FarReelAILab)

Machine Mindset (MM) series models represent a groundbreaking collaboration between FarReel AI Lab, formerly known as the ChatLaw project, and Peking University's Deep Research Institute. These cutting-edge models cater to various MBTI personality types, offering extensive language capabilities in both Chinese and English. They are designed to provide users with a unique and insightful experience while interacting with AI.

## Limited Access(INTJ Model)

Due to the high computational costs involved in running these models, we regret that we can only open access to two publicly available spaces for testing. These spaces are dedicated to the INTJ model, both in Chinese and English. This limitation ensures that we can provide a high-quality and responsive experience to our users.

For a more comprehensive understanding of the Machine Mindset project and to explore detailed information about the INTJ model and its unique features, we invite you to visit our official GitHub website at [https://github.com/PKU-YuanGroup/Machine-Mindset](https://github.com/PKU-YuanGroup/Machine-Mindset).
"""

LICENSE = """
---
* Our code adheres to the Apache 2.0 open-source license. Please refer to the [LICENSE](https://github.com/PKU-YuanGroup/Machine-Mindset/blob/main/LICENSE) for specific details of the open-source agreement.
* Our model weights are subject to an open-source agreement based on the original weights, with specific details provided in the Chinese version under the baichuan open-source license. For commercial use, please refer to [model_LICENSE](https://huggingface.co/JessyTsu1/Machine_Mindset_zh_INTP/resolve/main/Machine_Mindset%E5%9F%BA%E4%BA%8Ebaichuan%E7%9A%84%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) for further information.
* The English version follows the open-source agreement under the [llama2 license](https://ai.meta.com/resources/models-and-libraries/llama-downloads/).
"""

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


if torch.cuda.is_available():
    model_id = "FarReelAILab/Machine_Mindset_en_INTJ"
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.use_default_system_prompt = False


@spaces.GPU
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    system_prompt: str='You are a person with distinctive personality traits. Your answers should reflect your personality either implicitly or explicitly.',
    max_new_tokens: int = 1024,
    temperature: float = 0.95,
    top_p: float = 0.7,
    top_k: int = 50,
    repetition_penalty: float = 1.0,
) -> Iterator[str]:
    conversation = []
    if system_prompt:
        conversation.append({"role": "system", "content": system_prompt})
    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
    input_ids = input_ids.to(model.device)

    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        {"input_ids": input_ids},
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
        repetition_penalty=repetition_penalty,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)


chat_interface = gr.ChatInterface(
    fn=generate,
    additional_inputs=[
        # gr.Textbox(label="System prompt", lines=6),
        # gr.Slider(
        #     label="Max new tokens",
        #     minimum=1,
        #     maximum=MAX_MAX_NEW_TOKENS,
        #     step=1,
        #     value=DEFAULT_MAX_NEW_TOKENS,
        # ),
        # gr.Slider(
        #     label="Temperature",
        #     minimum=0.1,
        #     maximum=4.0,
        #     step=0.1,
        #     value=0.6,
        # ),
        # gr.Slider(
        #     label="Top-p (nucleus sampling)",
        #     minimum=0.05,
        #     maximum=1.0,
        #     step=0.05,
        #     value=0.9,
        # ),
        # gr.Slider(
        #     label="Top-k",
        #     minimum=1,
        #     maximum=1000,
        #     step=1,
        #     value=50,
        # ),
        # gr.Slider(
        #     label="Repetition penalty",
        #     minimum=1.0,
        #     maximum=2.0,
        #     step=0.05,
        #     value=1.2,
        # ),
    ],
    stop_btn=None,
    examples=[
        ["What is your MBTI personality type?"],
        ["What kind of birthday gift do you hope to receive?"],
        ["What is the meaning of life?"],
        ["How do you perceive death?"],
        ["I'm feeling very sad due to the loss of a loved one, what should I do?"],
        ["What type of work do you think people should pursue?"],
        ["How should one choose between career and love?"],
        ["What do you enjoy doing on weekends?"]
    ],
)

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    chat_interface.render()
    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.queue(max_size=20).launch()