radce/Llama-3.2-3B-ru · Hugging Face

Модель намного лучше отвечает если дать ей любую инструкцию, хотя бы `Ты полезный чат-бот`.

Модель умеет запускать ipython, запускать функции и немного рассуждать, как o1 (это пока плохо получается). Так же она обучена говорить на русском.

Пример чата 1:

<|begin_of_text|><|eot_id|><|start_header_id|>system<|end_header_id|>

Ты можешь запускать ipython.
Ты можешь блокировать ПК, используя функцию lock_pc()<|eot_id|><|start_header_id|>user<|end_header_id|>

Заблокируй ПК<|eot_id|><|start_header_id|>assistant<|end_header_id|>

<|python_tag|>lock_pc()<|eom_id|><|start_header_id|>tool<|end_header_id|>

ПК успешно заблокирован.<|eom_id|><|start_header_id|>assistant<|end_header_id|>

Покажу сообщение об успешной блокировки ПК.<|eot_id|>

Пример чата 2:

<|begin_of_text|><|eot_id|><|start_header_id|>system<|end_header_id|>

Ты можешь запускать ipython.<|eot_id|><|start_header_id|>user<|end_header_id|>

Сколько будет 7 в кубе?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

<|python_tag|>7 ** 3<|eom_id|><|start_header_id|>tool<|end_header_id|>

343<|eom_id|><|start_header_id|>assistant<|end_header_id|>

7 в кубе равно 343.<|eot_id|>

токен <|eot_id|> - конец ответа. токен <|eom_id|> - конец сообщения, дальше будет использовать ipython (или писать сообщение, если сейчас ipython)

В поле tool записывается вывод кода из поля ipython.

Код для запуска:

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F
import torch
from transformers import logging
import warnings
from colorama import Fore, Style, init
import time
import datetime
import locale
import io
from contextlib import redirect_stdout
import ctypes
init(autoreset=True)
warnings.filterwarnings("ignore")
logging.set_verbosity_error()
locale.setlocale(locale.LC_TIME, 'Russian_Russia.1251')

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

name = "radce/Llama-3.2-3B-ru"
model = AutoModelForCausalLM.from_pretrained(name).half()#.to(torch.bfloat16)
model = model.to(device)
tokenizer = AutoTokenizer.from_pretrained(name)
model.eval()

total_params = sum(p.numel() for p in model.parameters())
print(f"Количество параметров в модели: {total_params:,}")

def execute_ipython_code(code: str, functions: list = []):
    #print(Fore.GREEN + '\n---\nCode: ' + code + '\n---\n' + Fore.RESET)

    # Создаем локальный словарь для выполнения кода
    local_vars = {}

    # Добавляем функции из function_call в локальный контекст
    for func in functions:
        local_vars[func.__name__] = func

    # Перехват стандартного вывода
    output = io.StringIO()
    lines = code.strip().split('\n')
    last_line = lines[-1]  # Последняя строка кода
    other_lines = '\n'.join(lines[:-1])  # Остальные строки кода
    if other_lines.strip() != '':
        with redirect_stdout(output):
            # Выполняем код
            try:
                exec(other_lines, {}, local_vars)
            # Функция не найдена
            except Exception as e:
                return f"Произошла ошибка при выполнении кода:\n```\n{e}\n```"

    # Если есть выражение, которое нужно вернуть, выполняем eval() последней строки
    if last_line.strip() == "'''": return None
    try:
        result = eval(last_line, {}, local_vars)
    except Exception as e:
        print(e)
        result = None  # Если последняя строка не выражение
    
    # Собираем вывод из output и результат выполнения
    return str(output.getvalue()) + str(result)
    

def gen(text: str, function_call: list = [], verbose: bool = True, temperature: float = 0.3, top_k: int = 8, top_p: float = 0.9, max_new_tokens: int = 512, eos_token_id: list = [128001, 128008, 128009], max_length: int = (131072, float('inf'))[0], repetition_penalty: float = 2.1):
    with torch.no_grad():
        with torch.cuda.amp.autocast():
            text = text.replace('<|DATETIME|>', time.ctime())
            day_of_week = datetime.datetime.now().strftime('%A')
            text = text.replace('<|DAY_OF_WEEK|>', day_of_week)
            ids = tokenizer.encode('<|begin_of_text|>' + text, return_tensors="pt", add_special_tokens=False).to(device)
            len_ids = ids.size(1)

            type_message = "assistant"
            is_start = True
            while ids.size(1) - len_ids < max_new_tokens:
                find_type = False
                outputs = model(ids, use_cache=True)
                logits = outputs.logits[:, -1, :]

                if ids[0, -1] == 128006: # <|start_header_id|>
                    allowed_tokens = torch.tensor([78191, 128011], device=logits.device)

                    # Сохраняем исходные значения для разрешённых токенов
                    allowed_logits = logits[0, allowed_tokens]

                    # Устанавливаем все значения в -inf
                    logits = torch.full_like(logits, float('-inf'))

                    # Восстанавливаем значения для разрешённых токенов
                    logits[0, allowed_tokens] = allowed_logits
                    find_type = True
                
                elif ids[0, -1] == 128011 and False:
                    pass

                if is_start:
                    for token_id in eos_token_id:
                        logits[0, token_id] = float('-inf')

                if temperature > 0:
                    logits = logits / temperature

                if top_k > 0:
                    top_k_values, top_k_indices = torch.topk(logits, top_k, dim=-1)
                    logits = torch.full_like(logits, float('-inf'))
                    logits.scatter_(1, top_k_indices, top_k_values)

                if top_p < 1.0:
                    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                    cumulative_probs = torch.cumsum(F.softmax(sorted_logits.float(), dim=-1), dim=-1)  # Смена типа на float32
                    sorted_indices_to_remove = cumulative_probs > top_p
                    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                    sorted_indices_to_remove[..., 0] = 0
                    indices_to_remove = sorted_indices[sorted_indices_to_remove]
                    logits[0, indices_to_remove] = float('-inf')

                if repetition_penalty != 1.0:
                    for token_id in set(ids.view(-1).tolist()):
                        logits[0, token_id] /= repetition_penalty

                logits = F.softmax(logits.float(), dim=-1)  # Смена типа на float32

                if find_type:
                    next_token_id = torch.argmax(logits, dim=1).unsqueeze(0)
                else:
                    next_token_id = torch.multinomial(logits, num_samples=1)


                if next_token_id.item() in eos_token_id:
                    if type_message == 'assistant' and next_token_id.item() != 128008:
                        break
                    elif type_message == 'ipython':
                        type_message = 'assistant'
                        code = tokenizer.decode(ids[0, len_ids:], skip_special_tokens=False).split('<|python_tag|>')[-1].split('<|eo')[0]
                        try:
                            result_code = execute_ipython_code(code, function_call)
                        except Exception as e:
                            result_code = f'Error: {e}'
                        #print('Result:', result_code)
                        ids = torch.cat([ids, tokenizer.encode('<|eom_id|><|start_header_id|>tool<|end_header_id|>\n\n' + str(result_code) + '<|eom_id|><|start_header_id|>', return_tensors="pt", add_special_tokens=False).to(device)], dim=-1)
                        yield '\n', False, 1.0
                        continue

                elif ids.size(1) > max_length:
                    break
                

                ids = torch.cat([ids, next_token_id], dim=-1)

                next_token = tokenizer.decode(next_token_id.item(), skip_special_tokens=False)

                if is_start:
                    is_start = False
                    if next_token == '<|python_tag|>':
                        type_message = 'ipython'


                if find_type:
                    next_token += '<|end_header_id|>\n\n'
                    ids = torch.cat([ids, tokenizer.encode('<|end_header_id|>\n\n', return_tensors="pt", add_special_tokens=False).to(device)], dim=-1)

                    continue
                    
                
                if type_message != 'assistant':
                    next_token = Fore.GREEN + next_token + Style.RESET_ALL                
                    if not verbose:
                        continue


                yield next_token, False, logits[0, next_token_id.item()].item()
            yield tokenizer.decode(ids[0], skip_special_tokens=False), True, 0


def clear_last_lines(n=1):
    """Очистка последних n строк в терминале"""
    for _ in range(n):
        print("\033[F\033[K", end='')  # Перемещаем курсор вверх и очищаем строку

def pre(user_input: str, instruction: str = 'Вы полезный чат-бот.'):
    text =  f"<|eot_id|><|start_header_id|>system<|end_header_id|>\n\n{instruction.rstrip()}" if instruction else ''
    text += f"<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    return text

def preprocess_data(example):
    text = f"<|eot_id|><|start_header_id|>system<|end_header_id|>\n\n{example['instruction']}" if 'instruction' in example else ''
    text += f"<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{example['input']}" if 'input' in example else ''
    text += f"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{example['output']}" if 'output' in example else ''
    return text


def get_color(prob):
    if prob >= 0.95:
        return Fore.LIGHTGREEN_EX
    if prob >= 0.9:
        return Fore.GREEN
    elif prob >= 0.75:
        return Fore.LIGHTYELLOW_EX
    elif prob >= 0.5:
        return Fore.YELLOW
    elif prob >= 0.25:
        return Fore.LIGHTRED_EX
    elif prob > 0.1:
        return Fore.RED
    elif prob > 0.03:
        return Fore.LIGHTMAGENTA_EX
    else:
        return Fore.MAGENTA

def lock_pc():
    try:
        user32 = ctypes.WinDLL('user32')
        result = user32.LockWorkStation()

        if result == 0:
            return "Не удалось заблокировать ПК. Убедитесь, что вы имеете необходимые права."
        else:
            return "ПК успешно заблокирован."
    except Exception as e:
        return f"Произошла ошибка: {e}"

try:
    open('data.txt').close()
except FileNotFoundError:
    with open('data.txt', 'w', encoding='utf-8') as f:
        f.write('Точное время: <|DATETIME|>\nДень недели: <|DAY_OF_WEEK|>\nТы можешь запускать ipython.\nТы можешь блокировать ПК, используя функцию lock_pc()')
    print(Fore.YELLOW + "Инструкции для нейросети находятся в файле data.txt" + Style.RESET_ALL)

context = ''
while True:
    try:
        inp = input('>>> ')
        context = '' # Закоментируйте эту строчку, если вам нужен контекст.
        if inp == 'clear':
            context = ''
            continue
        if context == '':
            context += pre(inp, instruction=open('data.txt', encoding='utf-8').read())
        else:
            context += pre(inp)
        for c, fin, prob in gen(context, function_call=[lock_pc], verbose=True): # В инструкции нужно написать нейросети о том, какие функции она может запускать. Формат в котором записывают: Ты умеешь генерировать изображение функцией generate(text: str), text - описание изображения.
            color = get_color(prob)
            if not fin:
                print(c + Style.RESET_ALL, end='', flush=True) # Иногда могут быть проблемы из-за вывода по 1 токену, вместо букв моуг встречаться символы вопроса: �
            else:
                print(c) # Выводит весь контекст после генерации.
                context += c
        print()
    except KeyboardInterrupt:
        print(Style.RESET_ALL + '\nГенерация прервана пользователем.')

Модель намного лучше отвечает если дать ей любую инструкцию, хотя бы Ты полезный чат-бот.

Модель намного лучше отвечает если дать ей любую инструкцию, хотя бы `Ты полезный чат-бот`.