File size: 4,486 Bytes
1793d31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from typing import Dict, List

from llama_cpp import Llama


class Model:
    def __init__(self):
        pass
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        raise NotImplementedError
    def conv(self, msgs:List[Dict[str, str]]):
        raise NotImplementedError
    def starttok(self, user:str):
        raise NotImplementedError
    def close(self):
        pass

class Phi35RPMax(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF",
            filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf",
        )
        
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|{user}|>\n"
    def close(self):
        self.llm.close()
Phi35RPMax.modelname="Phi35RPMax-fp16"
class Phi35(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
            filename="Phi-3.5-mini-instruct-IQ3_XS.gguf",
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
    
    def starttok(self,user:str):
        return f"<|{user}|>\n"
    def close(self):
        self.llm.close()
Phi35.modelname="Phi35-IQ3_XS"

# TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it
# class Gemma2(Model):
#     def __init__(self):
#         self.llm = Llama.from_pretrained(
#             repo_id="google/gemma-2-2b-it-GGUF",
#             filename="2b_it_v2.gguf",
#         )
#     def __call__(self, msg:str, stop:List[str], max_tokens:int):
#         return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
#     def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de
#         return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
#     def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de
#         if(role=="system"):
#             # Gemma2 does not support system messages / isnt trained for them
#             # TODO: Make them Assistant messages and test if this improves the results
#             return ""
#         if role=="assistant":
#             role="model"
#         return f"<start_of_turn>{role}\n{msg}<end_of_turn>"
#     def starttok(self,user:str):
#         return f"<start_of_turn>{user}\n"
#     def close(self):
#         self.llm.close()
# Gemma2.modelname="Gemma2-2b-it-GGUF"

class Llama31uncensored(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF",
            filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf",
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>\n\n"
    def close(self):
        self.llm.close()
Llama31uncensored.modelname="Llama31-uncensored-fp16"

class Llama31(Model):
    def __init__(self):
        self.llm = Llama.from_pretrained(
            repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
            filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf",
        )
    def __call__(self, msg:str, stop:List[str], max_tokens:int):
        return self.llm(msg, stop=stop, max_tokens=max_tokens)
    
    def conv(self,msgs:List[Dict[str, str]]):
        return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
    def starttok(self,user:str):
        return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>"
    def close(self):
        self.llm.close()
Llama31.modelname="Llama31-IQ4_XS"

models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31]