Spaces:
Runtime error
Runtime error
File size: 4,486 Bytes
1793d31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
from typing import Dict, List
from llama_cpp import Llama
class Model:
def __init__(self):
pass
def __call__(self, msg:str, stop:List[str], max_tokens:int):
raise NotImplementedError
def conv(self, msgs:List[Dict[str, str]]):
raise NotImplementedError
def starttok(self, user:str):
raise NotImplementedError
def close(self):
pass
class Phi35RPMax(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="ArliAI/Phi-3.5-mini-3.8B-ArliAI-RPMax-v1.1-GGUF",
filename="ArliAI-RPMax-3.8B-v1.1-fp16.gguf",
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
def starttok(self,user:str):
return f"<|{user}|>\n"
def close(self):
self.llm.close()
Phi35RPMax.modelname="Phi35RPMax-fp16"
class Phi35(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
filename="Phi-3.5-mini-instruct-IQ3_XS.gguf",
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
def starttok(self,user:str):
return f"<|{user}|>\n"
def close(self):
self.llm.close()
Phi35.modelname="Phi35-IQ3_XS"
# TODO: Gemma2 needs license maybe try it in the future but dont think it is worth it
# class Gemma2(Model):
# def __init__(self):
# self.llm = Llama.from_pretrained(
# repo_id="google/gemma-2-2b-it-GGUF",
# filename="2b_it_v2.gguf",
# )
# def __call__(self, msg:str, stop:List[str], max_tokens:int):
# return self.llm(msg, stop=stop, max_tokens=max_tokens)
# def conv(self,msgs:List[Dict[str, str]]):#https://ai.google.dev/gemma/docs/formatting?hl=de
# return "\n".join([f"<|{msg['role']}|>\n{msg['content']}<|end|>" for msg in msgs])
# def formatmessage(self,msg:str, role:str):#https://ai.google.dev/gemma/docs/formatting?hl=de
# if(role=="system"):
# # Gemma2 does not support system messages / isnt trained for them
# # TODO: Make them Assistant messages and test if this improves the results
# return ""
# if role=="assistant":
# role="model"
# return f"<start_of_turn>{role}\n{msg}<end_of_turn>"
# def starttok(self,user:str):
# return f"<start_of_turn>{user}\n"
# def close(self):
# self.llm.close()
# Gemma2.modelname="Gemma2-2b-it-GGUF"
class Llama31uncensored(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF",
filename="Llama-3.1-8B-Lexi-Uncensored_V2_F16.gguf",
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
def starttok(self,user:str):
return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>\n\n"
def close(self):
self.llm.close()
Llama31uncensored.modelname="Llama31-uncensored-fp16"
class Llama31(Model):
def __init__(self):
self.llm = Llama.from_pretrained(
repo_id="lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
filename="Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf",
)
def __call__(self, msg:str, stop:List[str], max_tokens:int):
return self.llm(msg, stop=stop, max_tokens=max_tokens)
def conv(self,msgs:List[Dict[str, str]]):
return "\n".join([f"<|begin_of_text|><|start_header_id|>{msg['role']}<|end_header_id|>\n\n{msg['content']}<|eot_id|>" for msg in msgs])
def starttok(self,user:str):
return f"<|begin_of_text|><|start_header_id|>{user}<|end_header_id|>"
def close(self):
self.llm.close()
Llama31.modelname="Llama31-IQ4_XS"
models=[Phi35RPMax,Phi35,Llama31uncensored,Llama31] |