tevykuch commited on
Commit
e7e5617
·
verified ·
1 Parent(s): 2f670d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from ctransformers import AutoModelForCausalLM
2
  import gradio as gr
3
 
4
  sys_message = """
@@ -6,28 +6,29 @@ This model can generate untruths, lies or inappropriate things. Only for testing
6
  """
7
 
8
 
9
- llm = AutoModelForCausalLM.from_pretrained("tevykuch/sftsl0th",
10
- model_type='mistral',
11
- max_new_tokens = 2048,
12
- threads = 2,
13
- temperature = 0.50,
14
- top_p = 0.95,
15
- top_k = 30,
16
- repetition_penalty = 1.1,
17
- stop=['### Instruction:']
18
- )
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def stream(prompt, UL):
21
- system_prompt = 'You are a helpful chatbot. You only answer in Khmer. User is based in Cambodia. Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.'
22
- E_INST = " "
23
- system, user, assistant = "###Instruction", "###Input", "###Response"
24
- prompt = f"{system}\n{system_prompt}{E_INST}\n{user}\n{prompt.strip()}{E_INST}\n{assistant}\n"
25
-
26
- output = ""
27
- for response in llm(prompt, stream=True):
28
- output += response
29
- yield output
30
- return output
31
 
32
 
33
  chat_interface = gr.ChatInterface(
 
1
+ from transformers import AutoModel, AutoTokenizers
2
  import gradio as gr
3
 
4
  sys_message = """
 
6
  """
7
 
8
 
9
+ # Configuration settings for model generation (example)
10
+ generation_config = {
11
+ "max_new_tokens": 2048,
12
+ "temperature": 0.50,
13
+ "top_p": 0.95,
14
+ "top_k": 30,
15
+ "repetition_penalty": 1.1,
16
+ "stop_token": '### Instruction:'
17
+ }
18
+
19
+ llm = AutoModel.from_pretrained("tevykuch/sftsl0th")
20
+ # llm = AutoModelForCausalLM.from_pretrained("tevykuch/sl0th", hf=True)
21
+ tokenizer = AutoTokenizer.from_pretrained(llm)
22
+
23
+ def stream(prompt):
24
+ # Tokenize the prompt
25
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
26
+ # Generate a response
27
+ output_ids = llm.generate(inputs, **generation_config)
28
+ # Decode the generated ids to a string
29
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
30
+ return response
31
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  chat_interface = gr.ChatInterface(