eddyejembi commited on
Commit
48d7b1f
·
1 Parent(s): a09366e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import AutoTokenizer
3
+ import torch
4
+ from transformers import pipeline
5
+
6
+ model = "meta-llama/Llama-2-7b-chat-hf"
7
+ tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
8
+
9
+ SYSTEM_PROMPT = """<s>[INST] <<SYS>>
10
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
11
+
12
+ If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
13
+ <</SYS>>
14
+
15
+ """
16
+
17
+ # Formatting function for message and history
18
+ def message_format(message: str, history: list, memory_limit: int = 5) -> str:
19
+
20
+ # always keep len(history) <= memory_limit
21
+ if len(history) > memory_limit:
22
+ history = history[-memory_limit:]
23
+
24
+ if len(history) == 0:
25
+ return SYSTEM_PROMPT + f"{message} [/INST]"
26
+
27
+ formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
28
+
29
+ # Handle conversation history
30
+ for user_msg, model_answer in history[1:]:
31
+ formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"
32
+
33
+ # Handle the current message
34
+ formatted_message += f"<s>[INST] {message} [/INST]"
35
+
36
+ return formatted_message
37
+
38
+ # Generate a response from the Llama model
39
+ def llama_response(message: str, history: list) -> str:
40
+
41
+ query = message_format(message, history)
42
+ response = ""
43
+
44
+ sequences = pipeline(
45
+ query,
46
+ do_sample=True,
47
+ top_k=10,
48
+ num_return_sequences=1,
49
+ eos_token_id=tokenizer.eos_token_id,
50
+ max_length=1050,
51
+ )
52
+
53
+ generated_text = sequences[0]['generated_text']
54
+ response = generated_text[len(query):] # Remove the prompt from the output
55
+
56
+ print("Chatbot:", response.strip())
57
+ return response.strip()
58
+
59
+ import gradio as gr
60
+
61
+ gr.ChatInterface(llama_response).launch()