MuntasirHossain commited on
Commit
a947bc1
·
verified ·
1 Parent(s): a9cc21f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -36
app.py CHANGED
@@ -2,9 +2,7 @@ import gradio as gr
2
  import os
3
  import requests
4
  from llama_cpp import Llama
5
- from transformers import AutoTokenizer
6
- import transformers
7
- import torch
8
 
9
  llm_name = "MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF"
10
  llm_path = os.path.basename(llm_name)
@@ -12,15 +10,9 @@ llm_path = os.path.basename(llm_name)
12
  # download gguf model
13
  def download_llms(llm_name):
14
  """Download GGUF model"""
15
-
16
  download_url = ""
17
  print("Downloading " + llm_name)
18
  download_url = "https://huggingface.co/MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF/resolve/main/Q4_K_M.gguf"
19
-
20
- # elif selected_llm == 'microsoft/Phi-3-mini-4k-instruct':
21
- # download_url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
22
- # elif selected_llm == 'mistralai/Mistral-7B-Instruct-v0.2':
23
- # download_url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q2_K.gguf"
24
 
25
  if not os.path.exists("model"):
26
  os.makedirs("model")
@@ -43,42 +35,33 @@ def download_llms(llm_name):
43
  print(f"Model download completed {response.status_code}")
44
 
45
  # define model pipeline with llama-cpp
46
- def initialize_llm(llm_model):
47
  model_path = ""
48
  if llm_model == llm_name:
49
  model_path = "model/Q4_K_M.gguf"
50
  download_llms(llm_model)
51
  llm = Llama(
52
  model_path=model_path,
53
- # temperature=temperature,
54
- # max_tokens=256,
55
- # top_p=1,
56
- # top_k= top_k,
57
- n_ctx=1024,
58
  verbose=False
59
  )
60
  return llm
61
 
62
  llm = initialize_llm(llm_name)
63
 
64
- # format prompt as per the chat template on the official model page: https://huggingface.co/google/gemma-7b-it
65
  def format_prompt(input_text, history):
66
- system_prompt = "You are a helpful AI assistant. You are truthful in your response."
 
67
  prompt = ""
68
  if history:
69
  for previous_prompt, response in history:
70
  prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{previous_prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>"
71
- # <start_of_turn>user
72
- # {previous_prompt}<end_of_turn>
73
- # <start_of_turn>model
74
- # {response}<end_of_turn>
75
  prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
76
- # <start_of_turn>user
77
- # {input_text}<end_of_turn>
78
- # <start_of_turn>model"""
79
  return prompt
80
 
81
- def generate(prompt, history, max_new_tokens=256): # temperature=0.95, top_p=0.9, repetition_penalty=1.0
 
82
  if not history:
83
  history = []
84
 
@@ -89,39 +72,35 @@ def generate(prompt, history, max_new_tokens=256): # temperature=0.95, top_p=0.9
89
  # temperature=temperature,
90
  max_tokens=max_new_tokens,
91
  # top_p=top_p,
92
- # repetition_penalty=repetition_penalty,
93
- # do_sample=True,
94
  stop=["<|im_end|>"]
95
  )
96
 
97
  formatted_prompt = format_prompt(prompt, history)
98
 
 
99
  response = llm(formatted_prompt, **kwargs, stream=True)
100
  output = ""
101
  for chunk in response:
102
  output += chunk['choices'][0]['text']
103
  yield output
104
  return output
105
- # output = ""
106
- # for chunk in response:
107
- # output += chunk.token.text
108
- # yield output
109
- # return output
110
-
111
  # response = llm(formatted_prompt, **kwargs)
112
  # return response['choices'][0]['text']
113
 
114
  chatbot = gr.Chatbot(height=500)
115
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
116
  gr.HTML("<center><h1>Fine-tuned Meta-Llama-3-8B</h1><center>")
 
117
  gr.ChatInterface(
118
  generate,
119
  chatbot=chatbot,
120
  retry_btn=None,
121
  undo_btn=None,
122
  clear_btn="Clear",
123
- description="This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation",
124
  # additional_inputs=additional_inputs,
125
- examples=[["Explain artificial intelligence in a few lines."]]
126
  )
127
  demo.queue().launch()
 
2
  import os
3
  import requests
4
  from llama_cpp import Llama
5
+
 
 
6
 
7
  llm_name = "MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF"
8
  llm_path = os.path.basename(llm_name)
 
10
  # download gguf model
11
  def download_llms(llm_name):
12
  """Download GGUF model"""
 
13
  download_url = ""
14
  print("Downloading " + llm_name)
15
  download_url = "https://huggingface.co/MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF/resolve/main/Q4_K_M.gguf"
 
 
 
 
 
16
 
17
  if not os.path.exists("model"):
18
  os.makedirs("model")
 
35
  print(f"Model download completed {response.status_code}")
36
 
37
  # define model pipeline with llama-cpp
38
+ def initialize_llm(llm_model):
39
  model_path = ""
40
  if llm_model == llm_name:
41
  model_path = "model/Q4_K_M.gguf"
42
  download_llms(llm_model)
43
  llm = Llama(
44
  model_path=model_path,
45
+ n_ctx=1024, # input text context length, 0 = from model
 
 
 
 
46
  verbose=False
47
  )
48
  return llm
49
 
50
  llm = initialize_llm(llm_name)
51
 
52
+ # format prompt as per the ChatML template. The model was fine-tuned with this chat template
53
  def format_prompt(input_text, history):
54
+ system_prompt = """You are a helpful AI assistant. You are truthful in your response for real-world matters
55
+ but you are also creative for imaginative/fictional tasks."""
56
  prompt = ""
57
  if history:
58
  for previous_prompt, response in history:
59
  prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{previous_prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>"
 
 
 
 
60
  prompt += f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant"
 
 
 
61
  return prompt
62
 
63
+ # generate llm response
64
+ def generate(prompt, history, max_new_tokens=256): # temperature=0.95, top_p=0.9
65
  if not history:
66
  history = []
67
 
 
72
  # temperature=temperature,
73
  max_tokens=max_new_tokens,
74
  # top_p=top_p,
 
 
75
  stop=["<|im_end|>"]
76
  )
77
 
78
  formatted_prompt = format_prompt(prompt, history)
79
 
80
+ # generate a streaming response
81
  response = llm(formatted_prompt, **kwargs, stream=True)
82
  output = ""
83
  for chunk in response:
84
  output += chunk['choices'][0]['text']
85
  yield output
86
  return output
87
+
88
+ # # generate response without streaming
 
 
 
 
89
  # response = llm(formatted_prompt, **kwargs)
90
  # return response['choices'][0]['text']
91
 
92
  chatbot = gr.Chatbot(height=500)
93
+ with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink")) as demo:
94
  gr.HTML("<center><h1>Fine-tuned Meta-Llama-3-8B</h1><center>")
95
+ gr.Markdown("<b>This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation.</b>")
96
  gr.ChatInterface(
97
  generate,
98
  chatbot=chatbot,
99
  retry_btn=None,
100
  undo_btn=None,
101
  clear_btn="Clear",
102
+ # description="This AI agent is using the MuntasirHossain/Meta-Llama-3-8B-OpenOrca-GGUF model for text-generation.",
103
  # additional_inputs=additional_inputs,
104
+ examples=[["What is a large language model?"], ["What is the meaning of life?"], ["Write a short fictional story about a planet named 'Orca'."]]
105
  )
106
  demo.queue().launch()