ruggsea commited on
Commit
2ec628c
·
1 Parent(s): 01ef28b

Updated to use ruggsea/Llama3.1-8B-SEP-Chat with multi-turn support

Browse files
Files changed (2) hide show
  1. app.py +29 -23
  2. requirements.txt +7 -8
app.py CHANGED
@@ -12,48 +12,49 @@ DEFAULT_MAX_NEW_TOKENS = 4000
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
15
- # Llama-3 8B Stanford Encyclopedia of Philosophy QA
16
 
17
- This Space showcases the llama3-stanford-encyclopedia-philosophy-QA model from ruggsea, a fine-tuned version of the Meta-Llama-3-8B-Instruct model, specifically tailored for answering philosophical inquiries with a formal and informative tone. The model was meticulously trained using the Stanford Encyclopedia of Philosophy-instruct dataset and a carefully crafted system prompt, emulating the expertise of a university professor in philosophy.
18
 
19
- Feel free to interact with the model by asking philosophical questions and exploring its informative responses.
20
  """
21
 
22
  LICENSE = """
23
  <p/>
24
 
25
  ---
26
- As a derivate work of [Llama-3-8b-instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) by Meta,
27
- this demo is governed by the original [license](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/LICENSE) and [acceptable use policy](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/USE_POLICY.md).
28
  """
29
 
30
  if not torch.cuda.is_available():
31
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
32
 
33
-
34
  if torch.cuda.is_available():
35
- model_id = "ruggsea/Llama3.1-Chat-stanford-encyclopedia-philosophy"
36
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
37
  tokenizer = AutoTokenizer.from_pretrained(model_id)
38
  tokenizer.use_default_system_prompt = False
39
 
40
-
41
  @spaces.GPU
42
  def generate(
43
  message: str,
44
  chat_history: list[tuple[str, str]],
45
  system_prompt: str,
46
  max_new_tokens: int = 1024,
47
- temperature: float = 0.6,
48
  top_p: float = 0.9,
49
  top_k: int = 50,
50
- repetition_penalty: float = 1.2,
51
  ) -> Iterator[str]:
52
  conversation = []
53
  if system_prompt:
54
  conversation.append({"role": "system", "content": system_prompt})
55
  for user, assistant in chat_history:
56
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
 
 
 
57
  conversation.append({"role": "user", "content": message})
58
 
59
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
@@ -64,7 +65,7 @@ def generate(
64
 
65
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
66
  generate_kwargs = dict(
67
- {"input_ids": input_ids},
68
  streamer=streamer,
69
  max_new_tokens=max_new_tokens,
70
  do_sample=True,
@@ -82,12 +83,13 @@ def generate(
82
  outputs.append(text)
83
  yield "".join(outputs)
84
 
85
-
86
  chat_interface = gr.ChatInterface(
87
  fn=generate,
88
  additional_inputs=[
89
- gr.Textbox(label="System prompt", lines=6,
90
- value="You are a Philosophy university professor. Answer questions in raw markdown format, no excessive newlines and no numbered paragraphs"
 
 
91
  ),
92
  gr.Slider(
93
  label="Max new tokens",
@@ -101,7 +103,7 @@ chat_interface = gr.ChatInterface(
101
  minimum=0.1,
102
  maximum=4.0,
103
  step=0.1,
104
- value=0.6,
105
  ),
106
  gr.Slider(
107
  label="Top-p (nucleus sampling)",
@@ -122,22 +124,26 @@ chat_interface = gr.ChatInterface(
122
  minimum=1.0,
123
  maximum=2.0,
124
  step=0.05,
125
- value=1.2,
126
  ),
127
  ],
128
  stop_btn=None,
129
  examples=[
130
- ["If you replace all the parts of a boat, is it still the same boat?"],
131
- ["Can you explain briefly to me the difference between left and right hegelians?"],
132
- ["Explain the Computational theory of mind"],
133
- ["What is a justified true belief?"],
134
- ["How does Wittgenstein define a 'language game'?"],
135
  ],
 
136
  )
137
 
138
  with gr.Blocks(css="style.css") as demo:
139
  gr.Markdown(DESCRIPTION)
140
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
 
 
 
141
  chat_interface.render()
142
  gr.Markdown(LICENSE)
143
 
 
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
15
+ # Llama-3.1 8B Stanford Encyclopedia of Philosophy Chat
16
 
17
+ This Space showcases the Llama3.1-8B-SEP-Chat model from ruggsea, a fine-tuned version of Meta's Llama 3.1 8B model, specifically tailored for philosophical discussions with a formal and informative tone. The model was trained using the Stanford Encyclopedia of Philosophy dataset and carefully crafted prompts.
18
 
19
+ Feel free to engage in philosophical discussions and ask questions. The model supports multi-turn conversations and will maintain context.
20
  """
21
 
22
  LICENSE = """
23
  <p/>
24
 
25
  ---
26
+ As a derivative work of Llama 3.1, this demo is governed by the original Meta license and acceptable use policy.
 
27
  """
28
 
29
  if not torch.cuda.is_available():
30
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
31
 
32
+ # Initialize model and tokenizer
33
  if torch.cuda.is_available():
34
+ model_id = "ruggsea/Llama3.1-8B-SEP-Chat"
35
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
36
  tokenizer = AutoTokenizer.from_pretrained(model_id)
37
  tokenizer.use_default_system_prompt = False
38
 
 
39
  @spaces.GPU
40
  def generate(
41
  message: str,
42
  chat_history: list[tuple[str, str]],
43
  system_prompt: str,
44
  max_new_tokens: int = 1024,
45
+ temperature: float = 0.7,
46
  top_p: float = 0.9,
47
  top_k: int = 50,
48
+ repetition_penalty: float = 1.1,
49
  ) -> Iterator[str]:
50
  conversation = []
51
  if system_prompt:
52
  conversation.append({"role": "system", "content": system_prompt})
53
  for user, assistant in chat_history:
54
+ conversation.extend([
55
+ {"role": "user", "content": user},
56
+ {"role": "assistant", "content": assistant}
57
+ ])
58
  conversation.append({"role": "user", "content": message})
59
 
60
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
 
65
 
66
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
67
  generate_kwargs = dict(
68
+ input_ids=input_ids,
69
  streamer=streamer,
70
  max_new_tokens=max_new_tokens,
71
  do_sample=True,
 
83
  outputs.append(text)
84
  yield "".join(outputs)
85
 
 
86
  chat_interface = gr.ChatInterface(
87
  fn=generate,
88
  additional_inputs=[
89
+ gr.Textbox(
90
+ label="System prompt",
91
+ lines=6,
92
+ value="You are a knowledgeable philosophy professor. Provide clear, accurate responses using markdown formatting. Focus on philosophical concepts and maintain academic rigor while being accessible."
93
  ),
94
  gr.Slider(
95
  label="Max new tokens",
 
103
  minimum=0.1,
104
  maximum=4.0,
105
  step=0.1,
106
+ value=0.7,
107
  ),
108
  gr.Slider(
109
  label="Top-p (nucleus sampling)",
 
124
  minimum=1.0,
125
  maximum=2.0,
126
  step=0.05,
127
+ value=1.1,
128
  ),
129
  ],
130
  stop_btn=None,
131
  examples=[
132
+ ["What is the trolley problem and what are its main ethical implications?"],
133
+ ["Can you explain Plato's Theory of Forms?"],
134
+ ["What is the difference between analytic and continental philosophy?"],
135
+ ["How does Kant's Categorical Imperative work?"],
136
+ ["What is the problem of consciousness in philosophy of mind?"],
137
  ],
138
+ title="Philosophy Chat with Llama 3.1",
139
  )
140
 
141
  with gr.Blocks(css="style.css") as demo:
142
  gr.Markdown(DESCRIPTION)
143
+ gr.DuplicateButton(
144
+ value="Duplicate Space for private use",
145
+ elem_id="duplicate-button"
146
+ )
147
  chat_interface.render()
148
  gr.Markdown(LICENSE)
149
 
requirements.txt CHANGED
@@ -1,8 +1,7 @@
1
- accelerate==0.28.0
2
- bitsandbytes==0.43.0
3
- gradio==4.26.0
4
- scipy==1.12.0
5
- sentencepiece==0.1.99
6
- spaces==0.26.0
7
- torch==2.0.0
8
- transformers==4.39.3
 
1
+ gradio>=4.0.0
2
+ torch
3
+ transformers>=4.37.0
4
+ accelerate
5
+ bitsandbytes>=0.41.0
6
+ scipy
7
+ sentencepiece