Llama3-philosophy-demo

Sleeping

App Files Files Community

ruggsea commited on Jan 13

Commit

2ec628c

1 Parent(s): 01ef28b

Updated to use ruggsea/Llama3.1-8B-SEP-Chat with multi-turn support

Browse files

Files changed (2) hide show

app.py +29 -23
requirements.txt +7 -8

app.py CHANGED Viewed

@@ -12,48 +12,49 @@ DEFAULT_MAX_NEW_TOKENS = 4000
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
-# Llama-3 8B Stanford Encyclopedia of Philosophy QA
-This Space showcases the llama3-stanford-encyclopedia-philosophy-QA model from ruggsea, a fine-tuned version of the Meta-Llama-3-8B-Instruct model, specifically tailored for answering philosophical inquiries with a formal and informative tone. The model was meticulously trained using the Stanford Encyclopedia of Philosophy-instruct dataset and a carefully crafted system prompt, emulating the expertise of a university professor in philosophy.
-Feel free to interact with the model by asking philosophical questions and exploring its informative responses.
 """
 LICENSE = """
 <p/>
 ---
-As a derivate work of [Llama-3-8b-instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) by Meta,
-this demo is governed by the original [license](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/LICENSE) and [acceptable use policy](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/USE_POLICY.md).
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available():
-    model_id = "ruggsea/Llama3.1-Chat-stanford-encyclopedia-philosophy"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
-    temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
-        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
@@ -64,7 +65,7 @@ def generate(
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
@@ -82,12 +83,13 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
-        gr.Textbox(label="System prompt", lines=6,
-                   value="You are a Philosophy university professor. Answer questions in raw markdown format, no excessive newlines and no numbered paragraphs"
         ),
         gr.Slider(
             label="Max new tokens",
@@ -101,7 +103,7 @@ chat_interface = gr.ChatInterface(
             minimum=0.1,
             maximum=4.0,
             step=0.1,
-            value=0.6,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
@@ -122,22 +124,26 @@ chat_interface = gr.ChatInterface(
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.2,
         ),
     ],
     stop_btn=None,
     examples=[
-        ["If you replace all the parts of a boat, is it still the same boat?"],
-        ["Can you explain briefly to me the difference between left and right hegelians?"],
-        ["Explain the Computational theory of mind"],
-        ["What is a justified true belief?"],
-        ["How does Wittgenstein define a 'language game'?"],
     ],
 )
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
     gr.Markdown(LICENSE)

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
+# Llama-3.1 8B Stanford Encyclopedia of Philosophy Chat
+This Space showcases the Llama3.1-8B-SEP-Chat model from ruggsea, a fine-tuned version of Meta's Llama 3.1 8B model, specifically tailored for philosophical discussions with a formal and informative tone. The model was trained using the Stanford Encyclopedia of Philosophy dataset and carefully crafted prompts.
+Feel free to engage in philosophical discussions and ask questions. The model supports multi-turn conversations and will maintain context.
 """
 LICENSE = """
 <p/>
 ---
+As a derivative work of Llama 3.1, this demo is governed by the original Meta license and acceptable use policy.
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+# Initialize model and tokenizer
 if torch.cuda.is_available():
+    model_id = "ruggsea/Llama3.1-8B-SEP-Chat"
     model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
+    temperature: float = 0.7,
     top_p: float = 0.9,
     top_k: int = 50,
+    repetition_penalty: float = 1.1,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
+        conversation.extend([
+            {"role": "user", "content": user},
+            {"role": "assistant", "content": assistant}
+        ])
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         outputs.append(text)
         yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
+        gr.Textbox(
+            label="System prompt",
+            lines=6,
+            value="You are a knowledgeable philosophy professor. Provide clear, accurate responses using markdown formatting. Focus on philosophical concepts and maintain academic rigor while being accessible."
         ),
         gr.Slider(
             label="Max new tokens",
             minimum=0.1,
             maximum=4.0,
             step=0.1,
+            value=0.7,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.1,
         ),
     ],
     stop_btn=None,
     examples=[
+        ["What is the trolley problem and what are its main ethical implications?"],
+        ["Can you explain Plato's Theory of Forms?"],
+        ["What is the difference between analytic and continental philosophy?"],
+        ["How does Kant's Categorical Imperative work?"],
+        ["What is the problem of consciousness in philosophy of mind?"],
     ],
+    title="Philosophy Chat with Llama 3.1",
 )
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(
+        value="Duplicate Space for private use",
+        elem_id="duplicate-button"
+    )
     chat_interface.render()
     gr.Markdown(LICENSE)

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-accelerate==0.28.0
-bitsandbytes==0.43.0
-gradio==4.26.0
-scipy==1.12.0
-sentencepiece==0.1.99
-spaces==0.26.0
-torch==2.0.0
-transformers==4.39.3

+gradio>=4.0.0
+torch
+transformers>=4.37.0
+accelerate
+bitsandbytes>=0.41.0
+scipy
+sentencepiece