Turkish-Llama-8b-DPO-v0.1

Sleeping

ysdede commited on Dec 20, 2024

Commit

a928ce7

1 Parent(s): 012c0fa

Update default inference parameters in app.py

- Increased max_new_tokens to 2048
- Adjusted repetition_penalty to 1.0

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
@@ -56,11 +56,11 @@ def generate(
     message: str,
     chat_history: list[dict],
     system_prompt: str = "",
-    max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
@@ -141,7 +141,7 @@ chat_interface = gr.ChatInterface(
             minimum=1.0,
             maximum=2.0,
             step=0.05,
-            value=1.2,
         ),
     ],
     stop_btn=None,

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 8192
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
     message: str,
     chat_history: list[dict],
     system_prompt: str = "",
+    max_new_tokens: int = 2048,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
+    repetition_penalty: float = 1.0,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
             minimum=1.0,
             maximum=2.0,
             step=0.05,
+            value=1.0,
         ),
     ],
     stop_btn=None,