CineAI
/

Llama32-3B-CoT

text-generation-inference

Model card Files Files and versions Community

CineAI commited on Jan 13

Commit

84d7a8d

·

verified ·

1 Parent(s): 5697cd9

Update README.md

Files changed (1) hide show

README.md +19 -21

README.md CHANGED Viewed

@@ -163,27 +163,25 @@ datasets:
   You can use it with a script
-  if do_run_lora_model:
-    if is_own:
-      model, tokenizer = FastLanguageModel.from_pretrained(
-          model_name="CineAI/Llama32-3B-CoT",
-          max_seq_length=max_length,
-          dtype=dtype,
-          load_in_4bit=load_in_4bit
-      )
-      FastLanguageModel.for_inference(model)
-    inputs = tokenizer.apply_chat_template(
-        message,
-        tokenize = True,
-        add_generation_prompt = True, # Must add for generation
-        return_tensors = "pt",
-    ).to(device)
-    text_streamer = TextStreamer(tokenizer, skip_prompt = True)
-    _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_new_tokens,
-                      use_cache = True, temperature = temperature, min_p = min_p)
 # Uploaded  model

   You can use it with a script
+  model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name="CineAI/Llama32-3B-CoT",
+        max_seq_length=max_length,
+        dtype=dtype,
+        load_in_4bit=load_in_4bit
+    )
+  FastLanguageModel.for_inference(model)
+  inputs = tokenizer.apply_chat_template(
+      message,
+      tokenize = True,
+      add_generation_prompt = True, # Must add for generation
+      return_tensors = "pt",
+  ).to(device)
+  text_streamer = TextStreamer(tokenizer, skip_prompt = True)
+  _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_new_tokens,
+                    use_cache = True, temperature = temperature, min_p = min_p)
 # Uploaded  model