Spaces:

Maxmobi
/

test

Runtime error

Maxmobi commited on Mar 25, 2024

Commit

dc8ad76

verified ·

1 Parent(s): bd4907d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,33 +1,44 @@
->>> from llama_cpp import Llama
->>> llm = Llama(
-      model_path="./models/7B/llama-model.gguf",
-      # n_gpu_layers=-1, # Uncomment to use GPU acceleration
-      # seed=1337, # Uncomment to set a specific seed
-      # n_ctx=2048, # Uncomment to increase the context window
 )
->>> output = llm(
-      "Q: Name the planets in the solar system? A: ", # Prompt
-      max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
-      stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
-      echo=True # Echo the prompt back in the output
-) # Generate a completion, can also call create_completion
->>> print(output)
-{
-  "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
-  "object": "text_completion",
-  "created": 1679561337,
-  "model": "./models/7B/llama-model.gguf",
-  "choices": [
-    {
-      "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
-      "index": 0,
-      "logprobs": None,
-      "finish_reason": "stop"
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 14,
-    "completion_tokens": 28,
-    "total_tokens": 42
-  }
-}

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "tiiuae/falcon-7b-instruct",
+    torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
+    device_map="auto",
+    low_cpu_mem_usage=True,
 )
+tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
+def generate_text(input_text):
+    input_ids = tokenizer.encode(input_text, return_tensors="pt")
+    attention_mask = torch.ones(input_ids.shape)
+    output = model.generate(
+        input_ids,
+        attention_mask=attention_mask,
+        max_length=200,
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+    )
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    print(output_text)
+    # Remove Prompt Echo from Generated Text
+    cleaned_output_text = output_text.replace(input_text, "")
+    return cleaned_output_text
+text_generation_interface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.inputs.Textbox(label="Input Text"),
+    ],
+    outputs=gr.inputs.Textbox(label="Generated Text"),
+    title="Falcon-7B Instruct",
+).launch()