Spaces:

huriacane33
/

QA

Sleeping

huriacane33 commited on Dec 20, 2024

Commit

4d8d534

verified ·

1 Parent(s): 1a3cbfb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,13 +3,17 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import pandas as pd
 import re
-# Load the Falcon-7B-Instruct model and tokenizer
 @st.cache_resource
 def load_falcon_model():
-    """Load the Falcon-7B-Instruct model and tokenizer."""
-    model_name = "tiiuae/falcon-7b-instruct"  # Open model for instruction-based tasks
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 qa_pipeline = load_falcon_model()

 import pandas as pd
 import re
+# Load the Falcon-3B-Instruct model and tokenizer
 @st.cache_resource
 def load_falcon_model():
+    """Load the Falcon-3B-Instruct model and tokenizer."""
+    model_name = "tiiuae/falcon-3b-instruct"  # Smaller model for faster loading
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype="auto",  # Use FP16 if supported
+        device_map="auto"    # Automatically distributes across available devices
+    )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 qa_pipeline = load_falcon_model()