Spaces:

mike23415
/

Thinking

Sleeping

App Files Files Community

mike23415 commited on May 11

Commit

59219bf

verified ·

1 Parent(s): b283c96

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -10

app.py CHANGED Viewed

@@ -6,10 +6,6 @@ from flask_cors import CORS
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
-# Initialize Flask app
-app = Flask(__name__)
-CORS(app)
 # Global variables
 MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"
 MAX_LENGTH = 2048
@@ -18,15 +14,24 @@ TEMPERATURE = 0.7
 TOP_P = 0.9
 THINKING_STEPS = 3  # Number of thinking steps
-# Load model and tokenizer
-@app.before_first_request
-def load_model():
     global model, tokenizer
     print(f"Loading model: {MODEL_ID}")
     # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
     # Load model with optimizations for limited resources
     model = AutoModelForCausalLM.from_pretrained(
@@ -38,6 +43,10 @@ def load_model():
     print("Model and tokenizer loaded successfully!")
 # Helper function for step-by-step thinking
 def generate_with_thinking(prompt, thinking_steps=THINKING_STEPS):
     # Initialize conversation with prompt
@@ -100,6 +109,10 @@ def generate_with_thinking(prompt, thinking_steps=THINKING_STEPS):
 @app.route('/api/chat', methods=['POST'])
 def chat():
     try:
         data = request.json
         prompt = data.get('prompt', '')
         include_thinking = data.get('include_thinking', False)
@@ -123,6 +136,9 @@ def chat():
         return jsonify(result)
     except Exception as e:
         return jsonify({'error': str(e)}), 500
 # Simple health check endpoint
@@ -157,6 +173,10 @@ def create_ui():
             if not question.strip():
                 return "", "Please enter a question"
             response = generate_with_thinking(question)
             if show_thinking:
@@ -180,8 +200,8 @@ def create_ui():
 # Create Gradio UI and launch the app
 if __name__ == "__main__":
-    # Load model at startup for Gradio
-    load_model()
     # Create and launch Gradio interface
     demo = create_ui()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 # Global variables
 MODEL_ID = "microsoft/bitnet-b1.58-2B-4T"
 MAX_LENGTH = 2048
 TOP_P = 0.9
 THINKING_STEPS = 3  # Number of thinking steps
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+# Function to load model and tokenizer
+def load_model_and_tokenizer():
     global model, tokenizer
+    if model is not None and tokenizer is not None:
+        return
     print(f"Loading model: {MODEL_ID}")
     # Load tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_ID,
+        use_fast=True,
+    )
     # Load model with optimizations for limited resources
     model = AutoModelForCausalLM.from_pretrained(
     print("Model and tokenizer loaded successfully!")
+# Initialize Flask app
+app = Flask(__name__)
+CORS(app)
 # Helper function for step-by-step thinking
 def generate_with_thinking(prompt, thinking_steps=THINKING_STEPS):
     # Initialize conversation with prompt
 @app.route('/api/chat', methods=['POST'])
 def chat():
     try:
+        # Ensure model is loaded
+        if model is None or tokenizer is None:
+            load_model_and_tokenizer()
         data = request.json
         prompt = data.get('prompt', '')
         include_thinking = data.get('include_thinking', False)
         return jsonify(result)
     except Exception as e:
+        import traceback
+        print(f"Error in chat endpoint: {str(e)}")
+        print(traceback.format_exc())
         return jsonify({'error': str(e)}), 500
 # Simple health check endpoint
             if not question.strip():
                 return "", "Please enter a question"
+            # Ensure model is loaded
+            if model is None or tokenizer is None:
+                load_model_and_tokenizer()
             response = generate_with_thinking(question)
             if show_thinking:
 # Create Gradio UI and launch the app
 if __name__ == "__main__":
+    # Load model at startup
+    load_model_and_tokenizer()
     # Create and launch Gradio interface
     demo = create_ui()