Spaces:

YongdongWang
/

robot-task-planning

Runtime error

App Files Files Community

YongdongWang commited on Jun 22

Commit

0af933c

verified ·

1 Parent(s): e70a7f9

Force update Space with optimized robot planning interface

Browse files

Files changed (3) hide show

README.md +10 -25
app.py +79 -87
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -8,38 +8,23 @@ sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: llama3.1
 ---
-# Robot Task Planning - Llama 3.1 8B
-This Space demonstrates a fine-tuned version of Meta's Llama 3.1 8B model specialized for **robot task planning** using QLoRA technique.
-The model converts natural language commands into structured task sequences for construction robots like excavators and dump trucks.
 ## Model
-The model is available at: [YongdongWang/llama-3.1-8b-dart-qlora](https://huggingface.co/YongdongWang/llama-3.1-8b-dart-qlora)
 ## Features
-- **Robot Command Processing**: Convert natural language to structured robot tasks
-- **Multi-Robot Coordination**: Handle complex scenarios with multiple excavators and dump trucks
-- **Task Dependencies**: Generate proper task sequences with dependencies
-- **Real-time Planning**: Instant task generation powered by Gradio
 ## Usage
-Input natural language robot commands like "Deploy Excavator 1 to Soil Area 1" and the model will generate structured task sequences in JSON format for robot execution.
-## Technical Details
-- **Base Model**: meta-llama/Llama-3.1-8B
-- **Fine-tuning**: QLoRA (4-bit quantization + LoRA)
-- **Interface**: Gradio
-- **Hosting**: HuggingFace Spaces
-- **Input**: Natural language robot commands
-- **Output**: Structured JSON task sequences
-## Performance
-⚠️ **Note**: Model loading may take 3-5 minutes on first startup due to the large model size and quantization process.

 app_file: app.py
 pinned: false
 license: llama3.1
+hardware: t4-medium
 ---
+# 🤖 Robot Task Planning - Llama 3.1 8B
+Fine-tuned Llama 3.1 8B model for robot task planning using QLoRA technique.
 ## Model
+[YongdongWang/llama-3.1-8b-dart-qlora](https://huggingface.co/YongdongWang/llama-3.1-8b-dart-qlora)
 ## Features
+- Natural language to robot task conversion
+- Multi-robot coordination
+- Real-time task generation
+- Optimized with 4-bit quantization
 ## Usage
+Input robot commands and get structured task sequences for excavators, dump trucks, and other construction robots.
+Loading time: ~3-5 minutes on first startup.

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
 import warnings
 warnings.filterwarnings("ignore")
 # 模型配置
@@ -23,7 +24,11 @@ def load_model():
         )
         # 加载分词器
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
@@ -33,11 +38,16 @@ def load_model():
             quantization_config=bnb_config,
             device_map="auto",
             torch_dtype=torch.float16,
-            trust_remote_code=True
         )
         # 加载 LoRA 适配器
-        model = PeftModel.from_pretrained(base_model, LORA_MODEL)
         model.eval()
         print("✅ Model loaded successfully!")
@@ -47,28 +57,47 @@ def load_model():
         print(f"❌ Model loading failed: {load_error}")
         return None, None
-# 全局变量存储模型
 model = None
 tokenizer = None
 def initialize_model():
-    """初始化模型 - 延迟加载"""
-    global model, tokenizer
-    if model is None or tokenizer is None:
         model, tokenizer = load_model()
-    return model is not None and tokenizer is not None
 def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """生成回复"""
     if not initialize_model():
-        return "❌ Model not loaded. Please check the logs or try again."
     try:
-        # 格式化输入 - 移除多余的字符串插值
-        formatted_prompt = prompt.strip()
         # 编码输入
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
         # 生成回复
         with torch.no_grad():
@@ -82,19 +111,18 @@ def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
                 eos_token_id=tokenizer.eos_token_id,
                 repetition_penalty=1.1,
                 early_stopping=True,
             )
         # 解码输出
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # 移除原始输入，只保留生成的部分
-        if len(response) > len(formatted_prompt):
             response = response[len(formatted_prompt):].strip()
-        # 如果回复包含特殊标记，进行清理
-        if "Assistant:" in response:
-            response = response.split("Assistant:")[-1].strip()
         return response if response else "❌ No response generated. Please try again."
     except Exception as generation_error:
@@ -115,104 +143,68 @@ def chat_interface(message, history, max_tokens, temperature, top_p):
         return history, ""
 # 创建 Gradio 应用
-with gr.Blocks(title="Robot Task Planning - Llama 3.1 8B", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
     # 🤖 Llama 3.1 8B - Robot Task Planning
-    This is a fine-tuned version of Meta's Llama 3.1 8B model specialized for **robot task planning** using QLoRA technique.
-    **Capabilities**: Convert natural language robot commands into structured task sequences for excavators, dump trucks, and other construction robots.
     **Model**: [YongdongWang/llama-3.1-8b-dart-qlora](https://huggingface.co/YongdongWang/llama-3.1-8b-dart-qlora)
-    ⚠️ **Note**: Model loading may take a few minutes on first startup.
     """)
     with gr.Row():
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
-                label="Task Planning Results",
-                height=400,
-                show_label=True,
-                container=True,
-                bubble_full_width=False
             )
             msg = gr.Textbox(
                 label="Robot Command",
-                placeholder="Enter robot task command (e.g., 'Deploy Excavator 1 to Soil Area 1')...",
-                lines=2,
-                max_lines=5,
-                show_label=True,
-                container=True
             )
             with gr.Row():
-                send_btn = gr.Button("Generate Tasks", variant="primary", size="sm")
-                clear_btn = gr.Button("Clear", variant="secondary", size="sm")
         with gr.Column(scale=1):
-            gr.Markdown("### ⚙️ Generation Settings")
-            max_tokens = gr.Slider(
-                minimum=50,
-                maximum=500,
-                value=200,
-                step=10,
-                label="Max Tokens",
-                info="Maximum number of tokens to generate"
-            )
-            temperature = gr.Slider(
-                minimum=0.1,
-                maximum=2.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature",
-                info="Controls randomness (lower = more focused)"
-            )
-            top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.9,
-                step=0.05,
-                label="Top-p",
-                info="Nucleus sampling threshold"
-            )
-    # 示例对话
     gr.Examples(
         examples=[
             ["Deploy Excavator 1 to Soil Area 1 for excavation."],
-            ["Send Dump Truck 1 to collect material, then unload at storage area."],
-            ["Move all robots to avoid Puddle 1 after inspection."],
-            ["Deploy multiple excavators to different soil areas simultaneously."],
-            ["Coordinate dump trucks to transport materials from excavation site to storage."],
-            ["Send robot to inspect rock area, then avoid with all other robots."],
-            ["Return all robots to start position after completing tasks."],
         ],
         inputs=msg,
-        label="💡 Example Robot Commands"
     )
     # 事件处理
-    msg.submit(
-        chat_interface,
-        inputs=[msg, chatbot, max_tokens, temperature, top_p],
-        outputs=[chatbot, msg]
-    )
-    send_btn.click(
-        chat_interface,
-        inputs=[msg, chatbot, max_tokens, temperature, top_p],
-        outputs=[chatbot, msg]
-    )
-    clear_btn.click(
-        lambda: ([], ""),
-        outputs=[chatbot, msg]
-    )
 if __name__ == "__main__":
-    demo.launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from peft import PeftModel
 import warnings
+import os
 warnings.filterwarnings("ignore")
 # 模型配置
         )
         # 加载分词器
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            use_fast=False,
+            trust_remote_code=True
+        )
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
             quantization_config=bnb_config,
             device_map="auto",
             torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
         )
         # 加载 LoRA 适配器
+        model = PeftModel.from_pretrained(
+            base_model,
+            LORA_MODEL,
+            torch_dtype=torch.float16
+        )
         model.eval()
         print("✅ Model loaded successfully!")
         print(f"❌ Model loading failed: {load_error}")
         return None, None
+# 全局变量
 model = None
 tokenizer = None
+model_loading = False
 def initialize_model():
+    """初始化模型"""
+    global model, tokenizer, model_loading
+    if model is not None and tokenizer is not None:
+        return True
+    if model_loading:
+        return False
+    model_loading = True
+    try:
         model, tokenizer = load_model()
+        return model is not None and tokenizer is not None
+    finally:
+        model_loading = False
 def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """生成回复"""
     if not initialize_model():
+        if model_loading:
+            return "🔄 Model is loading, please wait a few minutes and try again..."
+        else:
+            return "❌ Model failed to load. Please check the Space logs."
     try:
+        # 格式化输入
+        formatted_prompt = f"### Human: {prompt.strip()}\n### Assistant:"
         # 编码输入
+        inputs = tokenizer(
+            formatted_prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=2048
+        ).to(model.device)
         # 生成回复
         with torch.no_grad():
                 eos_token_id=tokenizer.eos_token_id,
                 repetition_penalty=1.1,
                 early_stopping=True,
+                no_repeat_ngram_size=3
             )
         # 解码输出
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # 提取生成的部分
+        if "### Assistant:" in response:
+            response = response.split("### Assistant:")[-1].strip()
+        elif len(response) > len(formatted_prompt):
             response = response[len(formatted_prompt):].strip()
         return response if response else "❌ No response generated. Please try again."
     except Exception as generation_error:
         return history, ""
 # 创建 Gradio 应用
+with gr.Blocks(
+    title="Robot Task Planning - Llama 3.1 8B",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {
+        max-width: 1200px;
+        margin: auto;
+    }
+    """
+) as demo:
     gr.Markdown("""
     # 🤖 Llama 3.1 8B - Robot Task Planning
+    Fine-tuned version of Meta's Llama 3.1 8B for **robot task planning** using QLoRA.
     **Model**: [YongdongWang/llama-3.1-8b-dart-qlora](https://huggingface.co/YongdongWang/llama-3.1-8b-dart-qlora)
+    ⚠️ **First load takes 3-5 minutes**
     """)
     with gr.Row():
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(
+                label="🤖 Task Planning Results",
+                height=500,
+                show_copy_button=True
             )
             msg = gr.Textbox(
                 label="Robot Command",
+                placeholder="e.g., 'Deploy Excavator 1 to Soil Area 1'...",
+                lines=2
             )
             with gr.Row():
+                send_btn = gr.Button("🚀 Generate", variant="primary")
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
         with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            max_tokens = gr.Slider(50, 500, 200, label="Max Tokens")
+            temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
+            top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top-p")
+    # 示例
     gr.Examples(
         examples=[
             ["Deploy Excavator 1 to Soil Area 1 for excavation."],
+            ["Send Dump Truck 1 to collect material and unload at storage."],
+            ["Move all robots to avoid dangerous Puddle 1."],
+            ["Coordinate multiple excavators across different areas."],
+            ["Create evacuation sequence for all robots."],
         ],
         inputs=msg,
+        label="💡 Try these examples"
     )
     # 事件处理
+    msg.submit(chat_interface, [msg, chatbot, max_tokens, temperature, top_p], [chatbot, msg])
+    send_btn.click(chat_interface, [msg, chatbot, max_tokens, temperature, top_p], [chatbot, msg])
+    clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -5,3 +5,5 @@ peft==0.7.1
 bitsandbytes==0.41.3
 accelerate==0.24.1
 scipy==1.11.4

 bitsandbytes==0.41.3
 accelerate==0.24.1
 scipy==1.11.4
+sentencepiece
+protobuf