Spaces:

jatingocodeo
/

phi-vlm

Sleeping

App Files Files Community

jatingocodeo commited on Apr 12

Commit

9ebfecb

verified ·

1 Parent(s): c5150ea

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -25

app.py CHANGED Viewed

@@ -34,36 +34,57 @@ def load_model(model_id):
     return model, tokenizer
 def generate_description(image, model, tokenizer, max_length=100, temperature=0.7, top_p=0.9):
-    # Convert and resize image
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    image = image.resize((32, 32))
-    # Format the input text
-    input_text = """Below is an image. Please describe it in detail.
 Image: [IMAGE]
 Description: """
-    # Tokenize input
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
-    # Generate response
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_length=max_length,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            num_return_sequences=1,
-            pad_token_id=tokenizer.pad_token_id,
-            eos_token_id=tokenizer.eos_token_id
         )
-    # Decode and return the response
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text.split("Description: ")[-1].strip()
 def create_demo(model_id):
     # Load model and tokenizer

     return model, tokenizer
 def generate_description(image, model, tokenizer, max_length=100, temperature=0.7, top_p=0.9):
+    try:
+        # Convert and resize image
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        image = image.resize((32, 32))
+        # Format the input text
+        input_text = """Below is an image. Please describe it in detail.
 Image: [IMAGE]
 Description: """
+        # Ensure we have valid token IDs
+        if tokenizer.pad_token_id is None:
+            tokenizer.pad_token_id = tokenizer.eos_token_id
+        # Tokenize input with explicit token IDs
+        inputs = tokenizer(
+            input_text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            add_special_tokens=True
         )
+        # Calculate minimum length to ensure we generate new tokens
+        min_length = inputs['input_ids'].shape[1] + 20
+        # Generate response
+        with torch.no_grad():
+            outputs = model.generate(
+                input_ids=inputs['input_ids'],
+                attention_mask=inputs['attention_mask'],
+                max_length=max(min_length, max_length),  # Ensure max_length is greater than input length
+                min_length=min_length,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                num_return_sequences=1,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                use_cache=True
+            )
+        # Decode and return the response
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text.split("Description: ")[-1].strip()
+    except Exception as e:
+        import traceback
+        return f"Error generating description: {str(e)}\n{traceback.format_exc()}"
 def create_demo(model_id):
     # Load model and tokenizer