Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import os
|
|
2 |
import torch
|
3 |
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
|
4 |
import gradio as gr
|
|
|
|
|
5 |
|
6 |
# Get API token from environment variable
|
7 |
api_token = os.getenv("HF_TOKEN").strip()
|
@@ -30,27 +32,34 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
30 |
token=api_token
|
31 |
)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
def analyze_input(image, question):
|
34 |
try:
|
35 |
# Prepare inputs
|
36 |
if image:
|
37 |
-
|
38 |
-
# Convert image to RGB
|
39 |
image = image.convert('RGB')
|
40 |
-
|
|
|
|
|
|
|
41 |
model_inputs = {
|
42 |
"input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device),
|
43 |
-
"
|
44 |
}
|
45 |
else:
|
|
|
46 |
prompt = f"Medical question: {question}\nAnswer:"
|
47 |
model_inputs = {
|
48 |
-
"input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
|
49 |
-
"images": None
|
50 |
}
|
51 |
|
52 |
# Generate response using model's custom method
|
53 |
-
outputs = model.generate(model_inputs
|
54 |
|
55 |
# Decode and clean response
|
56 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
2 |
import torch
|
3 |
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
|
4 |
import gradio as gr
|
5 |
+
from PIL import Image
|
6 |
+
from torchvision.transforms import ToTensor
|
7 |
|
8 |
# Get API token from environment variable
|
9 |
api_token = os.getenv("HF_TOKEN").strip()
|
|
|
32 |
token=api_token
|
33 |
)
|
34 |
|
35 |
+
# Preprocess image
|
36 |
+
def preprocess_image(image):
|
37 |
+
transform = ToTensor()
|
38 |
+
return transform(image).unsqueeze(0).to(model.device)
|
39 |
+
|
40 |
def analyze_input(image, question):
|
41 |
try:
|
42 |
# Prepare inputs
|
43 |
if image:
|
44 |
+
# Process image
|
|
|
45 |
image = image.convert('RGB')
|
46 |
+
pixel_values = preprocess_image(image)
|
47 |
+
prompt = f"Given the medical image and question: {question}\nPlease provide a detailed analysis."
|
48 |
+
|
49 |
+
# Model inputs for multimodal processing
|
50 |
model_inputs = {
|
51 |
"input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device),
|
52 |
+
"pixel_values": pixel_values
|
53 |
}
|
54 |
else:
|
55 |
+
# Text-only processing
|
56 |
prompt = f"Medical question: {question}\nAnswer:"
|
57 |
model_inputs = {
|
58 |
+
"input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
|
|
|
59 |
}
|
60 |
|
61 |
# Generate response using model's custom method
|
62 |
+
outputs = model.generate(**model_inputs, max_new_tokens=256)
|
63 |
|
64 |
# Decode and clean response
|
65 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|