sounar commited on
Commit
65272a9
·
verified ·
1 Parent(s): 9698346

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
4
  import gradio as gr
 
 
5
 
6
  # Get API token from environment variable
7
  api_token = os.getenv("HF_TOKEN").strip()
@@ -30,27 +32,34 @@ tokenizer = AutoTokenizer.from_pretrained(
30
  token=api_token
31
  )
32
 
 
 
 
 
 
33
  def analyze_input(image, question):
34
  try:
35
  # Prepare inputs
36
  if image:
37
- prompt = f"Given the medical image and question: {question}\nPlease provide a detailed analysis."
38
- # Convert image to RGB
39
  image = image.convert('RGB')
40
- # Custom model_inputs for multimodal generation
 
 
 
41
  model_inputs = {
42
  "input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device),
43
- "images": [image]
44
  }
45
  else:
 
46
  prompt = f"Medical question: {question}\nAnswer:"
47
  model_inputs = {
48
- "input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device),
49
- "images": None
50
  }
51
 
52
  # Generate response using model's custom method
53
- outputs = model.generate(model_inputs=model_inputs, max_new_tokens=256)
54
 
55
  # Decode and clean response
56
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
4
  import gradio as gr
5
+ from PIL import Image
6
+ from torchvision.transforms import ToTensor
7
 
8
  # Get API token from environment variable
9
  api_token = os.getenv("HF_TOKEN").strip()
 
32
  token=api_token
33
  )
34
 
35
+ # Preprocess image
36
+ def preprocess_image(image):
37
+ transform = ToTensor()
38
+ return transform(image).unsqueeze(0).to(model.device)
39
+
40
  def analyze_input(image, question):
41
  try:
42
  # Prepare inputs
43
  if image:
44
+ # Process image
 
45
  image = image.convert('RGB')
46
+ pixel_values = preprocess_image(image)
47
+ prompt = f"Given the medical image and question: {question}\nPlease provide a detailed analysis."
48
+
49
+ # Model inputs for multimodal processing
50
  model_inputs = {
51
  "input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device),
52
+ "pixel_values": pixel_values
53
  }
54
  else:
55
+ # Text-only processing
56
  prompt = f"Medical question: {question}\nAnswer:"
57
  model_inputs = {
58
+ "input_ids": tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
 
59
  }
60
 
61
  # Generate response using model's custom method
62
+ outputs = model.generate(**model_inputs, max_new_tokens=256)
63
 
64
  # Decode and clean response
65
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)