Spaces:

KeerthiVM
/

SkinGPT

Sleeping

App Files Files Community

KeerthiVM commited on May 7

Commit

2e04d58

1 Parent(s): 2b8678a

fix added

Browse files

Files changed (2) hide show

SkinGPT.py +3 -149
app.py +64 -50

SkinGPT.py CHANGED Viewed

@@ -1,29 +1,21 @@
-import torch
 from torch import nn
 from torchvision import transforms
-from PIL import Image
 from transformers import LlamaForCausalLM, LlamaTokenizer, BertModel, BertConfig
 from eva_vit import create_eva_vit_g
 import requests
 from io import BytesIO
 import os
 from huggingface_hub import hf_hub_download
-from transformers import BitsAndBytesConfig
-from accelerate import init_empty_weights
 import torch
-from torch.cuda.amp import autocast
-import warnings
 MODEL_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 token = os.getenv("HF_TOKEN")
 import streamlit as st
-import torch.nn.functional as F
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 class Blip2QFormer(nn.Module):
     def __init__(self, num_query_tokens=32, vision_width=1408):
         super().__init__()
         self.num_query_tokens = num_query_tokens
-        # Load pre-trained Q-Former config
         self.bert_config = BertConfig(
             vocab_size=30522,
             hidden_size=768,
@@ -48,8 +40,6 @@ class Blip2QFormer(nn.Module):
             torch.zeros(1, num_query_tokens, self.bert_config.hidden_size)
         )
         self.vision_proj = nn.Linear(vision_width, self.bert_config.hidden_size)
-        # Initialize weights
         self._init_weights()
     def _init_weights(self):
@@ -67,16 +57,7 @@ class Blip2QFormer(nn.Module):
         msg = self.load_state_dict(state_dict, strict=False)
     def forward(self, visual_features):
-        print(
-            f"Visual features stats - min: {visual_features.min().item():.4f}, max: {visual_features.max().item():.4f}")
-        # Project visual features
         visual_embeds = self.vision_proj(visual_features.float())
-        print(f"Projected embeds stats - min: {visual_embeds.min().item():.4f}, max: {visual_embeds.max().item():.4f}")
-        # visual_embeds = self.vision_proj(visual_features.float())
-        # Expand query tokens
         query_tokens = self.query_tokens.expand(visual_embeds.shape[0], -1, -1)
         combined_input = torch.cat([query_tokens, visual_embeds], dim=1)
         attention_mask = torch.ones(
@@ -84,14 +65,11 @@ class Blip2QFormer(nn.Module):
             dtype=torch.long,
             device=combined_input.device
         )
-        # Forward through BERT
         outputs = self.bert(
             attention_mask=attention_mask,
             inputs_embeds=combined_input,
             return_dict=True
         )
         return outputs.last_hidden_state[:, :self.num_query_tokens]
@@ -100,17 +78,14 @@ class SkinGPT4(nn.Module):
     def __init__(self, vit_checkpoint_path,
                  q_former_model="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_flant5xxl.pth"):
         super().__init__()
-        # Image encoder parameters from paper
         self.device = device
-        # self.dtype = torch.float16
         self.dtype = MODEL_DTYPE
         self.H, self.W, self.C = 224, 224, 3
-        self.P = 14  # Patch size
-        self.D = 1408  # ViT embedding dimension
         self.num_query_tokens = 32
         self.vit = self._init_vit(vit_checkpoint_path).to(self.dtype)
-        print("Loaded ViT")
         self.ln_vision = nn.LayerNorm(self.D).to(self.dtype)
         self.q_former = Blip2QFormer(
@@ -120,27 +95,17 @@ class SkinGPT4(nn.Module):
         self.q_former.load_from_pretrained(q_former_model)
         for param in self.q_former.parameters():
             param.requires_grad = False
-        print("Loaded QFormer")
         self.llama = self._init_llama()
         self.llama_proj = nn.Linear(
             self.q_former.bert_config.hidden_size,
             self.llama.config.hidden_size
         ).to(self.dtype)
-        print(f"Q-Former output dim: {self.q_former.bert_config.hidden_size}")
-        print(f"LLaMA input dim: {self.llama.config.hidden_size}")
         for module in [self.vit, self.ln_vision, self.q_former, self.llama_proj, self.llama]:
             for param in module.parameters():
                 param.requires_grad = False
             module.eval()
     def _init_vit(self, vit_checkpoint_path):
-        """Initialize EVA-ViT-G with paper specifications"""
         vit = create_eva_vit_g(
             img_size=(self.H, self.W),
             patch_size=self.P,
@@ -156,24 +121,17 @@ class SkinGPT4(nn.Module):
         if not hasattr(vit, 'norm'):
             vit.norm = nn.LayerNorm(self.D)
         checkpoint = torch.load(vit_checkpoint_path, map_location='cpu')
-        # 3. Filter weights for ViT components only
         vit_weights = {k.replace("vit.", ""): v
                        for k, v in checkpoint.items()
                        if k.startswith("vit.")}
-        # 4. Load weights while ignoring classifier head
         vit.load_state_dict(vit_weights, strict=False)
         return vit.eval()
     def _init_llama(self):
-        """Initialize frozen LLaMA-2-13b-chat with proper error handling"""
         try:
             device_map = {
                 "": 0 if torch.cuda.is_available() else "cpu"
             }
-            # First try loading with device_map="auto"
             model = LlamaForCausalLM.from_pretrained(
                 "meta-llama/Llama-2-13b-chat-hf",
                 token=token,
@@ -181,9 +139,7 @@ class SkinGPT4(nn.Module):
                 device_map=device_map,
                 low_cpu_mem_usage=True
             )
             return model.eval()
         except Exception as e:
             raise ImportError(
                 f"Failed to load LLaMA model. Please ensure:\n"
@@ -194,143 +150,61 @@ class SkinGPT4(nn.Module):
             )
     def encode_image(self, x):
-        """Convert image to patch embeddings following Eq. (1)"""
-        # x: (B, C, H, W)
         x = x.to(self.dtype)
         if x.dim() == 3:
-            x = x.unsqueeze(0)  # Add batch dimension if missing
         if x.dim() != 4:
             raise ValueError(f"Input must be 4D tensor (got {x.dim()}D)")
-        B, C, H, W = x.shape
-        N = (H * W) // (self.P ** 2)
         x = self.vit.patch_embed(x)
         num_patches = x.shape[1]
         pos_embed = self.vit.pos_embed[:, 1:num_patches + 1, :]
         x = x + pos_embed
-        # Add class token
         class_token = self.vit.cls_token.expand(x.shape[0], -1, -1)
         x = torch.cat([class_token, x], dim=1)
         for blk in self.vit.blocks:
             x = blk(x)
         x = self.vit.norm(x)
         vit_features = self.ln_vision(x)
-        print(f"vit features (first 5): {vit_features[0, 0, :5]}")
-        # Q-Former forward pass
         with torch.no_grad():
             qformer_output = self.q_former(vit_features.float())
-            print(f"Q-Former output (first 5): {qformer_output[0, 0, :5]}")
             image_embeds = self.llama_proj(qformer_output.to(self.dtype))
         return image_embeds
     def generate(self, images, user_input=None, max_new_tokens=300):
         image_embeds = self.encode_image(images)
-        print(f"Aligned features : {image_embeds}")
-        # print(f"\n Images embeddings shape : {image_embeds.shape} \n Llama config hidden size : {self.llama.config.hidden_size}")
-        print(
-            f"\n[VALIDATION] Visual embeds - Mean: {image_embeds.mean().item():.4f}, Std: {image_embeds.std().item():.4f}")
         if image_embeds.shape[-1] != self.llama.config.hidden_size:
             raise ValueError(
                 f"Feature dimension mismatch. "
                 f"Q-Former output: {image_embeds.shape[-1]}, "
                 f"LLaMA expected: {self.llama.config.hidden_size}"
             )
-        # prompt = """"### Instruction: <Img><IMAGE></Img>
-        # Could you describe the skin condition in this image?
-        # Please provide a detailed analysis including possible diagnoses.
-        # ### Response:
-        # """
-        # prompt = """### Skin Diagnosis Analysis ###
-        # <IMAGE>
-        # Could you describe the skin condition in this image?
-        # Please provide a detailed analysis including possible diagnoses.
-        # ### Response:"""
         prompt = """### Instruction:
         <IMAGE>
         Could you describe the skin condition in this image?
         ### Response:"""
-        # print(f"\n[DEBUG] Raw Prompt:\n{prompt}")
         self.tokenizer = LlamaTokenizer.from_pretrained(
             "meta-llama/Llama-2-13b-chat-hf",
             token=token,
             padding_side="right"
         )
-        # self.tokenizer.add_special_tokens({'additional_special_tokens': ['<Img>', '</Img>', '<ImageHere>']})
         num_added = self.tokenizer.add_special_tokens({
             'additional_special_tokens': ['<IMAGE>']
         })
-        # num_added = self.tokenizer.add_special_tokens({
-        #     'additional_special_tokens': ['<Img>', '</Img>', '<IMAGE>']
-        # })
         if num_added == 0:
             raise ValueError("Failed to add <IMAGE> token!")
         self.llama.resize_token_embeddings(len(self.tokenizer))
         inputs = self.tokenizer(prompt, return_tensors="pt").to(images.device)
-        # print(f"\n[DEBUG] Tokenized input IDs:\n{inputs.input_ids}")
-        # print(f"[DEBUG] Special token positions: {self.tokenizer.all_special_tokens}")
-        # Prepare embeddings
         input_embeddings = self.llama.model.embed_tokens(inputs.input_ids)
         visual_embeds = image_embeds.mean(dim=1)
-        # image_token_id = self.tokenizer.convert_tokens_to_ids("<ImageHere>")
         image_token_id = self.tokenizer.convert_tokens_to_ids("<IMAGE>")
         replace_positions = (inputs.input_ids == image_token_id).nonzero()
         if len(replace_positions) == 0:
             raise ValueError("No <IMAGE> tokens found in prompt!")
         if len(replace_positions[0]) == 0:
             raise ValueError("Image token not found in prompt")
-        # print(f"\n[DEBUG] Image token found at position: {replace_positions}")
-        print(f"\n[DEBUG] Before replacement:")
-        # print(f"Text embeddings shape: {input_embeddings.shape}")
-        # print(f"Visual embeddings shape: {visual_embeds.shape}")
-        # print(f"Image token at {replace_positions[0][1].item()}:")
-        print(f"Image token embedding (before):\n{input_embeddings[0, replace_positions[0][1], :5]}...")
         for pos in replace_positions:
             input_embeddings[0, pos[1]] = visual_embeds[0]
-        print(f"\n[DEBUG] After replacement:")
-        print(f"Image token embedding (after):\n{input_embeddings[0, replace_positions[0][1], :5]}...")
-        # outputs = self.llama.generate(
-        #     inputs_embeds=input_embeddings,
-        #     max_new_tokens=max_new_tokens,
-        #     temperature=0.7,
-        #     top_k=40,
-        #     top_p=0.9,
-        #     repetition_penalty=1.1,
-        #     do_sample=True,
-        #     pad_token_id = self.tokenizer.eos_token_id,
-        #     eos_token_id = self.tokenizer.eos_token_id
-        # )
         outputs = self.llama.generate(
             inputs_embeds=input_embeddings,
             max_new_tokens=max_new_tokens,
@@ -340,27 +214,16 @@ class SkinGPT4(nn.Module):
             repetition_penalty=1.1,
             do_sample=True,
         )
         full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        print(f"Full Output from llama : {full_output}")
         response = full_output.split("### Response:")[-1].strip()
-        # print(f"Response from llama : {full_output}")
         return response
 class SkinGPTClassifier:
     def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'):
         self.device = torch.device(device)
-        self.conversation_history = []
         with st.spinner("Loading AI models (this may take several minutes)..."):
             self.model = self._load_model()
-        # print(f"Q-Former output shape: {self.model.q_former(torch.randn(1, 197, 1408)).shape}")
-        # print(f"Projection layer: {self.model.llama_proj}")
-        # Image transformations
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
@@ -378,18 +241,9 @@ class SkinGPTClassifier:
     def predict(self, image):
         image = image.convert('RGB')
-        print(f"Original image mode: {image.mode}, size: {image.size}")
         image_tensor = self.transform(image).unsqueeze(0).to(self.device)
-        print(f"Tensor shape: {image_tensor.shape}")
-        print(f"Tensor min/max: {image_tensor.min().item():.4f}/{image_tensor.max().item():.4f}")
-        print(f"Tensor mean: {image_tensor.mean().item():.4f}")
         with torch.no_grad():
             diagnosis = self.model.generate(image_tensor)
         return {
             "diagnosis": diagnosis,
-            "visual_features": None  # Can return features if needed
         }

 from torch import nn
 from torchvision import transforms
 from transformers import LlamaForCausalLM, LlamaTokenizer, BertModel, BertConfig
 from eva_vit import create_eva_vit_g
 import requests
 from io import BytesIO
 import os
 from huggingface_hub import hf_hub_download
 import torch
 MODEL_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 token = os.getenv("HF_TOKEN")
 import streamlit as st
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 class Blip2QFormer(nn.Module):
     def __init__(self, num_query_tokens=32, vision_width=1408):
         super().__init__()
         self.num_query_tokens = num_query_tokens
         self.bert_config = BertConfig(
             vocab_size=30522,
             hidden_size=768,
             torch.zeros(1, num_query_tokens, self.bert_config.hidden_size)
         )
         self.vision_proj = nn.Linear(vision_width, self.bert_config.hidden_size)
         self._init_weights()
     def _init_weights(self):
         msg = self.load_state_dict(state_dict, strict=False)
     def forward(self, visual_features):
         visual_embeds = self.vision_proj(visual_features.float())
         query_tokens = self.query_tokens.expand(visual_embeds.shape[0], -1, -1)
         combined_input = torch.cat([query_tokens, visual_embeds], dim=1)
         attention_mask = torch.ones(
             dtype=torch.long,
             device=combined_input.device
         )
         outputs = self.bert(
             attention_mask=attention_mask,
             inputs_embeds=combined_input,
             return_dict=True
         )
         return outputs.last_hidden_state[:, :self.num_query_tokens]
     def __init__(self, vit_checkpoint_path,
                  q_former_model="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_flant5xxl.pth"):
         super().__init__()
         self.device = device
         self.dtype = MODEL_DTYPE
         self.H, self.W, self.C = 224, 224, 3
+        self.P = 14
+        self.D = 1408
         self.num_query_tokens = 32
         self.vit = self._init_vit(vit_checkpoint_path).to(self.dtype)
         self.ln_vision = nn.LayerNorm(self.D).to(self.dtype)
         self.q_former = Blip2QFormer(
         self.q_former.load_from_pretrained(q_former_model)
         for param in self.q_former.parameters():
             param.requires_grad = False
         self.llama = self._init_llama()
         self.llama_proj = nn.Linear(
             self.q_former.bert_config.hidden_size,
             self.llama.config.hidden_size
         ).to(self.dtype)
         for module in [self.vit, self.ln_vision, self.q_former, self.llama_proj, self.llama]:
             for param in module.parameters():
                 param.requires_grad = False
             module.eval()
     def _init_vit(self, vit_checkpoint_path):
         vit = create_eva_vit_g(
             img_size=(self.H, self.W),
             patch_size=self.P,
         if not hasattr(vit, 'norm'):
             vit.norm = nn.LayerNorm(self.D)
         checkpoint = torch.load(vit_checkpoint_path, map_location='cpu')
         vit_weights = {k.replace("vit.", ""): v
                        for k, v in checkpoint.items()
                        if k.startswith("vit.")}
         vit.load_state_dict(vit_weights, strict=False)
         return vit.eval()
     def _init_llama(self):
         try:
             device_map = {
                 "": 0 if torch.cuda.is_available() else "cpu"
             }
             model = LlamaForCausalLM.from_pretrained(
                 "meta-llama/Llama-2-13b-chat-hf",
                 token=token,
                 device_map=device_map,
                 low_cpu_mem_usage=True
             )
             return model.eval()
         except Exception as e:
             raise ImportError(
                 f"Failed to load LLaMA model. Please ensure:\n"
             )
     def encode_image(self, x):
         x = x.to(self.dtype)
         if x.dim() == 3:
+            x = x.unsqueeze(0)
         if x.dim() != 4:
             raise ValueError(f"Input must be 4D tensor (got {x.dim()}D)")
         x = self.vit.patch_embed(x)
         num_patches = x.shape[1]
         pos_embed = self.vit.pos_embed[:, 1:num_patches + 1, :]
         x = x + pos_embed
         class_token = self.vit.cls_token.expand(x.shape[0], -1, -1)
         x = torch.cat([class_token, x], dim=1)
         for blk in self.vit.blocks:
             x = blk(x)
         x = self.vit.norm(x)
         vit_features = self.ln_vision(x)
         with torch.no_grad():
             qformer_output = self.q_former(vit_features.float())
             image_embeds = self.llama_proj(qformer_output.to(self.dtype))
         return image_embeds
     def generate(self, images, user_input=None, max_new_tokens=300):
         image_embeds = self.encode_image(images)
         if image_embeds.shape[-1] != self.llama.config.hidden_size:
             raise ValueError(
                 f"Feature dimension mismatch. "
                 f"Q-Former output: {image_embeds.shape[-1]}, "
                 f"LLaMA expected: {self.llama.config.hidden_size}"
             )
         prompt = """### Instruction:
         <IMAGE>
         Could you describe the skin condition in this image?
         ### Response:"""
         self.tokenizer = LlamaTokenizer.from_pretrained(
             "meta-llama/Llama-2-13b-chat-hf",
             token=token,
             padding_side="right"
         )
         num_added = self.tokenizer.add_special_tokens({
             'additional_special_tokens': ['<IMAGE>']
         })
         if num_added == 0:
             raise ValueError("Failed to add <IMAGE> token!")
         self.llama.resize_token_embeddings(len(self.tokenizer))
         inputs = self.tokenizer(prompt, return_tensors="pt").to(images.device)
         input_embeddings = self.llama.model.embed_tokens(inputs.input_ids)
         visual_embeds = image_embeds.mean(dim=1)
         image_token_id = self.tokenizer.convert_tokens_to_ids("<IMAGE>")
         replace_positions = (inputs.input_ids == image_token_id).nonzero()
         if len(replace_positions) == 0:
             raise ValueError("No <IMAGE> tokens found in prompt!")
         if len(replace_positions[0]) == 0:
             raise ValueError("Image token not found in prompt")
         for pos in replace_positions:
             input_embeddings[0, pos[1]] = visual_embeds[0]
         outputs = self.llama.generate(
             inputs_embeds=input_embeddings,
             max_new_tokens=max_new_tokens,
             repetition_penalty=1.1,
             do_sample=True,
         )
         full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         response = full_output.split("### Response:")[-1].strip()
         return response
 class SkinGPTClassifier:
     def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'):
         self.device = torch.device(device)
         with st.spinner("Loading AI models (this may take several minutes)..."):
             self.model = self._load_model()
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
     def predict(self, image):
         image = image.convert('RGB')
         image_tensor = self.transform(image).unsqueeze(0).to(self.device)
         with torch.no_grad():
             diagnosis = self.model.generate(image_tensor)
         return {
             "diagnosis": diagnosis,
         }

app.py CHANGED Viewed

@@ -1,32 +1,18 @@
 import torch
 import random
 import numpy as np
 torch.manual_seed(42)
 random.seed(42)
 np.random.seed(42)
 import streamlit as st
 import io
-from fpdf import FPDF
-import nest_asyncio
-nest_asyncio.apply()
-device='cuda' if torch.cuda.is_available() else 'cpu'
-st.set_page_config(page_title="DermBOT", page_icon="🧬", layout="centered")
 from PIL import Image
 import os
 from transformers import logging
-import torch
 from SkinGPT import SkinGPTClassifier
 torch.set_default_dtype(torch.float32)  # Main computations in float32
 MODEL_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 import warnings
@@ -41,7 +27,11 @@ import warnings
 warnings.filterwarnings("ignore")
-# @st.cache_resource
 def get_classifier():
     classifier = SkinGPTClassifier()
     for module in [classifier.model.vit,
@@ -53,12 +43,17 @@ def get_classifier():
     return classifier
-classifier = get_classifier()
 # === Session Init ===
 if "messages" not in st.session_state:
     st.session_state.messages = []
 # === PDF Export ===
 def export_chat_to_pdf(messages):
@@ -77,37 +72,56 @@ def export_chat_to_pdf(messages):
 st.title("🧬 DermBOT — Skin AI Assistant")
 st.caption(f"🧠 Using model: SkinGPT")
-uploaded_file = st.file_uploader("Upload a skin image", type=["jpg", "jpeg", "png"])
-if "conversation" not in st.session_state:
-    st.session_state.conversation = []
-if uploaded_file:
-    st.image(uploaded_file, caption="Uploaded image", use_column_width=True)
     image = Image.open(uploaded_file).convert("RGB")
-    if not st.session_state.conversation:
-        with st.spinner("Analyzing image..."):
-            result = classifier.predict(image)
-            if "error" in result:
-                st.error(result["error"])
             else:
-                st.session_state.conversation.append(("assistant", result))
-                with st.chat_message("assistant"):
-                    st.markdown(result["diagnosis"])
-    else:
-        # Follow-up questions
-        if user_query := st.chat_input("Ask a follow-up question..."):
-            st.session_state.conversation.append(("user", user_query))
-            with st.chat_message("user"):
-                st.markdown(user_query)
-            # Generate response with context
-            context = "\n".join([f"{role}: {msg}" for role, msg in st.session_state.conversation])
-            response = classifier.generate(image, user_input=context)
-            st.session_state.conversation.append(("assistant", response))
-            with st.chat_message("assistant"):
-                st.markdown(response)
-# === PDF Button ===
-if st.button("📄 Download Chat as PDF"):
     pdf_file = export_chat_to_pdf(st.session_state.messages)
-    st.download_button("Download PDF", data=pdf_file, file_name="chat_history.pdf", mime="application/pdf")

 import torch
 import random
 import numpy as np
 torch.manual_seed(42)
 random.seed(42)
 np.random.seed(42)
 import streamlit as st
 import io
 from PIL import Image
 import os
 from transformers import logging
 from SkinGPT import SkinGPTClassifier
+from fpdf import FPDF
+import nest_asyncio
+nest_asyncio.apply()
 torch.set_default_dtype(torch.float32)  # Main computations in float32
 MODEL_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 import warnings
 warnings.filterwarnings("ignore")
+device='cuda' if torch.cuda.is_available() else 'cpu'
+st.set_page_config(page_title="SkinGPT", page_icon="🧬", layout="centered")
+@st.cache_resource(show_spinner=False)
 def get_classifier():
     classifier = SkinGPTClassifier()
     for module in [classifier.model.vit,
     return classifier
+if 'app_models' not in st.session_state:
+    st.session_state.app_models = get_classifier()
+classifier = st.session_state.app_models
 # === Session Init ===
 if "messages" not in st.session_state:
     st.session_state.messages = []
+if "current_image" not in st.session_state:
+    st.session_state.current_image = None
 # === PDF Export ===
 def export_chat_to_pdf(messages):
 st.title("🧬 DermBOT — Skin AI Assistant")
 st.caption(f"🧠 Using model: SkinGPT")
+uploaded_file = st.file_uploader(
+    "Upload a skin image",
+    type=["jpg", "jpeg", "png"],
+    key="file_uploader"
+)
+if uploaded_file is not None and uploaded_file != st.session_state.current_image:
+    st.session_state.messages = []
+    st.session_state.current_image = uploaded_file
     image = Image.open(uploaded_file).convert("RGB")
+    st.image(image, caption="Uploaded image", use_column_width=True)
+    with st.spinner("Analyzing the image..."):
+        result = classifier.predict(image)
+    st.session_state.messages.append({"role": "assistant", "content": result["diagnosis"]})
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# === Chat Interface ===
+if prompt := st.chat_input("Ask a follow-up question..."):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            if len(st.session_state.messages) > 1:
+                conversation_context = "\n".join(
+                    f"{m['role']}: {m['content']}"
+                    for m in st.session_state.messages[:-1]  # Exclude current prompt
+                )
+                augmented_prompt = (
+                    f"Conversation history:\n{conversation_context}\n\n"
+                    f"Current question: {prompt}"
+                )
+                result = classifier.predict(image)
             else:
+                result = classifier.predict(image)
+            st.markdown(result["diagnosis"])
+            st.session_state.messages.append({"role": "assistant", "content": result["diagnosis"]})
+if st.session_state.messages and st.button("📄 Download Chat as PDF"):
     pdf_file = export_chat_to_pdf(st.session_state.messages)
+    st.download_button(
+        "Download PDF",
+        data=pdf_file,
+        file_name="skingpt_chat_history.pdf",
+        mime="application/pdf"
+    )