|
import torch |
|
from torch import nn |
|
|
|
from LLAVA_Biovil.biovil_t.pretrained import get_biovil_t_image_encoder |
|
|
|
|
|
class ChexpertClassifier(nn.Module): |
|
def __init__(self, num_classes): |
|
super().__init__() |
|
self.biovil_encoder = get_biovil_t_image_encoder() |
|
|
|
self.fc1 = nn.Linear(128 * 4 * 4, 512) |
|
self.fc2 = nn.Linear(512, num_classes) |
|
|
|
def forward(self, x): |
|
x = self.biovil_encoder(x).projected_patch_embeddings |
|
x = torch.nn.functional.avg_pool2d(x, 4) |
|
x = x.view(x.shape[0], -1) |
|
x = torch.relu(self.fc1(x)) |
|
|
|
return self.fc2(x) |
|
|