Spaces:

Souha-BH
/

HealthyOrNot

Sleeping

App Files Files Community

Souha Ben Hassine commited on Dec 10, 2024

Commit

eebb5f5

1 Parent(s): c32f512

initial commit

Browse files

Files changed (1) hide show

app.py +9 -50

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import torch
-from torch.utils.data import Dataset
-from transformers import AutoModel, AutoTokenizer, AutoConfig
 from torchvision import transforms, models
 import torch.nn as nn
 import os
@@ -8,16 +7,15 @@ import json
 import cv2
 from PIL import Image
 import gradio as gr
-import torch
 class MultimodalRiskBehaviorModel(nn.Module):
     def __init__(self, text_model_name="bert-base-uncased", hidden_dim=512, dropout=0.3):
         super(MultimodalRiskBehaviorModel, self).__init__()
-        # Text model
         self.text_model_name = text_model_name
-        self.text_model = AutoModel.from_pretrained(text_model_name)
         # Visual model (ResNet50)
         self.visual_model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
@@ -38,10 +36,10 @@ class MultimodalRiskBehaviorModel(nn.Module):
         attention_mask = encoding['attention_mask'].squeeze(1).to(device)
         # Text embeddings from BERT
-        text_features = self.text_model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
         # Visual features from ResNet50
-        frames = frames.to(device)  # Move frames to the same device as the model
         batch_size, num_frames, channels, height, width = frames.size()
         frames = frames.view(batch_size * num_frames, channels, height, width)
@@ -55,56 +53,17 @@ class MultimodalRiskBehaviorModel(nn.Module):
         # Pass through the classifier
         x = self.dropout(torch.relu(self.fc1(combined_features)))
-        output = torch.sigmoid(self.fc2(x))  # Sigmoid for binary classification
         return output
-    def save_pretrained(self, save_directory):
-        os.makedirs(save_directory, exist_ok=True)
-        torch.save(self.state_dict(), os.path.join(save_directory, 'pytorch_model.bin'))
-        config = {
-            "text_model_name": self.text_model_name,
-            "hidden_dim": self.fc1.out_features
-        }
-        with open(os.path.join(save_directory, 'config.json'), 'w') as f:
-            json.dump(config, f)
-    @classmethod
-    def from_pretrained(cls, load_directory, map_location=None):
-        if os.path.exists(load_directory):
-            # Handling local paths
-            config_path = os.path.join(load_directory, 'config.json')
-            state_dict_path = os.path.join(load_directory, 'pytorch_model.bin')
-            if not os.path.exists(config_path):
-                raise FileNotFoundError(f"{config_path} not found.")
-            if not os.path.exists(state_dict_path):
-                raise FileNotFoundError(f"{state_dict_path} not found.")
-            with open(config_path, 'r') as f:
-                config_dict = json.load(f)
-            model = cls(text_model_name=config_dict["text_model_name"], hidden_dim=config_dict["hidden_dim"])
-            state_dict = torch.load(state_dict_path, map_location=map_location)
-            model.load_state_dict(state_dict)
-        else:
-            # Handling Hugging Face Hub paths
-            config = AutoConfig.from_pretrained(load_directory)
-            # Use AutoModel to load a pre-trained model with weights
-            hf_model = AutoModel.from_pretrained(load_directory, config=config)
-            model = cls(text_model_name=config.name_or_path, hidden_dim=hf_model.config.hidden_size)
-            model.text_model = hf_model
-        return model
 tokenizer = AutoTokenizer.from_pretrained('Souha-BH/BERT_Resnet50')
-model = MultimodalRiskBehaviorModel.from_pretrained('Souha-BH/BERT_Resnet50') # if cpu add arg map_location='cpu'
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Function to load frames from a video
 def load_frames_from_video(video_path, transform, num_frames=10):
     cap = cv2.VideoCapture(video_path)

 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 from torchvision import transforms, models
 import torch.nn as nn
 import os
 import cv2
 from PIL import Image
 import gradio as gr
 class MultimodalRiskBehaviorModel(nn.Module):
     def __init__(self, text_model_name="bert-base-uncased", hidden_dim=512, dropout=0.3):
         super(MultimodalRiskBehaviorModel, self).__init__()
+        # Text model using AutoModelForSequenceClassification
         self.text_model_name = text_model_name
+        self.text_model = AutoModelForSequenceClassification.from_pretrained(text_model_name, num_labels=1)
         # Visual model (ResNet50)
         self.visual_model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
         attention_mask = encoding['attention_mask'].squeeze(1).to(device)
         # Text embeddings from BERT
+        text_features = self.text_model(input_ids=input_ids, attention_mask=attention_mask).logits
         # Visual features from ResNet50
+        frames = frames.to(device)
         batch_size, num_frames, channels, height, width = frames.size()
         frames = frames.view(batch_size * num_frames, channels, height, width)
         # Pass through the classifier
         x = self.dropout(torch.relu(self.fc1(combined_features)))
+        output = torch.sigmoid(self.fc2(x))
         return output
+# Loading the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('Souha-BH/BERT_Resnet50')
+model = MultimodalRiskBehaviorModel.from_pretrained('Souha-BH/BERT_Resnet50', map_location='cpu')
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Function to load frames from a video
 def load_frames_from_video(video_path, transform, num_frames=10):
     cap = cv2.VideoCapture(video_path)