Spaces:

sebastiansarasti
/

fakeJobPredictor

Running

App Files Files Community

sebastiansarasti commited on Dec 24, 2024

Commit

4736ae1

verified ·

1 Parent(s): 19ad08c

adding the files for the app

Browse files

Files changed (4) hide show

data_processing.py +10 -0
main.py +80 -0
model.py +42 -0
requirements.txt +3 -0

data_processing.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from transformers import DistilBertTokenizer
+tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+def get_tokens(string):
+    text = tokenizer(string, return_tensors="pt", padding="max_length", max_length=256, truncation=True)
+    return {
+            "input_ids": text["input_ids"],
+            "attention_mask": text["attention_mask"]
+        }

main.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import streamlit as st
+from huggingface_hub import hf_hub_download
+from model import JobFakeModel
+import torch
+import torch.nn.functional as F
+from data_processing import get_tokens
+import numpy as np
+import time
+def get_model_list():
+    model_name = "sebastiansarasti/fakeJobs"
+    filename = "best_model.pth"
+    file_path = hf_hub_download(repo_id=model_name, filename=filename)
+    return file_path
+def load_model(path):
+    model = JobFakeModel(base_model="distilbert", freeze_base=True)
+    model.load_state_dict(torch.load(path))
+    return model
+st.title('Fake Jobs Streamlit App')
+st.write('This is a fake jobs streamlit app')
+# download the model
+path = get_model_list()
+model = load_model(path)
+with st.sidebar:
+    st.subheader('About the App')
+    st.markdown('Data used for the training come from the following source: https://www.kaggle.com/datasets/shivamb/real-or-fake-fake-jobposting-prediction')
+    st.empty()
+    st.subheader('Author')
+    st.markdown('Sebastián Sarasti Zambonino')
+    st.markdown('Data Scientist - Machine Learning Engineer')
+    st.markdown('https://www.linkedin.com/in/sebastiansarasti/')
+    st.markdown('https://github.com/sebassaras02')
+# Create columns for the inputs
+col1, col2, col3 = st.columns(3)
+with col1:
+    # Create an input text box for the description
+    description = st.text_area('Description', 'Enter the job description here')
+with col2:
+    # Create an input text box for the requirements
+    requirements = st.text_area('Requirements', 'Enter the job requirements here')
+with col3:
+    # Create an input text box for the benefits
+    benefits = st.text_area('Benefits', 'Enter the job benefits here')
+# if benefits is none, set it to an empty nothing
+if benefits is None:
+    benefits = 'Nothing'
+elif requirements is None:
+    requirements = 'Nothing'
+elif description is None:
+    raise ValueError('Description cannot be empty')
+# Create a button to submit the job
+with st.spinner('Wait for it...'):
+    time.sleep(2)
+    if st.button('Submit Job'):
+        tokens_des = get_tokens(description)
+        tokens_req = get_tokens(requirements)
+        tokens_ben = get_tokens(benefits)
+        model.eval()
+        with torch.no_grad():
+            output = model(tokens_des, tokens_req, tokens_ben)
+        # calculate the probability
+        output = F.sigmoid(output).item()*100
+        # Create a box to show the result
+        st.metric('Fake Job Probability', f"{np.round(output, 5)}%")
+        if output > 75:
+            st.error('⚠️ High probability of being a fake job!')
+        elif output > 50:
+            st.warning('⚠️ Medium probability of being a fake job')
+        else:
+            st.success('✅ Low probability of being a fake job')

model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch.nn as nn
+from torch import cat
+from transformers import DistilBertModel
+class JobFakeModel(nn.Module):
+    def __init__(self, base_model, freeze_base):
+        super(JobFakeModel, self).__init__()
+        self.base_model = base_model
+        self.fc = nn.Sequential(
+            nn.Linear(768*3, 600),
+            nn.ReLU(),
+            nn.Linear(600, 300),
+            nn.ReLU(),
+            nn.Linear(300, 1)
+        )
+        self.head1, self.head2, self.head3 = self._create_base_model()
+        if freeze_base:
+            for param in self.head1.parameters():
+                param.requires_grad = False
+            for param in self.head2.parameters():
+                param.requires_grad = False
+            for param in self.head3.parameters():
+                param.requires_grad = False
+    def forward(self, x, y , z):
+        x = self.head1(**x).last_hidden_state.mean(dim=1)
+        y = self.head2(**y).last_hidden_state.mean(dim=1)
+        z = self.head3(**z).last_hidden_state.mean(dim=1)
+        output = cat([x, y, z], dim=1)
+        output = self.fc(output)
+        return output
+    def _create_base_model(self):
+        if self.base_model == "distilbert":
+            model1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
+            model2 = DistilBertModel.from_pretrained("distilbert-base-uncased")
+            model3 = DistilBertModel.from_pretrained("distilbert-base-uncased")
+            return model1, model2, model3
+        else:
+            raise ValueError("Model not supported")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==2.4.0
+huggingface-hub==0.26.5
+numpy==2.2.0