Spaces:
Sleeping
Sleeping
finalizing
Browse files- .github/workflows/scraping.yml +1 -1
- .github/workflows/trends.yml +44 -0
- .github/workflows/visualization.yml +42 -0
- README.md +5 -1
- embedding_gen.py +7 -1
- plots/03-01-2024_3D_clustering.html +0 -0
- plots/03-01-2024_3D_projection.html +0 -0
- requirements.txt +5 -1
- trend_graph.py +1 -0
.github/workflows/scraping.yml
CHANGED
@@ -23,7 +23,7 @@ jobs:
|
|
23 |
- name: Install dependencies
|
24 |
run: |
|
25 |
python -m pip install --upgrade pip
|
26 |
-
pip install -r requirements.txt
|
27 |
|
28 |
- name: Run LinkedIn Scraping Script
|
29 |
env:
|
|
|
23 |
- name: Install dependencies
|
24 |
run: |
|
25 |
python -m pip install --upgrade pip
|
26 |
+
pip install -r all-requirements.txt
|
27 |
|
28 |
- name: Run LinkedIn Scraping Script
|
29 |
env:
|
.github/workflows/trends.yml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Run trend graph Script
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
schedule:
|
6 |
+
# Run every 30 minutes
|
7 |
+
#- cron: '*/30 * * * *'
|
8 |
+
- cron: '0 0 * * 0'
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
run-scraper:
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
|
14 |
+
steps:
|
15 |
+
- name: Checkout repository
|
16 |
+
uses: actions/checkout@v3
|
17 |
+
|
18 |
+
- name: Set up Python
|
19 |
+
uses: actions/setup-python@v4
|
20 |
+
with:
|
21 |
+
python-version: '3.11'
|
22 |
+
|
23 |
+
- name: Install dependencies
|
24 |
+
run: |
|
25 |
+
python -m pip install --upgrade pip
|
26 |
+
pip install -r requirements.txt
|
27 |
+
|
28 |
+
- name: Run trend graph Script
|
29 |
+
env:
|
30 |
+
RAPID_API_KEY: ${{ secrets.RAPID_API_KEY }}
|
31 |
+
run: |
|
32 |
+
python trend_graph.py
|
33 |
+
- name: List plots folder
|
34 |
+
run: ls -R plots || echo "plots folder not found"
|
35 |
+
- name: Commit and Push Changes
|
36 |
+
run: |
|
37 |
+
git config --global user.name "github-actions[bot]"
|
38 |
+
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
39 |
+
git add job-postings
|
40 |
+
git commit -m "Add plots generated by scrip"
|
41 |
+
git push
|
42 |
+
env:
|
43 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
44 |
+
|
.github/workflows/visualization.yml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Run Visualization Script
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
schedule:
|
6 |
+
# Run every 30 minutes
|
7 |
+
#- cron: '*/30 * * * *'
|
8 |
+
- cron: '0 0 * * 0'
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
run-scraper:
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
|
14 |
+
steps:
|
15 |
+
- name: Checkout repository
|
16 |
+
uses: actions/checkout@v3
|
17 |
+
|
18 |
+
- name: Set up Python
|
19 |
+
uses: actions/setup-python@v4
|
20 |
+
with:
|
21 |
+
python-version: '3.11'
|
22 |
+
|
23 |
+
- name: Install dependencies
|
24 |
+
run: |
|
25 |
+
python -m pip install --upgrade pip
|
26 |
+
pip install -r requirements.txt
|
27 |
+
|
28 |
+
- name: Run Visualization Script
|
29 |
+
run: |
|
30 |
+
python embedding_gen.py
|
31 |
+
- name: List plots folder
|
32 |
+
run: ls -R plots || echo "plots not found"
|
33 |
+
- name: Commit and Push Changes
|
34 |
+
run: |
|
35 |
+
git config --global user.name "github-actions[bot]"
|
36 |
+
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
37 |
+
git add plots
|
38 |
+
git commit -m "Add plots generated by script"
|
39 |
+
git push
|
40 |
+
env:
|
41 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
42 |
+
|
README.md
CHANGED
@@ -10,7 +10,11 @@ pinned: false
|
|
10 |
---
|
11 |
|
12 |
|
13 |
-
#
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Project outline
|
16 |
|
|
|
10 |
---
|
11 |
|
12 |
|
13 |
+
# In-demand Skill Monitoring for Machine Learning Industry
|
14 |
+
|
15 |
+
## About
|
16 |
+
|
17 |
+
This projects strives to monitor in-demand skills for machine learning roles based in Stockholm, Sweden.
|
18 |
|
19 |
# Project outline
|
20 |
|
embedding_gen.py
CHANGED
@@ -87,7 +87,13 @@ def visualize3D(reduced_embeddings, labels, skills, n_clusters, output_folder, d
|
|
87 |
# fig.show()
|
88 |
return fig
|
89 |
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
# Main execution
|
93 |
base_folder = "./tags"
|
|
|
87 |
# fig.show()
|
88 |
return fig
|
89 |
|
90 |
+
# Main execution
|
91 |
+
base_folder = "./tags"
|
92 |
+
output_folder = "./plots"
|
93 |
+
specific_date = "03-01-2024" # Example date folder to process
|
94 |
+
# Get today's date in the desired format
|
95 |
+
# specific_date = datetime.now().strftime("%d-%m-%Y")
|
96 |
+
n_clusters = 5
|
97 |
|
98 |
# Main execution
|
99 |
base_folder = "./tags"
|
plots/03-01-2024_3D_clustering.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
plots/03-01-2024_3D_projection.html
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -7,4 +7,8 @@ langchain_openai
|
|
7 |
python-dotenv
|
8 |
torch
|
9 |
spacy
|
10 |
-
|
|
|
|
|
|
|
|
|
|
7 |
python-dotenv
|
8 |
torch
|
9 |
spacy
|
10 |
+
sentence-transformers
|
11 |
+
umap-learn
|
12 |
+
scikit-learn
|
13 |
+
matplotlib
|
14 |
+
plotly
|
trend_graph.py
CHANGED
@@ -46,6 +46,7 @@ top_skills = total_counts.nlargest(3).index
|
|
46 |
|
47 |
# Step 5: Plot and save separate graphs for the top 3 skills
|
48 |
for skill in top_skills:
|
|
|
49 |
plt.figure(figsize=(8, 5))
|
50 |
plt.plot(df.index, df[skill], marker="o", label=skill)
|
51 |
|
|
|
46 |
|
47 |
# Step 5: Plot and save separate graphs for the top 3 skills
|
48 |
for skill in top_skills:
|
49 |
+
print(f"Trend of {skill} Over Time")
|
50 |
plt.figure(figsize=(8, 5))
|
51 |
plt.plot(df.index, df[skill], marker="o", label=skill)
|
52 |
|