Robzy commited on
Commit
6a29073
·
2 Parent(s): 92fbfc8 c805022

finalizing

Browse files
.github/workflows/scraping.yml CHANGED
@@ -23,7 +23,7 @@ jobs:
23
  - name: Install dependencies
24
  run: |
25
  python -m pip install --upgrade pip
26
- pip install -r requirements.txt
27
 
28
  - name: Run LinkedIn Scraping Script
29
  env:
 
23
  - name: Install dependencies
24
  run: |
25
  python -m pip install --upgrade pip
26
+ pip install -r all-requirements.txt
27
 
28
  - name: Run LinkedIn Scraping Script
29
  env:
.github/workflows/trends.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run trend graph Script
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ schedule:
6
+ # Run every 30 minutes
7
+ #- cron: '*/30 * * * *'
8
+ - cron: '0 0 * * 0'
9
+
10
+ jobs:
11
+ run-scraper:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout repository
16
+ uses: actions/checkout@v3
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: '3.11'
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install -r requirements.txt
27
+
28
+ - name: Run trend graph Script
29
+ env:
30
+ RAPID_API_KEY: ${{ secrets.RAPID_API_KEY }}
31
+ run: |
32
+ python trend_graph.py
33
+ - name: List plots folder
34
+ run: ls -R plots || echo "plots folder not found"
35
+ - name: Commit and Push Changes
36
+ run: |
37
+ git config --global user.name "github-actions[bot]"
38
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
39
+ git add job-postings
40
+ git commit -m "Add plots generated by scrip"
41
+ git push
42
+ env:
43
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44
+
.github/workflows/visualization.yml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Visualization Script
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ schedule:
6
+ # Run every 30 minutes
7
+ #- cron: '*/30 * * * *'
8
+ - cron: '0 0 * * 0'
9
+
10
+ jobs:
11
+ run-scraper:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout repository
16
+ uses: actions/checkout@v3
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: '3.11'
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install -r requirements.txt
27
+
28
+ - name: Run Visualization Script
29
+ run: |
30
+ python embedding_gen.py
31
+ - name: List plots folder
32
+ run: ls -R plots || echo "plots not found"
33
+ - name: Commit and Push Changes
34
+ run: |
35
+ git config --global user.name "github-actions[bot]"
36
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
37
+ git add plots
38
+ git commit -m "Add plots generated by script"
39
+ git push
40
+ env:
41
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
42
+
README.md CHANGED
@@ -10,7 +10,11 @@ pinned: false
10
  ---
11
 
12
 
13
- # Compilation of in-demand tech skills
 
 
 
 
14
 
15
  # Project outline
16
 
 
10
  ---
11
 
12
 
13
+ # In-demand Skill Monitoring for Machine Learning Industry
14
+
15
+ ## About
16
+
17
+ This projects strives to monitor in-demand skills for machine learning roles based in Stockholm, Sweden.
18
 
19
  # Project outline
20
 
embedding_gen.py CHANGED
@@ -87,7 +87,13 @@ def visualize3D(reduced_embeddings, labels, skills, n_clusters, output_folder, d
87
  # fig.show()
88
  return fig
89
 
90
- if __name__ == "__main__":
 
 
 
 
 
 
91
 
92
  # Main execution
93
  base_folder = "./tags"
 
87
  # fig.show()
88
  return fig
89
 
90
+ # Main execution
91
+ base_folder = "./tags"
92
+ output_folder = "./plots"
93
+ specific_date = "03-01-2024" # Example date folder to process
94
+ # Get today's date in the desired format
95
+ # specific_date = datetime.now().strftime("%d-%m-%Y")
96
+ n_clusters = 5
97
 
98
  # Main execution
99
  base_folder = "./tags"
plots/03-01-2024_3D_clustering.html CHANGED
The diff for this file is too large to render. See raw diff
 
plots/03-01-2024_3D_projection.html CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -7,4 +7,8 @@ langchain_openai
7
  python-dotenv
8
  torch
9
  spacy
10
- plotly
 
 
 
 
 
7
  python-dotenv
8
  torch
9
  spacy
10
+ sentence-transformers
11
+ umap-learn
12
+ scikit-learn
13
+ matplotlib
14
+ plotly
trend_graph.py CHANGED
@@ -46,6 +46,7 @@ top_skills = total_counts.nlargest(3).index
46
 
47
  # Step 5: Plot and save separate graphs for the top 3 skills
48
  for skill in top_skills:
 
49
  plt.figure(figsize=(8, 5))
50
  plt.plot(df.index, df[skill], marker="o", label=skill)
51
 
 
46
 
47
  # Step 5: Plot and save separate graphs for the top 3 skills
48
  for skill in top_skills:
49
+ print(f"Trend of {skill} Over Time")
50
  plt.figure(figsize=(8, 5))
51
  plt.plot(df.index, df[skill], marker="o", label=skill)
52