Robzy commited on
Commit
acee5d9
·
1 Parent(s): 762e05d

creating two requirements folders

Browse files
Files changed (3) hide show
  1. all-requirements.txt +11 -0
  2. app.py +55 -18
  3. requirements.txt +0 -1
all-requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ gradio
3
+ gradio-client
4
+ httpx
5
+ idna
6
+ langchain_openai
7
+ python-dotenv
8
+ torch
9
+ spacy
10
+ umap-learn
11
+ plotly
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from embedding_gen import load_skills_from_date, visualize3D
4
  import numpy as np
5
  import pickle
6
 
7
- token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
8
- token_knowledge_classifier = pipeline(model="Robzy/jobbert_knowledge_extraction", aggregation_strategy="first")
 
9
 
10
 
11
  examples = [
@@ -30,19 +31,55 @@ def aggregate_span(results):
30
 
31
  return new_results
32
 
33
- def ner(text):
34
 
35
 
36
- output_knowledge = token_knowledge_classifier(text)
37
- for result in output_knowledge:
38
- if result.get("entity_group"):
39
- result["entity"] = "Knowledge"
40
- del result["entity_group"]
41
 
42
- if len(output_knowledge) > 0:
43
- output_knowledge = aggregate_span(output_knowledge)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- return {"text": text, "entities": output_knowledge}
46
 
47
 
48
  import plotly.express as px
@@ -61,12 +98,12 @@ fig.update_layout(
61
 
62
  with gr.Blocks() as demo:
63
 
64
- gr.Interface(fn=ner,
65
- inputs=gr.Textbox(placeholder="Enter sentence here..."),
66
- outputs=["highlight"],
67
- examples=examples,
68
- title="In-demand skills in machine learning (ML) industry"
69
- )
70
 
71
  # gr.Markdown("Embedding visualisation of sought skills in ML job posting in Stockholm, Sweden on LinkedIn")
72
  gr.Plot(fig)
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ # from embedding_gen import load_skills_from_date, visualize3D
4
  import numpy as np
5
  import pickle
6
 
7
+ # token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
8
+ # token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction")
9
+ # token_knowledge_classifier = pipeline(model="Robzy/jobbert_knowledge_extraction")
10
 
11
 
12
  examples = [
 
31
 
32
  return new_results
33
 
34
+ # def ner(text):
35
 
36
 
37
+ # output_knowledge = token_knowledge_classifier(text)
38
+ # for result in output_knowledge:
39
+ # if result.get("entity_group"):
40
+ # result["entity"] = "Knowledge"
41
+ # del result["entity_group"]
42
 
43
+ # if len(output_knowledge) > 0:
44
+ # output_knowledge = aggregate_span(output_knowledge)
45
+
46
+ # return {"text": text, "entities": output_knowledge}
47
+
48
+ ### Visualisation 3D
49
+
50
+ import os
51
+
52
+ def load_skills_from_date(base_folder, date):
53
+ date_folder = os.path.join(base_folder, date)
54
+ all_skills = set() # To ensure unique skills
55
+ if os.path.exists(date_folder) and os.path.isdir(date_folder):
56
+ for file_name in os.listdir(date_folder):
57
+ file_path = os.path.join(date_folder, file_name)
58
+ if file_name.endswith(".txt"):
59
+ with open(file_path, 'r', encoding='utf-8') as f:
60
+ all_skills.update(line.strip() for line in f if line.strip())
61
+ return list(all_skills)
62
+
63
+ def visualize3D(reduced_embeddings, labels, skills, n_clusters, output_folder, date):
64
+
65
+ fig = px.scatter_3d(
66
+ x=reduced_embeddings[:, 0],
67
+ y=reduced_embeddings[:, 1],
68
+ z=reduced_embeddings[:, 2],
69
+ color=labels,
70
+ text=skills,
71
+ title=f"KMeans Clustering with {n_clusters} Clusters ({date})"
72
+ )
73
+
74
+ # Save the clustered plot
75
+ # os.makedirs(output_folder, exist_ok=True)
76
+ # plot_path = os.path.join(output_folder, f"{date}_3D_clustering.html")
77
+ # fig.write_html(plot_path)
78
+ # print(f"3D clustered plot saved at {plot_path}")
79
+
80
+ # fig.show()
81
+ return fig
82
 
 
83
 
84
 
85
  import plotly.express as px
 
98
 
99
  with gr.Blocks() as demo:
100
 
101
+ # gr.Interface(fn=ner,
102
+ # inputs=gr.Textbox(placeholder="Enter sentence here..."),
103
+ # outputs=["highlight"],
104
+ # examples=examples,
105
+ # title="In-demand skills in machine learning (ML) industry"
106
+ # )
107
 
108
  # gr.Markdown("Embedding visualisation of sought skills in ML job posting in Stockholm, Sweden on LinkedIn")
109
  gr.Plot(fig)
requirements.txt CHANGED
@@ -7,5 +7,4 @@ langchain_openai
7
  python-dotenv
8
  torch
9
  spacy
10
- umap-learn
11
  plotly
 
7
  python-dotenv
8
  torch
9
  spacy
 
10
  plotly