Robzy commited on
Commit
0049d2e
·
1 Parent(s): 6944ca9
debug.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import re
3
+
4
+ nlp = spacy.load("en_core_web_sm")
5
+
6
+ def split_text_recursively(text):
7
+ if '\n' not in text:
8
+ return [text]
9
+ parts = text.split('\n', 1)
10
+ return [parts[0]] + split_text_recursively(parts[1])
11
+
12
+ def parse_post(path):
13
+
14
+ # Read the file
15
+
16
+ with open(path, 'r') as file:
17
+ text = file.read()
18
+
19
+ # Sentence tokenization
20
+
21
+ str_list = split_text_recursively(text)
22
+ str_list = [i.strip() for i in str_list]
23
+ str_list = list(filter(None, str_list))
24
+
25
+ count = 0
26
+ sents = []
27
+
28
+ for line in str_list:
29
+ doc = nlp(line)
30
+ for sent in doc.sents:
31
+ print(f"{sent.text}")
32
+ sents.append(sent.text)
33
+
34
+ # Skill/knowledge extraction
35
+
36
+
37
+
38
+
39
+ path = './job-postings/03-01-2024/2.txt'
40
+ parse_post(path)
debug2.py ADDED
@@ -0,0 +1 @@
 
 
1
+ deb
job-postings/03-01-2024/1.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ About the job
2
+
3
+ Grow with us
4
+
5
+ About This Opportunity
6
+
7
+ Ericsson is a world-leading provider of telecommunications equipment and services to mobile and fixed network operators. Over 1,000 networks in more than 180 countries use Ericsson equipment, and more than 40 percent of the world's mobile traffic passes through Ericsson networks. Using innovation to empower people, business and society, Ericsson is working towards the Networked Society: a world connected in real time that will open opportunities to create freedom, transform society and drive solutions to some of our planet’s greatest challenges.
8
+
9
+ Ericsson's 6G vision, first introduced in 2020, remains pivotal for transforming business and society in the 2030s through secure, efficient, and sustainable communication services. As 6G development progresses into a more concrete phase of regulation and standardization we are looking for researchers that would like to join us, co-creating a cyber-physical world
10
+
11
+ Within Ericsson, Ericsson Research develops new communication solutions and standards which have made Ericsson the industry leader in defining five generations of mobile communication. As we gear up for the 6th generation, we would like to fully embrace and utilize cloud native principles, hyperscalers and internal cloud infrastructure in our research. We are now looking for a MLOps research engineer to develop and support our workflows.
12
+
13
+ In this role, you will
14
+
15
+ Contribute to the direction and implementation of ML-based ways of working
16
+ Study, design and develop workflows and solutions for AI based R&D
17
+ Work across internal compute and external cloud platforms
18
+ Working closely with researchers driving 6G standardization
19
+
20
+ Join our Team
21
+
22
+ Qualifications
23
+
24
+ MSc in Data Science or related field, or have equivalent practical experience
25
+ Technical skills and/or professional experience, particularly in:
26
+ Programming in various languages (Python, Go, etc)
27
+ MLOps technologies and tooling (e.g. MLFlow, Kubeflow)
28
+ Dispatching and computational Python packages (Hydra, numpy, TensorFlow, etc.)
29
+ DevOps and CI/CD experience, runner deployment & management, pipeline creation, testing etc. for validating ML-driven code
30
+ Familiarity in the following is a plus:
31
+ ML frameworks (PyTorch, TensorFlow, or Jax)
32
+ Containers technologies (engines, orchestration tools and frameworks such as Docker, Kaniko, Kubernetes, Helm, etc.)
33
+ Cloud ecosystems along with the respective infrastructure, in particular AWS
34
+ Infrastructure management (Ansible, Terraform, etc.)
35
+ Team skills is a necessity. Daily cross-functional collaboration and interaction with other skilled researchers are the basis for our ways of working.
36
+ You should enjoy working with people having diverse backgrounds and competences.
37
+ It is important that you have strong personal drive and a strong focus on the tasks at hand.
38
+ Ability to translate high-level objectives into detailed tasks and actionable steps.
39
+
40
+ Location: Luleå, Sweden
job-postings/03-01-2024/2.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ About the job
3
+
4
+ Share this job
5
+
6
+
7
+
8
+ For our research & development team at Transformers we are now looking for an R&D Engineer with expertise in the area of artificial intelligence and Automation. You will join our team to develop smart solutions for future generations of power transformers and transformer components. Your focus will be technical contribution towards data analysis, image recognition, automation solution, investigation, and coordination of improvements within Transformers. You will also have ability to work with cross-competence topics and contribute to development of new and innovative solutions in different areas.
9
+
10
+ You will work in dynamic, motivated, and creative team with a wide range of experience and competence. Through your research projects you will become part of global community that will shape and help you to develop your career into a technical or managerial leader!
11
+
12
+ Your Responsibilities
13
+
14
+ Run, within the Artificial Intelligence field, feasibility studies, propose, and develop new technical concepts, design, and implement new solutions.
15
+ Integrating AI projects within our production leading the way towards the industry 4.0 revolution.
16
+ Organize prototyping and testing activities to validate new technologies/algorithms.
17
+ Provide technical support to factories in the field of operational excellence and improving the processes as well as data analysis of different stages of the production.
18
+ Have global collaboration with scientists of Hitachi energy’s research teams and technology centers, technology managers and engineers from the various factories.
19
+ Be an active member in further strengthening the collaboration with universities and research institutes.
20
+
21
+ Your background
22
+
23
+ You have experience working in the field of Data Science and a degree in software engineering or equivalent.
24
+ You also have experience in integration of a system with different existing hardware as well as building new setups from scratch. Experience with Artificial Intelligence Vision Systems is a plus.
25
+ You have sound analytical skills and have ambition towards applying new and innovative solutions to improve the current solutions.
26
+ You are a team player who enjoys collaborating with others to achieve great results.
27
+ Preferably you also have experience in, or ability to develop, project management and technical leadership skills.
28
+ You have good communication skills and are fluent in English, both written and spoken
29
+
30
+ What We Offer
31
+
32
+ Collective agreement
33
+ Flexible working time
34
+ Health care and wellness allowance
35
+ Fantastic career possibilities within Hitachi Energy both within Sweden and globally
36
+ Mentor to support you throughout onboard phase
37
+ Various trainings and education supporting employee development
38
+ Diversified company with over 70+ nationalities working in Sweden
39
+ Supplementary compensation for parental leave
40
+ Employee Benefit Portal with thousands of discounts and perks
41
+ More benefits could be connected to this specific role
42
+
43
+ Additional Information
44
+
45
+ Are you ready for an exciting new challenge? Does the above description sound like you? Applications will be reviewed on an ongoing basis, so don’t delay – apply today!
46
+
47
+ Recruiting Manager Nima Sadr-Momtazi, [email protected], will answer your questions about the position.  Union representatives - Sveriges Ingenjörer: Håkan Blomquist, +46 107-38 31 52; Unionen: Tomas L. Gustafsson, +46 107-38 27 47; Ledarna: Frank Hollstedt, +46 107-38 70 43. All other questions can be directed to Recruiter Gustaf Hedström, [email protected].
48
+
49
+ Apply now
50
+
51
+ Location Ludvika, Dalarna County, Sweden Job type Full time Experience Experienced Job function Engineering & Science Contract Regular Publication date 2024-10-01 Reference number R0052858
job-postings/03-01-2024/3.txt ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ About the job
2
+
3
+ Tembi is looking for two senior python developers to help us develop our data collection engine and web scraping tech. Work with SQL, cloud infrastructure, APIs and large volumes of data. We have just secured a €3 million funding round having full focus on taking our Market Intelligence Platform to the next level.
4
+
5
+
6
+ Tembi's Market Intelligence platform connects data from multiple industries and uses advanced analytics to create tailored market insights and predictions for various industries. From relocation predictions to i.e. company growth projections, Tembi provides users with actionable market analyses to discover new growth and sales opportunities. With Tembi, B2B companies gain a better overview and understanding of their market's development. They use the data and insights to make more informed strategic decisions, and execute on them with Tembi's lead generation solution.
7
+
8
+
9
+ Our platform is developed by an internal team that poses all the competencies involved in building data driven SaaS web products. We have data engineers, data scientists, data analysts, UI/UX designers, front end developers and agile coaches. We are committed to our company values to innovate, collaborate and support each other. We celebrate our wins and take every opportunity there is to improve and to learn new things. We believe in agile ways of working and apply a start-up mentality in everything we do.
10
+
11
+
12
+ Location: The role is located in Copenhagen, Denmark. Our office is located 5 min walk from Nørreport station.
13
+
14
+
15
+ Who are you?
16
+
17
+ We are looking for a highly skilled senior backend developer with experience in Python along with infra and database experience. We utilize large-scale scraping technologies built in Python, hosted and being run on containerized environments in the cloud. Data is processed and served to our frontend via APIs written in Typescript running on a node server.
18
+
19
+
20
+ You will be a key-player in development and optimization of our scraping technologies, making sure they run smoothly, and data is served to our frontend and the customers. You will work very closely with the rest of the team in optimizing data for both analytics & our SaaS application platform.
21
+
22
+
23
+ On a day-to-day basis you will be a key player in our agile development process, and part of sprint planning, team retrospectives and setting the right product vision and road map. We want to take new ideas to our customers quickly and measure the outcome to make sure we create the right value. Your input on delivering fast while safeguarding quality will be a priority and how we will evaluate your success.
24
+
25
+
26
+ On a personal side, leadership skills, growth mindset and a get-things-done mentality is something we value a lot. Things move fast and we make adoptions all the time in favor of reaching our goals. There will be many opportunities for the right person to grow, try out new things and be part of a great team with a unicorn vision.
27
+
28
+
29
+ Qualifications:
30
+
31
+ At least 5+ years within backend development, with great experience in python
32
+ Experience with SQL and NoSQL databases
33
+ Experience with web scraping
34
+ Experience with API development
35
+ Bonus: Experience working with containerization and running computations in cloud environments. We mostly use Azure
36
+ Bonus: Experience with data processing in Databricks
37
+ Bonus: Previous work from start-up companies
38
+
39
+
40
+ What we offer:
41
+
42
+ A highly entrepreneurial environment with flat hierarchies and short decision paths.
43
+ We are 25 colleagues today, have clients in fourteen markets, and have an ambition to build a fast-growing organization. You’ll be part of the ride from nearly the start and learn how to build a software company from the ground up.
44
+ Outstanding engineering and product team, shipping high-quality product incredibly fast.
45
+ Salary and packages are tailored individually based on experience and fit for the team.
46
+ An office full of talented, mission-driven people.
47
+ A position with direct feedback from customers.
48
+ A strong, collaborative work environment with a history of successful startups.
49
+ Enjoyable lunches at our Cantina and a multicultural team that communicates in English.
50
+ Office stocked with free snacks, soda, and coffee
51
+ Located at Fiolstræde, centrally in Copenhagen near Nørreport. (25 min from Malmö)
52
+ Diverse team with members from (Denmark, Sweden, Chile, Iran, Spain, Ukraine, and India)
53
+
54
+
55
+ Application process:
56
+
57
+ The application process begins with a review by our team. Following this, candidates will undergo interviews conducted via voice or video calls. Successful candidates will then be invited for an in-person interview where they will have the opportunity to meet the team to ensure a good fit. During this process there may be a technical test and TT38 personality test. We care about finding the right person for the job with best fit for the team. The final stage includes contract negotiations. The entire hiring process is expected to take anywhere from a few days to a couple of weeks.
58
+
59
+
60
+ For any questions related to this role please contact our CTO, Joakim Erlandsson. +46 705 53 53 95
job-postings/04-01-2024/1.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ About the job
3
+
4
+ Defence projects, unlocked.
5
+
6
+
7
+ At Defence.Works, we match you with the industry's top R&D projects and help you deliver impact for a safer tomorrow.
8
+
9
+
10
+ With Defence.Works, you will get access to interesting projects without consuming time on sales and project hunting. Our platform allows us to keep our organization structure thin, which means less internal costs, more value to customers, and higher rates for you.
11
+
12
+
13
+ As an Artificial Intelligence Engineer you will join our trusted network to work on cutting-edge R&D projects in the defense sector, contributing to innovative solutions that enhance safety and security for a safer tomorrow. You will bring deep expertise in machine learning and artificial intelligence, ideally with prior experience in developing autonomous products, radar systems, or applications that heavily leverage AI capabilities. The ideal candidate will have a proven track record of building secure, high-performing solutions designed to meet the defense industry's rigorous standards.
14
+
15
+
16
+ Your presence can be in Northern Europe (Finland, Sweden, Norway, Denmark), United Kingdom, or Germany and you are eligible to pass security-clearance before starting on a project. Please notice that consultants might need to pass security clearance before starting on a project.
17
+
18
+
19
+ Responsibilities:
20
+
21
+ Design, implement, and optimize AI models and algorithms for defense sector applications, including autonomous systems, radars, and AI-driven decision-making tools.
22
+ Develop secure, high-performance software solutions tailored to meet the stringent requirements of defense industry projects.
23
+ Identify and address vulnerabilities in AI systems, ensuring robust cybersecurity measures are integrated throughout the development lifecycle.
24
+ Collaborate with cross-functional teams to align AI capabilities with overall system architectures, ensuring seamless integration and performance.
25
+ Conduct rigorous testing, validation, and documentation of AI solutions to ensure reliability, scalability, and compliance with industry standards.
26
+
27
+
28
+ Qualifications:
29
+
30
+ Master's or Ph.D. in Computer Science, Machine Learning, or related field
31
+ Strong skills in Pattern Recognition, Neural Networks, and Algorithms.
32
+ Experience from developing products which rely on AI/ML capabilities such as autonomous systems, radar technologies, or other AI-reliant defense applications.
33
+ Expertise in machine learning frameworks (e.g., TensorFlow, PyTorch) and programming languages such as Python, C++, or Java.
34
+ Familiarity with secure system design, cybersecurity principles, and compliance with defense-specific regulations.
35
+
36
+
37
+ Additional Requirements:
38
+
39
+ Security certifications or training (e.g., CISSP, CEH) are highly desirable.
40
+
41
+
42
+ Next steps:
43
+
44
+ Press "Apply"
45
+ Submit your resume on Hashlist platform (takes 5 minutes). Hashlist is our parental company which infrastructure we are utilizing to manage the profiles
46
+ We will review your profile and if your profile matches our criteria
47
+ We call you and make sure that your expectations and career ambitions are aligned with our offer.
48
+ You will get accepted to our trusted network of partners and our clients and receive offers from different clients who are looking for your expertise
49
+
job-postings/04-01-2024/2.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ About the job
3
+
4
+ Job Description
5
+
6
+ We are on the journey to transform our digital capabilities, bringing core business processes, people, data & technology together - an enabler for IKEA to become an even better home furnishing retailer in the future. A journey that needs passionate people who embrace change, dare to question and want to make a difference. If that sounds like you, come and join us. Together we can do great things!
7
+
8
+ We're on the lookout for passionate doers, idea makers, creative thinkers - people who want to help IKEA find answers to everyday problems in people's lives using Spatial Computing. Do you want to be part of making it happen? Then keep on reading.
9
+
10
+ The Team
11
+
12
+ The Spatial Computing Team drives the digital innovation work in the spatial computing area for all IKEA companies. We are looking for a Research Engineer with knowledge of Spatial Computing / XR Development. In this position you will use your technical expertise to find, explore, evaluate and transfer innovation insights. You are someone with an innovative mind and lateral thinking. Someone who thinks virtual worlds are cool but helping real people is even better. You know why robots and pie go well together and why point clouds on a sunny day don't matter at all.
13
+
14
+ Your Main Responsibilities Will Be To
15
+
16
+ Explore and evaluate new technology and its possibility to improve IKEA customer and co-worker experience
17
+ Develop real-time 3D applications to serve as inspiring proof of concepts
18
+ Collaborate with external experts and innovators in explorations of technical solutions
19
+ Collect and share expertise with IKEA stakeholders
20
+
21
+ About You
22
+
23
+ The person we're looking for is someone passionate about the future of 3D graphics. A person who wants to use game engine for more than just for games. Someone who likes the idea of conceptualizing the utopian future of the digital - human interfaces, to interact with people in a more natural way than ever before. Someone who thinks Swedish meatballs have the potential to taste even better in mixed reality. We're looking for a person who can break down high-level concepts like these and explore them singlehandedly or as part of a team.
24
+
25
+ To be successful in this role, the following knowledge, skills and experiences would be valuable:
26
+
27
+ Background in real-time 3D graphics with deep interest in the future of Real Time Graphics (VR/MR/AR).
28
+ Understanding of graphics pipelines related to real-time 3D environments.
29
+ Passion for how sound and audio design and haptics can be used to elevate immersive experiences
30
+ Experience with Unreal and/or Unity and/or native IOS/Android 3D development and/or Web based 3D engines
31
+ Experience with mobile application development and deployment.
32
+ Programming skills building applications communicating with back-ends and building applications of interactive worlds using game engines and 3D Graphics.
33
+ Good knowledge of at least 2 different core programming languages such as C,C#, Python, C++ or JavaScript
34
+ You'll have a passion for sharing the knowledge you've acquired, with the ability to communicate with both technical and non-technical stakeholders.
35
+ Ability to formulate new ideas surrounding technological innovations.
36
+ Ability to discuss problems with program code examples.
37
+ Strong collaboration skills, with experience developing solutions alongside other teammates.
38
+
39
+ Additional Information
40
+
41
+ This role is full-time (40 hours per week) and based in Älmhult. This role sits in the Range Operations and reports to Innovation Manager. If you would like to know more about the role, please connect with [email protected] or if you have questions about the recruitment process, please connect with [email protected]
42
+
43
+ At IKEA, we are looking for people who believe everyone deserves a seat at the table. You’re welcome no matter where you come from, what you believe, and what you look like. We don’t even care how you have furnished your home. We’re interested in you simply because you’re you. Even if your experience doesn’t align perfectly with every qualification in the job description, we encourage you to apply anyway. We believe that people’s different perspectives, backgrounds, and personalities make us better at understanding our customers dreams and needs. At IKEA, we’re all on the same project.
44
+
45
+ If you have a special need that requires accommodation in the recruitment process, just let us know.
46
+
47
+ Please note that due to the upcoming holiday season in December, our recruitment process may take a bit longer than usual. We will do our best to keep all candidates updated on their status before any breaks. Thank you for your patience during this time!
48
+
49
+ Interested? Submit your CV and let us know why you would be a good fit for this role, in English, by 7th of January 2025.
job-postings/04-01-2024/3.txt ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ About the job
3
+
4
+ Rillion is seeking a skilled AI Engineer to join our innovative Data/AI team. In this role, you’ll be pivotal in developing machine learning models that drive AI-powered AP (Accounts Payable) automation and enhance our products. Our team consists of data engineers, an AI/ML lead, a data lead, a UX designer, a product manager, and software engineers. Together, we collaborate across the entire product lifecycle—from brainstorming and design to implementation—unlocking AI’s potential in AP automation. If you’re an experienced AI/ML developer who thrives in a dynamic environment, we’d love to have you on board!
5
+
6
+
7
+ Responsibilities:
8
+
9
+ The AI Engineer plays a key role in developing and implementing AI technologies into our SaaS products, with a focus on technical execution rather than leadership. This role is critical in building, integrating, and optimizing AI/ML solutions to meet product goals.
10
+
11
+
12
+ Develop and implement scalable AI/ML models that support product objectives.
13
+ Collaborate closely with product, engineering, and data teams to integrate AI features into our products.
14
+ Stay up-to-date with emerging AI technologies and contribute to experimentation and innovation.
15
+ Build and maintain effective AI/ML pipelines and deployment infrastructure.
16
+
17
+
18
+ What You'll Bring:
19
+
20
+ 2+ years hands-on experience with putting self-developed machine learning solutions into a production environment.
21
+ High proficiency in Python and AI/ML frameworks, i.e. Pytorch.
22
+ Familiarity with working with containers in a cloud environment like Azure, AWS, and/or GCP, with experience scaling AI solutions in production.
23
+ A curious mindset with strong collaboration skills who is comfortable in environments without clear-cut processes.
24
+ Master's degree in engineering or similar.
25
+
26
+
27
+ Bonus skills:
28
+
29
+ Cloud Ops and IaC tools such as Terraform.
30
+ MLOps best practices and tools like Databricks.
31
+ Knowledge of working with visually rich documents (VRDs) and generative AI.
32
+ Experience working with RAG, LLM evaluation, API-driven microservices, cache management and production-level software.
33
+
34
+
35
+ What we offer:
36
+
37
+ Opportunity to work in a dynamic growth company
38
+ Talented colleagues ready to support the success in your career path
39
+ Social events with your colleagues (breakfast, candy-time, afterwork etc.)
40
+ A collection of different benefits, including a generous pension and insurance package
41
+ Hybrid working model, 2 days per week in the office
42
+ Come and enjoy our beautiful office in central Stockholm (on the 14th floor, with amazing views)
43
+
44
+
45
+ The recruitment process:
46
+
47
+ We review applications and invite for interviews continuously. A background check will be conducted on final candidates, pre-employment. If you have any questions regarding the role, please contact recruiter Victoria Stjernström at [email protected]
48
+
49
+
50
+ About Rillion:
51
+
52
+ We are a global company founded in Sweden with 30 years’ experience in the AP Automation industry. We help finance professionals transform how they manage invoices by digitalizing and automating the entire process.
53
+
54
+
55
+ By removing the manual steps of invoice handling, we enable finance teams to save time and effort, reducing the possibility of human error. Because we’re AP professionals ourselves, we understand how to give our customers everything they need, and nothing they don’t.
56
+
57
+
58
+ Together with our owners at Altor, we´re on a journey to expand in our home markets, while entering new territories. To complete our mission, we need more talented people!
59
+
60
+
61
+ Rillion is an equal opportunity employer. We believe that diversity is integral to our success, and do not discriminate based on race, color, religion, age, or any other basis protected by law.
requirements.txt CHANGED
@@ -5,4 +5,5 @@ httpx
5
  idna
6
  langchain_openai
7
  python-dotenv
8
- torch @ https://download.pytorch.org/whl/cpu
 
 
5
  idna
6
  langchain_openai
7
  python-dotenv
8
+ torch
9
+ spacy
tag-posting.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import re
3
+
4
+ nlp = spacy.load("en_core_web_sm")
5
+
6
+ def split_text_recursively(text):
7
+ if '\n' not in text:
8
+ return [text]
9
+ parts = text.split('\n', 1)
10
+ return [parts[0]] + split_text_recursively(parts[1])
11
+
12
+ def parse_post(path):
13
+
14
+ # Read the file
15
+
16
+ with open(path, 'r') as file:
17
+ text = file.read()
18
+
19
+ # Sentence tokenization
20
+
21
+ str_list = split_text_recursively(text)
22
+ str_list = [i.strip() for i in str_list]
23
+ str_list = list(filter(None, str_list))
24
+
25
+ count = 0
26
+ sents = []
27
+
28
+ for line in str_list:
29
+ doc = nlp(line)
30
+ for sent in doc.sents:
31
+ print(f"{sent.text}")
32
+ sents.append(sent.text)
33
+
34
+ # Skill/knowledge extraction
35
+
36
+
37
+
38
+
39
+ path = './job-postings/03-01-2024/2.txt'
40
+ parse_post(path)
tagging.py CHANGED
@@ -13,8 +13,44 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification
13
  import torch
14
  import sys
15
  from tabulate import tabulate
 
 
 
16
  load_dotenv(".env")
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  ### LLM-based tag extraction with few-shot learning
20
 
@@ -88,10 +124,63 @@ def convert(text):
88
 
89
  skill_cls = [mapping[i.item()] for i in skill_cls]
90
  knowledge_cls = [mapping[i.item()] for i in knowledge_cls]
91
- return skill_cls, knowledge_cls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
 
94
  if __name__ == "__main__":
 
 
 
 
 
95
  text = input('Enter text: ')
96
 
97
  # LLM-based tag extraction
 
13
  import torch
14
  import sys
15
  from tabulate import tabulate
16
+ import spacy
17
+ import re
18
+
19
  load_dotenv(".env")
20
 
21
+ nlp = spacy.load("en_core_web_sm")
22
+
23
+ def split_text_recursively(text):
24
+ if '\n' not in text:
25
+ return [text]
26
+ parts = text.split('\n', 1)
27
+ return [parts[0]] + split_text_recursively(parts[1])
28
+
29
+
30
+ def tokenize_to_sent(path):
31
+
32
+ # Read the file
33
+
34
+ with open(path, 'r') as file:
35
+ text = file.read()
36
+
37
+ # Sentence tokenization
38
+
39
+ str_list = split_text_recursively(text)
40
+ str_list = [i.strip() for i in str_list]
41
+ str_list = list(filter(None, str_list))
42
+
43
+ count = 0
44
+ sents = []
45
+
46
+ for line in str_list:
47
+ doc = nlp(line)
48
+ for sent in doc.sents:
49
+ # print(f"{sent.text}")
50
+ sents.append(sent.text)
51
+
52
+ return sents
53
+
54
 
55
  ### LLM-based tag extraction with few-shot learning
56
 
 
124
 
125
  skill_cls = [mapping[i.item()] for i in skill_cls]
126
  knowledge_cls = [mapping[i.item()] for i in knowledge_cls]
127
+
128
+ if len(decoded_tokens) != len(skill_cls) or len(decoded_tokens) != len(knowledge_cls):
129
+ raise ValueError("Error: Length mismatch")
130
+
131
+ return skill_cls, knowledge_cls, decoded_tokens
132
+
133
+
134
+ from transformers import pipeline
135
+ pipe = pipeline("token-classification", model="jjzha/jobbert_knowledge_extraction")
136
+
137
+ def convert2(text):
138
+ output = pipe(text)
139
+ tokens = [i['word'] for i in output]
140
+ skill_cls = [i['entity'] for i in output]
141
+ knowledge_cls = [i['entity'] for i in output]
142
+
143
+ return skill_cls, knowledge_cls, tokens
144
+
145
+
146
+
147
+
148
+ def tag_posting(path, llm_extract = True):
149
+
150
+ # Reading & sentence tokenization
151
+ sents = tokenize_to_sent(path)
152
+
153
+ for sent in sents:
154
+ # print(f"Sent: {sent}")
155
+ skill_cls, knowledge_cls, tokens = convert(sent)
156
+
157
+
158
+ # Pre-trained
159
+ # skill_cls, knowledge_cls, _ = convert(text)
160
+
161
+ if llm_extract:
162
+
163
+ # LLM-based tag extraction
164
+ tokens, output = extract_tags(text, tokenize=True)
165
+ table = zip(tokens, output['skill_labels'], output['knowledge_labels'], skill_cls, knowledge_cls)
166
+ headers = ["Token", "Skill Label", "Knowledge Label", "Pred Skill Label", "Pred Knowledge Label"]
167
+ print(tabulate(table, headers=headers, tablefmt="pretty"))
168
+
169
+ else:
170
+
171
+ # Only pre-trained
172
+ table = zip(tokens, output['skill_labels'], output['knowledge_labels'])
173
+ headers = ["Token", "Skill Label", "Knowledge Label"]
174
+ print(tabulate(table, headers=headers, tablefmt="pretty"))
175
+
176
 
177
 
178
  if __name__ == "__main__":
179
+
180
+ path = './job-postings/03-01-2024/1.txt'
181
+ tag_posting(path, llm_extract = False)
182
+
183
+ quit()
184
  text = input('Enter text: ')
185
 
186
  # LLM-based tag extraction
tags/03-01-2024/1.txt ADDED
File without changes
tags/03-01-2024/2.txt ADDED
File without changes
tags/03-01-2024/3.txt ADDED
File without changes
tags/04-01-2024/1.txt ADDED
File without changes
tags/04-01-2024/2.txt ADDED
File without changes
tags/04-01-2024/3.txt ADDED
File without changes