Aqsa-K commited on
Commit
68403a4
·
2 Parent(s): f8da2f0 95c280d

gerged branch 'main' of https://github.com/iamrobzy/in-demand

Browse files
.github/workflows/scraping.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run LinkedIn Scraping Script
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ schedule:
6
+ # Run every 30 minutes
7
+ #- cron: '*/30 * * * *'
8
+ - cron: '0 0 * * 0'
9
+
10
+ jobs:
11
+ run-scraper:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout repository
16
+ uses: actions/checkout@v3
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: '3.11'
22
+
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install -r requirements.txt
27
+
28
+ - name: Run LinkedIn Scraping Script
29
+ env:
30
+ RAPID_API_KEY: ${{ secrets.RAPID_API_KEY }}
31
+ run: |
32
+ python linkedin_scrapping.py
33
+ - name: List job-postings folder
34
+ run: ls -R job-postings || echo "job-postings folder not found"
35
+ - name: Commit and Push Changes
36
+ run: |
37
+ git config --global user.name "github-actions[bot]"
38
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
39
+ git add job-postings
40
+ git commit -m "Add job postings generated by script"
41
+ git push
42
+ env:
43
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44
+
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  .venv/
2
- .env
 
 
1
  .venv/
2
+ .env
3
+ wandb/
app.py CHANGED
@@ -7,7 +7,8 @@ token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction"
7
 
8
  examples = [
9
  "Knowing Python is a plus",
10
- "Recommend changes, develop and implement processes to ensure compliance with IFRS standards"
 
11
  ]
12
 
13
 
 
7
 
8
  examples = [
9
  "Knowing Python is a plus",
10
+ "Recommend changes, develop and implement processes to ensure compliance with IFRS standards",
11
+ "Experience with Unreal and/or Unity and/or native IOS/Android 3D development and/or Web based 3D engines",
12
  ]
13
 
14
 
data/test-medium.json ADDED
The diff for this file is too large to render. See raw diff
 
data/test-short.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"idx": 1, "tokens": ["Full", "Stack", "Software", "Engineer", "-", "Java", "/", "JavaScript"], "tags_skill": ["O", "O", "O", "O", "O", "O", "O", "O"], "tags_knowledge": ["O", "O", "O", "O", "O", "O", "O", "O"], "source": "tech"}
2
+ {"idx": 1, "tokens": ["<ORGANIZATION>", "<ORGANIZATION>", "<ORGANIZATION>", "<ORGANIZATION>", "."], "tags_skill": ["O", "O", "O", "O", "O"], "tags_knowledge": ["O", "O", "O", "O", "O"], "source": "tech"}
3
+ {"idx": 1, "tokens": ["<ADDRESS>", "<ADDRESS>", "<LOCATION>", "-", "<LOCATION>"], "tags_skill": ["O", "O", "O", "O", "O"], "tags_knowledge": ["O", "O", "O", "O", "O"], "source": "tech"}
4
+ {"idx": 1, "tokens": ["Date", "posted:", "2021-03-04"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
5
+ {"idx": 1, "tokens": ["Likes:", "0", "Dislikes:", "0", "Love:", "0"], "tags_skill": ["O", "O", "O", "O", "O", "O"], "tags_knowledge": ["O", "O", "O", "O", "O", "O"], "source": "tech"}
6
+ {"idx": 1, "tokens": ["Salary:", "<SALARY>"], "tags_skill": ["O", "O"], "tags_knowledge": ["O", "O"], "source": "tech"}
7
+ {"idx": 1, "tokens": ["Job", "type:", "FULL_TIME"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
8
+ {"idx": 1, "tokens": ["Experience", "level:", "<EXPERIENCE>"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
9
+ {"idx": 1, "tokens": ["Industry:", "<INDUSTRY>"], "tags_skill": ["O", "O"], "tags_knowledge": ["O", "O"], "source": "tech"}
10
+ {"idx": 1, "tokens": ["Company", "size:", "<SIZE>"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
11
+ {"idx": 1, "tokens": ["Company", "type:", "<COMPANY_TYPE>"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
12
+ {"idx": 1, "tokens": ["Technologies:"], "tags_skill": ["O"], "tags_knowledge": ["O"], "source": "tech"}
13
+ {"idx": 1, "tokens": ["javascript", "reactjs", "java"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["B", "B", "B"], "source": "tech"}
14
+ {"idx": 1, "tokens": ["Job", "description:"], "tags_skill": ["O", "O"], "tags_knowledge": ["O", "O"], "source": "tech"}
15
+ {"idx": 1, "tokens": ["Job", "type:"], "tags_skill": ["O", "O"], "tags_knowledge": ["O", "O"], "source": "tech"}
16
+ {"idx": 1, "tokens": ["Full-time"], "tags_skill": ["O"], "tags_knowledge": ["O"], "source": "tech"}
17
+ {"idx": 1, "tokens": ["Role:"], "tags_skill": ["O"], "tags_knowledge": ["O"], "source": "tech"}
18
+ {"idx": 1, "tokens": ["Full", "Stack", "Developer"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["O", "O", "O"], "source": "tech"}
19
+ {"idx": 1, "tokens": ["Technologies"], "tags_skill": ["O"], "tags_knowledge": ["O"], "source": "tech"}
20
+ {"idx": 1, "tokens": ["javascript", "reactjs", "java"], "tags_skill": ["O", "O", "O"], "tags_knowledge": ["B", "B", "B"], "source": "tech"}
debug.py DELETED
@@ -1,40 +0,0 @@
1
- import spacy
2
- import re
3
-
4
- nlp = spacy.load("en_core_web_sm")
5
-
6
- def split_text_recursively(text):
7
- if '\n' not in text:
8
- return [text]
9
- parts = text.split('\n', 1)
10
- return [parts[0]] + split_text_recursively(parts[1])
11
-
12
- def parse_post(path):
13
-
14
- # Read the file
15
-
16
- with open(path, 'r') as file:
17
- text = file.read()
18
-
19
- # Sentence tokenization
20
-
21
- str_list = split_text_recursively(text)
22
- str_list = [i.strip() for i in str_list]
23
- str_list = list(filter(None, str_list))
24
-
25
- count = 0
26
- sents = []
27
-
28
- for line in str_list:
29
- doc = nlp(line)
30
- for sent in doc.sents:
31
- print(f"{sent.text}")
32
- sents.append(sent.text)
33
-
34
- # Skill/knowledge extraction
35
-
36
-
37
-
38
-
39
- path = './job-postings/03-01-2024/2.txt'
40
- parse_post(path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debug2.py DELETED
@@ -1 +0,0 @@
1
- deb
 
 
demo-app.py DELETED
@@ -1,56 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
-
4
- token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
5
- token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")
6
-
7
-
8
- examples = [
9
- "Knowing Python is a plus",
10
- "Recommend changes, develop and implement processes to ensure compliance with IFRS standards"
11
- ]
12
-
13
-
14
- def aggregate_span(results):
15
- new_results = []
16
- current_result = results[0]
17
-
18
- for result in results[1:]:
19
- if result["start"] == current_result["end"] + 1:
20
- current_result["word"] += " " + result["word"]
21
- current_result["end"] = result["end"]
22
- else:
23
- new_results.append(current_result)
24
- current_result = result
25
-
26
- new_results.append(current_result)
27
-
28
- return new_results
29
-
30
- def ner(text):
31
- output_skills = token_skill_classifier(text)
32
- for result in output_skills:
33
- if result.get("entity_group"):
34
- result["entity"] = "Skill"
35
- del result["entity_group"]
36
-
37
- output_knowledge = token_knowledge_classifier(text)
38
- for result in output_knowledge:
39
- if result.get("entity_group"):
40
- result["entity"] = "Knowledge"
41
- del result["entity_group"]
42
-
43
- if len(output_skills) > 0:
44
- output_skills = aggregate_span(output_skills)
45
- if len(output_knowledge) > 0:
46
- output_knowledge = aggregate_span(output_knowledge)
47
-
48
- return {"text": text, "entities": output_skills}, {"text": text, "entities": output_knowledge}
49
-
50
-
51
- demo = gr.Interface(fn=ner,
52
- inputs=gr.Textbox(placeholder="Enter sentence here..."),
53
- outputs=["highlight", "highlight"],
54
- examples=examples)
55
-
56
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
env-template.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_API_KEY=<openai api key>
2
+ HF_USERNAME=<hugging face username>
3
+ WANDB_API_KEY=<weights & biases api key>
examples.py → few-shot-extract.py RENAMED
@@ -1,4 +1,6 @@
1
  import requests
 
 
2
 
3
  def show_examples(n = 10):
4
 
@@ -13,11 +15,12 @@ def show_examples(n = 10):
13
  tokens = row['tokens']
14
  skill_labels, knowledge_labels = row['tags_skill'], row['tags_knowledge']
15
 
16
- print(f'Example #{i+1}')
17
- print('Tokens:', tokens)
18
- print('Skill Labels:', skill_labels)
19
- print('Knowledge Labels:', knowledge_labels)
20
- print('')
 
21
 
22
 
23
  show_examples(n=100)
 
1
  import requests
2
+ import os
3
+ repo_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
4
 
5
  def show_examples(n = 10):
6
 
 
15
  tokens = row['tokens']
16
  skill_labels, knowledge_labels = row['tags_skill'], row['tags_knowledge']
17
 
18
+ with open(f"{repo_dir}/examples.txt", 'w') as file:
19
+ file.write(f'Example #{i+1}\n')
20
+ file.write(f'Tokens: {str(tokens)}\n')
21
+ file.write(f'Skill Labels: {str(skill_labels)}\n')
22
+ file.write(f'Knowledge Labels: {str(knowledge_labels)}\n')
23
+ file.write('\n')
24
 
25
 
26
  show_examples(n=100)
job-ad.txt DELETED
@@ -1,40 +0,0 @@
1
- About the job
2
- Grow with us
3
-
4
- About This Opportunity
5
-
6
- Ericsson is a world-leading provider of telecommunications equipment and services to mobile and fixed network operators. Over 1,000 networks in more than 180 countries use Ericsson equipment, and more than 40 percent of the world's mobile traffic passes through Ericsson networks. Using innovation to empower people, business and society, Ericsson is working towards the Networked Society: a world connected in real time that will open opportunities to create freedom, transform society and drive solutions to some of our planet’s greatest challenges.
7
-
8
- Ericsson's 6G vision, first introduced in 2020, remains pivotal for transforming business and society in the 2030s through secure, efficient, and sustainable communication services. As 6G development progresses into a more concrete phase of regulation and standardization we are looking for researchers that would like to join us, co-creating a cyber-physical world
9
-
10
- Within Ericsson, Ericsson Research develops new communication solutions and standards which have made Ericsson the industry leader in defining five generations of mobile communication. As we gear up for the 6th generation, we would like to fully embrace and utilize cloud native principles, hyperscalers and internal cloud infrastructure in our research. We are now looking for a MLOps research engineer to develop and support our workflows.
11
-
12
- In this role, you will
13
-
14
- Contribute to the direction and implementation of ML-based ways of working
15
- Study, design and develop workflows and solutions for AI based R&D
16
- Work across internal compute and external cloud platforms
17
- Working closely with researchers driving 6G standardization
18
-
19
- Join our Team
20
-
21
- Qualifications
22
-
23
- MSc in Data Science or related field, or have equivalent practical experience
24
- Technical skills and/or professional experience, particularly in:
25
- Programming in various languages (Python, Go, etc)
26
- MLOps technologies and tooling (e.g. MLFlow, Kubeflow)
27
- Dispatching and computational Python packages (Hydra, numpy, TensorFlow, etc.)
28
- DevOps and CI/CD experience, runner deployment & management, pipeline creation, testing etc. for validating ML-driven code
29
- Familiarity in the following is a plus:
30
- ML frameworks (PyTorch, TensorFlow, or Jax)
31
- Containers technologies (engines, orchestration tools and frameworks such as Docker, Kaniko, Kubernetes, Helm, etc.)
32
- Cloud ecosystems along with the respective infrastructure, in particular AWS
33
- Infrastructure management (Ansible, Terraform, etc.)
34
- Team skills is a necessity. Daily cross-functional collaboration and interaction with other skilled researchers are the basis for our ways of working.
35
- You should enjoy working with people having diverse backgrounds and competences.
36
- It is important that you have strong personal drive and a strong focus on the tasks at hand.
37
- Ability to translate high-level objectives into detailed tasks and actionable steps.
38
- Location: Luleå, Sweden
39
-
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
job-postings/07-01-2025/1.txt CHANGED
@@ -1,52 +1,71 @@
1
- About Agoda
2
 
3
- Agoda is an online travel booking platform for accommodations, flights, and more. We build and deploy cutting-edge technology that connects travelers with a global network of 4.7M hotels and holiday properties worldwide, plus flights, activities, and more . Based in Asia and part of Booking Holdings, our 7,100+ employees representing 95+ nationalities in 27 markets foster a work environment rich in diversity, creativity, and collaboration. We innovate through a culture of experimentation and ownership, enhancing the ability for our customers to experience the world.
4
 
5
- Our Purpose – Bridging the World Through Travel
6
 
7
- We believe travel allows people to enjoy, learn and experience more of the amazing world we live in. It brings individuals and cultures closer together, fostering empathy, understanding and happiness.
8
 
9
- We are a skillful, driven and diverse team from across the globe, united by a passion to make an impact. Harnessing our innovative technologies and strong partnerships, we aim to make travel easy and rewarding for everyone.
10
 
11
- Get to Know our Team
12
 
13
- The Data department oversees all of Agoda’s data-related requirements. Our ultimate goal is to enable and increase the use of data in the company through creative approaches and the implementation of powerful resources such as operational and analytical databases, queue systems, BI tools, and data science technology. We hire the brightest minds from around the world to take on this challenge and equip them with the knowledge and tools that contribute to their personal growth and success while supporting our company’s culture of diversity and experimentation. The role the Data team plays at Agoda is critical as business users, product managers, engineers, and many others rely on us to empower their decision making. We are equally dedicated to our customers by improving their search experience with faster results and protecting them from any fraudulent activities. Data is interesting only when you have enough of it, and we have plenty. This is what drives up the challenge as part of the Data department, but also the reward.
14
 
15
- The Opportunity
16
 
17
- Our mission is to empower Agoda employees with an optimal set of tools that will enable them to make the best decisions and build state of the art features and models for Agoda’s end users by leveraging the data we collect. As an ML Technical Product Manager, you will be responsible for the internal product requirements, analysis, ideation, feature design, roadmap, project management, trainings and the quality of our solutions. You will conceptualize platform capabilities to empower the rest of the organization to build great external customer experiences, continuing our efforts being a data driven company.
18
 
19
- In This Role, You’ll Get to
20
 
21
- Own the product from concept to design, specification, implementation, and analysis
22
- Gather and synthesize requirements and input from multiple stakeholders (internal product teams, engineers, business teams, marketing, finance, etc.)
23
- Lead in-person conversations with internal customers to understand users, priorities, and feature considerations
24
- Demonstrate strong leadership, organizational and execution skills, to drive product development projects from concept to launch, and operate in a fast- paced setting
25
- Excellent leadership and communication skills. Expected to be asking questions, listening, driving team alignment, and influencing without authority across all levels of the organization.
26
- Bridge business and technical worlds very well, a good conceptual problem solver to articulate opportunities and solutions Internal
27
- Technical confidence. You’ll need to work with senior engineers to balance product velocity and technical debt tradeoffs
28
 
29
- What You’ll Need To Succeed
30
 
31
- 5+ years of technical experience in ML engineering, Data Scientist, Data Analytics, or related role
32
- 2+ years of technical program/product management experience in a fast-paced environment.
33
- Excellent interpersonal skills, energetic, and a self-starter.
34
- Excellent presentation skills.
35
- Strong organizational skills along with demonstrated ability to manage multiple tasks simultaneously and able to react to shifting priorities to meet business need
36
- Effective communicator (written and verbal). Able to communicate effectively with both business and technical teams.
37
- Demonstrated analytical and quantitative skills. You use data to make decisions and are comfortable gathering it yourself or working with others to gather it.
38
- Hands-on experience with product management tools (JIRA etc )
39
- A problem-solving mindset
40
- Strong technical background in the Data Science and ML world
41
- The ability and positive mindset to “figure things out.”
42
- This position requires a successful candidate to relocate fully to Bangkok, Thailand, where relocation support is provided.
43
 
44
- #sanfrancisco #sanjose #losangeles #sandiego #oakland #denver #miami #orlando #atlanta #chicago #boston #detroit #newyork #portland #philadelphia #dallas #houston #austin #seattle #washdc #sydney #melbourne #perth #vienna #graz #baku #brussels #rio #toronto #vancouver #montreal #Ostrava #copenhagen #estonia #helsinki #paris #nice #marseille #rouen #lyon #berlin #munich #hamburg #stuttgart #cologne #frankfurt #dusseldorf #dortmund #essen #Bremen #leipzig #dresden #hanover #nuremberg #athens #hongkong #budapest #dublin #telaviv #milan #rome #naples #turin #palermo #venice #bologna #florence #tokyo #osaka #yokohama #nagoya #okinawa #fukuoka #sapporo #luxembourg #kualalumpur #amsterdam #oslo #jerusalem #warsaw #krakow #porto #loures #newdelhi #doha #bucharest #moscow #saintpetersburg #singapore #bratislava #seoul #barcelona #madrid #valencia #seville #stockholm #zurich #geneva #basel #taipei #bangkok #istanbul #dubai #abudhabi #london #manchester #edinburgh #hanoi #IT #4 #5
45
 
46
- Equal Opportunity Employer
 
 
47
 
48
- At Agoda, we pride ourselves on being a company represented by people of all different backgrounds and orientations. We prioritize attracting diverse talent and cultivating an inclusive environment that encourages collaboration and innovation. Employment at Agoda is based solely on a person’s merit and qualifications. We are committed to providing equal employment opportunity regardless of sex, age, race, color, national origin, religion, marital status, pregnancy, sexual orientation, gender identity, disability, citizenship, veteran or military status, and other legally protected characteristics.
49
 
50
- We will keep your application on file so that we can consider you for future vacancies and you can always ask to have your details removed from the file. For more details please read our privacy policy .
51
 
52
- To all recruitment agencies: Agoda does not accept third party resumes. Please do not send resumes to our jobs alias, Agoda employees or any other organization location. Agoda is not responsible for any fees related to unsolicited resumes.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Responsibilities
2
 
3
+ TikTok is the leading destination for short-form mobile video. Our mission is to inspire creativity and bring joy. TikTok has global offices including Los Angeles, New York, London, Paris, Berlin, Dubai, Singapore, Jakarta, Seoul and Tokyo.
4
 
5
+ Why Join Us
6
 
7
+ Creation is the core of TikTok's purpose. Our platform is built to help imaginations thrive. This is doubly true of the teams that make TikTok possible.
8
 
9
+ Together, we inspire creativity and bring joy - a mission we all believe in and aim towards achieving every day.
10
 
11
+ To us, every challenge, no matter how difficult, is an opportunity; to learn, to innovate, and to grow as one team. Status quo? Never. Courage? Always.
12
 
13
+ At TikTok, we create together and grow together. That's how we drive impact - for ourselves, our company, and the communities we serve.
14
 
15
+ Team Introduction
16
 
17
+ E-commerce is a new and fast growing business that aims at connecting all customers to excellent sellers and quality products on TikTok Shop, through E-commerce live-streaming, E-commerce short videos, and commodity recommendation. We are a group of applied machine learning engineers and data scientists that focus on E-commerce recommendations. We are developing innovative algorithms and techniques to improve user engagement and satisfaction, converting creative ideas into business-impacting solutions. We are interested and excited about applying large scale machine learning to solve various real-world problems in E-commerce.
18
 
19
+ We are looking for talented individuals to join us for an internship in 2024. Internships at TikTok aim to offer students industry exposure and hands-on experience. Turn your ambitions into reality as your inspiration brings infinite opportunities at TikTok.
20
 
21
+ This Internship Program runs for 10-24 weeks. Candidates can also apply for both Off-cycle Intern position and Program Intern position.
 
 
 
 
 
 
22
 
23
+ Applications will be reviewed on a rolling basis. We encourage you to apply early. Candidates can apply to a maximum of TWO positions and will be considered for jobs in the order you apply. The application limit is applicable to TikTok and its affiliates' jobs globally.
24
 
25
+ Candidates can apply to a maximum of two positions and will be considered for jobs in the order you apply. The application limit is applicable to TikTok and its affiliates' jobs globally. Applications will be reviewed on a rolling basis - we encourage you to apply early.
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ Responsibilities
28
 
29
+ Participate in building large-scale (10 million to 100 million) e-commerce recommendation algorithms and systems, including commodity recommendations, live stream recommendations, short video recommendations etc in TikTok.
30
+ Build long and short term user interest models, analyze and extract relevant information from large amounts of various data and design algorithms to explore users' latent interests efficiently.
31
+ Design, develop, evaluate and iterate on predictive models for candidate generation and ranking(eg. Click Through Rate and Conversion Rate prediction) , including, but not limited to building real-time data pipelines, feature engineering, model optimization and innovation.
32
 
33
+ Qualifications
34
 
35
+ Minimum Qualifications:
36
 
37
+ Currently pursuing a Master's degree or Phd's Degree in Software Development, Computer Science, Computer Engineering, or a related technical discipline.
38
+ Solid knowledge in one of the following areas: machine learning, deep learning, data mining, large-scale systems.
39
+ Experience with at least one programming language like C++/Python or equivalent.
40
+ Experience in Deep Learning Tools such as tensorflow/ pytorch.
41
+ Must obtain work authorization in country of employment at the time of hire, and maintain ongoing work authorization during employment; Able to commit to working for 12 weeks starting May 2024
42
+
43
+ Preferred Qualifications:
44
+
45
+ Graduating December 2024 onwards with intent to return to degree-program after the completion of the internship.
46
+ Familiar with one or more of the algorithms such as Collaborative Filtering, Matrix Factorization, Factorization Machines, Word2vec, Logistic Regression, Gradient Boosting Trees, Deep Neural Networks, Wide and Deep etc.
47
+ Publications at KDD, NeurlPS, WWW, SIGIR, WSDM, ICML, IJCAI, AAAI, RECSYS and related conferences/journals, or experience in data mining/machine learning competitions such as Kaggle/KDD-cup etc.
48
+
49
+ TikTok is committed to creating an inclusive space where employees are valued for their skills, experiences, and unique perspectives. Our platform connects people from across the globe and so does our workplace. At TikTok, our mission is to inspire creativity and bring joy. To achieve that goal, we are committed to celebrating our diverse voices and to creating an environment that reflects the many communities we reach. We are passionate about this and hope you are too.
50
+
51
+ TikTok is committed to providing reasonable accommodations in our recruitment processes for candidates with disabilities, pregnancy, sincerely held religious beliefs or other reasons protected by applicable laws. If you need assistance or a reasonable accommodation, please reach out to us at https://shorturl.at/cdpT2
52
+
53
+ By submitting an application for this role, you accept and agree to our global applicant privacy policy, which may be accessed here: https://careers.tiktok.com/legal/privacy.
54
+
55
+ Job Information
56
+
57
+ 【For Pay Transparency】Compensation Description (Hourly) - Campus Intern
58
+
59
+ The hourly rate range for this position in the selected city is $59- $59.
60
+
61
+ Benefits may vary depending on the nature of employment and the country work location. Interns have day one access to health insurance, life insurance, wellbeing benefits and more. Interns also receive 10 paid holidays per year and paid sick time (56 hours if hired in first half of year, 40 if hired in second half of year).
62
+
63
+ The Company reserves the right to modify or change these benefits programs at any time, with or without notice.
64
+
65
+ For Los Angeles County (unincorporated) Candidates:
66
+
67
+ Qualified applicants with arrest or conviction records will be considered for employment in accordance with all federal, state, and local laws including the Los Angeles County Fair Chance Ordinance for Employers and the California Fair Chance Act. Our company believes that criminal history may have a direct, adverse and negative relationship on the following job duties, potentially resulting in the withdrawal of the conditional offer of employment:
68
+
69
+ Interacting and occasionally having unsupervised contact with internal/external clients and/or colleagues;
70
+ Appropriately handling and managing confidential information including proprietary and trade secret information and access to information technology systems; and
71
+ Exercising sound judgment.
job-postings/07-01-2025/10.txt CHANGED
@@ -1,79 +1,62 @@
1
- GEICO's Enterprise Voice team is seeking an experienced Machine Learning Engineer to reimagine our customers’ contact center experience. You will help drive our insurance business transformation as we redefine experience for our customers.
2
 
3
- You will join a team of skilled software engineers as a resident AI/ML expert, supporting multiple AI automation initiatives across the contact center. We are looking for a motivated, high-energy, self-starter and problem solver with a positive attitude, and solid interpersonal and communication skills.
4
 
5
- Position Description
6
 
7
- Our Senior Machine Learning Engineer is a mid-level member of the Voice engineering staff ensuring we provide innovative experiences and operational excellence to our contact center agents, supervisors and customers. Our team thrives and succeeds in delivering high-quality technology products and services in a hyper-growth environment where priorities shift quickly. The ideal candidate has broad and deep technical knowledge in AI language services, Natural Language Processing and Generative AI.
8
 
9
- Responsibilities
10
 
11
- Ability to think creatively to find innovative solutions to complex ill-defined problems
12
- Stay abreast of the latest developments in AI, incorporating new techniques and methodologies into our processes to keep us ahead in the insurance industry
13
- Provides machine learning expertise within a team's functional area
14
- Consistently writes production-ready code with defined standards of readability, maintainability, reliability and testability. Helps junior team members to produce the same.
15
- Participates in the formulation of non-functional requirements
16
- Specifies the design and implementation of software modules based upon system requirements and architectural guidance
17
 
18
- Basic Qualifications:
19
 
20
- Strong foundation in Machine Learning and Artificial Intelligence
21
- Experienced in Natural Language Processing and AI language services
22
- Knowledgeable in Generative AI, Large Language Models, OpenAI APIs, prompt engineering
23
- Familiar with Responsible AI principles, model evaluation and monitoring
24
- Strong Python and SQL programming skills. Experience using Jupyter notebooks or similar tools. Experience with Github or other source code management platforms
25
- Experience building configurable AI/ML data pipelines through the complete ML Ops lifecycle.
26
- Working knowledge of Big Data technologies such as Spark, Data lake, MLflow, Snowflake, Elasticsearch.
27
- Experience with cloud providers, preferably Amazon Web Services and/or Azure
28
- Ability to explain AI/ML concepts to technical and non-technical audience
29
- Ability to coach and guide other engineers in shaping ill-defined data needs into concrete project deliverables
30
- Ability to develop collaborative relationships with multi-functional teams
31
 
32
- Preferred Qualifications
33
 
34
- Generative AI Retrieval Augmented Generation and AI agents
35
- Experience working with chatbots and Conversational AI
36
- Developing omni-channel (voice, chat, SMS) Contact Center solutions including natural language processing and speech-enabled grammar
37
- Experience with Amazon Web Services including Sagemaker, Bedrock, Lambda, S3, Connect, Lex, DynamoDB, API Gateway, CloudWatch
38
- Experience with Machine Learning frameworks and tools like TensorFlow, PyTorch, Scikit-learn.
39
- Familiar with Microservices development, Swagger, Postman
40
- Building test suite and frameworks to automate end to end testing
41
- Experience with open source libraries and frameworks
42
- Experience working in an Agile environment (Scrum, Kanban, SAFe)
43
 
44
- Experience
45
 
46
- 4+ years of hands-on experience in building AI/ML solutions in a production environment
47
- 4+ years of professional software development experience
48
- 2+ years of experience with architecture and design
49
- 2+ years of experience in open source frameworks
50
- 1+ years of experience with AWS, GCP, Azure, or another cloud service
51
- 1+ years of experience in Natural Language Processing, Generative AI or Language services
52
 
53
- Education
54
 
55
- Bachelor's degree in Computer Science, Information Systems, Engineering or equivalent education or work experience. Master’s degree preferred.
 
 
 
 
 
56
 
57
- Annual Salary
58
 
59
- $70,000.00 - $230,000.00
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- The above annual salary range is a general guideline. Multiple factors are taken into consideration to arrive at the final hourly rate/ annual salary to be offered to the selected candidate. Factors include, but are not limited to, the scope and responsibilities of the role, the selected candidate’s work experience, education and training, the work location as well as market and business considerations.
62
 
63
- At this time, GEICO will not sponsor a new applicant for employment authorization for this position.
64
 
65
- Benefits:
66
-
67
- As an Associate, you’ll enjoy our Total Rewards Program* to help secure your financial future and preserve your health and well-being, including:
68
-
69
- Premier Medical, Dental and Vision Insurance with no waiting period**
70
- Paid Vacation, Sick and Parental Leave
71
- 401(k) Plan
72
- Tuition Reimbursement
73
- Paid Training and Licensures
74
- Benefits may be different by location. Benefit eligibility requirements vary and may include length of service.
75
- Coverage begins on the date of hire. Must enroll in New Hire Benefits within 30 days of the date of hire for coverage to take effect.
76
-
77
- The equal employment opportunity policy of the GEICO Companies provides for a fair and equal employment opportunity for all associates and job applicants regardless of race, color, religious creed, national origin, ancestry, age, gender, pregnancy, sexual orientation, gender identity, marital status, familial status, disability or genetic information, in compliance with applicable federal, state and local law. GEICO hires and promotes individuals solely on the basis of their qualifications for the job to be filled.
78
-
79
- GEICO reasonably accommodates qualified individuals with disabilities to enable them to receive equal employment opportunity and/or perform the essential functions of the job, unless the accommodation would impose an undue hardship to the Company. This applies to all applicants and associates. GEICO also provides a work environment in which each associate is able to be productive and work to the best of their ability. We do not condone or tolerate an atmosphere of intimidation or harassment. We expect and require the cooperation of all associates in maintaining an atmosphere free from discrimination and harassment with mutual respect by and for all associates and applicants.
 
1
+ We are on a mission to spark connections and bring people together.
2
 
3
+ Dcard is a social media platform devoted to creating a safe and free environment for ever-flowing ideas and extraordinary stories. Garnering the trust of the younger generation, our service attracts millions of active users and up to 20 million unique visitors per month. We have substantial influence and high penetration amongst the youth of Taiwan, but our ambitions do not stop here.
4
 
5
+ As a strong and emerging international company, we are on a mission to spark connections and bring people together. We continue to make impactful influence in the social media, advertising and e-commerce fields. Continuing our success in the Taiwan market, we are now expanding to Hong Kong, Japan, and the APAC market.
6
 
7
+ As a Senior Machine Learning Engineer at Dcard, you will collaborate closely with product managers and developers to build products that matter and create tools that accelerate growth. Join our team of developers to build the social network of the next generation. We code in a fresh monolithic repository and ship code every few hours, and most importantly, we're never afraid of using new and bold approaches to conquer challenges.
8
 
9
+ If you are ready to take the leap, join us in creating an experience that connects people all around the world!
10
 
11
+ Why should you join Dcard?
 
 
 
 
 
12
 
13
+ Dcard's products have expanded from the card-pairing feature to community, e-commerce, and other services targeting university students and young people. We are building a rapidly growing and continuously expanding organization with a growth mindset. The team focuses on long-term mission vision and strategy, working together to stay focused on goals and continuously break through barriers. We are reaching out to the world, creating more opportunities and development in different fields, and we are not satisfied with the current boundaries. We need you to provide value to our users in more aspects of life!
14
 
15
+ About The Dcard Engineering Team
 
 
 
 
 
 
 
 
 
 
16
 
17
+ As a member of the Dcard Engineering Team, you will not only focus on feature development but also optimize the developer experience and architecture, and evaluate the adoption of new technologies. At Dcard, you will face many interesting challenges, working on high-traffic products, constantly adjusting and improving the existing architecture to provide smooth services to millions of users. We are -
18
 
19
+ Data Driven - Any analysis and decision-making within the team revolve around important metrics, and product development goals are based on OKRs to measure their value, ensuring that everyone is on the same track and moving towards the same goal. We value data-driven thinking over relying on intuition.
20
+ Fast-Paced - Working with a talented team, you will experience significant growth in both technical and collaborative abilities. The team operates at a fast pace, and we expect the product to move forward quickly. Consequently, we face daily challenges such as setting up an ad system to handle high traffic or ensuring real-time and fast data updates.
21
+ Process Optimization - The team pays great attention to the smoothness of processes and continuously thinks about how to collaborate more efficiently. We roll up our sleeves and directly change things that bother us, optimizing the development and life experiences as a whole.
22
+ Continuous Growth - In addition to regular study sessions, we learn about the projects undertaken by team members in different domains through Developer Sessions within the team. We also invite external members to share successful case studies or development processes from other teams.
 
 
 
 
 
23
 
24
+ What you'll do
25
 
26
+ Participate in the development and evolution of machine learning-related products at Dcard, involving tasks such as algorithm development, model training, feature pipeline design, and maintaining the smooth operation of services.
27
+ Collaborate with other Data Component developers to build machine learning-related systems at Dcard.
28
+ Analyze and extract insights from a large volume of user data to iteratively optimize algorithms.
29
+ Design and conduct A/B testing experiments to validate the effectiveness of algorithms.
 
 
30
 
31
+ What We're Looking For
32
 
33
+ Passionate about understanding user needs and transforming algorithms into products.
34
+ Proficient in Python and open to learning new languages.
35
+ Enjoy striving for high-quality code and can propose minimal viable system architectures and understand the tradeoffs involved when facing requirements.
36
+ Possess excellent communication and collaboration skills, able to articulate ideas clearly and work seamlessly with other teams.
37
+ Have a basic understanding of machine learning algorithms and workflows, such as NLP, Deep Learning, Recommendation Systems, and more.
38
+ Demonstrated Competence in Conversational English
39
 
40
+ Bonus Points If You Have
41
 
42
+ Have more than two years of working experience in recommendation systems, search, e-commerce, or advertising systems, with familiarity in relevant application scenarios.
43
+ Proficient in designing distributed systems, capable of handling large-scale data or developing large-scale systems.
44
+ Have experience in NLP and Chinese text analysis.
45
+ Familiar with business applications and system design of machine learning systems.
46
+ Able to address challenges encountered when developing with mainstream ML frameworks and handling massive data.
47
+ Proficient in several of the following technologies:
48
+ PyTorch / Scikit-Learn / XGBoost / Tensorflow
49
+ Airflow
50
+ GCP / Kubernetes
51
+ SQL / NoSQL / Redis
52
+ Linux
53
+ Compensation
54
 
55
+ Negotiable
56
 
57
+ Things to Consider
58
 
59
+ Only shortlisted candidates will be notified.
60
+ The job opening may close ahead of schedule if positions are filled.
61
+ Dcard reserves the right to withdraw a job offer if any false information is discovered during the application process.
62
+ At Dcard, we celebrate diversity and strive to provide an inclusive environment where everyone is respected. We believe that equality and diversity drive innovation and creativity. Dcard is committed to maintaining a non-discriminatory employment environment and providing equal opportunities to all candidates.
 
 
 
 
 
 
 
 
 
 
 
job-postings/07-01-2025/2.txt CHANGED
@@ -1,49 +1,30 @@
1
- Job Description
2
-
3
- We are looking for experienced individuals who are highly independent and passionate about working in dual roles as a full-stack developer as well as a machine learning engineer to support advanced manufacturing capabilities. Innovative thinkers who are highly interested in explorations, pathfinding, and piloting new capabilities using Predictive Modeling, Deep Learning, Computer Vision, Generative AI, Natural Language Processing, ML-based optimization/simulation, and other techniques Intel Foundry Manufacturing customers. If you're passionate about blending digital innovation with machine learning technicalities and want to be part of Foundry Manufacturing's cutting-edge transformation, we want to hear from you! Apply now, and let's build the future of Emerging Technology Solutions together.
4
-
5
- Responsibilities
6
-
7
- Uses modern software development methodologies and programming languages to solve overarching manufacturing needs.
8
- Familiar with Machine Learning Algorithms and LLM integration.
9
- Develop and maintain technical documentation and gather requirements for new business capabilities.
10
- Collaborate with a dream team of innovators/developers across Agile Release Train (ART) to deliver state-of-the-art software/ML solutions.
11
- Collaborate with cross-functional teams to Collect business requirements and convert them into technical specifications.
12
- Design, develop, test, deploy, and maintain Software and Data Platform technology stack
13
- Keeping up to date with modern data engineering technologies through the designing, developing, and validation of ML solutions.
14
-
15
- Qualifications
16
-
17
- Minimum qualifications are required to be initially considered for this position. Preferred qualifications are in addition to the minimum requirements and are considered a plus factor in identifying top candidates.
18
-
19
- Minimum Qualifications
20
-
21
- Bachelors degree in Computer Science, Engineering degree, or related discipline.
22
- 3+ years of experience designing, building, deploying, and/or maintaining software/ML solutions.
23
- Strong knowledge in building web and mobile applications and/or integrating RESTful APIs.
24
- Strong experience in Angular 16+, .NET Core, C#, Python, SQL Database, HTML and/or JavaScript.
25
- Knowledge of big and fast data technologies (Scala, Spark, Cassandra, Hadoop, etc.) and/or rapid prototyping frameworks (RTOS, ADTF, DDS) including Large Language Models (LLMs).
26
- Excellent verbal and written communication skills
27
- Upper Intermediate to Advanced English level.
28
-
29
- Preferred Qualifications
30
-
31
- Result-oriented team player with strong problem-solving skills, and the ability to work across multiple teams.
32
- Knowledge in building machine learning workflows necessary to productize AI platforms, self-service AI solutions, or AI models and sustain them in production.
33
- Responsible for preparing data for ML models at scale, building appropriate inference interfaces for ML model consumption, and enabling MLOps for continuous delivery platforms, scaled/POR integration, deployment, adoption, and support.
34
-
35
- Inside this Business Group
36
-
37
- Intel makes possible the most amazing experiences of the future. You may know us for our processors. But we do so much more. Intel invents at the boundaries of technology to make amazing experiences possible for business and society, and for every person on Earth. Harnessing the capability of the cloud, the ubiquity of the Internet of Things, the latest advances in memory and programmable solutions, and the promise of always-on 5G connectivity, Intel is disrupting industries and solving global challenges. Leading on policy, diversity, inclusion, education and sustainability, we create value for our stockholders, customers, and society.
38
-
39
- Posting Statement
40
-
41
- All qualified applicants will receive consideration for employment without regard to race, color, religion, religious creed, sex, national origin, ancestry, age, physical or mental disability, medical condition, genetic information, military and veteran status, marital status, pregnancy, gender, gender expression, gender identity, sexual orientation, or any other characteristic protected by local law, regulation, or ordinance.
42
-
43
- Benefits
44
-
45
- We offer a total compensation package that ranks among the best in the industry. It consists of competitive pay, stock, bonuses, as well as, benefit programs which include health, retirement, and vacation. Find more information about all of our Amazing Benefits here.
46
-
47
- Working Model
48
-
49
- This role will be eligible for our hybrid work model which allows employees to split their time between working on-site at their assigned Intel site and off-site. * Job posting details (such as work model, location or time type) are subject to change.
 
1
+ We are looking for a talented Machine Learning Engineer with a strong focus on Deep Learning and MLOps to join our client's engineering team. As an integral part of their MLOps initiatives, you will work on building, deploying, and maintaining deep learning models in production environments, using best practices in model management, automation, and continuous integration. You will leverage cutting-edge deep learning techniques to solve real-world problems while ensuring that these models can be efficiently deployed, monitored, and scaled.
2
+ This is an exciting opportunity for someone who thrives in an entrepreneurial, fast-paced startup environment and is passionate about combining deep learning expertise with MLOps to bring AI to life at scale.
3
+
4
+ Key Responsibilities:
5
+ Deep Learning Model Development: Design, train, and optimize deep learning models (e.g., CNNs, RNNs, Transformers) for various applications like NLP, computer vision, and predictive analytics.
6
+ MLOps Pipeline Development: Build and maintain scalable and automated MLOps pipelines for model training, validation, deployment, and monitoring in production environments.
7
+ Model Deployment & Monitoring: Implement best practices for deploying deep learning models using CI/CD pipelines, ensuring that models are continuously integrated, deployed, and monitored across environments (staging, production, etc.).
8
+ Model Versioning & Management: Implement robust model versioning and lifecycle management practices, ensuring that models can be easily tracked, retrained, and rolled back if necessary.
9
+ Collaboration with Data Scientists: Work closely with data scientists to refine models, integrate new features, and ensure models meet business requirements while maintaining operational scalability.
10
+ Model Performance & Optimization: Monitor and optimize the performance of models in production, adjusting hyperparameters, retraining models, and improving inference speed while maintaining accuracy.
11
+ Automation & Infrastructure: Build automated systems for data preprocessing, model training, evaluation, and deployment. Use technologies like Kubernetes, Docker, and cloud platforms (AWS, Azure, GCP) to ensure model deployment and scaling.
12
+ Cloud Platform Expertise: Deploy deep learning models on cloud platforms using services like AWS SageMaker, Google AI Platform, or Azure Machine Learning, ensuring that solutions are scalable and cost-effective.
13
+ Research & Continuous Improvement: Stay up-to-date with the latest trends in deep learning and MLOps, contributing to the development of new techniques for model deployment, monitoring, and optimization.
14
+ Cross-Functional Collaboration: Collaborate with DevOps engineers, software engineers, and product teams to ensure seamless integration of machine learning solutions into production systems.
15
+
16
+ Required Skills & Experience:
17
+ Experience: 3+ years of hands-on experience in machine learning, with a strong focus on deep learning and MLOps practices.
18
+ Deep Learning Frameworks: Proficiency with deep learning frameworks such as TensorFlow, Keras, or PyTorch for building and optimizing models.
19
+ MLOps Tools & Technologies: Experience in building and managing MLOps pipelines using tools like Kubeflow, MLflow, TFX, Jenkins, Docker, Kubernetes, and Terraform.
20
+ Programming Skills: Strong programming skills in Python and experience with data manipulation libraries such as Pandas, NumPy, and SciPy.
21
+ Cloud Computing: Hands-on experience with cloud platforms (AWS, GCP, or Azure) for deploying machine learning models at scale, including using tools like AWS SageMaker, Google AI Platform, or Azure ML.
22
+
23
+ Preferred Skills:
24
+ AI Specializations: Expertise in specific deep learning domains like NLP, computer vision, or reinforcement learning.
25
+ MLOps Frameworks: Experience with open-source MLOps frameworks such as Kubeflow, MLflow, or TFX for managing the end-to-end machine learning lifecycle.
26
+ Automation: Familiarity with infrastructure as code tools (e.g., Terraform, CloudFormation) for managing MLOps infrastructure.
27
+ Continuous Learning: A passion for staying up-to-date with the latest research in deep learning, MLOps practices, and model deployment strategies.
28
+
29
+ Education:
30
+ Degree Requirements: A Master's or PhD in Computer Science, Data Science, Electrical Engineering, or a related field is preferred but not required.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
job-postings/07-01-2025/3.txt CHANGED
@@ -1,53 +1,42 @@
1
- P-1131
2
 
3
- At Databricks, we are obsessed with enabling data teams to solve the world's toughest problems. We do this by building and running the world's best data and AI infrastructure platform, so our customers can focus on the high value challenges that are central to their own missions. Founded in 2013 by the original creators of Apache Spark™, Databricks has grown from a tiny corner office in Berkeley, California to a global organization with over 1000 employees. Thousands of organizations, from small to Fortune 100, trust Databricks with their mission-critical workloads, making us one of the fastest growing SaaS companies in the world.
 
 
 
4
 
5
- You’ll work with teams across Databricks to conduct foundational research into the feasibility and effectiveness of solutions that help customers analyze data using natural language, and then bring those solutions into our products to make data analysis easier and more approachable for all of our customers. More broadly, our teams work on some of the hardest, most interesting problems facing the business, ranging from designing large-scale distributed AI/ML systems, to optimizing distributed GPU model serving to developing novel modeling methodologies that scale to production use cases.
6
 
7
- The Impact You Will Have
 
 
 
 
 
8
 
9
- Shape the direction of our applied ML areas and intelligence features in our products, helping customers translate unstructured text into structured code, queries and data.
10
- Drive the development and deployment of state-of-the-art AI models and systems that directly impact the capabilities and performance of Databricks' products and services.
11
- Architect and implement robust, scalable ML infrastructure, including data storage, processing, and model serving components, to support seamless integration of AI/ML models into production environments.
12
- Develop novel data collection, fine-tuning, and pre-training strategies that achieve optimal performance on specific tasks and domains.
13
- Design and implement automated ML pipelines for data preprocessing, feature engineering, model training, hyperparameter tuning, and model evaluation, enabling rapid experimentation and iteration.
14
- Implement advanced model compression and optimization techniques to reduce the resource footprint of language models while preserving their performance
15
- Contribute to the broader AI community by publishing research, presenting at conferences, and actively participating in open-source projects, enhancing Databricks' reputation as an industry leader.
16
 
17
- What We Look For
18
 
19
- PhD in Computer Science, strongly preferred, or a related field or equivalent practical experience
20
- 2-5 years of machine learning engineering experience in high-velocity, high-growth companies. Alternatively, a strong background in relevant ML research in academia will be considered as an equivalent qualification.
21
- Experience developing AI/ML systems at scale in production or in high-impact research environments.
22
- Strong track record of working with language modeling technologies. This could include the following: Developing generative and embedding techniques, modern model architectures, fine tuning / pre-training datasets, and evaluation benchmarks.
23
- Strong coding and software engineering skills, and familiarity with software engineering principles around testing, code reviews and deployment.
24
- Experience deploying and scaling language models in production; deep understanding of the unique infrastructure challenges posed by training and serving LLMs.
25
- Strong understanding of computer science fundamentals.
26
- Prior experience with Natural Language Processing and transforming unstructured text into structured code, queries and data is a plus.
27
- Contributions to well-used open-source projects.
28
 
29
- Please note we are open to employees working from our Mountain View, CA office for this position.
30
 
31
- Pay Range Transparency
32
 
33
- Databricks is committed to fair and equitable compensation practices. The pay range(s) for this role is listed below and represents base salary range for non-commissionable roles or on-target earnings for commissionable roles. Actual compensation packages are based on several factors that are unique to each candidate, including but not limited to job-related skills, depth of experience, relevant certifications and training, and specific work location. Based on the factors above, Databricks utilizes the full width of the range. The total compensation package for this position may also include eligibility for annual performance bonus, equity, and the benefits listed above. For more information regarding which range your location is in visit our page here.
34
 
35
- Local Pay Range
36
 
37
- $166,000—$210,250 USD
38
 
39
- About Databricks
40
 
41
- Databricks is the data and AI company. More than 10,000 organizations worldwide — including Comcast, Condé Nast, Grammarly, and over 50% of the Fortune 500 — rely on the Databricks Data Intelligence Platform to unify and democratize data, analytics and AI. Databricks is headquartered in San Francisco, with offices around the globe and was founded by the original creators of Lakehouse, Apache Spark™, Delta Lake and MLflow. To learn more, follow Databricks on Twitter, LinkedIn and Facebook.
 
 
 
 
42
 
43
- Benefits
44
 
45
- At Databricks, we strive to provide comprehensive benefits and perks that meet the needs of all of our employees. For specific details on the benefits offered in your region, please visit https://www.mybenefitsnow.com/databricks.
46
-
47
- Our Commitment to Diversity and Inclusion
48
-
49
- At Databricks, we are committed to fostering a diverse and inclusive culture where everyone can excel. We take great care to ensure that our hiring practices are inclusive and meet equal employment opportunity standards. Individuals looking for employment at Databricks are considered without regard to age, color, disability, ethnicity, family or marital status, gender identity or expression, language, national origin, physical and mental ability, political affiliation, race, religion, sexual orientation, socio-economic status, veteran status, and other protected characteristics.
50
-
51
- Compliance
52
-
53
- If access to export-controlled technology or source code is required for performance of job duties, it is within Employer's discretion whether to apply for a U.S. government license for such positions, and Employer may decline to proceed with an applicant on this basis alone.
 
1
+ Note: By applying to this position you will have an opportunity to share your preferred working location from the following: Mountain View, CA, USA; Seattle, WA, USA; San Francisco, CA, USA.Minimum qualifications:
2
 
3
+ PhD degree in Computer Science, a related field, or equivalent practical experience.
4
+ One or more scientific publication submission(s) for conferences, journals, or public repositories.
5
+ Coding experience in Python, JavaScript, R, Java, or C++.
6
+ Machine Learning experience.
7
 
8
+ Preferred qualifications:
9
 
10
+ 2 years of coding experience in Python, JavaScript, R, Java, or C++.
11
+ 1 year of experience owning and initiating research agendas.
12
+ Experience with automated algorithm discovery methods, learning to learn, or program synthesis.
13
+ Experience with digital hardware or hardware intended for machine learning.
14
+ Knowledge of computational neuroscience.
15
+ Familiarity with non-gradient-based optimization techniques.
16
 
17
+ About The Job
 
 
 
 
 
 
18
 
19
+ As an organization, Google maintains a portfolio of research projects driven by fundamental research, new product innovation, product contribution and infrastructure goals, while providing individuals and teams the freedom to emphasize specific types of work. As a Research Scientist, you'll setup large-scale tests and deploy promising ideas quickly and broadly, managing deadlines and deliverables while applying the latest theories to develop new and improved products, processes, or technologies. From creating experiments and prototyping implementations to designing new architectures, our research scientists work on real-world problems that span the breadth of computer science, such as machine (and deep) learning, data mining, natural language processing, hardware and software performance analysis, improving compilers for mobile platforms, as well as core search and much more.
20
 
21
+ As a Research Scientist, you'll also actively contribute to the wider research community by sharing and publishing your findings, with ideas inspired by internal projects as well as from collaborations with research programs at partner universities and technical institutes all over the world.
 
 
 
 
 
 
 
 
22
 
23
+ To advance the field of artificial intelligence by exploring alternative computational paradigms beyond those currently trending. In particular, our team is interested in the discovery of learning algorithms for experimental, energy efficient hardware paradigms. We use both hand-design and automated discovery methods.
24
 
25
+ Google Research is building the next generation of intelligent systems for all Google products. To achieve this, we’re working on projects that utilize the latest computer science techniques developed by skilled software developers and research scientists. Google Research teams collaborate closely with other teams across Google, maintaining the flexibility and versatility required to adapt new projects and foci that meet the demands of the world's fast-paced business needs.
26
 
27
+ [For US Applicants]
28
 
29
+ The US base salary range for this full-time position is $136,000-$200,000 + bonus + equity + benefits. Our salary ranges are determined by role, level, and location. The range displayed on each job posting reflects the minimum and maximum target salaries for the position across all US locations. Within the range, individual pay is determined by work location and additional factors, including job-related skills, experience, and relevant education or training. Your recruiter can share more about the specific salary range for your preferred location during the hiring process.
30
 
31
+ Please note that the compensation details listed in US role postings reflect the base salary only, and do not include bonus, equity, or benefits. Learn more about benefits at Google .
32
 
33
+ Responsibilities
34
 
35
+ Explore thoroughly into a project for an extended period of time.
36
+ Design, execute, and interpret machine learning experiments, selecting appropriate algorithms, models, and evaluation metrics.
37
+ Review literature, identify key questions, think creatively, iterate on experiments, and employ scientific accuracy.
38
+ Be proficient in one or more modern programming languages (e.g., Python), learn new programming languages. Learn technologies such as large-scale computation methods, be experienced with one or more machine learning libraries (e.g., JAX or PyTorch).
39
+ Write clear academic papers, give formal research talks, and have informal discussions with colleagues.
40
 
 
41
 
42
+ Google is proud to be an equal opportunity workplace and is an affirmative action employer. We are committed to equal employment opportunity regardless of race, color, ancestry, religion, sex, national origin, sexual orientation, age, citizenship, marital status, disability, gender identity or Veteran status. We also consider qualified applicants regardless of criminal histories, consistent with legal requirements. See also Google's EEO Policy and EEO is the Law. If you have a disability or special need that requires accommodation, please let us know by completing our Accommodations for Applicants form .
 
 
 
 
 
 
 
 
job-postings/07-01-2025/4.txt CHANGED
@@ -1,82 +1,42 @@
1
- About The Team
2
 
3
- Come help us build the world's most reliable on-demand, logistics engine for delivery! We're bringing on talented engineers to help us create and maintain a 24x7, no downtime, global infrastructure system that powers DoorDash’s three-sided marketplace of consumers, merchants, and dashers.
 
 
 
4
 
5
- About The Role
6
 
7
- At DoorDash, our Data Scientists and ML Engineers have the opportunity to dive into a wealth of delivery data to improve company-wide ML workflows such as Search & Recommendations, Dasher Assignment, ETA Prediction, and Dasher Capacity Planning. You will join a small team to build systems that empower efficient machine learning at scale. This is a hybrid opportunity in San Francisco, Sunnyvale or Seattle.
 
 
 
 
 
8
 
9
- You’re Excited About This Opportunity Because You Will…
10
 
11
- Build a world-class ML platform where models are developed, trained, and deployed seamlessly
12
- Work closely with Data Scientists and Product Engineers to evolve the ML platform as per their use cases
13
- You will help build high performance and flexible pipelines that can rapidly evolve to handle new technologies, techniques and modeling approaches
14
- You will work on infrastructure designs and solutions to store trillions of feature values and power hundreds of billions of predictions a day
15
- You will help design and drive directions for the centralized machine learning platform that powers all of DoorDash's business.
16
- Improve the reliability, scalability, and observability of our training and inference infrastructure.
17
 
18
- We’re Excited About You Because…
19
 
20
- B.S., M.S., or PhD. in Computer Science or equivalent
21
- Exceptionally strong knowledge of CS fundamental concepts and OOP languages
22
- 6+ years of industry experience in software engineering
23
- Prior experience building machine learning systems in production such as enabling data analytics at scale
24
- Prior experience in machine learning - you've developed and deployed your own models - even if these are simple proof of concepts
25
- Systems Engineering - you've built meaningful pieces of infrastructure in a cloud computing environment. Bonus if those were data processing systems or distributed systems
26
 
27
- Nice To Haves
28
 
29
- Experience with challenges in real-time computing
30
- Experience with large scale distributed systems, data processing pipelines and machine learning training and serving infrastructure
31
- Familiar with Pandas and Python machine learning libraries and deep learning frameworks such as PyTorch and TensorFlow
32
- Familiar with Spark, MLLib, Databricks,MLFlow, Apache Airflow, Dagster and similar related technologies.
33
- Familiar with large language models like GPT, LLAMA, BERT, or Transformer-based architectures
34
- Familiar with a cloud based environment such as AWS
35
 
36
- Notice to Applicants for Jobs Located in NYC or Remote Jobs Associated With Office in NYC Only
37
 
38
- We use Covey as part of our hiring and/or promotional process for jobs in NYC and certain features may qualify it as an AEDT in NYC. As part of the hiring and/or promotion process, we provide Covey with job requirements and candidate submitted applications. We began using Covey Scout for Inbound from August 21, 2023, through December 21, 2023, and resumed using Covey Scout for Inbound again on June 29, 2024.
39
 
40
- The Covey tool has been reviewed by an independent auditor. Results of the audit may be viewed here: Covey
41
 
42
- Compensation
 
 
 
 
43
 
44
- The successful candidate's starting pay will fall within the pay range listed below and is determined based on job-related factors including, but not limited to, skills, experience, qualifications, work location, and market conditions. Base salary is localized according to an employee’s work location. Ranges are market-dependent and may be modified in the future.
45
 
46
- In addition to base salary, the compensation for this role includes opportunities for equity grants. Talk to your recruiter for more information.
47
-
48
- DoorDash cares about you and your overall well-being. That’s why we offer a comprehensive benefits package for all regular employees that includes a 401(k) plan with an employer match, paid time off, paid parental leave, wellness benefits, and several paid holidays. Paid sick leave in compliance with applicable laws (i.e. Colorado Healthy Families and Workplaces Act).
49
-
50
- Additionally, for full-time employees, DoorDash offers medical, dental, and vision benefits, disability and basic life insurance, family-forming assistance, a commuter benefit match, and a mental health program, among others.
51
-
52
- To learn more about our benefits, visit our careers page here.
53
-
54
- The base pay for this position ranges from our lowest geographical market up to our highest geographical market within California, Colorado, District of Columbia, Hawaii, Maryland, New Jersey, New York and Washington.
55
-
56
- I4
57
-
58
- $119,100—$175,100 USD
59
-
60
- I5
61
-
62
- $145,000—$213,200 USD
63
-
64
- I6
65
-
66
- $171,600—$252,400 USD
67
-
68
- About DoorDash
69
-
70
- At DoorDash, our mission to empower local economies shapes how our team members move quickly, learn, and reiterate in order to make impactful decisions that display empathy for our range of users—from Dashers to merchant partners to consumers. We are a technology and logistics company that started with door-to-door delivery, and we are looking for team members who can help us go from a company that is known for delivering food to a company that people turn to for any and all goods.
71
-
72
- DoorDash is growing rapidly and changing constantly, which gives our team members the opportunity to share their unique perspectives, solve new challenges, and own their careers. We're committed to supporting employees’ happiness, healthiness, and overall well-being by providing comprehensive benefits and perks including premium healthcare, wellness expense reimbursement, paid parental leave and more.
73
-
74
- Our Commitment to Diversity and Inclusion
75
-
76
- We’re committed to growing and empowering a more inclusive community within our company, industry, and cities. That’s why we hire and cultivate diverse teams of people from all backgrounds, experiences, and perspectives. We believe that true innovation happens when everyone has room at the table and the tools, resources, and opportunity to excel.
77
-
78
- Statement of Non-Discrimination: In keeping with our beliefs and goals, no employee or applicant will face discrimination or harassment based on: race, color, ancestry, national origin, religion, age, gender, marital/domestic partner status, sexual orientation, gender identity or expression, disability status, or veteran status. Above and beyond discrimination and harassment based on “protected categories,” we also strive to prevent other subtler forms of inappropriate behavior (i.e., stereotyping) from ever gaining a foothold in our office. Whether blatant or hidden, barriers to success have no place at DoorDash. We value a diverse workforce – people who identify as women, non-binary or gender non-conforming, LGBTQIA+, American Indian or Native Alaskan, Black or African American, Hispanic or Latinx, Native Hawaiian or Other Pacific Islander, differently-abled, caretakers and parents, and veterans are strongly encouraged to apply. Thank you to the Level Playing Field Institute for this statement of non-discrimination.
79
-
80
- Pursuant to the San Francisco Fair Chance Ordinance, Los Angeles Fair Chance Initiative for Hiring Ordinance, and any other state or local hiring regulations, we will consider for employment any qualified applicant, including those with arrest and conviction records, in a manner consistent with the applicable regulation.
81
-
82
- If you need any accommodations, please inform your recruiting contact upon initial connection.
 
1
+ Note: By applying to this position you will have an opportunity to share your preferred working location from the following: Mountain View, CA, USA; Seattle, WA, USA; San Francisco, CA, USA.Minimum qualifications:
2
 
3
+ PhD degree in Computer Science, a related field, or equivalent practical experience.
4
+ One or more scientific publication submission(s) for conferences, journals, or public repositories.
5
+ Coding experience in Python, JavaScript, R, Java, or C++.
6
+ Machine Learning experience.
7
 
8
+ Preferred qualifications:
9
 
10
+ 2 years of coding experience in Python, JavaScript, R, Java, or C++.
11
+ 1 year of experience owning and initiating research agendas.
12
+ Experience with automated algorithm discovery methods, learning to learn, or program synthesis.
13
+ Experience with digital hardware or hardware intended for machine learning.
14
+ Knowledge of computational neuroscience.
15
+ Familiarity with non-gradient-based optimization techniques.
16
 
17
+ About The Job
18
 
19
+ As an organization, Google maintains a portfolio of research projects driven by fundamental research, new product innovation, product contribution and infrastructure goals, while providing individuals and teams the freedom to emphasize specific types of work. As a Research Scientist, you'll setup large-scale tests and deploy promising ideas quickly and broadly, managing deadlines and deliverables while applying the latest theories to develop new and improved products, processes, or technologies. From creating experiments and prototyping implementations to designing new architectures, our research scientists work on real-world problems that span the breadth of computer science, such as machine (and deep) learning, data mining, natural language processing, hardware and software performance analysis, improving compilers for mobile platforms, as well as core search and much more.
 
 
 
 
 
20
 
21
+ As a Research Scientist, you'll also actively contribute to the wider research community by sharing and publishing your findings, with ideas inspired by internal projects as well as from collaborations with research programs at partner universities and technical institutes all over the world.
22
 
23
+ To advance the field of artificial intelligence by exploring alternative computational paradigms beyond those currently trending. In particular, our team is interested in the discovery of learning algorithms for experimental, energy efficient hardware paradigms. We use both hand-design and automated discovery methods.
 
 
 
 
 
24
 
25
+ Google Research is building the next generation of intelligent systems for all Google products. To achieve this, we’re working on projects that utilize the latest computer science techniques developed by skilled software developers and research scientists. Google Research teams collaborate closely with other teams across Google, maintaining the flexibility and versatility required to adapt new projects and foci that meet the demands of the world's fast-paced business needs.
26
 
27
+ [For US Applicants]
 
 
 
 
 
28
 
29
+ The US base salary range for this full-time position is $136,000-$200,000 + bonus + equity + benefits. Our salary ranges are determined by role, level, and location. The range displayed on each job posting reflects the minimum and maximum target salaries for the position across all US locations. Within the range, individual pay is determined by work location and additional factors, including job-related skills, experience, and relevant education or training. Your recruiter can share more about the specific salary range for your preferred location during the hiring process.
30
 
31
+ Please note that the compensation details listed in US role postings reflect the base salary only, and do not include bonus, equity, or benefits. Learn more about benefits at Google .
32
 
33
+ Responsibilities
34
 
35
+ Explore thoroughly into a project for an extended period of time.
36
+ Design, execute, and interpret machine learning experiments, selecting appropriate algorithms, models, and evaluation metrics.
37
+ Review literature, identify key questions, think creatively, iterate on experiments, and employ scientific accuracy.
38
+ Be proficient in one or more modern programming languages (e.g., Python), learn new programming languages. Learn technologies such as large-scale computation methods, be experienced with one or more machine learning libraries (e.g., JAX or PyTorch).
39
+ Write clear academic papers, give formal research talks, and have informal discussions with colleagues.
40
 
 
41
 
42
+ Google is proud to be an equal opportunity workplace and is an affirmative action employer. We are committed to equal employment opportunity regardless of race, color, ancestry, religion, sex, national origin, sexual orientation, age, citizenship, marital status, disability, gender identity or Veteran status. We also consider qualified applicants regardless of criminal histories, consistent with legal requirements. See also Google's EEO Policy and EEO is the Law. If you have a disability or special need that requires accommodation, please let us know by completing our Accommodations for Applicants form .
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
job-postings/07-01-2025/5.txt CHANGED
@@ -1,52 +1,42 @@
1
- Affirm is reinventing credit to make it more honest and friendly, giving consumers the flexibility to buy now and pay later without any hidden fees or compounding interest.
2
 
3
- Join Affirm’s ML Fraud team as a Machine Learning Engineer II and advance our capabilities in detecting and preventing online fraud. Leverage cutting-edge algorithms and proprietary data to outsmart fraudsters while ensuring seamless experiences for genuine customers. Your work will directly impact Affirm's financial performance by developing sophisticated models for real time transaction decisions.
 
 
 
4
 
5
- Collaborate with experts to tackle complex challenges, create innovative solutions for emerging fraud patterns, and advance our fraud detection capabilities. If you're passionate about machine learning, thrive on challenges, and want to make a tangible difference in fintech, Affirm is your opportunity. Help us create honest financial products that improve lives while growing your career in a dynamic, impactful environment.
6
 
7
- What You'll Do
 
 
 
 
 
8
 
9
- Use Affirm’s proprietary and other third party data to develop machine learning models that predict the likelihood of fraud. These models will protect victims’ identities from being stolen, prevent Affirm from incurring financial loss, and increase the trust that consumers and partners have in the Affirm ecosystem.
10
- Partner with the ML platform team to build fraud specific ML infrastructure
11
- Research ground breaking solutions and develop prototypes that drive the future of fraud decisioning at Affirm
12
- Implement and scale data pipelines, new features, and algorithms that are essential to our production models
13
- Collaborate with the engineering, fraud, and product teams to define requirements for new products
14
- Develop fraud models to maximize user conversion while minimizing fraud losses and data costs.
15
 
16
- What We Look For
17
 
18
- 2+ years of experience as a machine learning engineer or PhD in a relevant field
19
- Proficiency in machine learning with experience in areas such as gradient boosting, online learning, and deep learning. Domain knowledge in fraud risk is a plus
20
- Strong programming skills in Python
21
- Experience using large scale distributed systems like Spark and Ray
22
- Experience using machine learning frameworks such as scikit-learn, pandas, numpy, xgboost, and pytorch
23
- Excellent written and oral communication skills and the capability to drive cross-functional requirements with product and engineering teams
24
- The ability to present technical concepts and results in an audience-appropriate way
25
- Persistence, patience and a strong sense of responsibility – we build the decision making that enables consumers and partners to place their trust in Affirm!
26
 
27
- Base Pay Grade - L
28
 
29
- Equity Grade - 5
30
 
31
- Employees new to Affirm typically come in at the start of the pay range. Affirm focuses on providing a simple and transparent pay structure which is based on a variety of factors, including location, experience and job-related skills.
32
 
33
- Base pay is part of a total compensation package that may include monthly stipends for health, wellness and tech spending, and benefits (including 100% subsidized medical coverage, dental and vision for you and your dependents). In addition, the employees may be eligible for equity rewards offered by Affirm Holdings, Inc. (parent company).
34
 
35
- CAN base pay range per year: $125,000 - $175,000
36
 
37
- Affirm is proud to be a remote-first company! The majority of our roles are remote and you can work almost anywhere within the country of employment. Affirmers in proximal roles have the flexibility to work remotely, but will occasionally be required to work out of their assigned Affirm office. A limited number of roles remain office-based due to the nature of their job responsibilities.
38
 
39
- Benefits
 
 
 
 
40
 
41
- We’re extremely proud to offer competitive benefits that are anchored to our core value of people come first. Some key highlights of our benefits package include:
42
 
43
- Health care coverage - Affirm covers all premiums for all levels of coverage for you and your dependents
44
- Flexible Spending Wallets - generous stipends for spending on Technology, Food, various Lifestyle needs, and family forming expenses
45
- Time off - competitive vacation and holiday schedules allowing you to take time off to rest and recharge
46
- ESPP - An employee stock purchase plan enabling you to buy shares of Affirm at a discount
47
-
48
- We believe It’s On Us to provide an inclusive interview experience for all, including people with disabilities. We are happy to provide reasonable accommodations to candidates in need of individualized support during the hiring process.
49
-
50
- [For U.S. positions that could be performed in Los Angeles or San Francisco] Pursuant to the San Francisco Fair Chance Ordinance and Los Angeles Fair Chance Initiative for Hiring Ordinance, Affirm will consider for employment qualified applicants with arrest and conviction records.
51
-
52
- By clicking "Submit Application," you acknowledge that you have read Affirm's Global Candidate Privacy Notice and hereby freely and unambiguously give informed consent to the collection, processing, use, and storage of your personal information as described therein.
 
1
+ Note: By applying to this position you will have an opportunity to share your preferred working location from the following: Mountain View, CA, USA; Seattle, WA, USA; San Francisco, CA, USA.Minimum qualifications:
2
 
3
+ PhD degree in Computer Science, a related field, or equivalent practical experience.
4
+ One or more scientific publication submission(s) for conferences, journals, or public repositories.
5
+ Coding experience in Python, JavaScript, R, Java, or C++.
6
+ Machine Learning experience.
7
 
8
+ Preferred qualifications:
9
 
10
+ 2 years of coding experience in Python, JavaScript, R, Java, or C++.
11
+ 1 year of experience owning and initiating research agendas.
12
+ Experience with automated algorithm discovery methods, learning to learn, or program synthesis.
13
+ Experience with digital hardware or hardware intended for machine learning.
14
+ Knowledge of computational neuroscience.
15
+ Familiarity with non-gradient-based optimization techniques.
16
 
17
+ About The Job
 
 
 
 
 
18
 
19
+ As an organization, Google maintains a portfolio of research projects driven by fundamental research, new product innovation, product contribution and infrastructure goals, while providing individuals and teams the freedom to emphasize specific types of work. As a Research Scientist, you'll setup large-scale tests and deploy promising ideas quickly and broadly, managing deadlines and deliverables while applying the latest theories to develop new and improved products, processes, or technologies. From creating experiments and prototyping implementations to designing new architectures, our research scientists work on real-world problems that span the breadth of computer science, such as machine (and deep) learning, data mining, natural language processing, hardware and software performance analysis, improving compilers for mobile platforms, as well as core search and much more.
20
 
21
+ As a Research Scientist, you'll also actively contribute to the wider research community by sharing and publishing your findings, with ideas inspired by internal projects as well as from collaborations with research programs at partner universities and technical institutes all over the world.
 
 
 
 
 
 
 
22
 
23
+ To advance the field of artificial intelligence by exploring alternative computational paradigms beyond those currently trending. In particular, our team is interested in the discovery of learning algorithms for experimental, energy efficient hardware paradigms. We use both hand-design and automated discovery methods.
24
 
25
+ Google Research is building the next generation of intelligent systems for all Google products. To achieve this, we’re working on projects that utilize the latest computer science techniques developed by skilled software developers and research scientists. Google Research teams collaborate closely with other teams across Google, maintaining the flexibility and versatility required to adapt new projects and foci that meet the demands of the world's fast-paced business needs.
26
 
27
+ [For US Applicants]
28
 
29
+ The US base salary range for this full-time position is $136,000-$200,000 + bonus + equity + benefits. Our salary ranges are determined by role, level, and location. The range displayed on each job posting reflects the minimum and maximum target salaries for the position across all US locations. Within the range, individual pay is determined by work location and additional factors, including job-related skills, experience, and relevant education or training. Your recruiter can share more about the specific salary range for your preferred location during the hiring process.
30
 
31
+ Please note that the compensation details listed in US role postings reflect the base salary only, and do not include bonus, equity, or benefits. Learn more about benefits at Google .
32
 
33
+ Responsibilities
34
 
35
+ Explore thoroughly into a project for an extended period of time.
36
+ Design, execute, and interpret machine learning experiments, selecting appropriate algorithms, models, and evaluation metrics.
37
+ Review literature, identify key questions, think creatively, iterate on experiments, and employ scientific accuracy.
38
+ Be proficient in one or more modern programming languages (e.g., Python), learn new programming languages. Learn technologies such as large-scale computation methods, be experienced with one or more machine learning libraries (e.g., JAX or PyTorch).
39
+ Write clear academic papers, give formal research talks, and have informal discussions with colleagues.
40
 
 
41
 
42
+ Google is proud to be an equal opportunity workplace and is an affirmative action employer. We are committed to equal employment opportunity regardless of race, color, ancestry, religion, sex, national origin, sexual orientation, age, citizenship, marital status, disability, gender identity or Veteran status. We also consider qualified applicants regardless of criminal histories, consistent with legal requirements. See also Google's EEO Policy and EEO is the Law. If you have a disability or special need that requires accommodation, please let us know by completing our Accommodations for Applicants form .
 
 
 
 
 
 
 
 
 
job-postings/07-01-2025/6.txt CHANGED
@@ -1,59 +1,47 @@
1
- Overview
2
 
3
- Working at Atlassian
4
 
5
- Atlassians can choose where they work whether in an office, from home, or a combination of the two. That way, Atlassians have more control over supporting their family, personal goals, and other priorities. We can hire people in any country where we have a legal entity. Interviews and onboarding are conducted virtually, a part of being a distributed-first company.
6
 
7
  Responsibilities
8
 
9
- Atlassian is looking for a Principal Data Scientist to be the wizard who will uncover valuable insights from customer friction which will help the business improve our products. We're looking for someone with superb business instincts, comfort with executive stakeholders, proficiency in statistical analyses, expertise in NLP/LLM, experience partnering with cross-functional teams, and mentoring others on the team. This role aims to leverage basic analytics, NLP/LLM, and data science methods to identify areas of customer friction from variety of customer interaction channels and build tools and solutions that will help us identify areas to improve our products. This role is part of Service Enablement Team within Customer Support Organization.
10
 
11
- Responsibilities
12
-
13
- Influence strategy & important decisions around customer friction by surfacing data driven insights.
14
- Define, set and report on department level metrics or KRs to the CSS Executive team
15
- Build and implement measurement frameworks, machine learning models and NLP/LLM tooling to accelerate Atlassian’s growth and improve product quality.
16
- Foster a world-class Data Science culture by leading training on technical concepts, driving continuous learning and mentoring Data Scientists on the team
17
-
18
-
19
- Qualifications
20
-
21
- Experience applying your Data Science skills to identify and lead projects which have had impact on business strategy and performance
22
- 8+ years of experience in Data Science or related fields. (Preferred - 10+ years experience with a post-graduate degree in a quantitative discipline like Statistics, Mathematics, Econometrics, Computer science)
23
- Expertise in applying a broad variety of ML methods including NLP and LLM to solve business problems and a strong sense of when to apply them to the problem at hand
24
- Experience in managing ML projects end-to-end including deployment and monitoring.
25
- Expertise in SQL and a high level of proficiency in another data science programming language (e.g Python, R) with expertise in libraries like Pandas, Numpy, Scikit-learn etc.
26
- A very high bar for output quality, while balancing "having something now" vs. "perfection in the future"
27
- Comfort explaining complex concepts to diverse audiences and creating compelling stories for non-data experts
28
- Proficiency in visualization tools (e.g. Streamlit, Tableau)
29
-
30
-
31
- Qualifications
32
-
33
- Compensation
34
 
35
- At Atlassian, We Strive To Design Equitable, Explainable, And Competitive Compensation Programs. To Support This Goal, The Baseline Of Our Range Is Higher Than That Of The Typical Market Range, But In Turn We Expect To Hire Most Candidates Near This Baseline. Base Pay Within The Range Is Ultimately Determined By a Candidate's Skills, Expertise, Or Experience. In The United States, We Have Three Geographic Pay Zones. For This Role, Our Current Base Pay Ranges For New Hires In Each Zone Are:
 
 
 
 
 
36
 
37
- Zone A: $175,100 - $233,400
38
 
39
- Zone B: $157,500 - $210,100
 
 
 
 
 
40
 
41
- Zone C: $145,300 - $193,700
42
 
43
- This role may also be eligible for benefits, bonuses, commissions, and equity.
44
 
45
- Please visit go.atlassian.com/payzones for more information on which locations are included in each of our geographic pay zones. However, please confirm the zone for your specific location with your recruiter.
46
 
47
- Our Perks & Benefits
48
 
49
- Atlassian offers a variety of perks and benefits to support you, your family and to help you engage with your local community. Our offerings include health coverage, paid volunteer days, wellness resources, and so much more. Visit go.atlassian.com/perksandbenefits to learn more.
50
 
51
- About Atlassian
52
 
53
- At Atlassian, we're motivated by a common goal: to unleash the potential of every team. Our software products help teams all over the planet and our solutions are designed for all types of work. Team collaboration through our tools makes what may be impossible alone, possible together.
54
 
55
- We believe that the unique contributions of all Atlassians create our success. To ensure that our products and culture continue to incorporate everyone's perspectives and experience, we never discriminate based on race, religion, national origin, gender identity or expression, sexual orientation, age, or marital, veteran, or disability status. All your information will be kept confidential according to EEO guidelines.
56
 
57
- To provide you the best experience, we can support with accommodations or adjustments at any stage of the recruitment process. Simply inform our Recruitment team during your conversation with them.
58
 
59
- To learn more about our culture and hiring process, visit go.atlassian.com/crh .
 
1
+ Job Description
2
 
3
+ Arm's Machine Learning Group is seeking highly motivated and creative Software Engineers to join the Cambridge-based ML Content, Algorithms and Tools team!
4
 
5
+ This Machine Learning Engineer role focuses on advancing the field of AI by optimizing and deploying pioneering models, particularly Large Language Models (LLMs) and Generative AI algorithms. This involves deep analysis of neural networks, optimizing software and hardware, developing innovative solutions, and collaborating with teams to build high-performance AI systems.
6
 
7
  Responsibilities
8
 
9
+ Your responsibilities involve working with major ML frameworks (PyTorch, TensorFlow, etc.) to port and develop ML networks, optimize and quantize models for efficient execution on Arm platforms, and help ensure multiple Arm products are designed to perform effectively for machine learning. As an in-depth technical responsibility, you will need to deeply understand the complex applications you analyze and communicate them in their simplest form to contribute to product designs, allowing you to influence both IP and system architecture.
10
 
11
+ Required Skills And Experience
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ A background in computer science, software engineering or other comparable skills
14
+ Experience training and debugging neural networks with TensorFlow and PyTorch using Python
15
+ Understanding, deploying, and optimizing Large Language Models (LLMs) and Generative AI algorithms.
16
+ Experience using software development platforms and continuous integration systems
17
+ Familiarity with Linux and cloud services
18
+ Have a strong attention to detail to ensure use cases you investigate are well understood and the critical areas needing improvement are understood
19
 
20
+ Nice To Have Skills And Experience
21
 
22
+ Experience of the inner workings of Pytorch, Tensorflow, Executorch and Tensorflow Lite
23
+ Experience of developing and maintaining CI/testing components to improve automation of model analysis
24
+ Good knowledge of Python for working with ML frameworks
25
+ Good knowledge of C++ for working with optimised ML libraries
26
+ Previous experience of machine learning projects
27
+ Experience with deployment optimizations on machine learning models
28
 
29
+ In Return
30
 
31
+ From research to proof-of-concept development, to deployment on ARM IPs, joining this team would be a phenomenal opportunity to contribute to the full life cycle of machine learning projects and understand how innovative machine learning is used to solve real word problems.
32
 
33
+ Working closely with experts in ML and software and hardware optimisation - a truly multi-discipline environment - you will have the chance to explore existing or build new machine learning techniques, while helping unpick the complex world of use-cases spanning mobile phones, servers, autonomous driving vehicles, and low-power embedded devices
34
 
35
+ !
36
 
37
+ Accommodations at Arm
38
 
39
+ At Arm, we want our people to Do Great Things. If you need support or an accommodation to Be Your Brilliant Self during the recruitment process, please email [email protected] . To note, by sending us the requested information, you consent to its use by Arm to arrange for appropriate accommodations. All accommodation requests will be treated with confidentiality, and information concerning these requests will only be disclosed as necessary to provide the accommodation. Although this is not an exhaustive list, examples of support include breaks between interviews, having documents read aloud or office accessibility. Please email us about anything we can do to accommodate you during the recruitment process.
40
 
41
+ Hybrid Working at Arm
42
 
43
+ Arm’s approach to hybrid working is designed to create a working environment that supports both high performance and personal wellbeing. We believe in bringing people together face to face to enable us to work at pace, whilst recognizing the value of flexibility. Within that framework, we empower groups/teams to determine their own hybrid working patterns, depending on the work and the team’s needs. Details of what this means for each role will be shared upon application. In some cases, the flexibility we can offer is limited by local legal, regulatory, tax, or other considerations, and where this is the case, we will collaborate with you to find the best solution. Please talk to us to find out more about what this could look like for you.
44
 
45
+ Equal Opportunities at Arm
46
 
47
+ Arm is an equal opportunity employer, committed to providing an environment of mutual respect where equal opportunities are available to all applicants and colleagues. We are a diverse organization of dedicated and innovative individuals, and don’t discriminate on the basis of race, color, religion, sex, sexual orientation, gender identity, national origin, disability, or status as a protected veteran.
job-postings/07-01-2025/7.txt CHANGED
@@ -1,79 +1,78 @@
1
- GEICO's Enterprise Voice team is seeking an experienced Machine Learning Engineer to reimagine our customers’ contact center experience. You will help drive our insurance business transformation as we redefine experience for our customers.
2
 
3
- You will join a team of skilled software engineers as a resident AI/ML expert, supporting multiple AI automation initiatives across the contact center. We are looking for a motivated, high-energy, self-starter and problem solver with a positive attitude, and solid interpersonal and communication skills.
4
 
5
- Position Description
6
 
7
- Our Senior Machine Learning Engineer is a mid-level member of the Voice engineering staff ensuring we provide innovative experiences and operational excellence to our contact center agents, supervisors and customers. Our team thrives and succeeds in delivering high-quality technology products and services in a hyper-growth environment where priorities shift quickly. The ideal candidate has broad and deep technical knowledge in AI language services, Natural Language Processing and Generative AI.
8
 
9
- Responsibilities
10
 
11
- Ability to think creatively to find innovative solutions to complex ill-defined problems
12
- Stay abreast of the latest developments in AI, incorporating new techniques and methodologies into our processes to keep us ahead in the insurance industry
13
- Provides machine learning expertise within a team's functional area
14
- Consistently writes production-ready code with defined standards of readability, maintainability, reliability and testability. Helps junior team members to produce the same.
15
- Participates in the formulation of non-functional requirements
16
- Specifies the design and implementation of software modules based upon system requirements and architectural guidance
17
 
18
- Basic Qualifications:
19
 
20
- Strong foundation in Machine Learning and Artificial Intelligence
21
- Experienced in Natural Language Processing and AI language services
22
- Knowledgeable in Generative AI, Large Language Models, OpenAI APIs, prompt engineering
23
- Familiar with Responsible AI principles, model evaluation and monitoring
24
- Strong Python and SQL programming skills. Experience using Jupyter notebooks or similar tools. Experience with Github or other source code management platforms
25
- Experience building configurable AI/ML data pipelines through the complete ML Ops lifecycle.
26
- Working knowledge of Big Data technologies such as Spark, Data lake, MLflow, Snowflake, Elasticsearch.
27
- Experience with cloud providers, preferably Amazon Web Services and/or Azure
28
- Ability to explain AI/ML concepts to technical and non-technical audience
29
- Ability to coach and guide other engineers in shaping ill-defined data needs into concrete project deliverables
30
- Ability to develop collaborative relationships with multi-functional teams
31
 
32
- Preferred Qualifications
33
 
34
- Generative AI Retrieval Augmented Generation and AI agents
35
- Experience working with chatbots and Conversational AI
36
- Developing omni-channel (voice, chat, SMS) Contact Center solutions including natural language processing and speech-enabled grammar
37
- Experience with Amazon Web Services including Sagemaker, Bedrock, Lambda, S3, Connect, Lex, DynamoDB, API Gateway, CloudWatch
38
- Experience with Machine Learning frameworks and tools like TensorFlow, PyTorch, Scikit-learn.
39
- Familiar with Microservices development, Swagger, Postman
40
- Building test suite and frameworks to automate end to end testing
41
- Experience with open source libraries and frameworks
42
- Experience working in an Agile environment (Scrum, Kanban, SAFe)
43
 
44
- Experience
45
 
46
- 4+ years of hands-on experience in building AI/ML solutions in a production environment
47
- 4+ years of professional software development experience
48
- 2+ years of experience with architecture and design
49
- 2+ years of experience in open source frameworks
50
- 1+ years of experience with AWS, GCP, Azure, or another cloud service
51
- 1+ years of experience in Natural Language Processing, Generative AI or Language services
52
 
53
- Education
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- Bachelor's degree in Computer Science, Information Systems, Engineering or equivalent education or work experience. Master’s degree preferred.
56
 
57
- Annual Salary
 
 
 
 
58
 
59
- $70,000.00 - $230,000.00
60
 
61
- The above annual salary range is a general guideline. Multiple factors are taken into consideration to arrive at the final hourly rate/ annual salary to be offered to the selected candidate. Factors include, but are not limited to, the scope and responsibilities of the role, the selected candidate’s work experience, education and training, the work location as well as market and business considerations.
 
 
62
 
63
- At this time, GEICO will not sponsor a new applicant for employment authorization for this position.
64
 
65
- Benefits:
66
 
67
- As an Associate, you’ll enjoy our Total Rewards Program* to help secure your financial future and preserve your health and well-being, including:
 
 
 
68
 
69
- Premier Medical, Dental and Vision Insurance with no waiting period**
70
- Paid Vacation, Sick and Parental Leave
71
- 401(k) Plan
72
- Tuition Reimbursement
73
- Paid Training and Licensures
74
- Benefits may be different by location. Benefit eligibility requirements vary and may include length of service.
75
- Coverage begins on the date of hire. Must enroll in New Hire Benefits within 30 days of the date of hire for coverage to take effect.
76
 
77
- The equal employment opportunity policy of the GEICO Companies provides for a fair and equal employment opportunity for all associates and job applicants regardless of race, color, religious creed, national origin, ancestry, age, gender, pregnancy, sexual orientation, gender identity, marital status, familial status, disability or genetic information, in compliance with applicable federal, state and local law. GEICO hires and promotes individuals solely on the basis of their qualifications for the job to be filled.
78
 
79
- GEICO reasonably accommodates qualified individuals with disabilities to enable them to receive equal employment opportunity and/or perform the essential functions of the job, unless the accommodation would impose an undue hardship to the Company. This applies to all applicants and associates. GEICO also provides a work environment in which each associate is able to be productive and work to the best of their ability. We do not condone or tolerate an atmosphere of intimidation or harassment. We expect and require the cooperation of all associates in maintaining an atmosphere free from discrimination and harassment with mutual respect by and for all associates and applicants.
 
 
 
 
 
 
 
 
 
1
+ At EY, you’ll have the chance to build a career as unique as you are, with the global scale, support, inclusive culture and technology to become the best version of you. And we’re counting on your unique voice and perspective to help EY become even better. Join us and build an exceptional experience for yourself, and a better working world for all.
2
 
3
+ The exceptional EY experience. It's yours to build.
4
 
5
+ EY focuses on high-ethical standards and integrity among its employees and expects all candidates to demonstrate these qualities.
6
 
7
+ AI/Machine Learning Engineer, Senior Consultant
8
 
9
+ The opportunity
10
 
11
+ Our Artificial Intelligence and Data team helps apply cutting edge technology and techniques to bring solutions to our clients. As part of that, you'll sit side-by-side with clients and diverse teams from EY to create a well-rounded approach to advising and solving challenging problems, some of which have not been solved before. No two days will be the same, and with constant research and development, you'll find yourself building knowledge that can be applied across a wide range of projects now, and in the future. You'll need to have a passion for continuous learning, stay ahead of the trends, and influence new ways of working so you can position solutions in the most relevant and innovative way for our clients. You can expect heavy client interaction in a fast-paced environment and the opportunity to develop your own career path for your unique skills and ambitions.
 
 
 
 
 
12
 
13
+ Your Key Responsibilities
14
 
15
+ You will work with a wide variety of clients to deliver the latest data science and big data technologies. Your teams will design and build scalable solutions that unify, enrich, and derive insights from varied data sources across a broad technology landscape. You will help our clients navigate the complex world of modern data science, analytics, and software engineering. We'll look to you to provide guidance and perform technical development tasks to ensure data science solutions are properly engineered and maintained to support the ongoing business needs of our clients.
 
 
 
 
 
 
 
 
 
 
16
 
17
+ You will be joining a dynamic and interdisciplinary team of scientists and engineers who love to tackle the most challenging computational problems for our clients. We love to think creatively, build applications efficiently, and collaborate in both the ideation of solutions and the pursuit of new opportunities. Many on our team have advanced academic degrees or equivalent experience in industry.
18
 
19
+ Skills And Attributes For Success
 
 
 
 
 
 
 
 
20
 
21
+ This role will work to deliver tech at speed, innovate at scale and put humans at the center. Provide technical guidance and share knowledge with team members with diverse skills and backgrounds. Consistently deliver quality client services focusing on more complex, judgmental and/or specialized issues surrounding emerging technology. Demonstrate technical capabilities and professional knowledge. Learn about EY and its service lines and actively assess and present ways to apply knowledge and services.
22
 
23
+ To qualify for the role you must have
 
 
 
 
 
24
 
25
+ Bachelor's degree and 3-6 years of full-time working experience in AI and/or Machine Learning
26
+ Strong skills in Python.
27
+ Experience using Generative AI models and frameworks e.g. OpenAI family, open source LLMs, Dall-e, LlamaIndex, Langchain, Retrieval Augmented Generation (RAG).
28
+ Experience working with popular ML packages such as scikit-learn, Pytorch and ONNX, or related ML libraries.
29
+ Extensive experience using DevOps tools like GIT, Azure Devops and Agile tools such as Jira to develop and deploy analytical solutions with multiple features, pipelines, and releases.
30
+ A solid understanding of Machine Learning (ML) workflows including ingesting, analysing, transforming data and evaluating results to make meaningful predictions.
31
+ Experience with MLOps methods and platforms such as MLFlow.
32
+ Experience with CI/CD and test-driven development.
33
+ Experience designing, building, and maintaining ML models, frameworks, and pipelines.
34
+ Experience designing and deploying end to end ML workflows on at least one major cloud computing platform.
35
+ Understanding of data structures, data modelling and software engineering best practices.
36
+ Proficiency using data manipulation tools and libraries such as SQL, Pandas, and Spark.
37
+ Clearly communicating findings, recommendations, and opportunities to improve data systems and solutions.
38
+ Experience with containerization and scaling models.
39
+ Integrating models and feedback from downstream consumption systems - reporting and dashboards, AI driven applications.
40
+ Strong mathematical and quantitative skills including calculus, linear algebra, and statistics.
41
+ Willingness to travel to meet client obligations.
42
 
43
+ Ideally, you'll also have
44
 
45
+ A deep understanding of and ability to teach concepts, tools, features, functions, and benefits of different approaches to apply them.
46
+ Master's degree Computer Science, Mathematics, Physical Sciences, or other quantitative field.
47
+ Experience working with diverse teams to deliver complex solutions.
48
+ Strong skills in languages beyond Python: R, JavaScript, Java, C++, C.
49
+ Experience fine-tuning Generative AI models.
50
 
51
+ What We Look For
52
 
53
+ You have an agile, growth-oriented mindset. What you know matters. But the right mindset is just as important in determining success. We're looking for people who are innovative, can work in an agile way and keep pace with a rapidly changing world.
54
+ You are curious and purpose driven. We're looking for people who see opportunities instead of challenges, who ask better questions to seek better answers that build a better working world.
55
+ You are inclusive. We're looking for people who seek out and embrace diverse perspectives, who value differences, and team inclusively to build safety and trust. FY25NATAID
56
 
57
+ What We Offer
58
 
59
+ We offer a comprehensive compensation and benefits package where you’ll be rewarded based on your performance and recognized for the value you bring to the business. The base salary range for this job in all geographic locations in the US is $105,800 to $174,800. The salary range for New York City Metro Area, Washington State and California (excluding Sacramento) is $127,100 to $198,600. Individual salaries within those ranges are determined through a wide variety of factors including but not limited to education, experience, knowledge, skills and geography. In addition, our Total Rewards package includes medical and dental coverage, pension and 401(k) plans, and a wide range of paid time off options. Join us in our team-led and leader-enabled hybrid model. Our expectation is for most people in external, client serving roles to work together in person 40-60% of the time over the course of an engagement, project or year. Under our flexible vacation policy, you’ll decide how much vacation time you need based on your own personal circumstances. You’ll also be granted time off for designated EY Paid Holidays, Winter/Summer breaks, Personal/Family Care, and other leaves of absence when needed to support your physical, financial, and emotional well-being.
60
 
61
+ Continuous learning: You’ll develop the mindset and skills to navigate whatever comes next.
62
+ Success as defined by you: We’ll provide the tools and flexibility, so you can make a meaningful impact, your way.
63
+ Transformative leadership: We’ll give you the insights, coaching and confidence to be the leader the world needs.
64
+ Diverse and inclusive culture: You’ll be embraced for who you are and empowered to use your voice to help others find theirs.
65
 
66
+ EY accepts applications for this position on an on-going basis. If you can demonstrate that you meet the criteria above, please contact us as soon as possible.
 
 
 
 
 
 
67
 
68
+ EY exists to build a better working world, helping to create long-term value for clients, people and society and build trust in the capital markets.
69
 
70
+ Enabled by data and technology, diverse EY teams in over 150 countries provide trust through assurance and help clients grow, transform and operate.
71
+
72
+ Working across assurance, consulting, law, strategy, tax and transactions, EY teams ask better questions to find new answers for the complex issues facing our world today.
73
+
74
+ For those living in California, please click here for additional information.
75
+
76
+ EY is an equal opportunity, affirmative action employer providing equal employment opportunities to applicants and employees without regard to race, color, religion, age, sex, sexual orientation, gender identity/expression, pregnancy, genetic information, national origin, protected veteran status, disability status, or any other legally protected basis, including arrest and conviction records, in accordance with applicable law.
77
+
78
+ EY is committed to providing reasonable accommodation to qualified individuals with disabilities including veterans with disabilities. If you have a disability and either need assistance applying online or need to request an accommodation during any part of the application process, please call 1-800-EY-HELP3, select Option 2 for candidate related inquiries, then select Option 1 for candidate queries and finally select Option 2 for candidates with an inquiry which will route you to EY’s Talent Shared Services Team (TSS) or email the TSS at [email protected]
job-postings/07-01-2025/8.txt CHANGED
@@ -1,79 +1,104 @@
1
- GEICO's Enterprise Voice team is seeking an experienced Machine Learning Engineer to reimagine our customerscontact center experience. You will help drive our insurance business transformation as we redefine experience for our customers.
2
 
3
- You will join a team of skilled software engineers as a resident AI/ML expert, supporting multiple AI automation initiatives across the contact center. We are looking for a motivated, high-energy, self-starter and problem solver with a positive attitude, and solid interpersonal and communication skills.
4
 
5
- Position Description
6
 
7
- Our Senior Machine Learning Engineer is a mid-level member of the Voice engineering staff ensuring we provide innovative experiences and operational excellence to our contact center agents, supervisors and customers. Our team thrives and succeeds in delivering high-quality technology products and services in a hyper-growth environment where priorities shift quickly. The ideal candidate has broad and deep technical knowledge in AI language services, Natural Language Processing and Generative AI.
8
 
9
- Responsibilities
10
 
11
- Ability to think creatively to find innovative solutions to complex ill-defined problems
12
- Stay abreast of the latest developments in AI, incorporating new techniques and methodologies into our processes to keep us ahead in the insurance industry
13
- Provides machine learning expertise within a team's functional area
14
- Consistently writes production-ready code with defined standards of readability, maintainability, reliability and testability. Helps junior team members to produce the same.
15
- Participates in the formulation of non-functional requirements
16
- Specifies the design and implementation of software modules based upon system requirements and architectural guidance
17
 
18
- Basic Qualifications:
19
 
20
- Strong foundation in Machine Learning and Artificial Intelligence
21
- Experienced in Natural Language Processing and AI language services
22
- Knowledgeable in Generative AI, Large Language Models, OpenAI APIs, prompt engineering
23
- Familiar with Responsible AI principles, model evaluation and monitoring
24
- Strong Python and SQL programming skills. Experience using Jupyter notebooks or similar tools. Experience with Github or other source code management platforms
25
- Experience building configurable AI/ML data pipelines through the complete ML Ops lifecycle.
26
- Working knowledge of Big Data technologies such as Spark, Data lake, MLflow, Snowflake, Elasticsearch.
27
- Experience with cloud providers, preferably Amazon Web Services and/or Azure
28
- Ability to explain AI/ML concepts to technical and non-technical audience
29
- Ability to coach and guide other engineers in shaping ill-defined data needs into concrete project deliverables
30
- Ability to develop collaborative relationships with multi-functional teams
31
 
32
- Preferred Qualifications
 
 
 
 
33
 
34
- Generative AI Retrieval Augmented Generation and AI agents
35
- Experience working with chatbots and Conversational AI
36
- Developing omni-channel (voice, chat, SMS) Contact Center solutions including natural language processing and speech-enabled grammar
37
- Experience with Amazon Web Services including Sagemaker, Bedrock, Lambda, S3, Connect, Lex, DynamoDB, API Gateway, CloudWatch
38
- Experience with Machine Learning frameworks and tools like TensorFlow, PyTorch, Scikit-learn.
39
- Familiar with Microservices development, Swagger, Postman
40
- Building test suite and frameworks to automate end to end testing
41
- Experience with open source libraries and frameworks
42
- Experience working in an Agile environment (Scrum, Kanban, SAFe)
43
 
44
- Experience
 
 
45
 
46
- 4+ years of hands-on experience in building AI/ML solutions in a production environment
47
- 4+ years of professional software development experience
48
- 2+ years of experience with architecture and design
49
- 2+ years of experience in open source frameworks
50
- 1+ years of experience with AWS, GCP, Azure, or another cloud service
51
- 1+ years of experience in Natural Language Processing, Generative AI or Language services
52
 
53
- Education
 
 
 
 
 
 
 
54
 
55
- Bachelor's degree in Computer Science, Information Systems, Engineering or equivalent education or work experience. Master’s degree preferred.
56
 
57
- Annual Salary
 
 
 
 
 
58
 
59
- $70,000.00 - $230,000.00
60
 
61
- The above annual salary range is a general guideline. Multiple factors are taken into consideration to arrive at the final hourly rate/ annual salary to be offered to the selected candidate. Factors include, but are not limited to, the scope and responsibilities of the role, the selected candidate’s work experience, education and training, the work location as well as market and business considerations.
 
 
 
 
62
 
63
- At this time, GEICO will not sponsor a new applicant for employment authorization for this position.
64
 
65
- Benefits:
66
 
67
- As an Associate, you’ll enjoy our Total Rewards Program* to help secure your financial future and preserve your health and well-being, including:
68
 
69
- Premier Medical, Dental and Vision Insurance with no waiting period**
70
- Paid Vacation, Sick and Parental Leave
71
- 401(k) Plan
72
- Tuition Reimbursement
73
- Paid Training and Licensures
74
- Benefits may be different by location. Benefit eligibility requirements vary and may include length of service.
75
- Coverage begins on the date of hire. Must enroll in New Hire Benefits within 30 days of the date of hire for coverage to take effect.
76
 
77
- The equal employment opportunity policy of the GEICO Companies provides for a fair and equal employment opportunity for all associates and job applicants regardless of race, color, religious creed, national origin, ancestry, age, gender, pregnancy, sexual orientation, gender identity, marital status, familial status, disability or genetic information, in compliance with applicable federal, state and local law. GEICO hires and promotes individuals solely on the basis of their qualifications for the job to be filled.
78
 
79
- GEICO reasonably accommodates qualified individuals with disabilities to enable them to receive equal employment opportunity and/or perform the essential functions of the job, unless the accommodation would impose an undue hardship to the Company. This applies to all applicants and associates. GEICO also provides a work environment in which each associate is able to be productive and work to the best of their ability. We do not condone or tolerate an atmosphere of intimidation or harassment. We expect and require the cooperation of all associates in maintaining an atmosphere free from discrimination and harassment with mutual respect by and for all associates and applicants.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ By clicking the “Apply” button, I understand that my employment application process with Takeda will commence and that the information I provide in my application will be processed in line with Takedas Privacy Notice and Terms of Use. I further attest that all information I submit in my employment application is true to the best of my knowledge.
2
 
3
+ Job Description
4
 
5
+ Takeda has been translating science into breakthrough medicines for 240 years. Every step of the way, our teams have worked together to tackle some of the most challenging problems in drug discovery and development. Today, we’re a driving force behind innovative therapies that make a lasting difference to millions of patients around the world.
6
 
7
+ In R&D, all of our history and potential comes together in an environment that welcomes diversity of thought and amplifies every voice. Working closely with colleagues, you’ll play a key role in bringing our rich pipeline of products forward to help patients. Come join a team that’s earned trust for more than two centuries, and find out how advancing transformative therapies at Takeda will shape your bright future.
8
 
9
+ The Computational Oncology group within the Precision & Translational Medicine (PTM) function in the Oncology Therapeutic Area Unit (OTAU) at Takeda has the accountability for driving end-to-end computational innovation and excellence from discovery through development, launch, and beyond as needed to advance our pipeline to patients in need. It consists of talented computational biologists who derive actionable scientific insights from large, diverse, and complex biological datasets including clinical trials and external datasets. They partner closely with teams within PTM and across the enterprise, such as Oncology Discovery, the Data Sciences Institute (including Statistics, Global Evidence and Outcomes, Data Architecture), Clinical Pharmacology, Clinical Sciences, as well as with other computational functions at Takeda as needed. Their collaboration guides robust drug target identification and validation, proof-of-concept in the clinic, and the development of pharmacodynamic and predictive markers to inform data-driven decisions. They also propose actionable solutions to be tested in the laboratory and/or the clinic to identify and advance our innovative cancer therapies.
10
 
11
+ Job Description
 
 
 
 
 
12
 
13
+ We are seeking a highly motivated and talented graduate student intern with a background applying convolutional neural networks, autoencoders, or transformer models to solve problems in digital pathology and single cell transcriptomics to join our team. You will work on predicting RNA features from H&E images and fine-tuning single cell foundational models for downstream tasks, contributing to biomarker development, and the advancement of therapies for patients in need. This role includes training deep neural networks, transfer learning and shallow machine learning using H&E images and single cell transcriptomics to understand the tumor microenvironment and predicting therapeutic responses. This internship is designed to immerse you in the forefront of medical research, offering hands-on experience and the opportunity to collaborate with leading industry professionals in a dynamic and collaborative environment.
14
 
15
+ How You Will Contribute
 
 
 
 
 
 
 
 
 
 
16
 
17
+ Collaborate with internal and external teams to build machine learning models using multi-modal data, including single cell transcriptomics and medical images.
18
+ Contribute to the development of innovative quantitative biomarkers related to the tumor microenvironment to help build patient selection strategies.
19
+ Analyze complex data sets to extract actionable insights, inform strategic decisions, and effectively communicate findings to the team and stakeholders.
20
+ Partner with cross-functional teams to develop and implement innovative approaches for data analysis, aiming for continuous research process improvements.
21
+ Help translate preclinical observations into the clinic to benefit patients with unmet need.
22
 
23
+ Internship Development Opportunities
 
 
 
 
 
 
 
 
24
 
25
+ Understanding of drug discovery & development
26
+ Application of AI/ML approaches to real problems in drug discovery & development
27
+ Working collaboratively with cross-functional teams on a common problem
28
 
29
+ Job Requirements
 
 
 
 
 
30
 
31
+ This position will be Hybrid and require 2-3 days in the Cambridge office per week.
32
+ Experience of working in laboratory environment with good safety and practices (Chemistry/Biology/Biochemistry or other related major).
33
+ Basic understanding of computer skills including MS Office (PowerPoint, Words, Excel)
34
+ Internet skills including use of e-mails, group messaging and information gathering
35
+ Highly reliable and a strong team player
36
+ Flexible with an attention to detail
37
+ Strong verbal and written communication skills
38
+ Must be currently enrolled in a PhD program with a focus on quantitative fields such as bioinformatics, biomedical engineering, machine learning, math or statistics or equivalent.
39
 
40
+ Internship Eligibility
41
 
42
+ Must be authorized to work in the U.S. on a permanent basis without requiring sponsorship
43
+ Must be currently enrolled in a degree program graduating December 2025 or later
44
+ The internship program is 10-12 weeks depending on the two start dates (June 2nd- August 29th) or (June 16th - August 22nd
45
+ The intern must be able to commit to one of these time frames
46
+ Able to work full time 40 hours a week during internship dates
47
+ Takeda does not provide a housing stipend or relocation support for the U.S Summer Internship Program
48
 
49
+ Program Highlights
50
 
51
+ Hands-on experience with real projects and responsibilities
52
+ Dedicated mentorship program pairing interns with experienced professionals
53
+ Networking opportunities with industry professionals and fellow interns
54
+ Internship events focused on professional and skills development
55
+ Exposure to multiple business areas or departments within a Pharmaceutical Organization
56
 
57
+ Applications will be accepted between January 6th and January 31st
58
 
59
+ Takeda Compensation And Benefits Summary
60
 
61
+ We understand compensation may be an important factor as you consider an internship opportunity. We are committed to equitable pay for all employees, and we strive to be more transparent with our pay practices.
62
 
63
+ For Location
 
 
 
 
 
 
64
 
65
+ Boston, MA
66
 
67
+ U.S. Hourly Wage Range
68
+
69
+ $21.00 - $46.00
70
+
71
+ The estimated hourly range reflects an anticipated range for this position. The actual hourly wage offered will depend on the candidate’s school year/level to be entered following completion of internship. The actual hourly wage offered will be in accordance with state or local minimum wage requirements for the job location.
72
+
73
+ U.S. internship benefits vary by location and may include
74
+
75
+ Paid sick time
76
+ Civic Duty paid time off
77
+ Participation at company volunteer events
78
+ Participation at company sponsored special events
79
+ Access to on-site fitness center (where available)
80
+ Commuter Benefit To offset your work-commute expenses, Takeda provides U.S. employees with a fixed monthly subsidy to be used for either public transportation (transit) or parking.
81
+
82
+ EEO Statement
83
+
84
+ Takeda is proud in its commitment to creating a diverse workforce and providing equal employment opportunities to all employees and applicants for employment without regard to race, color, religion, sex, sexual orientation, gender identity, gender expression, parental status, national origin, age, disability, citizenship status, genetic information or characteristics, marital status, status as a Vietnam era veteran, special disabled veteran, or other protected veteran in accordance with applicable federal, state and local laws, and any other characteristic protected by law.
85
+
86
+ Locations
87
+
88
+ Boston, MA
89
+
90
+ Worker Type
91
+
92
+ Employee
93
+
94
+ Worker Sub-Type
95
+
96
+ Paid Intern (Fixed Term) (Trainee)
97
+
98
+ Time Type
99
+
100
+ Full time
101
+
102
+ Job Exempt
103
+
104
+ No
job-postings/07-01-2025/9.txt CHANGED
@@ -1,79 +1,15 @@
1
- GEICO's Enterprise Voice team is seeking an experienced Machine Learning Engineer to reimagine our customers’ contact center experience. You will help drive our insurance business transformation as we redefine experience for our customers.
2
 
3
- You will join a team of skilled software engineers as a resident AI/ML expert, supporting multiple AI automation initiatives across the contact center. We are looking for a motivated, high-energy, self-starter and problem solver with a positive attitude, and solid interpersonal and communication skills.
4
 
5
- Position Description
6
 
7
- Our Senior Machine Learning Engineer is a mid-level member of the Voice engineering staff ensuring we provide innovative experiences and operational excellence to our contact center agents, supervisors and customers. Our team thrives and succeeds in delivering high-quality technology products and services in a hyper-growth environment where priorities shift quickly. The ideal candidate has broad and deep technical knowledge in AI language services, Natural Language Processing and Generative AI.
 
8
 
9
- Responsibilities
 
 
 
10
 
11
- Ability to think creatively to find innovative solutions to complex ill-defined problems
12
- Stay abreast of the latest developments in AI, incorporating new techniques and methodologies into our processes to keep us ahead in the insurance industry
13
- Provides machine learning expertise within a team's functional area
14
- Consistently writes production-ready code with defined standards of readability, maintainability, reliability and testability. Helps junior team members to produce the same.
15
- Participates in the formulation of non-functional requirements
16
- Specifies the design and implementation of software modules based upon system requirements and architectural guidance
17
-
18
- Basic Qualifications:
19
-
20
- Strong foundation in Machine Learning and Artificial Intelligence
21
- Experienced in Natural Language Processing and AI language services
22
- Knowledgeable in Generative AI, Large Language Models, OpenAI APIs, prompt engineering
23
- Familiar with Responsible AI principles, model evaluation and monitoring
24
- Strong Python and SQL programming skills. Experience using Jupyter notebooks or similar tools. Experience with Github or other source code management platforms
25
- Experience building configurable AI/ML data pipelines through the complete ML Ops lifecycle.
26
- Working knowledge of Big Data technologies such as Spark, Data lake, MLflow, Snowflake, Elasticsearch.
27
- Experience with cloud providers, preferably Amazon Web Services and/or Azure
28
- Ability to explain AI/ML concepts to technical and non-technical audience
29
- Ability to coach and guide other engineers in shaping ill-defined data needs into concrete project deliverables
30
- Ability to develop collaborative relationships with multi-functional teams
31
-
32
- Preferred Qualifications
33
-
34
- Generative AI Retrieval Augmented Generation and AI agents
35
- Experience working with chatbots and Conversational AI
36
- Developing omni-channel (voice, chat, SMS) Contact Center solutions including natural language processing and speech-enabled grammar
37
- Experience with Amazon Web Services including Sagemaker, Bedrock, Lambda, S3, Connect, Lex, DynamoDB, API Gateway, CloudWatch
38
- Experience with Machine Learning frameworks and tools like TensorFlow, PyTorch, Scikit-learn.
39
- Familiar with Microservices development, Swagger, Postman
40
- Building test suite and frameworks to automate end to end testing
41
- Experience with open source libraries and frameworks
42
- Experience working in an Agile environment (Scrum, Kanban, SAFe)
43
-
44
- Experience
45
-
46
- 4+ years of hands-on experience in building AI/ML solutions in a production environment
47
- 4+ years of professional software development experience
48
- 2+ years of experience with architecture and design
49
- 2+ years of experience in open source frameworks
50
- 1+ years of experience with AWS, GCP, Azure, or another cloud service
51
- 1+ years of experience in Natural Language Processing, Generative AI or Language services
52
-
53
- Education
54
-
55
- Bachelor's degree in Computer Science, Information Systems, Engineering or equivalent education or work experience. Master’s degree preferred.
56
-
57
- Annual Salary
58
-
59
- $70,000.00 - $230,000.00
60
-
61
- The above annual salary range is a general guideline. Multiple factors are taken into consideration to arrive at the final hourly rate/ annual salary to be offered to the selected candidate. Factors include, but are not limited to, the scope and responsibilities of the role, the selected candidate’s work experience, education and training, the work location as well as market and business considerations.
62
-
63
- At this time, GEICO will not sponsor a new applicant for employment authorization for this position.
64
-
65
- Benefits:
66
-
67
- As an Associate, you’ll enjoy our Total Rewards Program* to help secure your financial future and preserve your health and well-being, including:
68
-
69
- Premier Medical, Dental and Vision Insurance with no waiting period**
70
- Paid Vacation, Sick and Parental Leave
71
- 401(k) Plan
72
- Tuition Reimbursement
73
- Paid Training and Licensures
74
- Benefits may be different by location. Benefit eligibility requirements vary and may include length of service.
75
- Coverage begins on the date of hire. Must enroll in New Hire Benefits within 30 days of the date of hire for coverage to take effect.
76
-
77
- The equal employment opportunity policy of the GEICO Companies provides for a fair and equal employment opportunity for all associates and job applicants regardless of race, color, religious creed, national origin, ancestry, age, gender, pregnancy, sexual orientation, gender identity, marital status, familial status, disability or genetic information, in compliance with applicable federal, state and local law. GEICO hires and promotes individuals solely on the basis of their qualifications for the job to be filled.
78
-
79
- GEICO reasonably accommodates qualified individuals with disabilities to enable them to receive equal employment opportunity and/or perform the essential functions of the job, unless the accommodation would impose an undue hardship to the Company. This applies to all applicants and associates. GEICO also provides a work environment in which each associate is able to be productive and work to the best of their ability. We do not condone or tolerate an atmosphere of intimidation or harassment. We expect and require the cooperation of all associates in maintaining an atmosphere free from discrimination and harassment with mutual respect by and for all associates and applicants.
 
1
+ 🚀 Join Us as a Founding Member of Technical Staff (ML Engineering & Research)
2
 
3
+ We’re an open-source platform shaping the future of large language models (LLMs) by transforming production data into smarter, faster, and more cost-efficient solutions. Our platform creates a continuous feedback loop that optimizes LLM applications through smarter inference, real-time observability, and seamless experimentation.
4
 
5
+ You’ll contribute to an open-source project tackling exciting challenges like advanced inference techniques and cutting-edge optimization methods, including reinforcement learning. Your work will span across the stack, providing opportunities to blend ML research with systems engineering.
6
 
7
+ Who We’re Looking For
8
+ We don’t separate “engineers” from “researchers.” Instead, we focus on building a team that thrives on cross-functional collaboration and impactful contributions. If you’re passionate about solving complex technical problems and pushing boundaries, this is the role for you.
9
 
10
+ Key Qualifications:
11
+ Strong technical expertise: You’ve led large-scale projects from ideation to deployment, solving challenging problems along the way.
12
+ Experience with LLMs or RL: You bring technical depth and leadership, ideally having worked at the forefront of these fields.
13
+ Growth-oriented mindset: You’re excited to work in a fast-paced enviro
14
 
15
+ If you’re passionate about the intersection of open source, machine learning, and impactful innovation, this is your opportunity to make a difference.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
linkedin_scrapping.py CHANGED
@@ -1,16 +1,17 @@
1
  import http.client
2
- from config import *
3
  import json
4
  import os
5
  from datetime import datetime
6
 
 
7
 
8
  def scrape_jobs():
9
 
10
  conn = http.client.HTTPSConnection("linkedin-job-search-api.p.rapidapi.com")
11
 
12
  headers = {
13
- 'x-rapidapi-key': RAPID_API_KEY,
14
  'x-rapidapi-host': "linkedin-job-search-api.p.rapidapi.com"
15
  }
16
 
@@ -28,6 +29,7 @@ def extract_job_descriptions(jobs):
28
  # Get the current date in YYYY-MM-DD format and create folder
29
  current_date = datetime.now().strftime('%d-%m-%Y')
30
  folder_path = os.path.join("job-postings", current_date)
 
31
  os.makedirs(folder_path, exist_ok=True)
32
 
33
  for idx, job in enumerate(jobs, start=1):
@@ -42,6 +44,10 @@ def extract_job_descriptions(jobs):
42
  print("Job {} saved".format(str(idx)))
43
  else:
44
  print("Job description not available")
45
-
46
  jobs = scrape_jobs()
47
  extract_job_descriptions(jobs)
 
 
 
 
 
 
1
  import http.client
2
+ # from config import *
3
  import json
4
  import os
5
  from datetime import datetime
6
 
7
+ api_key = os.getenv('RAPID_API_KEY')
8
 
9
  def scrape_jobs():
10
 
11
  conn = http.client.HTTPSConnection("linkedin-job-search-api.p.rapidapi.com")
12
 
13
  headers = {
14
+ 'x-rapidapi-key': api_key,
15
  'x-rapidapi-host': "linkedin-job-search-api.p.rapidapi.com"
16
  }
17
 
 
29
  # Get the current date in YYYY-MM-DD format and create folder
30
  current_date = datetime.now().strftime('%d-%m-%Y')
31
  folder_path = os.path.join("job-postings", current_date)
32
+ print(f"Creating folder at: {folder_path}")
33
  os.makedirs(folder_path, exist_ok=True)
34
 
35
  for idx, job in enumerate(jobs, start=1):
 
44
  print("Job {} saved".format(str(idx)))
45
  else:
46
  print("Job description not available")
 
47
  jobs = scrape_jobs()
48
  extract_job_descriptions(jobs)
49
+
50
+ # current_date = datetime.now().strftime('%d-%m-%Y')
51
+ # folder_path = os.path.join("job-postings", current_date)
52
+ # print(f"Creating folder at: {folder_path}")
53
+ # os.makedirs(folder_path, exist_ok=True)
tagging.py → llm-tagging.py RENAMED
File without changes
tag-posting.py CHANGED
@@ -1,7 +1,12 @@
1
  import spacy
2
  import re
 
 
 
 
3
 
4
- nlp = spacy.load("en_core_web_sm")
 
5
 
6
  def split_text_recursively(text):
7
  if '\n' not in text:
@@ -11,6 +16,8 @@ def split_text_recursively(text):
11
 
12
  def parse_post(path):
13
 
 
 
14
  # Read the file
15
 
16
  with open(path, 'r') as file:
@@ -30,11 +37,191 @@ def parse_post(path):
30
  for sent in doc.sents:
31
  print(f"{sent.text}")
32
  sents.append(sent.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Skill/knowledge extraction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
- path = './job-postings/03-01-2024/2.txt'
40
- parse_post(path)
 
 
 
1
  import spacy
2
  import re
3
+ from transformers import AutoTokenizer, BertForTokenClassification, TrainingArguments, Trainer
4
+ import torch
5
+ from typing import List
6
+ import os
7
 
8
+
9
+ ### Parsing job posting
10
 
11
  def split_text_recursively(text):
12
  if '\n' not in text:
 
16
 
17
  def parse_post(path):
18
 
19
+ nlp = spacy.load("en_core_web_sm")
20
+
21
  # Read the file
22
 
23
  with open(path, 'r') as file:
 
37
  for sent in doc.sents:
38
  print(f"{sent.text}")
39
  sents.append(sent.text)
40
+
41
+ return sents
42
+
43
+
44
+ ### Model inference
45
+
46
+ from torch.utils.data import DataLoader
47
+ import torch.nn as nn
48
+ from transformers import DataCollatorForTokenClassification
49
+ from typing import List, Tuple
50
+
51
+ tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_knowledge_extraction")
52
+ model = BertForTokenClassification.from_pretrained("Robzy/jobbert_knowledge_extraction")
53
+
54
+ id2label = model.config.id2label
55
+ label2id = model.config.label2id
56
+
57
+ def pad(list_of_lists, pad_value=0):
58
+ max_len = max(len(lst) for lst in list_of_lists)
59
+
60
+ # Pad shorter lists with the specified value
61
+ padded_lists = [lst + [pad_value] * (max_len - len(lst)) for lst in list_of_lists]
62
+ attention_masks = [[1] * len(lst) + [0] * (max_len - len(lst)) for lst in list_of_lists]
63
+
64
+ return torch.tensor(padded_lists), torch.tensor(attention_masks)
65
+
66
+ def collate_fn(batch: List[List[torch.Tensor]]):
67
+
68
+ input_ids, attention_mask = pad(list(map(lambda x: tokenizer.convert_tokens_to_ids(x['tokens']),batch)))
69
+ tags_knowledge, _ = pad([list(map(lambda x: label2id[x],o)) for o in [b['tags_knowledge'] for b in batch]])
70
+ return {"input_ids": input_ids, "tags_knowledge": tags_knowledge, "attention_mask": attention_mask}
71
 
72
+ def extract_spans(B_mask, I_mask, token_ids, tokenizer):
73
+ """
74
+ Extract text spans for 2D tensors (batch of sequences).
75
+ """
76
+ batch_size = B_mask.size(0)
77
+ all_spans = []
78
+
79
+ d = tokenizer.decode
80
+
81
+ for batch_idx in range(batch_size):
82
+ spans = []
83
+ current_span = []
84
+
85
+ for i in range(B_mask.size(1)): # Iterate over sequence length
86
+ if B_mask[batch_idx, i].item() == 1: # Begin a new span
87
+ if current_span:
88
+ spans.append(current_span)
89
+ print(d(current_span))
90
+ current_span = [token_ids[batch_idx, i].item()]
91
+ print(d(current_span))
92
+ elif I_mask[batch_idx, i].item() == 1 and current_span: # Continue the current span
93
+ print(d(current_span))
94
+ current_span.append(token_ids[batch_idx, i].item())
95
+ else: # Outside any entity
96
+ print(d(current_span))
97
+ if current_span:
98
+ spans.append(current_span)
99
+ current_span = []
100
+
101
+ if current_span: # Save the last span if it exists
102
+ spans.append(current_span)
103
+
104
+ # Decode spans for this sequence
105
+ decoded_spans = [tokenizer.decode(span, skip_special_tokens=True) for span in spans]
106
+ all_spans.append(decoded_spans)
107
+
108
+ # Remove empty spans
109
+ all_spans = list(filter(lambda x: x != [], all_spans))
110
+
111
+ return all_spans
112
+
113
+
114
+ def concat_subtokens(tokens):
115
+ result = []
116
 
117
+ for token in tokens:
118
+ if token.startswith('##'):
119
+ # Concatenate sub-token to the last token in result
120
+ result[-1] += token[2:] # Remove '##' and append the continuation
121
+ else:
122
+ # If it's a new token, add it to result
123
+ result.append(token)
124
 
125
+ return result
126
+
127
+ def merge_spans(batch_spans, tokenizer):
128
+
129
+ batch_decoded_spans = []
130
+
131
+ for spans in batch_spans:
132
+
133
+ ## Concatenate subtokens
134
+
135
+ if spans[0].startswith('##'):
136
+ continue
137
+
138
+ decoded_spans = []
139
+ for token in spans:
140
+ if token.startswith('##'):
141
+ # Concatenate sub-token to the last token in result
142
+ decoded_spans[-1] += token[2:] # Remove '##' and append the continuation
143
+ else:
144
+ # If it's a new token, add it to result
145
+ decoded_spans.append(token)
146
+
147
+ ## Concatenatation done
148
+
149
+ for span in decoded_spans:
150
+ batch_decoded_spans.append(span)
151
+
152
+ return batch_decoded_spans
153
+
154
+
155
+ def extract_skills(batch_sentences: List[str]):
156
+
157
+ print('Extracting skills from job posting...')
158
+
159
+ # Batch
160
+
161
+ # Tokenize
162
+ batch = tokenizer(batch_sentences, padding=True, truncation=True)
163
+ batch_tokens = torch.tensor(batch['input_ids'])
164
+ batch_attention_masks = torch.tensor(batch['attention_mask'])
165
+
166
+ model.eval()
167
+ with torch.no_grad():
168
+ output = model(input_ids=batch_tokens, attention_mask=batch_attention_masks)
169
+
170
+ # Post-process
171
+ pred = output.logits.argmax(-1)
172
+ pred = torch.where(batch_attention_masks==0, torch.tensor(-100), pred)
173
+
174
+ b_mask = torch.where(pred==0, 1, 0)
175
+ i_mask = torch.where(pred==1, 1, 0)
176
+
177
+ spans = extract_spans(b_mask, i_mask, batch_tokens, tokenizer)
178
+ decoded_spans = merge_spans(spans, tokenizer)
179
+
180
+ return decoded_spans
181
+
182
+ def skills_save(path,skills):
183
+ with open(path, 'w') as f:
184
+ for i, skill in enumerate(skills):
185
+ if i == len(skills) - 1:
186
+ f.write(f"{skill}")
187
+ else:
188
+ f.write(f"{skill}\n")
189
+
190
+
191
+ def backfill():
192
+
193
+ job_dir = os.path.join(os.getcwd(), 'job-postings')
194
+ tag_dir = os.path.join(os.getcwd(), 'tags')
195
+
196
+ for date in os.listdir(job_dir):
197
+ print(f"Processing date directory: {date}")
198
+
199
+ job_date = os.path.join(job_dir, date)
200
+ tag_date = os.path.join(tag_dir, date)
201
+
202
+ for job in os.listdir(job_date):
203
+ job_path = os.path.join(job_date, job)
204
+ tag_path = os.path.join(tag_date, job)
205
+
206
+ print(f"Processing job file: {job_path}")
207
+
208
+ if not os.path.exists(tag_date):
209
+ os.makedirs(tag_date)
210
+ print(f"Created directory: {tag_date}")
211
+
212
+ sents = parse_post(job_path)
213
+ skills = extract_skills(sents)
214
+ skills_save(tag_path, skills)
215
+
216
+ print(f"Saved skills to: {tag_path}")
217
+
218
+ if __name__ == '__main__':
219
+
220
+ # Backfill
221
+ backfill()
222
 
223
 
224
+ # path = './job-postings/03-01-2024/2.txt'
225
+ # sents = parse_post(path)
226
+ # skills = extract_skills(sents)
227
+ # skills_save('./tags/03-01-2024/2.txt',skills)
tags/03-01-2024/1.txt CHANGED
@@ -1 +1,34 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ML
2
+ -
3
+ AI based R & D
4
+ MSc in Data Science
5
+ Python
6
+ Go
7
+ MLOps
8
+ MLFlow
9
+ Kubeflow )
10
+ Hydra
11
+ numpy
12
+ TensorFlow
13
+ DevOps
14
+ CI
15
+ /
16
+ CD
17
+ runner deployment & management
18
+ pipeline creation
19
+ testing
20
+ ML
21
+ ML
22
+ PyTorch
23
+ TensorFlow
24
+ Containers
25
+ engines, orchestration tools and
26
+ Docker
27
+ Kaniko
28
+ Kubernetes
29
+ Helm
30
+ Cloud ecosystems
31
+ AWS
32
+ Infrastructure management
33
+ Ansible
34
+ Terraform
tags/03-01-2024/2.txt CHANGED
@@ -1 +1,13 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artificial intelligence
2
+ Automation
3
+ data analysis
4
+ image recognition
5
+ automation
6
+ Artificial Intelligence
7
+ feasibility studies
8
+ data analysis
9
+ Data Science
10
+ degree in software engineering
11
+ Artificial Intelligence
12
+ Vision Systems
13
+ English
tags/03-01-2024/3.txt CHANGED
@@ -1 +1,22 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SQL
2
+ cloud infrastructure
3
+ APIs
4
+ Python
5
+ infra
6
+ database
7
+ Types
8
+ SaaS
9
+ agile development
10
+ sprint planning
11
+ backend development
12
+ python
13
+ SQL
14
+ NoSQL databases
15
+ web scraping
16
+ API development
17
+ containerization
18
+ cloud environments
19
+ Azure
20
+ data processing
21
+ Databricks
22
+ English
tags/04-01-2024/1.txt CHANGED
@@ -1 +1,36 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Defence projects
2
+ machine learning
3
+ artificial intelligence
4
+ AI models
5
+ AI systems
6
+ AI
7
+ Master
8
+ '
9
+ s or Ph. D. in Computer Science
10
+ Machine Learning
11
+ Pattern Recognition
12
+ Neural Networks
13
+ Algorithms
14
+ AI
15
+ /
16
+ ML
17
+ autonomous systems
18
+ radar technologies
19
+ AI
20
+ -
21
+ reliant
22
+ defense
23
+ machine learning frameworks
24
+ TensorFlow
25
+ PyTorch
26
+ Python
27
+ ,
28
+ C
29
+ +
30
+ +
31
+ Java
32
+ secure system design
33
+ cybersecurity principles
34
+ Security certifications
35
+ CISSP
36
+ CEH )
tags/04-01-2024/2.txt CHANGED
@@ -1 +1,36 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Spatial Computing /
2
+ XR Development
3
+ game
4
+ Swedish
5
+ real
6
+ 3D graphics
7
+ Real Time Graphics
8
+ VR
9
+ /
10
+ MR
11
+ /
12
+ AR )
13
+ graphics pipelines
14
+ real
15
+ -
16
+ time 3D environments
17
+ Unreal
18
+ Unity
19
+ native
20
+ IOS
21
+ /
22
+ Android 3D development
23
+ Web based 3D engines
24
+ mobile application development
25
+ deployment
26
+ game
27
+ 3D Graphics
28
+ C
29
+ ,
30
+ C
31
+ #
32
+ Python
33
+ C
34
+ +
35
+ +
36
+ JavaScript
tags/04-01-2024/3.txt CHANGED
@@ -1 +1,44 @@
1
- tags
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ machine
2
+ AI
3
+ SaaS
4
+ AI
5
+ /
6
+ ML
7
+ AI
8
+ /
9
+ ML models
10
+ AI
11
+ AI
12
+ /
13
+ ML pipelines
14
+ deployment infrastructure
15
+ Python
16
+ AI
17
+ /
18
+ ML
19
+ Pytorch
20
+ cloud environment
21
+ Azure
22
+ AWS
23
+ GCP
24
+ AI
25
+ Master
26
+ '
27
+ s degree in engineering
28
+ Cloud Ops
29
+ IaC
30
+ Terraform
31
+ MLOps best practices and tools
32
+ Databricks
33
+ VRDs
34
+ )
35
+ generative AI
36
+ RAG
37
+ LLM evaluation
38
+ API
39
+ -
40
+ driven microservices
41
+ cache management
42
+ production
43
+ -
44
+ level software
tags/07-01-2025/1.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ commodity recommendations
2
+ live stream recommendations
3
+ short video recommendations
4
+ TikTok
5
+ feature engineering
6
+ model optimization
7
+ Master
8
+ '
9
+ s degree
10
+ Phd
11
+ '
12
+ s Degree
13
+ Software Development
14
+ Computer Science
15
+ Computer Engineering
16
+ machine learning
17
+ deep learning
18
+ data mining
19
+ programming language
20
+ C
21
+ +
22
+ +
23
+ /
24
+ Python
25
+ Deep Learning Tools
26
+ tensorflow
27
+ /
28
+ pytorch
29
+ Collaborative Filtering
30
+ Matrix Factorization
31
+ Factorization Machines
32
+ Word2vec
33
+ Logistic Regression
34
+ Gradient Boosting
35
+ Trees
36
+ Deep Neural Networks
37
+ Wide and Deep
38
+ KDD
39
+ NeurlPS
40
+ WWW
41
+ SIGIR
42
+ WSDM
43
+ ICML
44
+ IJCAI
45
+ AAAI
46
+ RECSYS
47
+ data mining
48
+ machine learning
49
+ Kaggle
50
+ /
51
+ KDD
52
+ -
53
+ cup
tags/07-01-2025/10.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature development
2
+ Data Drivens
3
+ machine learning
4
+ algorithm development
5
+ model training
6
+ feature pipeline design
7
+ A
8
+ /
9
+ B testing
10
+ Python
11
+ machine learning algorithms and workflows
12
+ NLP
13
+ Deep Learning
14
+ Recommendation Systems
15
+ Conversational
16
+ English
17
+ recommendation systems
18
+ search
19
+ e
20
+ -
21
+ commerce
22
+ advertising
23
+ NLP
24
+ Chinese text analysis
25
+ business applications
26
+ system design
27
+ machine learning systems
28
+ ML
29
+ Scikit
30
+ -
31
+ Learn
32
+ /
33
+ XGBoost
34
+ /
35
+ Tensorflow
36
+ GCP
37
+ /
38
+ Kubernetes
39
+ SQL
40
+ /
41
+ NoSQL
42
+ /
43
+ Redis
44
+ Linux
tags/07-01-2025/2.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Deep Learning
2
+ MLOps
3
+ production environments
4
+ model management
5
+ automation
6
+ continuous integration
7
+ deep
8
+ MLOps
9
+ Deep
10
+ CNNs
11
+ RNNs
12
+ Transformers
13
+ NLP
14
+ computer vision
15
+ predictive analytics
16
+ MLOps
17
+ Pipeline Development
18
+ M
19
+ model training
20
+ Model De
21
+ CI
22
+ /
23
+ CD
24
+ model versioning
25
+ lifecycle management
26
+ Kubernetes
27
+ Docker
28
+ cloud platforms
29
+ AWS
30
+ ,
31
+ Azure
32
+ GCP
33
+ cloud platforms
34
+ AWS SageMaker
35
+ Google AI Platform
36
+ Azure
37
+ Machine Learning
38
+ Cross
39
+ -
40
+ Functional Collaboration
41
+ machine learning
42
+ deep learning
43
+ MLOps
44
+ TensorFlow
45
+ Keras
46
+ PyTorch
47
+ MLOps
48
+ Kubeflow
49
+ MLflow
50
+ TFX
51
+ Jenkins
52
+ Docker
53
+ Kubernetes
54
+ Terraform
55
+ Python
56
+ data manipulation libraries
57
+ Pandas
58
+ NumPy
59
+ SciPy
60
+ cloud platforms
61
+ AWS
62
+ GCP
63
+ Azure
64
+ machine learning
65
+ AWS
66
+ SageMaker
67
+ Google AI Platform
68
+ Azure
69
+ ML
70
+ NLP
71
+ computer vision
72
+ reinforcement learning
73
+ MLOps
74
+ open
75
+ -
76
+ source
77
+ MLOps
78
+ Kubeflow
79
+ MLflow
80
+ TFX
81
+ end
82
+ machine learning lifecycle
83
+ infrastructure as code tools
84
+ Terraform
85
+ CloudFormation
86
+ MLOps
87
+ Continuous Learning
88
+ deep learning
89
+ MLOps practices
90
+ model deployment strategies
91
+ Master
92
+ '
93
+ s or PhD in
94
+ Computer Science
95
+ Data Science
96
+ Electrical Engineering
tags/07-01-2025/3.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PhD degree in Computer Science
2
+ Python
3
+ JavaScript
4
+ R
5
+ Java
6
+ C
7
+ +
8
+ +
9
+ Machine Learning
10
+ Python
11
+ JavaScript
12
+ R
13
+ Java
14
+ C
15
+ +
16
+ +
17
+ automated algorithm discovery methods
18
+ learning to learn
19
+ program synthesis
20
+ digital hardware
21
+ machine learning
22
+ computational neuroscience
23
+ non
24
+ -
25
+ gradient
26
+ -
27
+ based optimization techniques
28
+ hand
29
+ -
30
+ automated discovery
31
+ machine learning
32
+ modern programming languages
33
+ Python
34
+ computation methods
35
+ machine learning libraries
36
+ JAX
37
+ PyTorch
38
+ )
tags/07-01-2025/4.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PhD degree in Computer Science
2
+ Python
3
+ JavaScript
4
+ R
5
+ Java
6
+ C
7
+ +
8
+ +
9
+ Machine Learning
10
+ Python
11
+ JavaScript
12
+ R
13
+ Java
14
+ C
15
+ +
16
+ +
17
+ automated algorithm discovery methods
18
+ learning to learn
19
+ program synthesis
20
+ digital hardware
21
+ machine learning
22
+ computational neuroscience
23
+ non
24
+ -
25
+ gradient
26
+ -
27
+ based optimization techniques
28
+ hand
29
+ -
30
+ automated discovery
31
+ machine learning
32
+ modern programming languages
33
+ Python
34
+ computation methods
35
+ machine learning libraries
36
+ JAX
37
+ PyTorch
38
+ )
tags/07-01-2025/5.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PhD degree in Computer Science
2
+ Python
3
+ JavaScript
4
+ R
5
+ Java
6
+ C
7
+ +
8
+ +
9
+ Machine Learning
10
+ Python
11
+ JavaScript
12
+ R
13
+ Java
14
+ C
15
+ +
16
+ +
17
+ automated algorithm discovery methods
18
+ learning to learn
19
+ program synthesis
20
+ digital hardware
21
+ machine learning
22
+ computational neuroscience
23
+ non
24
+ -
25
+ gradient
26
+ -
27
+ based optimization techniques
28
+ hand
29
+ -
30
+ automated discovery
31
+ machine learning
32
+ modern programming languages
33
+ Python
34
+ computation methods
35
+ machine learning libraries
36
+ JAX
37
+ PyTorch
38
+ )
tags/07-01-2025/6.txt ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AI
2
+ Large Language Models ( LLMs )
3
+ Generative AI algorithms
4
+ neural networks
5
+ ML
6
+ PyTorch
7
+ TensorFlowL
8
+ complex
9
+ IP
10
+ computer science
11
+ software engineering
12
+ TensorFlow
13
+ PyTorch
14
+ Python
15
+ Large Language Models ( LLMs )
16
+ Generative AI algorithms
17
+ software development platforms
18
+ continuous integration systems
19
+ Linux and cloud services
20
+ Pytorch
21
+ Tensorflow
22
+ Executorch
23
+ Tensorflow Lite
24
+ CI
25
+ /
26
+ testing
27
+ Python
28
+ ML
29
+ C
30
+ +
31
+ +
32
+ optimised
33
+ ML libraries
34
+ machine learning
35
+ machine learning models
36
+ proof -
37
+ ARM IPs
38
+ machine
39
+ ML
tags/07-01-2025/7.txt ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ continuous learning
2
+ modern data science
3
+ analytics
4
+ software engineering
5
+ academic degrees
6
+ Bachelor
7
+ '
8
+ s degree
9
+ AI
10
+ Machine Learning
11
+ Python
12
+ Generative AI models
13
+ OpenAI family
14
+ open source
15
+ LLMs
16
+ Dall
17
+ -
18
+ e
19
+ LlamaIndex
20
+ Langchain
21
+ Retrieval
22
+ Augmented Generation
23
+ RAG )
24
+ ML
25
+ scikit
26
+ -
27
+ learn
28
+ Pytorch
29
+ ONNX
30
+ ML
31
+ DevOps
32
+ GIT
33
+ Azure Devops
34
+ Agile
35
+ Jira
36
+ Machine Learning
37
+ ML ) workflows
38
+ MLOps
39
+ MLFlow
40
+ CI
41
+ /
42
+ CD
43
+ test
44
+ -
45
+ driven development
46
+ ML models
47
+ ML
48
+ data structures
49
+ data modelling
50
+ software engineering best practices
51
+ data manipulation
52
+ SQL
53
+ Pandas
54
+ Spark
55
+ containerization
56
+ scaling models
57
+ AI
58
+ calculus
59
+ linear algebra
60
+ statistics
61
+ Master
62
+ '
63
+ s degree
64
+ Computer Science
65
+ Mathematics
66
+ Physical Sciences
67
+ Python
68
+ R
69
+ JavaScript
70
+ Java
71
+ ,
72
+ C
73
+ +
74
+ +
75
+ C
76
+ Generative AI models
77
+ ale
tags/07-01-2025/8.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ convolutional
2
+ neural networks
3
+ autoencoders
4
+ transformer models
5
+ digital pathology
6
+ single cell transcriptomics
7
+ H
8
+ E
9
+ transfer learning
10
+ shallow machine learning
11
+ H
12
+ &
13
+ E images
14
+ single cell transcriptomics
15
+ multi
16
+ -
17
+ modal
18
+ single cell transcriptomics
19
+ medical images
20
+ tumor microenvironment
21
+ drug discovery & development
22
+ AI
23
+ /
24
+ ML
25
+ Chemistry
26
+ /
27
+ Biology
28
+ /
29
+ Biochemistry
30
+ MS Office
31
+ PowerPoint
32
+ Words
33
+ Excel
34
+ e
35
+ -
36
+ mails
37
+ group messaging
38
+ information gathering
39
+ quantitative
40
+ bioinformatics
41
+ biomedical engineering
42
+ machine learning
43
+ math
44
+ statistics
45
+ real projects
tags/07-01-2025/9.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ML
2
+ LLMs
3
+ RL
4
+ open source
5
+ machine learning
train.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, BertForTokenClassification, TrainingArguments, Trainer
2
+ import torch
3
+ from tabulate import tabulate
4
+ import wandb
5
+
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained("jjzha/jobbert_knowledge_extraction")
8
+ model = BertForTokenClassification.from_pretrained("Robzy/jobbert_knowledge_extraction")
9
+
10
+ artifact = wandb.Artifact(name="jobbert-knowledge-extraction", type="BERT")
11
+
12
+ text = 'Experience with Unreal and/or Unity and/or native IOS/Android 3D development and/or Web based 3D engines '
13
+
14
+ # Tokenize
15
+ inputs = tokenizer(
16
+ text, add_special_tokens=False, return_tensors="pt"
17
+ )
18
+
19
+ # Inference
20
+
21
+ # with torch.no_grad():
22
+ # output = model(**inputs)
23
+
24
+ # # Post-process
25
+ # predicted_token_class_ids = output.logits.argmax(-1)
26
+ # predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
27
+ # tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze())
28
+
29
+ # # Display
30
+ # table = zip(tokens, predicted_tokens_classes)
31
+ # print(tabulate(table, headers=["Token", "Predicted Class"], tablefmt="pretty"))
32
+
33
+ # Training
34
+
35
+ from datasets import load_dataset
36
+ dataset = load_dataset("json", data_files="data/test-short.json")
37
+
38
+
39
+ # Convert tokens to ids before training
40
+
41
+ data = [torch.tensor([tokenizer.convert_tokens_to_ids(t) for t in l]) for l in dataset['train']['tokens']]
42
+
43
+ dataset = dataset.map(
44
+ lambda x: {"input_ids": torch.tensor(tokenizer.convert_tokens_to_ids(x["tokens"]))}
45
+ )
46
+
47
+ # Data preprocessing
48
+
49
+ from torch.utils.data import DataLoader
50
+ import torch.nn as nn
51
+ from transformers import DataCollatorForTokenClassification
52
+ from typing import List, Tuple
53
+
54
+ def pad(list_of_lists, pad_value=0):
55
+ max_len = max(len(lst) for lst in list_of_lists)
56
+
57
+ # Pad shorter lists with the specified value
58
+ padded_lists = [lst + [pad_value] * (max_len - len(lst)) for lst in list_of_lists]
59
+ attention_masks = [[1] * len(lst) + [0] * (max_len - len(lst)) for lst in list_of_lists]
60
+
61
+ return torch.tensor(padded_lists), torch.tensor(attention_masks)
62
+
63
+
64
+ def collate_fn(batch: List[List[torch.Tensor]]):
65
+
66
+ input_ids, attention_mask = pad(list(map(lambda x: tokenizer.convert_tokens_to_ids(x['tokens']),batch)))
67
+ tags_knowledge, _ = pad([list(map(lambda x: label2id[x],o)) for o in [b['tags_knowledge'] for b in batch]])
68
+ return {"input_ids": input_ids, "tags_knowledge": tags_knowledge, "attention_mask": attention_mask}
69
+
70
+ # Training settings
71
+ batch_size = 32
72
+ train_dataloader = DataLoader(dataset['train'], shuffle=True, batch_size=batch_size, collate_fn=collate_fn)
73
+ eval_dataloader = DataLoader(dataset['train'], batch_size=batch_size, collate_fn=collate_fn)
74
+
75
+ from tqdm.auto import tqdm
76
+ from torch.optim import AdamW
77
+ from transformers import get_scheduler
78
+
79
+ model.train()
80
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
81
+
82
+ IGNORE_INDEX = -100
83
+ criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)
84
+ id2label = model.config.id2label
85
+ label2id = model.config.label2id
86
+
87
+ lr = 5e-5
88
+ optimizer = AdamW(model.parameters(), lr=lr)
89
+
90
+ num_epochs = 3
91
+ num_training_steps = num_epochs * len(train_dataloader)
92
+ lr_scheduler = get_scheduler(
93
+ name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
94
+ )
95
+
96
+ model.config.pad_token_id = 0
97
+
98
+ ## Training
99
+
100
+ from dotenv import load_dotenv
101
+ import os
102
+ load_dotenv(".env")
103
+
104
+ from datetime import datetime
105
+ current_time = datetime.now()
106
+
107
+ wandb.login(key=os.getenv('WANDB_API_KEY'))
108
+
109
+ run = wandb.init(
110
+ # set the wandb project where this run will be logged
111
+ project="in-demand",
112
+
113
+ # track hyperparameters and run metadata
114
+ config={
115
+ "learning_rate": lr,
116
+ "architecture": "BERT",
117
+ "epochs": num_epochs,
118
+ "batch_size": batch_size,
119
+ "notes": "Datetime: " + current_time.strftime("%m/%d/%Y, %H:%M:%S")
120
+ }
121
+ )
122
+
123
+ import logging
124
+ from datetime import datetime
125
+ logging.info("Initiating training")
126
+
127
+ progress_bar = tqdm(range(num_epochs), desc="Epochs")
128
+ for epoch in range(num_epochs):
129
+ logging.info(f"Epoch #{epoch}")
130
+ print(f"Epoch #{epoch}")
131
+
132
+ batch_count = 0
133
+
134
+ for batch in train_dataloader:
135
+
136
+ logging.info(f"Batch #{batch_count} / {len(train_dataloader)}")
137
+ print(f"Batch #{batch_count} / {len(train_dataloader)}")
138
+
139
+ tokens = batch['input_ids'].to(device)
140
+ attention_mask = batch['attention_mask'].to(device)
141
+ tags_knowledge = batch['tags_knowledge'].to(device)
142
+
143
+ outputs = model(tokens, attention_mask=attention_mask)
144
+
145
+ # Batch
146
+ pred = outputs.logits.reshape(-1, model.config.num_labels) # Logits
147
+ label = torch.where(attention_mask==0, torch.tensor(IGNORE_INDEX).to(device), tags_knowledge).reshape(-1) # Labels, padding set to class idx -100
148
+
149
+ # Compute accuracy ignoring padding idx
150
+ _, predicted_labels = torch.max(pred, dim=1)
151
+ non_pad_elements = label != IGNORE_INDEX
152
+ correct_predictions = (predicted_labels[non_pad_elements] == label[non_pad_elements]).sum().item()
153
+ total_predictions = non_pad_elements.sum().item()
154
+ accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
155
+
156
+ loss = criterion(pred, label)
157
+ loss.backward()
158
+ optimizer.step()
159
+ lr_scheduler.step()
160
+ optimizer.zero_grad()
161
+
162
+ wandb.log({"epoch": epoch, "accuracy": accuracy, "loss": loss})
163
+
164
+ batch_count += 1
165
+
166
+ progress_bar.update(1)
167
+
168
+
169
+ model.push_to_hub("Robzy/jobbert_knowledge_extraction")
170
+
171
+
172
+ # Add the state_dict to the artifact
173
+ state_dict = model.state_dict()
174
+ with artifact.new_file('model.pth', mode='wb') as f:
175
+ torch.save(state_dict, f)
176
+
177
+ # Log the artifact to W&B
178
+ wandb.log_artifact(artifact)