zayanomar5 commited on
Commit
db7aed9
·
verified ·
1 Parent(s): 62b729a

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +32 -0
  2. README.md +10 -0
  3. gitattributes +35 -0
  4. main.py +74 -0
  5. main.yml +27 -0
  6. requirements.txt +16 -0
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ RUN wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
13
+
14
+ Run git clone "https://huggingface.co/sentence-transformers/paraphrase-TinyBERT-L6-v2"
15
+
16
+ RUN mkdir /.cache
17
+
18
+ RUN chmod 777 /.cache
19
+
20
+ # RUN mkdir /.cache/huggingface
21
+
22
+ # RUN mkdir /.cache/huggingface/hub
23
+
24
+ # RUN mkdir /.cache/huggingface/hub/models--sentence-transformers--paraphrase-TinyBERT-L6-v2
25
+
26
+ # RUN mkdir /.cache/huggingface/hub/models--sentence-transformers--paraphrase-TinyBERT-L6-v2/blobs
27
+
28
+ # RUN mkdir /.cache/huggingface/hub/models--sentence-transformers--paraphrase-TinyBERT-L6-v2/snapshots
29
+
30
+ COPY . .
31
+
32
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "300", "main:app"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Cv
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from langchain_community.llms import LlamaCpp
3
+ from sentence_transformers import SentenceTransformer
4
+ from transformers import AutoModel
5
+ import torch
6
+ from torch.nn.functional import cosine_similarity
7
+ import os
8
+
9
+ app = Flask(__name__)
10
+
11
+ n_gpu_layers = 0
12
+ n_batch = 1024
13
+
14
+ llm = LlamaCpp(
15
+ model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
16
+ temperature=0.1,
17
+ n_gpu_layers=n_gpu_layers,
18
+ n_batch=n_batch,
19
+ verbose=True,
20
+ n_ctx=4096
21
+ )
22
+ model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')
23
+
24
+ model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
25
+
26
+ file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
27
+ print("model size ====> :", file_size.st_size, "bytes")
28
+
29
+
30
+ @app.route('/cv', methods=['POST'])
31
+ def get_skills():
32
+ cv_body = request.json.get('cv_body')
33
+
34
+ # Simple inference example
35
+ output = llm(
36
+ f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
37
+ max_tokens=256, # Generate up to 256 tokens
38
+ stop=["<|end|>"],
39
+ echo=True, # Whether to echo the prompt
40
+ )
41
+
42
+ return jsonify({'skills': output})
43
+
44
+ @app.get('/')
45
+ def health():
46
+ return jsonify({'status': 'Worked'})
47
+
48
+ @app.route('/compare', methods=['POST'])
49
+ def compare():
50
+ employee_skills = request.json.get('employee_skills')
51
+ jobs_skills = request.json.get('jobs_skills')
52
+
53
+ # Validation
54
+ if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
55
+ raise ValueError("jobs_skills must be a list of strings")
56
+
57
+ # Encoding skills into embeddings
58
+ job_embeddings = model.encode(jobs_skills)
59
+ employee_embeddings = model.encode(employee_skills)
60
+
61
+ # Computing cosine similarity between employee skills and each job
62
+ similarity_scores = []
63
+ employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
64
+
65
+ for i, job_e in enumerate(job_embeddings):
66
+ job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
67
+ similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
68
+ similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.item()})
69
+
70
+ return jsonify(similarity_scores)
71
+
72
+
73
+ if __name__ == '__main__':
74
+ app.run()
main.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python application
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+
16
+ - name: Set up Python 3.x
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.x'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+
26
+ - name: Run the app
27
+ run: python app.py
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ langchain
3
+ matplotlib
4
+ numpy
5
+ gensim
6
+ scikit-learn
7
+ llama-cpp-python
8
+ huggingface_hub==0.23.0
9
+ langchain
10
+ langchain-experimental
11
+ scipy==1.10.1
12
+ gunicorn
13
+ langchain-community
14
+ sentence-transformers
15
+ torch
16
+ transformers