Nassiraaa commited on
Commit
e892881
·
verified ·
1 Parent(s): 09edb1a

Update cv_quality.py

Browse files
Files changed (1) hide show
  1. cv_quality.py +19 -32
cv_quality.py CHANGED
@@ -1,51 +1,38 @@
1
- from pdf2image import convert_from_path
2
- import os
3
- from paddleocr import PaddleOCR
4
- from ftlangdetect import detect
5
- import re
6
- from helpers import get_email,get_number
7
- from langchain_community.document_loaders import UnstructuredPDFLoader,CSVLoader,PyPDFium2Loader,PyMuPDFLoader, Docx2txtLoader
8
- from langchain_community.llms import HuggingFaceEndpoint
9
  from dotenv import load_dotenv
10
- from langchain import hub
11
- import json
12
  from yolo_text_extraction import pdf_to_text
13
- from cv_analyser import analyse_cv
14
 
15
  load_dotenv()
16
 
17
-
18
- class CV():
19
-
20
- def __init__(self,file_name):
21
-
22
- self.file_name=file_name
23
-
24
- self.doc_loader = {".csv":CSVLoader(self.file_name),".pdf":PyMuPDFLoader(self.file_name),
25
- ".docx": Docx2txtLoader(file_name)}
26
-
27
 
28
  def get_cv_text(self):
29
-
30
  _, ext = os.path.splitext(self.file_name)
31
  if ext in self.doc_loader.keys():
32
  loader = self.doc_loader[ext]
33
  data = loader.load()
34
  if len(data) != 0:
35
- text = "\n".join([ page.page_content for page in data])
36
- if len(text.split()) ==0 :
37
  text = pdf_to_text(self.file_name)
38
-
39
  return text
40
-
 
 
41
  else:
42
  text = pdf_to_text(self.file_name)
43
-
44
  return text
45
 
46
  def analyse_cv_quality(self):
47
- cv_text=self.get_cv_text()
48
- result=analyse_cv(cv_text)
49
- return result
50
-
51
-
 
1
+ import os
2
+ from langchain_community.document_loaders import CSVLoader, PyMuPDFLoader, Docx2txtLoader
 
 
 
 
 
 
3
  from dotenv import load_dotenv
 
 
4
  from yolo_text_extraction import pdf_to_text
5
+ from cv_analyzer import analyze_cv
6
 
7
  load_dotenv()
8
 
9
+ class CV:
10
+ def __init__(self, file_name):
11
+ self.file_name = file_name
12
+ self.doc_loader = {
13
+ ".csv": CSVLoader(self.file_name),
14
+ ".pdf": PyMuPDFLoader(self.file_name),
15
+ ".docx": Docx2txtLoader(file_name)
16
+ }
 
 
17
 
18
  def get_cv_text(self):
 
19
  _, ext = os.path.splitext(self.file_name)
20
  if ext in self.doc_loader.keys():
21
  loader = self.doc_loader[ext]
22
  data = loader.load()
23
  if len(data) != 0:
24
+ text = "\n".join([page.page_content for page in data])
25
+ if len(text.split()) == 0:
26
  text = pdf_to_text(self.file_name)
 
27
  return text
28
+ else:
29
+ text = pdf_to_text(self.file_name)
30
+ return text
31
  else:
32
  text = pdf_to_text(self.file_name)
 
33
  return text
34
 
35
  def analyse_cv_quality(self):
36
+ cv_text = self.get_cv_text()
37
+ result = analyze_cv(cv_text)
38
+ return result