Spaces:
Sleeping
Sleeping
Commit
·
7d26fee
1
Parent(s):
a4f64ec
test cv extraction
Browse files
app.py
CHANGED
@@ -25,7 +25,6 @@ import huggingface_hub
|
|
25 |
#zero = torch.Tensor([0]).cuda()
|
26 |
load_dotenv()
|
27 |
api_token = os.getenv("HF_TOKEN")
|
28 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
29 |
|
30 |
#@spaces.GPU
|
31 |
def read_pdf(file_path):
|
@@ -49,15 +48,16 @@ def read_pdf(file_path):
|
|
49 |
img = Image.open(path)
|
50 |
pix = None
|
51 |
output += pytesseract.image_to_string(img, lang='vie') + '\n'
|
|
|
52 |
return output
|
53 |
|
54 |
|
55 |
@spaces.GPU(duration=30)
|
56 |
def LLM_Inference(cv_text):
|
57 |
huggingface_hub.login(token=api_token)
|
58 |
-
device = torch.device('cuda')
|
59 |
-
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-
|
60 |
-
model = AutoModelForCausalLM.from_pretrained('google/gemma-2-
|
61 |
|
62 |
text = f'''
|
63 |
You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details:
|
|
|
25 |
#zero = torch.Tensor([0]).cuda()
|
26 |
load_dotenv()
|
27 |
api_token = os.getenv("HF_TOKEN")
|
|
|
28 |
|
29 |
#@spaces.GPU
|
30 |
def read_pdf(file_path):
|
|
|
48 |
img = Image.open(path)
|
49 |
pix = None
|
50 |
output += pytesseract.image_to_string(img, lang='vie') + '\n'
|
51 |
+
os.remove(path)
|
52 |
return output
|
53 |
|
54 |
|
55 |
@spaces.GPU(duration=30)
|
56 |
def LLM_Inference(cv_text):
|
57 |
huggingface_hub.login(token=api_token)
|
58 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
59 |
+
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-9b-it')
|
60 |
+
model = AutoModelForCausalLM.from_pretrained('google/gemma-2-9b-it').to(device)
|
61 |
|
62 |
text = f'''
|
63 |
You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details:
|