Spaces:
Sleeping
Sleeping
Commit
·
c0cbe6c
1
Parent(s):
d461b3a
test cv extraction
Browse files
app.py
CHANGED
@@ -95,13 +95,15 @@ def LLM_Inference(cv_text):
|
|
95 |
6. Language
|
96 |
- List the languages mentioned in the CV along with proficiency levels (if specified).
|
97 |
|
98 |
-
Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in
|
99 |
'''
|
100 |
-
text = 'who is Lê Duẩn'
|
101 |
inputs = tokenizer(text, return_tensors='pt', max_length=2048,truncation=True).to(device)
|
102 |
with torch.no_grad():
|
103 |
outputs = model.generate(
|
104 |
-
**inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id
|
|
|
|
|
|
|
105 |
)
|
106 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
107 |
|
|
|
95 |
6. Language
|
96 |
- List the languages mentioned in the CV along with proficiency levels (if specified).
|
97 |
|
98 |
+
Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in the CV language. Let's work this out in a step by step way to ensure the correct answer. Do not repeat the step
|
99 |
'''
|
|
|
100 |
inputs = tokenizer(text, return_tensors='pt', max_length=2048,truncation=True).to(device)
|
101 |
with torch.no_grad():
|
102 |
outputs = model.generate(
|
103 |
+
**inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id,
|
104 |
+
top_p=0.99, # Nucleus sampling - only consider top 90% probability mass
|
105 |
+
top_k=1, # Top-k sampling - choose from top 50 tokens
|
106 |
+
temperature=0.0
|
107 |
)
|
108 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
109 |
|