khoatran94 commited on
Commit
c0cbe6c
·
1 Parent(s): d461b3a

test cv extraction

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -95,13 +95,15 @@ def LLM_Inference(cv_text):
95
  6. Language
96
  - List the languages mentioned in the CV along with proficiency levels (if specified).
97
 
98
- Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in Vietnamese. Let's work this out in a step by step way to ensure the correct answer. [END].
99
  '''
100
- text = 'who is Lê Duẩn'
101
  inputs = tokenizer(text, return_tensors='pt', max_length=2048,truncation=True).to(device)
102
  with torch.no_grad():
103
  outputs = model.generate(
104
- **inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id
 
 
 
105
  )
106
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
107
 
 
95
  6. Language
96
  - List the languages mentioned in the CV along with proficiency levels (if specified).
97
 
98
+ Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in the CV language. Let's work this out in a step by step way to ensure the correct answer. Do not repeat the step
99
  '''
 
100
  inputs = tokenizer(text, return_tensors='pt', max_length=2048,truncation=True).to(device)
101
  with torch.no_grad():
102
  outputs = model.generate(
103
+ **inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id,
104
+ top_p=0.99, # Nucleus sampling - only consider top 90% probability mass
105
+ top_k=1, # Top-k sampling - choose from top 50 tokens
106
+ temperature=0.0
107
  )
108
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
109