Spaces:

khoatran94
/

cv_ocr_gradio

Sleeping

khoatran94 commited on Nov 21, 2024

Commit

302a390

1 Parent(s): d7b6a67

test cv extraction

Files changed (1) hide show

app.py CHANGED Viewed

@@ -57,7 +57,7 @@ def read_pdf(file_path):
     return output
-@spaces.GPU(duration=15)
 def LLM_Inference(cv_text):
     text = f'''
     You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details:
@@ -97,11 +97,10 @@ def LLM_Inference(cv_text):
     Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in Vietnamese. Let's work this out in a step by step way to ensure the correct answer. [END].
     '''
-    text = 'short answer on who Lê Duẩn is'
     inputs = tokenizer(text, return_tensors='pt').to(device)
     with torch.no_grad():
         outputs = model.generate(
-            **inputs, max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id
         )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)

     return output
+@spaces.GPU(duration=30)
 def LLM_Inference(cv_text):
     text = f'''
     You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details:
     Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in Vietnamese. Let's work this out in a step by step way to ensure the correct answer. [END].
     '''
     inputs = tokenizer(text, return_tensors='pt').to(device)
     with torch.no_grad():
         outputs = model.generate(
+            **inputs, max_new_tokens=2048, pad_token_id = tokenizer.eos_token_id
         )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)