Spaces:

khoatran94
/

cv_ocr_gradio

Sleeping

khoatran94 commited on Nov 21, 2024

Commit

7d26fee

1 Parent(s): a4f64ec

test cv extraction

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,7 +25,6 @@ import huggingface_hub
 #zero = torch.Tensor([0]).cuda()
 load_dotenv()
 api_token = os.getenv("HF_TOKEN")
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 #@spaces.GPU
 def read_pdf(file_path):
@@ -49,15 +48,16 @@ def read_pdf(file_path):
                 img = Image.open(path)
                 pix = None
                 output += pytesseract.image_to_string(img, lang='vie') + '\n'
     return output
 @spaces.GPU(duration=30)
 def LLM_Inference(cv_text):
     huggingface_hub.login(token=api_token)
-    device = torch.device('cuda')
-    tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')
-    model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b-it').to(device)
     text = f'''
     You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details:

 #zero = torch.Tensor([0]).cuda()
 load_dotenv()
 api_token = os.getenv("HF_TOKEN")
 #@spaces.GPU
 def read_pdf(file_path):
                 img = Image.open(path)
                 pix = None
                 output += pytesseract.image_to_string(img, lang='vie') + '\n'
+                os.remove(path)
     return output
 @spaces.GPU(duration=30)
 def LLM_Inference(cv_text):
     huggingface_hub.login(token=api_token)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-9b-it')
+    model = AutoModelForCausalLM.from_pretrained('google/gemma-2-9b-it').to(device)
     text = f'''
     You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's CV and extract the following details: