Nassiraaa commited on
Commit
dfa2915
·
verified ·
1 Parent(s): ee6a627

Delete spelling_grammar_checker.py

Browse files
Files changed (1) hide show
  1. spelling_grammar_checker.py +0 -96
spelling_grammar_checker.py DELETED
@@ -1,96 +0,0 @@
1
- import json
2
- import logging
3
- from huggingface_hub import hf_hub_download
4
- from llama_cpp import Llama
5
- from cv_prompt import get_spelling_grammar_prompt
6
- from ocr_utils import combine_ocr_results, extract_text_aws, extract_text_doctr, extract_text_easyocr, extract_text_paddleocr, load_models, detect_language
7
- from config import weights
8
-
9
- # Configure logging
10
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
-
12
- def load_model():
13
- try:
14
- model_path = hf_hub_download("TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf")
15
- return Llama(model_path=model_path, n_ctx=32768, n_gpu_layers=2)
16
- except Exception as e:
17
- logging.error(f"Error loading model: {str(e)}")
18
- return None
19
-
20
- llm = load_model()
21
-
22
- def load_error_scoring():
23
- try:
24
- with open('error_scoring.json', 'r') as file:
25
- return json.load(file)
26
- except Exception as e:
27
- logging.error(f"Error loading error scoring: {str(e)}")
28
- return None
29
-
30
- error_scoring = load_error_scoring()
31
-
32
- def get_error_score(error_percentage):
33
- if error_scoring is None:
34
- logging.error("Error scoring data not loaded")
35
- return 0
36
-
37
- for score_range in error_scoring['error_scores']:
38
- if score_range['min'] <= error_percentage < score_range['max']:
39
- return score_range['score']
40
- return error_scoring['error_scores'][-1]['score'] # Return the last score if percentage is 100%
41
-
42
- def check_spelling_and_grammar(text):
43
- if llm is None:
44
- logging.error("LLM model not loaded")
45
- return 0, 0
46
-
47
- prompt = get_spelling_grammar_prompt(text)
48
- try:
49
- response = llm(prompt, max_tokens=50)
50
- response_text = response['choices'][0]['text'].strip()
51
- error_percentage = float(response_text)
52
- return get_error_score(error_percentage), error_percentage
53
- except ValueError:
54
- logging.error("Failed to parse error percentage from LLM response")
55
- return 0, 0
56
- except Exception as e:
57
- logging.error(f"Error in spelling and grammar check: {str(e)}")
58
- return 0, 0
59
-
60
- def evaluate_cv_text(file_path):
61
- try:
62
- # Extract text using OCR
63
- with open(file_path, 'rb') as f:
64
- file_content = f.read()
65
-
66
- # Detect language
67
- detected_language = detect_language(file_content)
68
-
69
- # Load OCR models
70
- doctr_model, easyocr_reader, paddleocr_reader = load_models(detected_language)
71
-
72
- # Extract text using different OCR methods
73
- results = {
74
- "aws": extract_text_aws(file_content),
75
- "doctr": extract_text_doctr(file_path, doctr_model),
76
- "easyocr": extract_text_easyocr(file_path, easyocr_reader),
77
- "paddleocr": extract_text_paddleocr(file_path, paddleocr_reader),
78
- }
79
-
80
- # Combine OCR results
81
- cv_text = combine_ocr_results(results, weights)
82
-
83
- # Check spelling and grammar
84
- score, error_percentage = check_spelling_and_grammar(cv_text)
85
-
86
- return {
87
- "score": score,
88
- "error_percentage": error_percentage
89
- }
90
- except Exception as e:
91
- logging.error(f"Error in CV text evaluation: {str(e)}")
92
- return {
93
- "score": 0,
94
- "error_percentage": 0,
95
- "error": str(e)
96
- }