Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 15

Commit

fda9c54

verified ·

1 Parent(s): 885b5d3

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -22

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import os
 from flask import Flask, request, jsonify
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from werkzeug.utils import secure_filename
 from pdf2image import convert_from_path
-import pytesseract
 from PIL import Image
 # Initialize Flask app
@@ -14,24 +13,36 @@ UPLOAD_FOLDER = 'uploads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-# Load AI Pipelines
-ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed")  # OCR Model
-text_gen_pipeline = pipeline("text-generation", model="gpt2")  # GPT-2 for text generation
-# Function to extract text from a PDF resume
 def extract_text_from_pdf(pdf_path):
     images = convert_from_path(pdf_path)
     extracted_text = ""
     for img in images:
-        text = pytesseract.image_to_string(img)  # OCR extraction
         extracted_text += text + "\n"
     return extracted_text.strip()
-# Route: Upload Resume & Generate Report
 @app.route('/upload', methods=['POST'])
-def upload_resume():
     if 'file' not in request.files:
         return jsonify({"error": "No file uploaded"}), 400
@@ -39,25 +50,21 @@ def upload_resume():
     if file.filename == '':
         return jsonify({"error": "No file selected"}), 400
     # Save uploaded file
     filename = secure_filename(file.filename)
     file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
     file.save(file_path)
-    # Extract text from PDF
-    extracted_text = extract_text_from_pdf(file_path)
-    # Generate AI evaluation
-    prompt = f"Candidate Resume: {extracted_text}\n\nEvaluate the suitability of this candidate for a software engineering role at Google."
-    ai_evaluation = text_gen_pipeline(prompt, max_length=150, num_return_sequences=1)[0]["generated_text"]
-    # Return response
-    response = {
-        "resume_text": extracted_text[:1000],  # Limit to 1000 chars for display
-        "ai_evaluation": ai_evaluation
-    }
-    return jsonify(response)
 # Run Flask App
 if __name__ == '__main__':

 import os
 from flask import Flask, request, jsonify
 from werkzeug.utils import secure_filename
+from transformers import pipeline
 from pdf2image import convert_from_path
 from PIL import Image
 # Initialize Flask app
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+# Allowed file extensions
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'pdf'}
+# Load TrOCR Model
+ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-small-printed")
+def allowed_file(filename):
+    """Check if the file has an allowed extension."""
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def extract_text_from_image(image_path):
+    """Extract text from a single image using TrOCR."""
+    image = Image.open(image_path).convert("RGB")
+    text = ocr_pipeline(image)[0]['generated_text']
+    return text
 def extract_text_from_pdf(pdf_path):
+    """Convert PDF to images and extract text from each page."""
     images = convert_from_path(pdf_path)
     extracted_text = ""
     for img in images:
+        text = extract_text_from_image(img)
         extracted_text += text + "\n"
     return extracted_text.strip()
 @app.route('/upload', methods=['POST'])
+def upload_file():
+    """Handle file upload and text extraction."""
     if 'file' not in request.files:
         return jsonify({"error": "No file uploaded"}), 400
     if file.filename == '':
         return jsonify({"error": "No file selected"}), 400
+    if not allowed_file(file.filename):
+        return jsonify({"error": "Invalid file type. Allowed: PNG, JPG, JPEG, PDF."}), 400
     # Save uploaded file
     filename = secure_filename(file.filename)
     file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
     file.save(file_path)
+    # Process image or PDF
+    if filename.lower().endswith(".pdf"):
+        extracted_text = extract_text_from_pdf(file_path)
+    else:
+        extracted_text = extract_text_from_image(file_path)
+    return jsonify({"extracted_text": extracted_text})
 # Run Flask App
 if __name__ == '__main__':