Spaces:

mgbam
/

builder

Running

App Files Files Community

builder / file_processing.py

mgbam

Upload 6 files

51d8a3f verified about 24 hours ago

raw

history blame contribute delete

3.04 kB

	import os
	import mimetypes
	import PyPDF2
	import docx
	import cv2
	import numpy as np
	from PIL import Image
	import pytesseract

	def process_image_for_model(image):
	"""Convert image to base64 for model input"""
	if image is None:
	return None

	# Convert numpy array to PIL Image if needed
	import io
	import base64

	# Handle numpy array from Gradio
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	buffer = io.BytesIO()
	image.save(buffer, format='PNG')
	img_str = base64.b64encode(buffer.getvalue()).decode()
	return f"data:image/png;base64,{img_str}"

	def extract_text_from_image(image_path):
	"""Extract text from image using OCR"""
	try:
	# Check if tesseract is available
	try:
	pytesseract.get_tesseract_version()
	except Exception:
	return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."

	image = cv2.imread(image_path)
	if image is None:
	return "Error: Could not read image file"

	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
	_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	text = pytesseract.image_to_string(binary, config='--psm 6')
	return text.strip() if text.strip() else "No text found in image"

	except Exception as e:
	return f"Error extracting text from image: {e}"

	def extract_text_from_file(file_path):
	if not file_path:
	return ""
	ext = os.path.splitext(file_path)[1].lower()
	try:
	if ext == ".pdf":
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	return "\n".join(page.extract_text() or "" for page in reader.pages)
	elif ext in [".txt", ".md", ".csv"]:
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()
	elif ext == ".docx":
	doc = docx.Document(file_path)
	return "\n".join([para.text for para in doc.paragraphs])
	elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
	return extract_text_from_image(file_path)
	else:
	return ""
	except Exception as e:
	return f"Error extracting text: {e}"

	def create_multimodal_message(text, image=None):
	"""Create a multimodal message with text and optional image"""
	if image is None:
	return {"role": "user", "content": text}

	content = [
	{
	"type": "text",
	"text": text
	},
	{
	"type": "image_url",
	"image_url": {
	"url": process_image_for_model(image)
	}
	}
	]

	return {"role": "user", "content": content}