Spaces:

fajjos
/

pdf_model

Sleeping

App Files Files Community

pdf_model / app.py

fajjos

Update app.py

c4accb1 verified 7 months ago

raw

history blame

2.41 kB

	import os
	from transformers import pipeline
	import streamlit as st
	from PyPDF2 import PdfReader

	# Initialize the Hugging Face model pipeline
	@st.cache(hash_funcs={pipeline: lambda _: None}) # Allow caching without hashing the model
	def load_model():
	return pipeline("text-classification", model="fajjos/pdf_model")

	# Extract text from a PDF file
	def extract_text_from_pdf(pdf_path):
	text = ""
	try:
	reader = PdfReader(pdf_path)
	for page in reader.pages:
	if page.extract_text(): # Ensure text is not None
	text += page.extract_text()
	except Exception as e:
	st.error(f"Error reading {pdf_path}: {e}")
	return text

	# Search for the keyword in PDF files
	def search_keyword_in_pdfs(folder_path, keyword, model):
	pdf_files = [f for f in os.listdir(folder_path) if f.endswith(".pdf")]
	matched_files = []

	for pdf_file in pdf_files:
	pdf_path = os.path.join(folder_path, pdf_file)
	text = extract_text_from_pdf(pdf_path)

	if text and keyword.lower() in text.lower(): # Case-insensitive search
	# Use the Hugging Face model for additional validation or relevance
	try:
	result = model(text)
	if any(keyword.lower() in res["label"].lower() for res in result):
	matched_files.append(pdf_file)
	except Exception as e:
	st.error(f"Error processing {pdf_file} with the model: {e}")
	return matched_files

	# Streamlit App UI
	st.title("PDF Keyword Search")

	# User Inputs
	folder_path = st.text_input("Enter the folder path:")
	keyword = st.text_input("Enter the keyword to search:")

	# Button to perform the search
	if st.button("Search PDFs"):
	if os.path.isdir(folder_path):
	if keyword:
	st.info("Searching... Please wait.")
	model = load_model() # Load the model
	matched_files = search_keyword_in_pdfs(folder_path, keyword, model)

	if matched_files:
	st.success(f"Found the keyword '{keyword}' in the following PDF(s):")
	for file in matched_files:
	st.write(f"- {file}")
	else:
	st.warning(f"No PDFs found with the keyword '{keyword}'.")
	else:
	st.error("Please enter a keyword.")
	else:
	st.error("Invalid folder path. Please enter a valid path.")