Spaces:

Fluospark128
/

Genre_prediction

Sleeping

App Files Files Community

Genre_prediction / app.py

Fluospark128

Update app.py

6c2882b verified 2 months ago

raw

history blame contribute delete

1.78 kB

	#pip install PyPDF2


	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch

	# Load the model and tokenizer
	#tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path")
	#model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path")

	# Define genre labels
	genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy",
	"historical fiction", "adventure", "action", "young adult", "classic", "biography",
	"non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"]

	st.title("Book Genre Classifier")

	# Text input
	#file = st.file_uploader("Upload the pdf file")


	#import streamlit as st
	from PyPDF2 import PdfReader

	# Streamlit app
	st.subheader("PDF Text Extractor")

	# Upload PDF
	uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

	if uploaded_file:
	# Extract text from the uploaded PDF
	reader = PdfReader(uploaded_file)
	all_text = ""
	for page in reader.pages:
	all_text += page.extract_text()

	# Display extracted text
	st.subheader("Extracted Text")
	st.text_area("PDF Content", all_text, height=300)
	#book_text = st.text_area("Enter the book's text or summary:", "")

	if st.button("Classify"):
	with st.spinner("Classifying..."):
	inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True)
	outputs = model(**inputs)
	scores = torch.softmax(outputs.logits, dim=1).detach().numpy()

	# Display results
	st.subheader("Predicted Genres:")
	for i, label in enumerate(genre_labels):
	st.write(f"{label}: {scores[0][i]:.2f}")