Spaces:
Sleeping
Sleeping
pip install PyPDF2 | |
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
# Load the model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path") | |
model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path") | |
# Define genre labels | |
genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy", | |
"historical fiction", "adventure", "action", "young adult", "classic", "biography", | |
"non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"] | |
st.title("Book Genre Classifier") | |
# Text input | |
#file = st.file_uploader("Upload the pdf file") | |
#import streamlit as st | |
from PyPDF2 import PdfReader | |
# Streamlit app | |
st.subheader("PDF Text Extractor") | |
# Upload PDF | |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
if uploaded_file: | |
# Extract text from the uploaded PDF | |
reader = PdfReader(uploaded_file) | |
all_text = "" | |
for page in reader.pages: | |
all_text += page.extract_text() | |
# Display extracted text | |
st.subheader("Extracted Text") | |
st.text_area("PDF Content", all_text, height=300) | |
#book_text = st.text_area("Enter the book's text or summary:", "") | |
if st.button("Classify"): | |
with st.spinner("Classifying..."): | |
inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True) | |
outputs = model(**inputs) | |
scores = torch.softmax(outputs.logits, dim=1).detach().numpy() | |
# Display results | |
st.subheader("Predicted Genres:") | |
for i, label in enumerate(genre_labels): | |
st.write(f"{label}: {scores[0][i]:.2f}") |