Spaces:
Sleeping
Sleeping
File size: 1,781 Bytes
691ba89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
pip install PyPDF2
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path")
model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path")
# Define genre labels
genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy",
"historical fiction", "adventure", "action", "young adult", "classic", "biography",
"non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"]
st.title("Book Genre Classifier")
# Text input
#file = st.file_uploader("Upload the pdf file")
#import streamlit as st
from PyPDF2 import PdfReader
# Streamlit app
st.subheader("PDF Text Extractor")
# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file:
# Extract text from the uploaded PDF
reader = PdfReader(uploaded_file)
all_text = ""
for page in reader.pages:
all_text += page.extract_text()
# Display extracted text
st.subheader("Extracted Text")
st.text_area("PDF Content", all_text, height=300)
#book_text = st.text_area("Enter the book's text or summary:", "")
if st.button("Classify"):
with st.spinner("Classifying..."):
inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
scores = torch.softmax(outputs.logits, dim=1).detach().numpy()
# Display results
st.subheader("Predicted Genres:")
for i, label in enumerate(genre_labels):
st.write(f"{label}: {scores[0][i]:.2f}") |