File size: 1,781 Bytes
691ba89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
pip install PyPDF2


import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path")
model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path")

# Define genre labels
genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy", 
    "historical fiction", "adventure", "action", "young adult", "classic", "biography", 
    "non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"]

st.title("Book Genre Classifier")

# Text input
#file = st.file_uploader("Upload the pdf file")


#import streamlit as st
from PyPDF2 import PdfReader

# Streamlit app
st.subheader("PDF Text Extractor")

# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

if uploaded_file:
   # Extract text from the uploaded PDF
   reader = PdfReader(uploaded_file)
   all_text = ""
   for page in reader.pages:
       all_text += page.extract_text()

       # Display extracted text
       st.subheader("Extracted Text")
       st.text_area("PDF Content", all_text, height=300)
#book_text = st.text_area("Enter the book's text or summary:", "")

if st.button("Classify"):
    with st.spinner("Classifying..."):
        inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)
        scores = torch.softmax(outputs.logits, dim=1).detach().numpy()

        # Display results
        st.subheader("Predicted Genres:")
        for i, label in enumerate(genre_labels):
            st.write(f"{label}: {scores[0][i]:.2f}")