Spaces:
Sleeping
Sleeping
File size: 3,667 Bytes
c0b040c 9f71231 c0b040c ec9702c c0b040c ec9702c c0b040c ec9702c c0b040c ec9702c c0b040c 9f71231 f4d2744 c0b040c f4d2744 c0b040c f4d2744 9f71231 c0b040c 9f71231 f4d2744 c0b040c f4d2744 9f71231 f4d2744 c0b040c 9f71231 c0b040c f4d2744 c0b040c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from dotenv import load_dotenv
load_dotenv()
import os
from tempfile import NamedTemporaryFile
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.chains.summarize import load_summarize_chain
from transformers import pipeline
import torch
import base64
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
#file loader and processing
def file_preprocessing(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_texts = ""
for text in texts:
print(text)
final_texts = final_texts + text.page_content
return final_texts
#lm pipeline
def llm_pipleline(filepath):
pipe_sum = pipeline(
'summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 500,
min_length = 50
)
input_text = file_preprocessing(filepath)
result = pipe_sum(input_text)
result = result[0]['summary_text']
return result
def llm_pipleline1(ans):
pipe_sum = pipeline(
'summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 500,
min_length = 50
)
input_text =""+ ans
result = pipe_sum(input_text)
result = result[0]['summary_text']
return result
@st.cache_data
# Function to display the PDF file
def displayPDF(file):
# Opening file from file path
with open(file, "rb") as f:
base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
# Embedding PDF in HTML
pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
# Displaying the file
st.markdown(pdf_display, unsafe_allow_html=True)
#streamlit code
st.set_page_config(layout='wide')
def main():
st.title('Content Summarizer')
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
def main():
st.title('Content Summarizer')
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize"):
col1, col2 = st.columns(2)
# Save the uploaded file to a temporary location
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
temp_filepath = temp_file.name
with col1:
st.info("Uploaded PDF File")
pdf_viewer = displayPDF(temp_filepath)
with col2:
st.info("Summarization is below")
summary = llm_pipleline(temp_filepath)
st.success(summary)
else:
st.warning("Please upload a valid PDF file.")
if st.button("text"):
ans = input("enter your content")
if st.button("Enter"):
col1,col2 = st.columns(2)
with col1:
st.info("what you have entered")
print(ans)
with col2:
st.info("Summarization is below")
summary1=llm_pipleline1(ans)
st.success(summary1)
if __name__ == '__main__':
main() |