import streamlit as st
from PyPDF2 import PdfReader
import textract
from transformers import pipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import random
# Function to create a multi-color line
def multicolor_line():
colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"]
return f'
'
# Initialize the Hugging Face model for summarization
@st.cache_resource
def load_summarization_model():
return pipeline("summarization", model="facebook/bart-large-cnn")
# Initialize the Hugging Face model for critique generation (using T5)
@st.cache_resource
def load_critique_model():
return pipeline("text2text-generation", model="t5-base")
summarizer = load_summarization_model()
critique_generator = load_critique_model()
# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to extract text from text files
def extract_text_from_file(txt_file):
with open(txt_file, "r") as file:
text = file.read()
return text
# Function to extract text from scanned PDFs or other formats
def extract_text_from_scanned_pdf(pdf_file):
text = textract.process(pdf_file).decode("utf-8")
return text
# Function to generate the summary using Hugging Face (BART model)
def summarize_text(text):
max_len = 1024 # Define the max input length for the summarizer
min_len = 50 # Define the minimum length for the summary
if not text.strip():
raise ValueError("Input text is empty, unable to summarize.")
if len(text.split()) > max_len:
text = " ".join(text.split()[:max_len])
if len(text.split()) < min_len:
raise ValueError("Input text is too short for summarization.")
summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
return summary[0]['summary_text']
# Function to generate critique using the Hugging Face T5 model
def generate_critique(summary):
critique_input = f"Critique: {summary}"
critique = critique_generator(critique_input)
return critique[0]['generated_text']
# Function to refine the summary using critique feedback
def refine_summary(summary, critique):
refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:"
refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False)
return refined_output[0]['summary_text']
# LangChain Integration: Set up Hugging Face as the LLM for LangChain
hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} )
# Create a PromptTemplate for summarization
prompt_template = PromptTemplate(
input_variables=["text"],
template="Summarize the following text:\n{text}"
)
# Define the LangChain chain for summarization
def create_summarization_chain():
chain = LLMChain(llm=hf_llm, prompt=prompt_template)
return chain
# Update the Streamlit workflow
def main():
st.title("Multi-Agent Research Assistant for Refining Academic Content")
st.write("Upload a PDF or Text file to start the process.")
uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"])
if uploaded_file is not None:
# Extract text from uploaded file
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension == 'pdf':
st.write("Extracting text from PDF...")
text = extract_text_from_pdf(uploaded_file)
elif file_extension == 'txt':
st.write("Extracting text from Text file...")
text = extract_text_from_file(uploaded_file)
else:
st.error("Unsupported file type. Please upload a PDF or a Text file.")
return
if text.strip() == "":
st.error("No text could be extracted from the file.")
return
# Show extracted text if checkbox is checked
show_text = st.checkbox("Show extracted text")
if show_text:
# Increase the width of the text area slightly
st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden")
# Show multi-color line after text extraction
st.markdown(multicolor_line(), unsafe_allow_html=True)
# Summarize text using Hugging Face model (BART)
st.write("Summarizing the content...")
try:
summary = summarize_text(text)
st.write("Summary:")
# Increase the width of the summary text area
st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden")
except Exception as e:
st.error(f"Error generating summary:\n\n{e}")
return
# Show multi-color line after summarization
st.markdown(multicolor_line(), unsafe_allow_html=True)
# Generate critique based on summary using Hugging Face model (T5)
st.write("Generating critique...")
try:
critique = generate_critique(summary)
st.write("Critique:")
# Increase the width of the critique text area
st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden")
except Exception as e:
st.error(f"Error generating critique:\n\n{e}")
return
# Show multi-color line after critique generation
st.markdown(multicolor_line(), unsafe_allow_html=True)
# Refine the summary using critique feedback
st.write("Refining the summary...")
try:
refined_summary = refine_summary(summary, critique)
st.write("Refined Summary:")
# Increase the width of the refined summary text area
st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden")
except Exception as e:
st.error(f"Error refining summary:\n\n{e}")
return
# Show multi-color line after refinement
st.markdown(multicolor_line(), unsafe_allow_html=True)
if __name__ == "__main__":
main()