import streamlit as st from PyPDF2 import PdfReader import textract from transformers import pipeline from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import random # Function to create a multi-color line def multicolor_line(): colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"] return f'
' # Initialize the Hugging Face model for summarization @st.cache_resource def load_summarization_model(): return pipeline("summarization", model="facebook/bart-large-cnn") # Initialize the Hugging Face model for critique generation (using T5) @st.cache_resource def load_critique_model(): return pipeline("text2text-generation", model="t5-base") summarizer = load_summarization_model() critique_generator = load_critique_model() # Function to extract text from PDFs def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"): pdf_reader = PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text # Function to extract text from text files def extract_text_from_file(txt_file): with open(txt_file, "r") as file: text = file.read() return text # Function to extract text from scanned PDFs or other formats def extract_text_from_scanned_pdf(pdf_file): text = textract.process(pdf_file).decode("utf-8") return text # Function to generate the summary using Hugging Face (BART model) def summarize_text(text): max_len = 1024 # Define the max input length for the summarizer min_len = 50 # Define the minimum length for the summary if not text.strip(): raise ValueError("Input text is empty, unable to summarize.") if len(text.split()) > max_len: text = " ".join(text.split()[:max_len]) if len(text.split()) < min_len: raise ValueError("Input text is too short for summarization.") summary = summarizer(text, max_length=200, min_length=50, do_sample=False) return summary[0]['summary_text'] # Function to generate critique using the Hugging Face T5 model def generate_critique(summary): critique_input = f"Critique: {summary}" critique = critique_generator(critique_input) return critique[0]['generated_text'] # Function to refine the summary using critique feedback def refine_summary(summary, critique): refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:" refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False) return refined_output[0]['summary_text'] # LangChain Integration: Set up Hugging Face as the LLM for LangChain hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} ) # Create a PromptTemplate for summarization prompt_template = PromptTemplate( input_variables=["text"], template="Summarize the following text:\n{text}" ) # Define the LangChain chain for summarization def create_summarization_chain(): chain = LLMChain(llm=hf_llm, prompt=prompt_template) return chain # Update the Streamlit workflow def main(): st.title("Multi-Agent Research Assistant for Refining Academic Content") st.write("Upload a PDF or Text file to start the process.") uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"]) if uploaded_file is not None: # Extract text from uploaded file file_extension = uploaded_file.name.split('.')[-1].lower() if file_extension == 'pdf': st.write("Extracting text from PDF...") text = extract_text_from_pdf(uploaded_file) elif file_extension == 'txt': st.write("Extracting text from Text file...") text = extract_text_from_file(uploaded_file) else: st.error("Unsupported file type. Please upload a PDF or a Text file.") return if text.strip() == "": st.error("No text could be extracted from the file.") return # Show extracted text if checkbox is checked show_text = st.checkbox("Show extracted text") if show_text: # Increase the width of the text area slightly st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden") # Show multi-color line after text extraction st.markdown(multicolor_line(), unsafe_allow_html=True) # Summarize text using Hugging Face model (BART) st.write("Summarizing the content...") try: summary = summarize_text(text) st.write("Summary:") # Increase the width of the summary text area st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden") except Exception as e: st.error(f"Error generating summary:\n\n{e}") return # Show multi-color line after summarization st.markdown(multicolor_line(), unsafe_allow_html=True) # Generate critique based on summary using Hugging Face model (T5) st.write("Generating critique...") try: critique = generate_critique(summary) st.write("Critique:") # Increase the width of the critique text area st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden") except Exception as e: st.error(f"Error generating critique:\n\n{e}") return # Show multi-color line after critique generation st.markdown(multicolor_line(), unsafe_allow_html=True) # Refine the summary using critique feedback st.write("Refining the summary...") try: refined_summary = refine_summary(summary, critique) st.write("Refined Summary:") # Increase the width of the refined summary text area st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden") except Exception as e: st.error(f"Error refining summary:\n\n{e}") return # Show multi-color line after refinement st.markdown(multicolor_line(), unsafe_allow_html=True) if __name__ == "__main__": main()