Spaces:

prasad6145
/

multi_agent_research_assisant

Sleeping

App Files Files Community

prasad6145 commited on Dec 28, 2024

Commit

afeacc2

verified ·

1 Parent(s): 25192c1

Create app.py

Browse files

Files changed (1) hide show

app.py +167 -0

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import streamlit as st
+from PyPDF2 import PdfReader
+import textract
+from transformers import pipeline
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.llms import HuggingFaceHub
+import random
+# Function to create a multi-color line
+def multicolor_line():
+    colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"]
+    return f'<hr style="border: 1px solid {random.choice(colors)};">'
+# Initialize the Hugging Face model for summarization
+@st.cache_resource
+def load_summarization_model():
+    return pipeline("summarization", model="facebook/bart-large-cnn")
+# Initialize the Hugging Face model for critique generation (using T5)
+@st.cache_resource
+def load_critique_model():
+    return pipeline("text2text-generation", model="t5-base")
+summarizer = load_summarization_model()
+critique_generator = load_critique_model()
+# Function to extract text from PDFs
+def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"):
+    pdf_reader = PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# Function to extract text from text files
+def extract_text_from_file(txt_file):
+    with open(txt_file, "r") as file:
+        text = file.read()
+    return text
+# Function to extract text from scanned PDFs or other formats
+def extract_text_from_scanned_pdf(pdf_file):
+    text = textract.process(pdf_file).decode("utf-8")
+    return text
+# Function to generate the summary using Hugging Face (BART model)
+def summarize_text(text):
+    max_len = 1024  # Define the max input length for the summarizer
+    min_len = 50    # Define the minimum length for the summary
+    if not text.strip():
+        raise ValueError("Input text is empty, unable to summarize.")
+    if len(text.split()) > max_len:
+        text = " ".join(text.split()[:max_len])
+    if len(text.split()) < min_len:
+        raise ValueError("Input text is too short for summarization.")
+    summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
+    return summary[0]['summary_text']
+# Function to generate critique using the Hugging Face T5 model
+def generate_critique(summary):
+    critique_input = f"Critique: {summary}"
+    critique = critique_generator(critique_input)
+    return critique[0]['generated_text']
+# Function to refine the summary using critique feedback
+def refine_summary(summary, critique):
+    refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:"
+    refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False)
+    return refined_output[0]['summary_text']
+# LangChain Integration: Set up Hugging Face as the LLM for LangChain
+hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} )
+# Create a PromptTemplate for summarization
+prompt_template = PromptTemplate(
+    input_variables=["text"],
+    template="Summarize the following text:\n{text}"
+)
+# Define the LangChain chain for summarization
+def create_summarization_chain():
+    chain = LLMChain(llm=hf_llm, prompt=prompt_template)
+    return chain
+# Update the Streamlit workflow
+def main():
+    st.title("Multi-Agent Research Assistant for Refining Academic Content")
+    st.write("Upload a PDF or Text file to start the process.")
+    uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"])
+    if uploaded_file is not None:
+        # Extract text from uploaded file
+        file_extension = uploaded_file.name.split('.')[-1].lower()
+        if file_extension == 'pdf':
+            st.write("Extracting text from PDF...")
+            text = extract_text_from_pdf(uploaded_file)
+        elif file_extension == 'txt':
+            st.write("Extracting text from Text file...")
+            text = extract_text_from_file(uploaded_file)
+        else:
+            st.error("Unsupported file type. Please upload a PDF or a Text file.")
+            return
+        if text.strip() == "":
+            st.error("No text could be extracted from the file.")
+            return
+        # Show extracted text if checkbox is checked
+        show_text = st.checkbox("Show extracted text")
+        if show_text:
+            # Increase the width of the text area slightly
+            st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden")
+        # Show multi-color line after text extraction
+        st.markdown(multicolor_line(), unsafe_allow_html=True)
+        # Summarize text using Hugging Face model (BART)
+        st.write("Summarizing the content...")
+        try:
+            summary = summarize_text(text)
+            st.write("Summary:")
+            # Increase the width of the summary text area
+            st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden")
+        except Exception as e:
+            st.error(f"Error generating summary:\n\n{e}")
+            return
+        # Show multi-color line after summarization
+        st.markdown(multicolor_line(), unsafe_allow_html=True)
+        # Generate critique based on summary using Hugging Face model (T5)
+        st.write("Generating critique...")
+        try:
+            critique = generate_critique(summary)
+            st.write("Critique:")
+            # Increase the width of the critique text area
+            st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden")
+        except Exception as e:
+            st.error(f"Error generating critique:\n\n{e}")
+            return
+        # Show multi-color line after critique generation
+        st.markdown(multicolor_line(), unsafe_allow_html=True)
+        # Refine the summary using critique feedback
+        st.write("Refining the summary...")
+        try:
+            refined_summary = refine_summary(summary, critique)
+            st.write("Refined Summary:")
+            # Increase the width of the refined summary text area
+            st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden")
+        except Exception as e:
+            st.error(f"Error refining summary:\n\n{e}")
+            return
+        # Show multi-color line after refinement
+        st.markdown(multicolor_line(), unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()