|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
import textract |
|
from transformers import pipeline |
|
from langchain.chains import LLMChain |
|
from langchain.prompts import PromptTemplate |
|
from langchain.llms import HuggingFaceHub |
|
import random |
|
|
|
|
|
def multicolor_line(): |
|
colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"] |
|
return f'<hr style="border: 1px solid {random.choice(colors)};">' |
|
|
|
|
|
@st.cache_resource |
|
def load_summarization_model(): |
|
return pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
|
|
@st.cache_resource |
|
def load_critique_model(): |
|
return pipeline("text2text-generation", model="t5-base") |
|
|
|
summarizer = load_summarization_model() |
|
critique_generator = load_critique_model() |
|
|
|
|
|
def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"): |
|
pdf_reader = PdfReader(pdf_file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def extract_text_from_file(txt_file): |
|
with open(txt_file, "r") as file: |
|
text = file.read() |
|
return text |
|
|
|
|
|
def extract_text_from_scanned_pdf(pdf_file): |
|
text = textract.process(pdf_file).decode("utf-8") |
|
return text |
|
|
|
|
|
def summarize_text(text): |
|
max_len = 1024 |
|
min_len = 50 |
|
|
|
if not text.strip(): |
|
raise ValueError("Input text is empty, unable to summarize.") |
|
|
|
if len(text.split()) > max_len: |
|
text = " ".join(text.split()[:max_len]) |
|
|
|
if len(text.split()) < min_len: |
|
raise ValueError("Input text is too short for summarization.") |
|
|
|
summary = summarizer(text, max_length=200, min_length=50, do_sample=False) |
|
return summary[0]['summary_text'] |
|
|
|
|
|
def generate_critique(summary): |
|
critique_input = f"Critique: {summary}" |
|
critique = critique_generator(critique_input) |
|
return critique[0]['generated_text'] |
|
|
|
|
|
def refine_summary(summary, critique): |
|
refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:" |
|
refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False) |
|
return refined_output[0]['summary_text'] |
|
|
|
|
|
hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} ) |
|
|
|
|
|
prompt_template = PromptTemplate( |
|
input_variables=["text"], |
|
template="Summarize the following text:\n{text}" |
|
) |
|
|
|
|
|
def create_summarization_chain(): |
|
chain = LLMChain(llm=hf_llm, prompt=prompt_template) |
|
return chain |
|
|
|
|
|
def main(): |
|
st.title("Multi-Agent Research Assistant for Refining Academic Content") |
|
st.write("Upload a PDF or Text file to start the process.") |
|
|
|
uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
file_extension = uploaded_file.name.split('.')[-1].lower() |
|
|
|
if file_extension == 'pdf': |
|
st.write("Extracting text from PDF...") |
|
text = extract_text_from_pdf(uploaded_file) |
|
elif file_extension == 'txt': |
|
st.write("Extracting text from Text file...") |
|
text = extract_text_from_file(uploaded_file) |
|
else: |
|
st.error("Unsupported file type. Please upload a PDF or a Text file.") |
|
return |
|
|
|
if text.strip() == "": |
|
st.error("No text could be extracted from the file.") |
|
return |
|
|
|
|
|
show_text = st.checkbox("Show extracted text") |
|
if show_text: |
|
|
|
st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden") |
|
|
|
|
|
st.markdown(multicolor_line(), unsafe_allow_html=True) |
|
|
|
|
|
st.write("Summarizing the content...") |
|
try: |
|
summary = summarize_text(text) |
|
st.write("Summary:") |
|
|
|
st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden") |
|
except Exception as e: |
|
st.error(f"Error generating summary:\n\n{e}") |
|
return |
|
|
|
|
|
st.markdown(multicolor_line(), unsafe_allow_html=True) |
|
|
|
|
|
st.write("Generating critique...") |
|
try: |
|
critique = generate_critique(summary) |
|
st.write("Critique:") |
|
|
|
st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden") |
|
except Exception as e: |
|
st.error(f"Error generating critique:\n\n{e}") |
|
return |
|
|
|
|
|
st.markdown(multicolor_line(), unsafe_allow_html=True) |
|
|
|
|
|
st.write("Refining the summary...") |
|
try: |
|
refined_summary = refine_summary(summary, critique) |
|
st.write("Refined Summary:") |
|
|
|
st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden") |
|
except Exception as e: |
|
st.error(f"Error refining summary:\n\n{e}") |
|
return |
|
|
|
|
|
st.markdown(multicolor_line(), unsafe_allow_html=True) |
|
|
|
if __name__ == "__main__": |
|
main() |