Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
import textract
|
4 |
+
from transformers import pipeline
|
5 |
+
from langchain.chains import LLMChain
|
6 |
+
from langchain.prompts import PromptTemplate
|
7 |
+
from langchain.llms import HuggingFaceHub
|
8 |
+
import random
|
9 |
+
|
10 |
+
# Function to create a multi-color line
|
11 |
+
def multicolor_line():
|
12 |
+
colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"]
|
13 |
+
return f'<hr style="border: 1px solid {random.choice(colors)};">'
|
14 |
+
|
15 |
+
# Initialize the Hugging Face model for summarization
|
16 |
+
@st.cache_resource
|
17 |
+
def load_summarization_model():
|
18 |
+
return pipeline("summarization", model="facebook/bart-large-cnn")
|
19 |
+
|
20 |
+
# Initialize the Hugging Face model for critique generation (using T5)
|
21 |
+
@st.cache_resource
|
22 |
+
def load_critique_model():
|
23 |
+
return pipeline("text2text-generation", model="t5-base")
|
24 |
+
|
25 |
+
summarizer = load_summarization_model()
|
26 |
+
critique_generator = load_critique_model()
|
27 |
+
|
28 |
+
# Function to extract text from PDFs
|
29 |
+
def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"):
|
30 |
+
pdf_reader = PdfReader(pdf_file)
|
31 |
+
text = ""
|
32 |
+
for page in pdf_reader.pages:
|
33 |
+
text += page.extract_text()
|
34 |
+
return text
|
35 |
+
|
36 |
+
# Function to extract text from text files
|
37 |
+
def extract_text_from_file(txt_file):
|
38 |
+
with open(txt_file, "r") as file:
|
39 |
+
text = file.read()
|
40 |
+
return text
|
41 |
+
|
42 |
+
# Function to extract text from scanned PDFs or other formats
|
43 |
+
def extract_text_from_scanned_pdf(pdf_file):
|
44 |
+
text = textract.process(pdf_file).decode("utf-8")
|
45 |
+
return text
|
46 |
+
|
47 |
+
# Function to generate the summary using Hugging Face (BART model)
|
48 |
+
def summarize_text(text):
|
49 |
+
max_len = 1024 # Define the max input length for the summarizer
|
50 |
+
min_len = 50 # Define the minimum length for the summary
|
51 |
+
|
52 |
+
if not text.strip():
|
53 |
+
raise ValueError("Input text is empty, unable to summarize.")
|
54 |
+
|
55 |
+
if len(text.split()) > max_len:
|
56 |
+
text = " ".join(text.split()[:max_len])
|
57 |
+
|
58 |
+
if len(text.split()) < min_len:
|
59 |
+
raise ValueError("Input text is too short for summarization.")
|
60 |
+
|
61 |
+
summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
|
62 |
+
return summary[0]['summary_text']
|
63 |
+
|
64 |
+
# Function to generate critique using the Hugging Face T5 model
|
65 |
+
def generate_critique(summary):
|
66 |
+
critique_input = f"Critique: {summary}"
|
67 |
+
critique = critique_generator(critique_input)
|
68 |
+
return critique[0]['generated_text']
|
69 |
+
|
70 |
+
# Function to refine the summary using critique feedback
|
71 |
+
def refine_summary(summary, critique):
|
72 |
+
refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:"
|
73 |
+
refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False)
|
74 |
+
return refined_output[0]['summary_text']
|
75 |
+
|
76 |
+
# LangChain Integration: Set up Hugging Face as the LLM for LangChain
|
77 |
+
hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} )
|
78 |
+
|
79 |
+
# Create a PromptTemplate for summarization
|
80 |
+
prompt_template = PromptTemplate(
|
81 |
+
input_variables=["text"],
|
82 |
+
template="Summarize the following text:\n{text}"
|
83 |
+
)
|
84 |
+
|
85 |
+
# Define the LangChain chain for summarization
|
86 |
+
def create_summarization_chain():
|
87 |
+
chain = LLMChain(llm=hf_llm, prompt=prompt_template)
|
88 |
+
return chain
|
89 |
+
|
90 |
+
# Update the Streamlit workflow
|
91 |
+
def main():
|
92 |
+
st.title("Multi-Agent Research Assistant for Refining Academic Content")
|
93 |
+
st.write("Upload a PDF or Text file to start the process.")
|
94 |
+
|
95 |
+
uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"])
|
96 |
+
|
97 |
+
if uploaded_file is not None:
|
98 |
+
# Extract text from uploaded file
|
99 |
+
file_extension = uploaded_file.name.split('.')[-1].lower()
|
100 |
+
|
101 |
+
if file_extension == 'pdf':
|
102 |
+
st.write("Extracting text from PDF...")
|
103 |
+
text = extract_text_from_pdf(uploaded_file)
|
104 |
+
elif file_extension == 'txt':
|
105 |
+
st.write("Extracting text from Text file...")
|
106 |
+
text = extract_text_from_file(uploaded_file)
|
107 |
+
else:
|
108 |
+
st.error("Unsupported file type. Please upload a PDF or a Text file.")
|
109 |
+
return
|
110 |
+
|
111 |
+
if text.strip() == "":
|
112 |
+
st.error("No text could be extracted from the file.")
|
113 |
+
return
|
114 |
+
|
115 |
+
# Show extracted text if checkbox is checked
|
116 |
+
show_text = st.checkbox("Show extracted text")
|
117 |
+
if show_text:
|
118 |
+
# Increase the width of the text area slightly
|
119 |
+
st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden")
|
120 |
+
|
121 |
+
# Show multi-color line after text extraction
|
122 |
+
st.markdown(multicolor_line(), unsafe_allow_html=True)
|
123 |
+
|
124 |
+
# Summarize text using Hugging Face model (BART)
|
125 |
+
st.write("Summarizing the content...")
|
126 |
+
try:
|
127 |
+
summary = summarize_text(text)
|
128 |
+
st.write("Summary:")
|
129 |
+
# Increase the width of the summary text area
|
130 |
+
st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden")
|
131 |
+
except Exception as e:
|
132 |
+
st.error(f"Error generating summary:\n\n{e}")
|
133 |
+
return
|
134 |
+
|
135 |
+
# Show multi-color line after summarization
|
136 |
+
st.markdown(multicolor_line(), unsafe_allow_html=True)
|
137 |
+
|
138 |
+
# Generate critique based on summary using Hugging Face model (T5)
|
139 |
+
st.write("Generating critique...")
|
140 |
+
try:
|
141 |
+
critique = generate_critique(summary)
|
142 |
+
st.write("Critique:")
|
143 |
+
# Increase the width of the critique text area
|
144 |
+
st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden")
|
145 |
+
except Exception as e:
|
146 |
+
st.error(f"Error generating critique:\n\n{e}")
|
147 |
+
return
|
148 |
+
|
149 |
+
# Show multi-color line after critique generation
|
150 |
+
st.markdown(multicolor_line(), unsafe_allow_html=True)
|
151 |
+
|
152 |
+
# Refine the summary using critique feedback
|
153 |
+
st.write("Refining the summary...")
|
154 |
+
try:
|
155 |
+
refined_summary = refine_summary(summary, critique)
|
156 |
+
st.write("Refined Summary:")
|
157 |
+
# Increase the width of the refined summary text area
|
158 |
+
st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden")
|
159 |
+
except Exception as e:
|
160 |
+
st.error(f"Error refining summary:\n\n{e}")
|
161 |
+
return
|
162 |
+
|
163 |
+
# Show multi-color line after refinement
|
164 |
+
st.markdown(multicolor_line(), unsafe_allow_html=True)
|
165 |
+
|
166 |
+
if __name__ == "__main__":
|
167 |
+
main()
|