prasad6145 commited on
Commit
afeacc2
·
verified ·
1 Parent(s): 25192c1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ import textract
4
+ from transformers import pipeline
5
+ from langchain.chains import LLMChain
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.llms import HuggingFaceHub
8
+ import random
9
+
10
+ # Function to create a multi-color line
11
+ def multicolor_line():
12
+ colors = ["#FF5733", "#33FF57", "#3357FF", "#FF33A1", "#FFC300"]
13
+ return f'<hr style="border: 1px solid {random.choice(colors)};">'
14
+
15
+ # Initialize the Hugging Face model for summarization
16
+ @st.cache_resource
17
+ def load_summarization_model():
18
+ return pipeline("summarization", model="facebook/bart-large-cnn")
19
+
20
+ # Initialize the Hugging Face model for critique generation (using T5)
21
+ @st.cache_resource
22
+ def load_critique_model():
23
+ return pipeline("text2text-generation", model="t5-base")
24
+
25
+ summarizer = load_summarization_model()
26
+ critique_generator = load_critique_model()
27
+
28
+ # Function to extract text from PDFs
29
+ def extract_text_from_pdf(pdf_file="/content/A_Validation_of_Six_Wearable_Devices_for_Estimatin.pdf"):
30
+ pdf_reader = PdfReader(pdf_file)
31
+ text = ""
32
+ for page in pdf_reader.pages:
33
+ text += page.extract_text()
34
+ return text
35
+
36
+ # Function to extract text from text files
37
+ def extract_text_from_file(txt_file):
38
+ with open(txt_file, "r") as file:
39
+ text = file.read()
40
+ return text
41
+
42
+ # Function to extract text from scanned PDFs or other formats
43
+ def extract_text_from_scanned_pdf(pdf_file):
44
+ text = textract.process(pdf_file).decode("utf-8")
45
+ return text
46
+
47
+ # Function to generate the summary using Hugging Face (BART model)
48
+ def summarize_text(text):
49
+ max_len = 1024 # Define the max input length for the summarizer
50
+ min_len = 50 # Define the minimum length for the summary
51
+
52
+ if not text.strip():
53
+ raise ValueError("Input text is empty, unable to summarize.")
54
+
55
+ if len(text.split()) > max_len:
56
+ text = " ".join(text.split()[:max_len])
57
+
58
+ if len(text.split()) < min_len:
59
+ raise ValueError("Input text is too short for summarization.")
60
+
61
+ summary = summarizer(text, max_length=200, min_length=50, do_sample=False)
62
+ return summary[0]['summary_text']
63
+
64
+ # Function to generate critique using the Hugging Face T5 model
65
+ def generate_critique(summary):
66
+ critique_input = f"Critique: {summary}"
67
+ critique = critique_generator(critique_input)
68
+ return critique[0]['generated_text']
69
+
70
+ # Function to refine the summary using critique feedback
71
+ def refine_summary(summary, critique):
72
+ refinement_input = f"Summary: {summary}\n\nCritique: {critique}\n\nRefine this into a cohesive and polished summary:"
73
+ refined_output = summarizer(refinement_input, max_length=300, min_length=100, do_sample=False)
74
+ return refined_output[0]['summary_text']
75
+
76
+ # LangChain Integration: Set up Hugging Face as the LLM for LangChain
77
+ hf_llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn", model_kwargs={"temperature": 0.5} )
78
+
79
+ # Create a PromptTemplate for summarization
80
+ prompt_template = PromptTemplate(
81
+ input_variables=["text"],
82
+ template="Summarize the following text:\n{text}"
83
+ )
84
+
85
+ # Define the LangChain chain for summarization
86
+ def create_summarization_chain():
87
+ chain = LLMChain(llm=hf_llm, prompt=prompt_template)
88
+ return chain
89
+
90
+ # Update the Streamlit workflow
91
+ def main():
92
+ st.title("Multi-Agent Research Assistant for Refining Academic Content")
93
+ st.write("Upload a PDF or Text file to start the process.")
94
+
95
+ uploaded_file = st.file_uploader("Choose a PDF or Text file", type=["pdf", "txt"])
96
+
97
+ if uploaded_file is not None:
98
+ # Extract text from uploaded file
99
+ file_extension = uploaded_file.name.split('.')[-1].lower()
100
+
101
+ if file_extension == 'pdf':
102
+ st.write("Extracting text from PDF...")
103
+ text = extract_text_from_pdf(uploaded_file)
104
+ elif file_extension == 'txt':
105
+ st.write("Extracting text from Text file...")
106
+ text = extract_text_from_file(uploaded_file)
107
+ else:
108
+ st.error("Unsupported file type. Please upload a PDF or a Text file.")
109
+ return
110
+
111
+ if text.strip() == "":
112
+ st.error("No text could be extracted from the file.")
113
+ return
114
+
115
+ # Show extracted text if checkbox is checked
116
+ show_text = st.checkbox("Show extracted text")
117
+ if show_text:
118
+ # Increase the width of the text area slightly
119
+ st.text_area("Extracted Text", text, height=200, max_chars=2000, key="extracted_text", label_visibility="hidden")
120
+
121
+ # Show multi-color line after text extraction
122
+ st.markdown(multicolor_line(), unsafe_allow_html=True)
123
+
124
+ # Summarize text using Hugging Face model (BART)
125
+ st.write("Summarizing the content...")
126
+ try:
127
+ summary = summarize_text(text)
128
+ st.write("Summary:")
129
+ # Increase the width of the summary text area
130
+ st.text_area("Summary", summary, height=200, max_chars=2000, key="summary", label_visibility="hidden")
131
+ except Exception as e:
132
+ st.error(f"Error generating summary:\n\n{e}")
133
+ return
134
+
135
+ # Show multi-color line after summarization
136
+ st.markdown(multicolor_line(), unsafe_allow_html=True)
137
+
138
+ # Generate critique based on summary using Hugging Face model (T5)
139
+ st.write("Generating critique...")
140
+ try:
141
+ critique = generate_critique(summary)
142
+ st.write("Critique:")
143
+ # Increase the width of the critique text area
144
+ st.text_area("Critique", critique, height=200, max_chars=2000, key="critique", label_visibility="hidden")
145
+ except Exception as e:
146
+ st.error(f"Error generating critique:\n\n{e}")
147
+ return
148
+
149
+ # Show multi-color line after critique generation
150
+ st.markdown(multicolor_line(), unsafe_allow_html=True)
151
+
152
+ # Refine the summary using critique feedback
153
+ st.write("Refining the summary...")
154
+ try:
155
+ refined_summary = refine_summary(summary, critique)
156
+ st.write("Refined Summary:")
157
+ # Increase the width of the refined summary text area
158
+ st.text_area("Refined Summary", refined_summary, height=200, max_chars=2000, key="refined_summary", label_visibility="hidden")
159
+ except Exception as e:
160
+ st.error(f"Error refining summary:\n\n{e}")
161
+ return
162
+
163
+ # Show multi-color line after refinement
164
+ st.markdown(multicolor_line(), unsafe_allow_html=True)
165
+
166
+ if __name__ == "__main__":
167
+ main()