umangchaudhry commited on
Commit
318c146
·
verified ·
1 Parent(s): d094df2

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +82 -27
  2. summary_tool_system_prompt.md +1 -0
app.py CHANGED
@@ -1,5 +1,5 @@
1
-
2
  import os
 
3
  import streamlit as st
4
  from io import BytesIO
5
  from tempfile import NamedTemporaryFile
@@ -8,11 +8,11 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain_openai import ChatOpenAI
10
  from langchain_community.document_loaders import PyPDFLoader
11
- from langchain.vectorstores import FAISS
12
  from langchain_openai import OpenAIEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
- from reportlab.lib.pagesizes import letter
15
- from reportlab.pdfgen import canvas
16
 
17
  # Function to process PDF, run Q&A, and return results
18
  def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
@@ -70,7 +70,11 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
70
  for question in questions:
71
  result = rag_chain.invoke({"input": question})
72
  answer = result["answer"]
73
- qa_text = f"### Question: {question}\n**Answer:** {answer}\n"
 
 
 
 
74
  qa_results.append(qa_text)
75
  # Update the placeholder with each new Q&A pair
76
  display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
@@ -80,24 +84,62 @@ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_pla
80
 
81
  return qa_results
82
 
83
- # Function to create a PDF using reportlab
84
- def create_pdf(content):
85
- buffer = BytesIO()
86
- pdf = canvas.Canvas(buffer, pagesize=letter)
87
- pdf.setFont("Helvetica", 10)
 
 
 
 
 
 
 
 
 
 
88
 
89
- # Start position for writing text
90
- text = pdf.beginText(40, 750)
91
-
92
- for line in content.split("\n"):
93
- text.textLine(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- pdf.drawText(text)
96
- pdf.showPage()
97
- pdf.save()
98
-
99
- buffer.seek(0)
100
- return buffer
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  # Streamlit app layout
103
  st.title("Climate Policy Summary Tool")
@@ -122,12 +164,25 @@ if st.button("Generate") and api_key and uploaded_file:
122
  results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
123
 
124
  # Allow the user to download the results as a Markdown file
125
- markdown_output = "\n".join(results)
126
- st.download_button("Download as Markdown", markdown_output, file_name="results.md")
127
-
128
- # Create a PDF file for the user to download
129
- pdf_output = create_pdf(markdown_output)
130
- st.download_button("Download as PDF", data=pdf_output, file_name="results.pdf")
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  except Exception as e:
133
  st.error(f"An error occurred: {e}")
 
 
1
  import os
2
+ import re
3
  import streamlit as st
4
  from io import BytesIO
5
  from tempfile import NamedTemporaryFile
 
8
  from langchain_core.prompts import ChatPromptTemplate
9
  from langchain_openai import ChatOpenAI
10
  from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_community.vectorstores import FAISS
12
  from langchain_openai import OpenAIEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
+ from xhtml2pdf import pisa
15
+ from markdown import markdown
16
 
17
  # Function to process PDF, run Q&A, and return results
18
  def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
 
70
  for question in questions:
71
  result = rag_chain.invoke({"input": question})
72
  answer = result["answer"]
73
+
74
+ # Clean up the answer
75
+ answer = clean_answer(answer)
76
+
77
+ qa_text = f"### Question: {question}\n**Answer:**\n\n{answer}\n"
78
  qa_results.append(qa_text)
79
  # Update the placeholder with each new Q&A pair
80
  display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
 
84
 
85
  return qa_results
86
 
87
+ # Function to clean up the AI's answer
88
+ def clean_answer(answer):
89
+ # Remove unwanted prefixes like 'markdown'
90
+ answer = answer.strip()
91
+ if answer.lower().startswith('markdown'):
92
+ answer = answer[len('markdown'):].strip()
93
+
94
+ # Additional cleaning if necessary
95
+ # For example, ensure that markdown table syntax is correct
96
+ return answer
97
+
98
+ # Function to convert markdown text to PDF with table support
99
+ def md_to_pdf(md_text):
100
+ # Convert markdown to HTML with table support
101
+ html_content = markdown(md_text, output_format='html5', extensions=['tables'])
102
 
103
+ # Define CSS styles for better table rendering
104
+ css_styles = '''
105
+ <style>
106
+ body {
107
+ font-family: Arial, sans-serif;
108
+ font-size: 12pt;
109
+ }
110
+ table {
111
+ border-collapse: collapse;
112
+ width: 100%;
113
+ }
114
+ th, td {
115
+ border: 1px solid black;
116
+ padding: 8px;
117
+ text-align: left;
118
+ }
119
+ th {
120
+ background-color: #f2f2f2;
121
+ }
122
+ </style>
123
+ '''
124
 
125
+ # Construct the full HTML with CSS and content
126
+ html = f'''
127
+ <html>
128
+ <head>
129
+ {css_styles}
130
+ </head>
131
+ <body>
132
+ {html_content}
133
+ </body>
134
+ </html>
135
+ '''
136
+
137
+ # Generate the PDF
138
+ pdf = BytesIO()
139
+ pisa_status = pisa.CreatePDF(html, dest=pdf)
140
+ if pisa_status.err:
141
+ return None
142
+ return pdf.getvalue()
143
 
144
  # Streamlit app layout
145
  st.title("Climate Policy Summary Tool")
 
164
  results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
165
 
166
  # Allow the user to download the results as a Markdown file
167
+ markdown_text = "\n".join(results)
168
+ st.download_button(
169
+ label="Download Results as Markdown",
170
+ data=markdown_text,
171
+ file_name="qa_results.md",
172
+ mime="text/markdown"
173
+ )
174
+
175
+ # Convert markdown to PDF
176
+ pdf_bytes = md_to_pdf(markdown_text)
177
+ if pdf_bytes:
178
+ st.download_button(
179
+ label="Download Results as PDF",
180
+ data=pdf_bytes,
181
+ file_name="qa_results.pdf",
182
+ mime="application/pdf"
183
+ )
184
+ else:
185
+ st.error("Error generating PDF")
186
 
187
  except Exception as e:
188
  st.error(f"An error occurred: {e}")
summary_tool_system_prompt.md CHANGED
@@ -6,6 +6,7 @@ Your task is to analyze the plan and answer a consistent set of questions based
6
  - **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
7
  - **Format all responses using Markdown syntax.**
8
  - **Responses should be well formatted. Use bold, italics, and bullet points where appropriate.**
 
9
 
10
  ### **Definitions**
11
 
 
6
  - **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
7
  - **Format all responses using Markdown syntax.**
8
  - **Responses should be well formatted. Use bold, italics, and bullet points where appropriate.**
9
+ - **Respond directly to the questions asked. Do not include any other text or comments.**
10
 
11
  ### **Definitions**
12