Prat0 commited on
Commit
6389da4
·
verified ·
1 Parent(s): ae92059

Update pages/Report_Writer.py

Browse files
Files changed (1) hide show
  1. pages/Report_Writer.py +35 -82
pages/Report_Writer.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import streamlit as st
4
  from llama_index.core import Settings
@@ -7,75 +6,28 @@ from llama_index.embeddings.gemini import GeminiEmbedding
7
  from llama_index.llms.gemini import Gemini
8
  from llama_index.core import DocumentSummaryIndex
9
  import google.generativeai as genai
10
- import os
11
  import PyPDF2
12
  import streamlit_analytics2 as streamlit_analytics
13
  from llama_index.embeddings.fastembed import FastEmbedEmbedding
14
-
15
  # Set up Google API key
16
 
17
  # Configure Google Gemini
18
- #Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
19
  Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
20
  Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
21
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
22
 
23
  # Load and index the input data
24
  def load_data(document_text):
25
- document = [Document(text=doc_text) for doc_text in document_text]
 
 
26
 
27
- #index = VectorStoreIndex.from_documents([document])
28
- index = DocumentSummaryIndex.from_documents(document)
 
29
  return index
30
 
31
- # Default report format template
32
- DEFAULT_REPORT_FORMAT = """
33
- Title Page
34
-
35
- Includes the report title, author's name, and date.
36
-
37
- Abstract
38
-
39
- A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions.
40
-
41
- Table of Contents
42
-
43
- Lists sections and subsections with corresponding page numbers for easy navigation.
44
-
45
- Introduction
46
-
47
- Provides background information, defines the scope of the report, and states the objectives.
48
-
49
- Literature Review
50
-
51
- Reviews relevant literature and previous research related to the report topic.
52
-
53
- Methodology/Approach
54
-
55
- Details the methods used to gather data or conduct experiments, including design and analytical techniques.
56
-
57
- Results and Discussion
58
-
59
- Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results.
60
-
61
- Conclusions
62
-
63
- Summarizes the main findings and their implications, often linking back to the report's objectives.
64
-
65
- Recommendations
66
-
67
- Suggests actions based on the findings, highlighting potential future work or improvements.
68
-
69
- References
70
-
71
- Lists all sources cited in the report, adhering to a specific referencing style.
72
-
73
- Appendices
74
-
75
- Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures.
76
-
77
- """
78
-
79
  # Generate report
80
  def generate_report(index, report_format, additional_info):
81
  query_engine = index.as_query_engine()
@@ -84,30 +36,30 @@ def generate_report(index, report_format, additional_info):
84
  report_format = DEFAULT_REPORT_FORMAT
85
  st.info("Using default report format.")
86
 
87
- response = query_engine.query(f"""
88
- You are a professional report writer. Your task is to create a comprehensive report based on the entire document provided.
89
-
90
- First, thoroughly analyze and summarize the entire document. Then, use the input text to create a well-structured report following the format below:
91
-
92
- Report Format:
93
- {report_format}
 
94
 
95
- Additional Information:
96
- {additional_info}
97
-
98
- Even if the input is shallow, generate a report
99
- Guidelines:
100
- 1. Ensure you comprehend and summarize the entire document before starting the report.
101
- 2. The report should be comprehensive, covering all major points from the document.
102
- 3. Adapt the provided format as necessary to best fit the content and context of the document.
103
- 4. Incorporate any additional information provided into the relevant sections of the report.
104
- 5. Use clear, professional language throughout the report.
105
- 6. Provide specific examples or data from the document to support your analysis and conclusions.
106
- 7. If the document contains technical information, explain it in a way that's accessible to a general audience.
 
107
 
108
- Generate a thorough, well-structured report that captures the essence of the entire document.
109
- """)
110
- return response.response
111
 
112
  # Streamlit app
113
  def main():
@@ -115,8 +67,7 @@ def main():
115
  st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
116
 
117
  with streamlit_analytics.track():
118
-
119
- # File uploader
120
  uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
121
 
122
  # Report format input
@@ -140,9 +91,11 @@ def main():
140
  st.write("Analyzing document and generating report...")
141
 
142
  # Load data and generate report
143
- doc_list = document_text.split(".")
144
- index = load_data(doc_list)
145
- report = generate_report(index, report_format, additional_info)
 
 
146
 
147
  st.write("## Generated Report")
148
  st.write(report)
 
 
1
  import os
2
  import streamlit as st
3
  from llama_index.core import Settings
 
6
  from llama_index.llms.gemini import Gemini
7
  from llama_index.core import DocumentSummaryIndex
8
  import google.generativeai as genai
 
9
  import PyPDF2
10
  import streamlit_analytics2 as streamlit_analytics
11
  from llama_index.embeddings.fastembed import FastEmbedEmbedding
12
+ from llama_index.core.node_parser import TokenTextSplitter
13
  # Set up Google API key
14
 
15
  # Configure Google Gemini
 
16
  Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
17
  Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
18
  llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
19
 
20
  # Load and index the input data
21
  def load_data(document_text):
22
+ # Use a text splitter to break the document into smaller chunks
23
+ text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
24
+ texts = text_splitter.split_text(document_text)
25
 
26
+ documents = [Document(text=t) for t in texts]
27
+
28
+ index = DocumentSummaryIndex.from_documents(documents)
29
  return index
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Generate report
32
  def generate_report(index, report_format, additional_info):
33
  query_engine = index.as_query_engine()
 
36
  report_format = DEFAULT_REPORT_FORMAT
37
  st.info("Using default report format.")
38
 
39
+ # Break down the report generation into smaller queries
40
+ sections = [
41
+ "Title and Abstract",
42
+ "Introduction and Literature Review",
43
+ "Methodology and Results",
44
+ "Discussion and Conclusion",
45
+ "Recommendations and References"
46
+ ]
47
 
48
+ full_report = ""
49
+ for section in sections:
50
+ response = query_engine.query(f"""
51
+ Generate the {section} section of the report based on the provided document.
52
+ Use the following format guidelines:
53
+ {report_format}
54
+
55
+ Additional Information:
56
+ {additional_info}
57
+
58
+ Focus on creating a comprehensive and well-structured section.
59
+ """)
60
+ full_report += response.response + "\n\n"
61
 
62
+ return full_report
 
 
63
 
64
  # Streamlit app
65
  def main():
 
67
  st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
68
 
69
  with streamlit_analytics.track():
70
+ # File uploader
 
71
  uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
72
 
73
  # Report format input
 
91
  st.write("Analyzing document and generating report...")
92
 
93
  # Load data and generate report
94
+ with st.spinner("Indexing document..."):
95
+ index = load_data(document_text)
96
+
97
+ with st.spinner("Generating report..."):
98
+ report = generate_report(index, report_format, additional_info)
99
 
100
  st.write("## Generated Report")
101
  st.write(report)