Spaces:

Prat0
/

ClarifyAI

Runtime error

App Files Files Community

Prat0 commited on Jul 28, 2024

Commit

6389da4

verified ·

1 Parent(s): ae92059

Update pages/Report_Writer.py

Browse files

Files changed (1) hide show

pages/Report_Writer.py +35 -82

pages/Report_Writer.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import streamlit as st
 from llama_index.core import Settings
@@ -7,75 +6,28 @@ from llama_index.embeddings.gemini import GeminiEmbedding
 from llama_index.llms.gemini import Gemini
 from llama_index.core import DocumentSummaryIndex
 import google.generativeai as genai
-import os
 import PyPDF2
 import streamlit_analytics2 as streamlit_analytics
 from llama_index.embeddings.fastembed import FastEmbedEmbedding
 # Set up Google API key
 # Configure Google Gemini
-#Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GOOGLE_API_KEY"), model_name="models/embedding-001")
 Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
 Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
 llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
 # Load and index the input data
 def load_data(document_text):
-    document = [Document(text=doc_text) for doc_text in document_text]
-    #index = VectorStoreIndex.from_documents([document])
-    index = DocumentSummaryIndex.from_documents(document)
     return index
-# Default report format template
-DEFAULT_REPORT_FORMAT = """
-    Title Page
-        Includes the report title, author's name, and date.
-    Abstract
-        A concise summary of the report, covering the background, objectives, methodology, key findings, and conclusions.
-    Table of Contents
-        Lists sections and subsections with corresponding page numbers for easy navigation.
-    Introduction
-        Provides background information, defines the scope of the report, and states the objectives.
-    Literature Review
-        Reviews relevant literature and previous research related to the report topic.
-    Methodology/Approach
-        Details the methods used to gather data or conduct experiments, including design and analytical techniques.
-    Results and Discussion
-        Presents findings in a clear format, often using tables, figures, and charts, followed by a discussion interpreting these results.
-    Conclusions
-        Summarizes the main findings and their implications, often linking back to the report's objectives.
-    Recommendations
-        Suggests actions based on the findings, highlighting potential future work or improvements.
-    References
-        Lists all sources cited in the report, adhering to a specific referencing style.
-    Appendices
-        Contains supplementary material that supports the main text, such as raw data, detailed calculations, or additional figures.
-"""
 # Generate report
 def generate_report(index, report_format, additional_info):
     query_engine = index.as_query_engine()
@@ -84,30 +36,30 @@ def generate_report(index, report_format, additional_info):
         report_format = DEFAULT_REPORT_FORMAT
         st.info("Using default report format.")
-    response = query_engine.query(f"""
-    You are a professional report writer. Your task is to create a comprehensive report based on the entire document provided.
-    First, thoroughly analyze and summarize the entire document. Then, use the input text to create a well-structured report following the format below:
-    Report Format:
-    {report_format}
-    Additional Information:
-    {additional_info}
-    Even if the input is shallow, generate a report
-    Guidelines:
-    1. Ensure you comprehend and summarize the entire document before starting the report.
-    2. The report should be comprehensive, covering all major points from the document.
-    3. Adapt the provided format as necessary to best fit the content and context of the document.
-    4. Incorporate any additional information provided into the relevant sections of the report.
-    5. Use clear, professional language throughout the report.
-    6. Provide specific examples or data from the document to support your analysis and conclusions.
-    7. If the document contains technical information, explain it in a way that's accessible to a general audience.
-    Generate a thorough, well-structured report that captures the essence of the entire document.
-    """)
-    return response.response
 # Streamlit app
 def main():
@@ -115,8 +67,7 @@ def main():
     st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
     with streamlit_analytics.track():
-    # File uploader
         uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
         # Report format input
@@ -140,9 +91,11 @@ def main():
                 st.write("Analyzing document and generating report...")
                 # Load data and generate report
-                doc_list = document_text.split(".")
-                index = load_data(doc_list)
-                report = generate_report(index, report_format, additional_info)
                 st.write("## Generated Report")
                 st.write(report)

 import os
 import streamlit as st
 from llama_index.core import Settings
 from llama_index.llms.gemini import Gemini
 from llama_index.core import DocumentSummaryIndex
 import google.generativeai as genai
 import PyPDF2
 import streamlit_analytics2 as streamlit_analytics
 from llama_index.embeddings.fastembed import FastEmbedEmbedding
+from llama_index.core.node_parser import TokenTextSplitter
 # Set up Google API key
 # Configure Google Gemini
 Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
 Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.8, model_name="models/gemini-pro")
 llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
 # Load and index the input data
 def load_data(document_text):
+    # Use a text splitter to break the document into smaller chunks
+    text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=200)
+    texts = text_splitter.split_text(document_text)
+    documents = [Document(text=t) for t in texts]
+    index = DocumentSummaryIndex.from_documents(documents)
     return index
 # Generate report
 def generate_report(index, report_format, additional_info):
     query_engine = index.as_query_engine()
         report_format = DEFAULT_REPORT_FORMAT
         st.info("Using default report format.")
+    # Break down the report generation into smaller queries
+    sections = [
+        "Title and Abstract",
+        "Introduction and Literature Review",
+        "Methodology and Results",
+        "Discussion and Conclusion",
+        "Recommendations and References"
+    ]
+    full_report = ""
+    for section in sections:
+        response = query_engine.query(f"""
+        Generate the {section} section of the report based on the provided document.
+        Use the following format guidelines:
+        {report_format}
+        Additional Information:
+        {additional_info}
+        Focus on creating a comprehensive and well-structured section.
+        """)
+        full_report += response.response + "\n\n"
+    return full_report
 # Streamlit app
 def main():
     st.write("Upload your document and our AI will generate a comprehensive report based on its contents!")
     with streamlit_analytics.track():
+        # File uploader
         uploaded_file = st.file_uploader("Choose a file (PDF or TXT)", type=["txt", "pdf"])
         # Report format input
                 st.write("Analyzing document and generating report...")
                 # Load data and generate report
+                with st.spinner("Indexing document..."):
+                    index = load_data(document_text)
+                with st.spinner("Generating report..."):
+                    report = generate_report(index, report_format, additional_info)
                 st.write("## Generated Report")
                 st.write(report)