Spaces:

raannakasturi
/

MindMap

Running

App Files Files Community

raannakasturi commited on Nov 14, 2024

Commit

b35a32c

verified ·

1 Parent(s): 2dfac3d

Upload 4 files

Browse files

Files changed (4) hide show

app.py +60 -58
generate_markdown.py +64 -95
generate_mindmap.py +59 -3
requirements.txt +4 -0

app.py CHANGED Viewed

@@ -1,58 +1,60 @@
-import os
-import sys
-from generate_markdown import load_llm_model, generate_markdown
-from generate_mindmap import generate_mindmap_svg
-import gradio as gr
-import subprocess
-def generate(file):
-    summary = "This is a summary of the research paper"
-    mindmap_markdown = generate_markdown(llm, file)
-    mindmap_svg = generate_mindmap_svg(mindmap_markdown)
-    return summary, mindmap_markdown, mindmap_svg
-theme = gr.themes.Soft(
-    primary_hue="purple",
-    secondary_hue="cyan",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
-)
-with gr.Blocks(theme=theme, title="Binary Biology") as app:
-    file = gr.File(file_count='single', label='Upload Research Paper PDF file')
-    summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
-    markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
-    graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True)
-    submit = gr.Button(value='Submit')
-    submit.click(generate,
-                inputs=[file],
-                outputs=[summary, markdown_mindmap, graphical_mindmap],
-                scroll_to_output=True,
-                show_progress=True,
-                queue=True,
-    )
-try:
-    env = os.environ.copy()
-    env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
-    cmd = ["pip", "install", "llama-cpp-python"]
-    subprocess.run(cmd, env=env)
-except:
-    cmd = ["pip", "install", "llama-cpp-python"]
-    subprocess.run(cmd)
-try:
-    try:
-        subprocess.run(['apt', 'install', '-y', 'graphviz'])
-        print("Graphviz installed successfully")
-    except:
-        subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
-        print("Graphviz installed successfully using sudo")
-except:
-        print("Graphviz installation failed")
-        sys.exit(1)
-print("Graphviz loaded successfully")
-llm = load_llm_model()
-print("Model loaded successfully")
-app.queue(default_concurrency_limit=5).launch(show_error=True)

+import os
+import sys
+from generate_markdown import load_llm_model, generate_markdown
+from generate_mindmap import generate_mindmap_svg
+import gradio as gr
+import subprocess
+def generate(file):
+    print(f"Generating mindmap for {file}")
+    summary = "This is a summary of the research paper"
+    mindmap_markdown = generate_markdown(llm, file)
+    mindmap_svg = generate_mindmap_svg(mindmap_markdown)
+    print("Mindmap generated successfully")
+    return summary, mindmap_markdown, mindmap_svg
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="cyan",
+    neutral_hue="slate",
+    font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
+)
+with gr.Blocks(theme=theme, title="Binary Biology") as app:
+    file = gr.File(file_count='single', label='Upload Research Paper PDF file', file_types=['.pdf'])
+    summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
+    markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
+    graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True, format='svg')
+    submit = gr.Button(value='Submit')
+    submit.click(generate,
+                inputs=[file],
+                outputs=[summary, markdown_mindmap, graphical_mindmap],
+                scroll_to_output=True,
+                show_progress=True,
+                queue=True,
+    )
+if __name__ == "__main__":
+    try:
+        env = os.environ.copy()
+        env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
+        cmd = ["pip", "install", "llama-cpp-python"]
+        subprocess.run(cmd, env=env)
+    except:
+        cmd = ["pip", "install", "llama-cpp-python"]
+        subprocess.run(cmd)
+    try:
+        try:
+            subprocess.run(['apt', 'install', '-y', 'graphviz'])
+            print("Graphviz installed successfully")
+        except:
+            subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
+            print("Graphviz installed successfully using sudo")
+    except:
+            print("Graphviz installation failed")
+            sys.exit(1)
+    print("Graphviz loaded successfully")
+    llm = load_llm_model()
+    print("Model loaded successfully")
+    app.queue(default_concurrency_limit=1).launch(show_error=True)

generate_markdown.py CHANGED Viewed

@@ -1,95 +1,64 @@
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import PyPDFLoader
-from llama_cpp import Llama
-import llama_cpp
-def load_llm_model():
-    llm = Llama(
-        model_path="Llama-3.2-3B-Instruct-Q8_0.gguf",
-        # n_gpu_layers = 20, # Uncomment for GPU
-        n_ctx=50000,
-        n_threads=16,
-        n_batch=512,
-        split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
-        pooling_type=llama_cpp.LLAMA_POOLING_TYPE_RANK,
-        rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_LINEAR,
-        # main_gpu=0 # Uncomment for GPU
-    )
-    return llm
-def get_text_from_pdf(file):
-    loader = PyPDFLoader(file)
-    pages = loader.load_and_split()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
-    texts = text_splitter.split_documents(pages)
-    final_text = ""
-    for text in texts:
-        final_text = final_text + text.page_content
-    print(f"Length of final text: {len(final_text)}")
-    with open("final_text.txt", "w") as f:
-        f.write(final_text)
-    research_paper = ""
-    for line in final_text.split("\n"):
-        if line.startswith("REFERENCES"):
-            break
-        else:
-            research_paper = research_paper + line + " "
-    with open("research_paper.txt", "w") as f:
-        f.write(research_paper)
-    print(f"Length of research paper: {len(research_paper)}")
-    return research_paper
-def generate_prompt(final_text):
-    prompt = f'''
-    You have been provided with a research paper in text format. Your task is to generate a mindmap structure in markdown format that summarizes the research paper.
-    Your output should use the language \"en\" 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap and use the following template (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination):
-        # {{Title}} (should be the title of the research paper)
-        ## {{Subtitle01}} (as required and as many as required in markdown format)
-        - {{Emoji01}} Bulletpoint01 (as required and as many as required in markdown format)
-            - {{Emoji01.1}} Bulletpoint01.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji01.1.1}} Bulletpoint01.1.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji01.1.2}} Bulletpoint01.1.2 (as required and as many as sub levels required in markdown format)
-            - {{Emoji01.2}} Bulletpoint01.2 (as required and as many as sub levels required in markdown format)
-        - {{Emoji02}} Bulletpoint02 (as required and as many as required in markdown format)
-            - {{Emoji02.1}} Bulletpoint02.1 (as required and as many as sub levels required in markdown format)
-            - {{Emoji02.2}} Bulletpoint02.2 (as required and as many as sub levels required in markdown format)
-                - {{Emoji02.2.1}} Bulletpoint02.2.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji02.2.2}} Bulletpoint02.2.2 (as required and as many as sub levels required in markdown format)
-                - {{Emoji02.2.3}} Bulletpoint02.2.3 (as required and as many as sub levels required in markdown format)
-                - {{Emoji02.2.4}} Bulletpoint02.2.4 (as required and as many as sub levels required in markdown format)
-        ## {{Subtitle02}} (as required and as many as required in markdown format)
-        - {{Emoji03}} Bulletpoint03 (as required and as many as required in markdown format)
-            - {{Emoji03.1}} Bulletpoint03.1 (as required and as many as sub levels required in markdown format)
-            - {{Emoji03.2}} Bulletpoint03.2 (as required and as many as sub levels required in markdown format)
-                - {{Emoji03.2.1}} Bulletpoint03.2.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji03.2.2}} Bulletpoint03.2.2 (as required and as many as sub levels required in markdown format)
-        - {{Emoji04}} Bulletpoint04 (as required and as many as required in markdown format)
-            - {{Emoji04.1}} Bulletpoint04.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji04.1.1}} Bulletpoint04.1.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji04.1.2}} Bulletpoint04.1.2 (as required and as many as sub levels required in markdown format)
-            - {{Emoji04.2}} Bulletpoint04.2 (as required and as many as sub levels required in markdown format)
-                - {{Emoji04.2.1}} Bulletpoint04.2.1 (as required and as many as sub levels required in markdown format)
-                - {{Emoji04.2.2}} Bulletpoint04.2.2 (as required and as many as sub levels required in markdown format)
-        Summarize the text \"{final_text}\" to generate a elaborated hierarchical mindmap structure (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination) markdown using the \"en\" language 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap
-    '''
-    return prompt
-def generate_mindmap_structure(llm, prompt):
-    response = llm.create_chat_completion(
-        messages = [
-            {'role':'user',
-            'content': prompt}
-        ],
-        temperature=0.7,
-        top_k=200,
-        top_p=3.0,
-    )
-    mindmap_data = response['choices'][0]['message']['content']
-    print(mindmap_data)
-    return mindmap_data
-def generate_markdown(llm, file):
-    final_text = get_text_from_pdf(file)
-    prompt = generate_prompt(final_text)
-    mindmap_markdown = generate_mindmap_structure(llm, prompt)
-    return mindmap_markdown

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from llama_cpp import Llama
+def load_llm_model():
+    try:
+        llm = Llama(
+            model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
+            # n_gpu_layers = 40,
+            n_ctx=130000,
+            n_batch=1024,
+            # main_gpu=0
+        )
+        print("LLM model loaded successfully")
+        return llm
+    except Exception as e:
+        print(f"Error loading LLM model: {e}")
+        raise
+def get_text_from_pdf(file):
+    loader = PyPDFLoader(file)
+    pages = loader.load_and_split()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
+    texts = text_splitter.split_documents(pages)
+    final_text = ""
+    for text in texts:
+        if text.page_content.startswith("REFERENCES"):
+            break
+        else:
+            final_text = final_text + text.page_content
+    research_paper = ""
+    for text in final_text:
+        if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
+            break
+        else:
+            research_paper = research_paper + text
+    return research_paper[:100000]
+def generate_prompt(research_paper):
+    prompt = f'''
+    As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n  Importantly your output must use language \\"English\\""
+    '''
+    return prompt
+def generate_mindmap_structure(llm, prompt):
+    response = llm.create_chat_completion(
+        messages = [
+            {'role':'system',
+            'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
+            {'role':'user',
+            'content': prompt}
+        ],
+        temperature=0.7,
+        top_k=200,
+        top_p=3.0,
+    )
+    mindmap_data = response['choices'][0]['message']['content']
+    return mindmap_data
+def generate_markdown(llm, file):
+    final_text = get_text_from_pdf(file)
+    prompt = generate_prompt(final_text)
+    mindmap_markdown = generate_mindmap_structure(llm, prompt)
+    return mindmap_markdown

generate_mindmap.py CHANGED Viewed

@@ -93,8 +93,7 @@ def generate_mindmap_svg(md_text):
     mindmap_dict = parse_markdown_to_dict(md_text)
     root_title = mindmap_dict.get('title', 'Mindmap')
     sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
-    if output_filename is None:
-        output_filename = sanitized_title
     graph = Digraph(format='svg')
     graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
     graph.attr('node', fontname="Arial", fontsize="9")
@@ -105,4 +104,61 @@ def generate_mindmap_svg(md_text):
     # Save the modified SVG content to a file
     with open(f'{output_filename}.svg', 'w') as f:
         f.write(svg_content)
-    return f"{output_filename}".svg

     mindmap_dict = parse_markdown_to_dict(md_text)
     root_title = mindmap_dict.get('title', 'Mindmap')
     sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
+    output_filename = f"{sanitized_title}_mindmap.svg"
     graph = Digraph(format='svg')
     graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
     graph.attr('node', fontname="Arial", fontsize="9")
     # Save the modified SVG content to a file
     with open(f'{output_filename}.svg', 'w') as f:
         f.write(svg_content)
+    return f"{output_filename}"
+# md = '''
+# Here is a mind map summarizing the topic of combining machine learning (ML) and computational chemistry (CompChem) for predictive insights into chemical systems:
+# **I. Introduction**
+# * Machine learning (ML) poised to transform chemical sciences
+# * Combining ML and CompChem for predictive insights
+# **II. Computational Chemistry (CompChem)**
+# * Computational quantum chemistry (CQChem)
+# * Methods for generating data sets (e.g., wavefunction theory, correlated wavefunction methods, density functional theory)
+# * Representations of systems (e.g., simple, complex, ambiguous)
+# **III. Wavefunction Theory Methods**
+# * Nonrelativistic time-independent Schrödinger equation
+# * Electronic Schrödinger equation
+# * Hartree-Fock (HF) approach
+# * Correlated wavefunction methods (e.g., extended Hückel theory, neglect of diatomic diﬀerential overlap)
+# **IV. Density Functional Theory (DFT)**
+# * Kinetic energy (KE-) or orbital-free (OF-) DFT
+# * Exchange-correlation functional (EC)
+# * Kohn-Sham (KS-) DFT
+# * Semiempirical methods (e.g., extended Hückel theory, neglect of diatomic diﬀerential overlap)
+# **V. Semiempirical Methods**
+# * Extended Hückel theory
+# * Neglect of diatomic diﬀerential overlap
+# * Semiempirical bond-order potentials (BOPs)
+# * Semiempirical nuclear quantum eﬀects (NQEs)
+# **VI. Response Properties**
+# * Nuclear forces (e.g., F = -Π)
+# * Hessian calculations (e.g., second derivative of energy with respect to nuclear positions)
+# * Energy conserving forces (e.g., dipole moments)
+# **VII. Applications of ML in CompChem**
+# * Predicting molecular and material properties
+# * Predicting chemical reactions and processes
+# * Predicting materials properties (e.g., conductivity, optical properties)
+# * Predicting drug design and development
+# **VIII. Future Directions**
+# * Developing more accurate ML models for CompChem
+# * Improving the transferability of ML models between different systems
+# * Using ML to accelerate and improve the discovery of new materials and compounds
+# '''
+# generate_mindmap_svg(md)

requirements.txt CHANGED Viewed

@@ -4,3 +4,7 @@ langchain-community==0.3.7
 graphviz==0.20.3
 llama-cpp-python==0.3.1
 pypdf==5.1.0

 graphviz==0.20.3
 llama-cpp-python==0.3.1
 pypdf==5.1.0
+llama-cpp-agent==0.2.35
+huggingface-hub==0.26.2
+spaces==0.30.4
+cairosvg==2.7.1