raannakasturi commited on
Commit
b35a32c
·
verified ·
1 Parent(s): 2dfac3d

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +60 -58
  2. generate_markdown.py +64 -95
  3. generate_mindmap.py +59 -3
  4. requirements.txt +4 -0
app.py CHANGED
@@ -1,58 +1,60 @@
1
- import os
2
- import sys
3
- from generate_markdown import load_llm_model, generate_markdown
4
- from generate_mindmap import generate_mindmap_svg
5
- import gradio as gr
6
- import subprocess
7
-
8
-
9
- def generate(file):
10
- summary = "This is a summary of the research paper"
11
- mindmap_markdown = generate_markdown(llm, file)
12
- mindmap_svg = generate_mindmap_svg(mindmap_markdown)
13
- return summary, mindmap_markdown, mindmap_svg
14
-
15
- theme = gr.themes.Soft(
16
- primary_hue="purple",
17
- secondary_hue="cyan",
18
- neutral_hue="slate",
19
- font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
20
- )
21
-
22
- with gr.Blocks(theme=theme, title="Binary Biology") as app:
23
- file = gr.File(file_count='single', label='Upload Research Paper PDF file')
24
- summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
25
- markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
26
- graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True)
27
- submit = gr.Button(value='Submit')
28
-
29
- submit.click(generate,
30
- inputs=[file],
31
- outputs=[summary, markdown_mindmap, graphical_mindmap],
32
- scroll_to_output=True,
33
- show_progress=True,
34
- queue=True,
35
- )
36
-
37
- try:
38
- env = os.environ.copy()
39
- env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
40
- cmd = ["pip", "install", "llama-cpp-python"]
41
- subprocess.run(cmd, env=env)
42
- except:
43
- cmd = ["pip", "install", "llama-cpp-python"]
44
- subprocess.run(cmd)
45
- try:
46
- try:
47
- subprocess.run(['apt', 'install', '-y', 'graphviz'])
48
- print("Graphviz installed successfully")
49
- except:
50
- subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
51
- print("Graphviz installed successfully using sudo")
52
- except:
53
- print("Graphviz installation failed")
54
- sys.exit(1)
55
- print("Graphviz loaded successfully")
56
- llm = load_llm_model()
57
- print("Model loaded successfully")
58
- app.queue(default_concurrency_limit=5).launch(show_error=True)
 
 
 
1
+ import os
2
+ import sys
3
+ from generate_markdown import load_llm_model, generate_markdown
4
+ from generate_mindmap import generate_mindmap_svg
5
+ import gradio as gr
6
+ import subprocess
7
+
8
+ def generate(file):
9
+ print(f"Generating mindmap for {file}")
10
+ summary = "This is a summary of the research paper"
11
+ mindmap_markdown = generate_markdown(llm, file)
12
+ mindmap_svg = generate_mindmap_svg(mindmap_markdown)
13
+ print("Mindmap generated successfully")
14
+ return summary, mindmap_markdown, mindmap_svg
15
+
16
+ theme = gr.themes.Soft(
17
+ primary_hue="purple",
18
+ secondary_hue="cyan",
19
+ neutral_hue="slate",
20
+ font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
21
+ )
22
+
23
+ with gr.Blocks(theme=theme, title="Binary Biology") as app:
24
+ file = gr.File(file_count='single', label='Upload Research Paper PDF file', file_types=['.pdf'])
25
+ summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
26
+ markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
27
+ graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True, format='svg')
28
+ submit = gr.Button(value='Submit')
29
+
30
+ submit.click(generate,
31
+ inputs=[file],
32
+ outputs=[summary, markdown_mindmap, graphical_mindmap],
33
+ scroll_to_output=True,
34
+ show_progress=True,
35
+ queue=True,
36
+ )
37
+
38
+ if __name__ == "__main__":
39
+ try:
40
+ env = os.environ.copy()
41
+ env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
42
+ cmd = ["pip", "install", "llama-cpp-python"]
43
+ subprocess.run(cmd, env=env)
44
+ except:
45
+ cmd = ["pip", "install", "llama-cpp-python"]
46
+ subprocess.run(cmd)
47
+ try:
48
+ try:
49
+ subprocess.run(['apt', 'install', '-y', 'graphviz'])
50
+ print("Graphviz installed successfully")
51
+ except:
52
+ subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
53
+ print("Graphviz installed successfully using sudo")
54
+ except:
55
+ print("Graphviz installation failed")
56
+ sys.exit(1)
57
+ print("Graphviz loaded successfully")
58
+ llm = load_llm_model()
59
+ print("Model loaded successfully")
60
+ app.queue(default_concurrency_limit=1).launch(show_error=True)
generate_markdown.py CHANGED
@@ -1,95 +1,64 @@
1
- from langchain.text_splitter import RecursiveCharacterTextSplitter
2
- from langchain_community.document_loaders import PyPDFLoader
3
- from llama_cpp import Llama
4
- import llama_cpp
5
-
6
- def load_llm_model():
7
- llm = Llama(
8
- model_path="Llama-3.2-3B-Instruct-Q8_0.gguf",
9
- # n_gpu_layers = 20, # Uncomment for GPU
10
- n_ctx=50000,
11
- n_threads=16,
12
- n_batch=512,
13
- split_mode=llama_cpp.LLAMA_SPLIT_MODE_LAYER,
14
- pooling_type=llama_cpp.LLAMA_POOLING_TYPE_RANK,
15
- rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_TYPE_LINEAR,
16
- # main_gpu=0 # Uncomment for GPU
17
- )
18
- return llm
19
-
20
- def get_text_from_pdf(file):
21
- loader = PyPDFLoader(file)
22
- pages = loader.load_and_split()
23
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
24
- texts = text_splitter.split_documents(pages)
25
- final_text = ""
26
- for text in texts:
27
- final_text = final_text + text.page_content
28
- print(f"Length of final text: {len(final_text)}")
29
- with open("final_text.txt", "w") as f:
30
- f.write(final_text)
31
- research_paper = ""
32
- for line in final_text.split("\n"):
33
- if line.startswith("REFERENCES"):
34
- break
35
- else:
36
- research_paper = research_paper + line + " "
37
- with open("research_paper.txt", "w") as f:
38
- f.write(research_paper)
39
- print(f"Length of research paper: {len(research_paper)}")
40
- return research_paper
41
-
42
- def generate_prompt(final_text):
43
- prompt = f'''
44
- You have been provided with a research paper in text format. Your task is to generate a mindmap structure in markdown format that summarizes the research paper.
45
- Your output should use the language \"en\" 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap and use the following template (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination):
46
- # {{Title}} (should be the title of the research paper)
47
- ## {{Subtitle01}} (as required and as many as required in markdown format)
48
- - {{Emoji01}} Bulletpoint01 (as required and as many as required in markdown format)
49
- - {{Emoji01.1}} Bulletpoint01.1 (as required and as many as sub levels required in markdown format)
50
- - {{Emoji01.1.1}} Bulletpoint01.1.1 (as required and as many as sub levels required in markdown format)
51
- - {{Emoji01.1.2}} Bulletpoint01.1.2 (as required and as many as sub levels required in markdown format)
52
- - {{Emoji01.2}} Bulletpoint01.2 (as required and as many as sub levels required in markdown format)
53
- - {{Emoji02}} Bulletpoint02 (as required and as many as required in markdown format)
54
- - {{Emoji02.1}} Bulletpoint02.1 (as required and as many as sub levels required in markdown format)
55
- - {{Emoji02.2}} Bulletpoint02.2 (as required and as many as sub levels required in markdown format)
56
- - {{Emoji02.2.1}} Bulletpoint02.2.1 (as required and as many as sub levels required in markdown format)
57
- - {{Emoji02.2.2}} Bulletpoint02.2.2 (as required and as many as sub levels required in markdown format)
58
- - {{Emoji02.2.3}} Bulletpoint02.2.3 (as required and as many as sub levels required in markdown format)
59
- - {{Emoji02.2.4}} Bulletpoint02.2.4 (as required and as many as sub levels required in markdown format)
60
- ## {{Subtitle02}} (as required and as many as required in markdown format)
61
- - {{Emoji03}} Bulletpoint03 (as required and as many as required in markdown format)
62
- - {{Emoji03.1}} Bulletpoint03.1 (as required and as many as sub levels required in markdown format)
63
- - {{Emoji03.2}} Bulletpoint03.2 (as required and as many as sub levels required in markdown format)
64
- - {{Emoji03.2.1}} Bulletpoint03.2.1 (as required and as many as sub levels required in markdown format)
65
- - {{Emoji03.2.2}} Bulletpoint03.2.2 (as required and as many as sub levels required in markdown format)
66
- - {{Emoji04}} Bulletpoint04 (as required and as many as required in markdown format)
67
- - {{Emoji04.1}} Bulletpoint04.1 (as required and as many as sub levels required in markdown format)
68
- - {{Emoji04.1.1}} Bulletpoint04.1.1 (as required and as many as sub levels required in markdown format)
69
- - {{Emoji04.1.2}} Bulletpoint04.1.2 (as required and as many as sub levels required in markdown format)
70
- - {{Emoji04.2}} Bulletpoint04.2 (as required and as many as sub levels required in markdown format)
71
- - {{Emoji04.2.1}} Bulletpoint04.2.1 (as required and as many as sub levels required in markdown format)
72
- - {{Emoji04.2.2}} Bulletpoint04.2.2 (as required and as many as sub levels required in markdown format)
73
- Summarize the text \"{final_text}\" to generate a elaborated hierarchical mindmap structure (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination) markdown using the \"en\" language 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap
74
- '''
75
- return prompt
76
-
77
- def generate_mindmap_structure(llm, prompt):
78
- response = llm.create_chat_completion(
79
- messages = [
80
- {'role':'user',
81
- 'content': prompt}
82
- ],
83
- temperature=0.7,
84
- top_k=200,
85
- top_p=3.0,
86
- )
87
- mindmap_data = response['choices'][0]['message']['content']
88
- print(mindmap_data)
89
- return mindmap_data
90
-
91
- def generate_markdown(llm, file):
92
- final_text = get_text_from_pdf(file)
93
- prompt = generate_prompt(final_text)
94
- mindmap_markdown = generate_mindmap_structure(llm, prompt)
95
- return mindmap_markdown
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from llama_cpp import Llama
4
+
5
+ def load_llm_model():
6
+ try:
7
+ llm = Llama(
8
+ model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
9
+ # n_gpu_layers = 40,
10
+ n_ctx=130000,
11
+ n_batch=1024,
12
+ # main_gpu=0
13
+ )
14
+ print("LLM model loaded successfully")
15
+ return llm
16
+ except Exception as e:
17
+ print(f"Error loading LLM model: {e}")
18
+ raise
19
+
20
+ def get_text_from_pdf(file):
21
+ loader = PyPDFLoader(file)
22
+ pages = loader.load_and_split()
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
24
+ texts = text_splitter.split_documents(pages)
25
+ final_text = ""
26
+ for text in texts:
27
+ if text.page_content.startswith("REFERENCES"):
28
+ break
29
+ else:
30
+ final_text = final_text + text.page_content
31
+ research_paper = ""
32
+ for text in final_text:
33
+ if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
34
+ break
35
+ else:
36
+ research_paper = research_paper + text
37
+ return research_paper[:100000]
38
+
39
+ def generate_prompt(research_paper):
40
+ prompt = f'''
41
+ As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n Importantly your output must use language \\"English\\""
42
+ '''
43
+ return prompt
44
+
45
+ def generate_mindmap_structure(llm, prompt):
46
+ response = llm.create_chat_completion(
47
+ messages = [
48
+ {'role':'system',
49
+ 'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
50
+ {'role':'user',
51
+ 'content': prompt}
52
+ ],
53
+ temperature=0.7,
54
+ top_k=200,
55
+ top_p=3.0,
56
+ )
57
+ mindmap_data = response['choices'][0]['message']['content']
58
+ return mindmap_data
59
+
60
+ def generate_markdown(llm, file):
61
+ final_text = get_text_from_pdf(file)
62
+ prompt = generate_prompt(final_text)
63
+ mindmap_markdown = generate_mindmap_structure(llm, prompt)
64
+ return mindmap_markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generate_mindmap.py CHANGED
@@ -93,8 +93,7 @@ def generate_mindmap_svg(md_text):
93
  mindmap_dict = parse_markdown_to_dict(md_text)
94
  root_title = mindmap_dict.get('title', 'Mindmap')
95
  sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
96
- if output_filename is None:
97
- output_filename = sanitized_title
98
  graph = Digraph(format='svg')
99
  graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
100
  graph.attr('node', fontname="Arial", fontsize="9")
@@ -105,4 +104,61 @@ def generate_mindmap_svg(md_text):
105
  # Save the modified SVG content to a file
106
  with open(f'{output_filename}.svg', 'w') as f:
107
  f.write(svg_content)
108
- return f"{output_filename}".svg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  mindmap_dict = parse_markdown_to_dict(md_text)
94
  root_title = mindmap_dict.get('title', 'Mindmap')
95
  sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
96
+ output_filename = f"{sanitized_title}_mindmap.svg"
 
97
  graph = Digraph(format='svg')
98
  graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
99
  graph.attr('node', fontname="Arial", fontsize="9")
 
104
  # Save the modified SVG content to a file
105
  with open(f'{output_filename}.svg', 'w') as f:
106
  f.write(svg_content)
107
+ return f"{output_filename}"
108
+
109
+
110
+ # md = '''
111
+ # Here is a mind map summarizing the topic of combining machine learning (ML) and computational chemistry (CompChem) for predictive insights into chemical systems:
112
+
113
+ # **I. Introduction**
114
+
115
+ # * Machine learning (ML) poised to transform chemical sciences
116
+ # * Combining ML and CompChem for predictive insights
117
+
118
+ # **II. Computational Chemistry (CompChem)**
119
+
120
+ # * Computational quantum chemistry (CQChem)
121
+ # * Methods for generating data sets (e.g., wavefunction theory, correlated wavefunction methods, density functional theory)
122
+ # * Representations of systems (e.g., simple, complex, ambiguous)
123
+
124
+ # **III. Wavefunction Theory Methods**
125
+
126
+ # * Nonrelativistic time-independent Schrödinger equation
127
+ # * Electronic Schrödinger equation
128
+ # * Hartree-Fock (HF) approach
129
+ # * Correlated wavefunction methods (e.g., extended Hückel theory, neglect of diatomic differential overlap)
130
+
131
+ # **IV. Density Functional Theory (DFT)**
132
+
133
+ # * Kinetic energy (KE-) or orbital-free (OF-) DFT
134
+ # * Exchange-correlation functional (EC)
135
+ # * Kohn-Sham (KS-) DFT
136
+ # * Semiempirical methods (e.g., extended Hückel theory, neglect of diatomic differential overlap)
137
+
138
+ # **V. Semiempirical Methods**
139
+
140
+ # * Extended Hückel theory
141
+ # * Neglect of diatomic differential overlap
142
+ # * Semiempirical bond-order potentials (BOPs)
143
+ # * Semiempirical nuclear quantum effects (NQEs)
144
+
145
+ # **VI. Response Properties**
146
+
147
+ # * Nuclear forces (e.g., F = -Π)
148
+ # * Hessian calculations (e.g., second derivative of energy with respect to nuclear positions)
149
+ # * Energy conserving forces (e.g., dipole moments)
150
+
151
+ # **VII. Applications of ML in CompChem**
152
+
153
+ # * Predicting molecular and material properties
154
+ # * Predicting chemical reactions and processes
155
+ # * Predicting materials properties (e.g., conductivity, optical properties)
156
+ # * Predicting drug design and development
157
+
158
+ # **VIII. Future Directions**
159
+
160
+ # * Developing more accurate ML models for CompChem
161
+ # * Improving the transferability of ML models between different systems
162
+ # * Using ML to accelerate and improve the discovery of new materials and compounds
163
+ # '''
164
+ # generate_mindmap_svg(md)
requirements.txt CHANGED
@@ -4,3 +4,7 @@ langchain-community==0.3.7
4
  graphviz==0.20.3
5
  llama-cpp-python==0.3.1
6
  pypdf==5.1.0
 
 
 
 
 
4
  graphviz==0.20.3
5
  llama-cpp-python==0.3.1
6
  pypdf==5.1.0
7
+ llama-cpp-agent==0.2.35
8
+ huggingface-hub==0.26.2
9
+ spaces==0.30.4
10
+ cairosvg==2.7.1