Spaces:
Running
Running
Upload 4 files
Browse files- app.py +60 -58
- generate_markdown.py +64 -95
- generate_mindmap.py +59 -3
- requirements.txt +4 -0
app.py
CHANGED
@@ -1,58 +1,60 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
from generate_markdown import load_llm_model, generate_markdown
|
4 |
-
from generate_mindmap import generate_mindmap_svg
|
5 |
-
import gradio as gr
|
6 |
-
import subprocess
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
summary = "This is a summary of the research paper"
|
11 |
-
mindmap_markdown = generate_markdown(llm, file)
|
12 |
-
mindmap_svg = generate_mindmap_svg(mindmap_markdown)
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
)
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
print("Graphviz
|
56 |
-
|
57 |
-
print("
|
58 |
-
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from generate_markdown import load_llm_model, generate_markdown
|
4 |
+
from generate_mindmap import generate_mindmap_svg
|
5 |
+
import gradio as gr
|
6 |
+
import subprocess
|
7 |
+
|
8 |
+
def generate(file):
|
9 |
+
print(f"Generating mindmap for {file}")
|
10 |
+
summary = "This is a summary of the research paper"
|
11 |
+
mindmap_markdown = generate_markdown(llm, file)
|
12 |
+
mindmap_svg = generate_mindmap_svg(mindmap_markdown)
|
13 |
+
print("Mindmap generated successfully")
|
14 |
+
return summary, mindmap_markdown, mindmap_svg
|
15 |
+
|
16 |
+
theme = gr.themes.Soft(
|
17 |
+
primary_hue="purple",
|
18 |
+
secondary_hue="cyan",
|
19 |
+
neutral_hue="slate",
|
20 |
+
font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
|
21 |
+
)
|
22 |
+
|
23 |
+
with gr.Blocks(theme=theme, title="Binary Biology") as app:
|
24 |
+
file = gr.File(file_count='single', label='Upload Research Paper PDF file', file_types=['.pdf'])
|
25 |
+
summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
|
26 |
+
markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
|
27 |
+
graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True, format='svg')
|
28 |
+
submit = gr.Button(value='Submit')
|
29 |
+
|
30 |
+
submit.click(generate,
|
31 |
+
inputs=[file],
|
32 |
+
outputs=[summary, markdown_mindmap, graphical_mindmap],
|
33 |
+
scroll_to_output=True,
|
34 |
+
show_progress=True,
|
35 |
+
queue=True,
|
36 |
+
)
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
try:
|
40 |
+
env = os.environ.copy()
|
41 |
+
env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
|
42 |
+
cmd = ["pip", "install", "llama-cpp-python"]
|
43 |
+
subprocess.run(cmd, env=env)
|
44 |
+
except:
|
45 |
+
cmd = ["pip", "install", "llama-cpp-python"]
|
46 |
+
subprocess.run(cmd)
|
47 |
+
try:
|
48 |
+
try:
|
49 |
+
subprocess.run(['apt', 'install', '-y', 'graphviz'])
|
50 |
+
print("Graphviz installed successfully")
|
51 |
+
except:
|
52 |
+
subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
|
53 |
+
print("Graphviz installed successfully using sudo")
|
54 |
+
except:
|
55 |
+
print("Graphviz installation failed")
|
56 |
+
sys.exit(1)
|
57 |
+
print("Graphviz loaded successfully")
|
58 |
+
llm = load_llm_model()
|
59 |
+
print("Model loaded successfully")
|
60 |
+
app.queue(default_concurrency_limit=1).launch(show_error=True)
|
generate_markdown.py
CHANGED
@@ -1,95 +1,64 @@
|
|
1 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
-
from langchain_community.document_loaders import PyPDFLoader
|
3 |
-
from llama_cpp import Llama
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
def get_text_from_pdf(file):
|
21 |
-
loader = PyPDFLoader(file)
|
22 |
-
pages = loader.load_and_split()
|
23 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
|
24 |
-
texts = text_splitter.split_documents(pages)
|
25 |
-
final_text = ""
|
26 |
-
for text in texts:
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
research_paper = ""
|
32 |
-
for
|
33 |
-
if
|
34 |
-
break
|
35 |
-
else:
|
36 |
-
research_paper = research_paper +
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
prompt
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
- {{Emoji03.2.2}} Bulletpoint03.2.2 (as required and as many as sub levels required in markdown format)
|
66 |
-
- {{Emoji04}} Bulletpoint04 (as required and as many as required in markdown format)
|
67 |
-
- {{Emoji04.1}} Bulletpoint04.1 (as required and as many as sub levels required in markdown format)
|
68 |
-
- {{Emoji04.1.1}} Bulletpoint04.1.1 (as required and as many as sub levels required in markdown format)
|
69 |
-
- {{Emoji04.1.2}} Bulletpoint04.1.2 (as required and as many as sub levels required in markdown format)
|
70 |
-
- {{Emoji04.2}} Bulletpoint04.2 (as required and as many as sub levels required in markdown format)
|
71 |
-
- {{Emoji04.2.1}} Bulletpoint04.2.1 (as required and as many as sub levels required in markdown format)
|
72 |
-
- {{Emoji04.2.2}} Bulletpoint04.2.2 (as required and as many as sub levels required in markdown format)
|
73 |
-
Summarize the text \"{final_text}\" to generate a elaborated hierarchical mindmap structure (any node in the mindmap should not exceed 10-12 words, also generate additional headings that aren't present in document if required for elaborative explaination) markdown using the \"en\" language 0.3 times the length of the original research paper. Do not include anything in the response, that is not the part of mindmap
|
74 |
-
'''
|
75 |
-
return prompt
|
76 |
-
|
77 |
-
def generate_mindmap_structure(llm, prompt):
|
78 |
-
response = llm.create_chat_completion(
|
79 |
-
messages = [
|
80 |
-
{'role':'user',
|
81 |
-
'content': prompt}
|
82 |
-
],
|
83 |
-
temperature=0.7,
|
84 |
-
top_k=200,
|
85 |
-
top_p=3.0,
|
86 |
-
)
|
87 |
-
mindmap_data = response['choices'][0]['message']['content']
|
88 |
-
print(mindmap_data)
|
89 |
-
return mindmap_data
|
90 |
-
|
91 |
-
def generate_markdown(llm, file):
|
92 |
-
final_text = get_text_from_pdf(file)
|
93 |
-
prompt = generate_prompt(final_text)
|
94 |
-
mindmap_markdown = generate_mindmap_structure(llm, prompt)
|
95 |
-
return mindmap_markdown
|
|
|
1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain_community.document_loaders import PyPDFLoader
|
3 |
+
from llama_cpp import Llama
|
4 |
+
|
5 |
+
def load_llm_model():
|
6 |
+
try:
|
7 |
+
llm = Llama(
|
8 |
+
model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
|
9 |
+
# n_gpu_layers = 40,
|
10 |
+
n_ctx=130000,
|
11 |
+
n_batch=1024,
|
12 |
+
# main_gpu=0
|
13 |
+
)
|
14 |
+
print("LLM model loaded successfully")
|
15 |
+
return llm
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Error loading LLM model: {e}")
|
18 |
+
raise
|
19 |
+
|
20 |
+
def get_text_from_pdf(file):
|
21 |
+
loader = PyPDFLoader(file)
|
22 |
+
pages = loader.load_and_split()
|
23 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
|
24 |
+
texts = text_splitter.split_documents(pages)
|
25 |
+
final_text = ""
|
26 |
+
for text in texts:
|
27 |
+
if text.page_content.startswith("REFERENCES"):
|
28 |
+
break
|
29 |
+
else:
|
30 |
+
final_text = final_text + text.page_content
|
31 |
+
research_paper = ""
|
32 |
+
for text in final_text:
|
33 |
+
if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
|
34 |
+
break
|
35 |
+
else:
|
36 |
+
research_paper = research_paper + text
|
37 |
+
return research_paper[:100000]
|
38 |
+
|
39 |
+
def generate_prompt(research_paper):
|
40 |
+
prompt = f'''
|
41 |
+
As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n Importantly your output must use language \\"English\\""
|
42 |
+
'''
|
43 |
+
return prompt
|
44 |
+
|
45 |
+
def generate_mindmap_structure(llm, prompt):
|
46 |
+
response = llm.create_chat_completion(
|
47 |
+
messages = [
|
48 |
+
{'role':'system',
|
49 |
+
'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
|
50 |
+
{'role':'user',
|
51 |
+
'content': prompt}
|
52 |
+
],
|
53 |
+
temperature=0.7,
|
54 |
+
top_k=200,
|
55 |
+
top_p=3.0,
|
56 |
+
)
|
57 |
+
mindmap_data = response['choices'][0]['message']['content']
|
58 |
+
return mindmap_data
|
59 |
+
|
60 |
+
def generate_markdown(llm, file):
|
61 |
+
final_text = get_text_from_pdf(file)
|
62 |
+
prompt = generate_prompt(final_text)
|
63 |
+
mindmap_markdown = generate_mindmap_structure(llm, prompt)
|
64 |
+
return mindmap_markdown
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_mindmap.py
CHANGED
@@ -93,8 +93,7 @@ def generate_mindmap_svg(md_text):
|
|
93 |
mindmap_dict = parse_markdown_to_dict(md_text)
|
94 |
root_title = mindmap_dict.get('title', 'Mindmap')
|
95 |
sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
|
96 |
-
|
97 |
-
output_filename = sanitized_title
|
98 |
graph = Digraph(format='svg')
|
99 |
graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
|
100 |
graph.attr('node', fontname="Arial", fontsize="9")
|
@@ -105,4 +104,61 @@ def generate_mindmap_svg(md_text):
|
|
105 |
# Save the modified SVG content to a file
|
106 |
with open(f'{output_filename}.svg', 'w') as f:
|
107 |
f.write(svg_content)
|
108 |
-
return f"{output_filename}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
mindmap_dict = parse_markdown_to_dict(md_text)
|
94 |
root_title = mindmap_dict.get('title', 'Mindmap')
|
95 |
sanitized_title = re.sub(r'[^a-zA-Z0-9_\-]', '', root_title.replace(" ", ""))
|
96 |
+
output_filename = f"{sanitized_title}_mindmap.svg"
|
|
|
97 |
graph = Digraph(format='svg')
|
98 |
graph.attr(rankdir='LR', size='10,10!', pad="0.5", margin="0.2", ratio="auto")
|
99 |
graph.attr('node', fontname="Arial", fontsize="9")
|
|
|
104 |
# Save the modified SVG content to a file
|
105 |
with open(f'{output_filename}.svg', 'w') as f:
|
106 |
f.write(svg_content)
|
107 |
+
return f"{output_filename}"
|
108 |
+
|
109 |
+
|
110 |
+
# md = '''
|
111 |
+
# Here is a mind map summarizing the topic of combining machine learning (ML) and computational chemistry (CompChem) for predictive insights into chemical systems:
|
112 |
+
|
113 |
+
# **I. Introduction**
|
114 |
+
|
115 |
+
# * Machine learning (ML) poised to transform chemical sciences
|
116 |
+
# * Combining ML and CompChem for predictive insights
|
117 |
+
|
118 |
+
# **II. Computational Chemistry (CompChem)**
|
119 |
+
|
120 |
+
# * Computational quantum chemistry (CQChem)
|
121 |
+
# * Methods for generating data sets (e.g., wavefunction theory, correlated wavefunction methods, density functional theory)
|
122 |
+
# * Representations of systems (e.g., simple, complex, ambiguous)
|
123 |
+
|
124 |
+
# **III. Wavefunction Theory Methods**
|
125 |
+
|
126 |
+
# * Nonrelativistic time-independent Schrödinger equation
|
127 |
+
# * Electronic Schrödinger equation
|
128 |
+
# * Hartree-Fock (HF) approach
|
129 |
+
# * Correlated wavefunction methods (e.g., extended Hückel theory, neglect of diatomic differential overlap)
|
130 |
+
|
131 |
+
# **IV. Density Functional Theory (DFT)**
|
132 |
+
|
133 |
+
# * Kinetic energy (KE-) or orbital-free (OF-) DFT
|
134 |
+
# * Exchange-correlation functional (EC)
|
135 |
+
# * Kohn-Sham (KS-) DFT
|
136 |
+
# * Semiempirical methods (e.g., extended Hückel theory, neglect of diatomic differential overlap)
|
137 |
+
|
138 |
+
# **V. Semiempirical Methods**
|
139 |
+
|
140 |
+
# * Extended Hückel theory
|
141 |
+
# * Neglect of diatomic differential overlap
|
142 |
+
# * Semiempirical bond-order potentials (BOPs)
|
143 |
+
# * Semiempirical nuclear quantum effects (NQEs)
|
144 |
+
|
145 |
+
# **VI. Response Properties**
|
146 |
+
|
147 |
+
# * Nuclear forces (e.g., F = -Π)
|
148 |
+
# * Hessian calculations (e.g., second derivative of energy with respect to nuclear positions)
|
149 |
+
# * Energy conserving forces (e.g., dipole moments)
|
150 |
+
|
151 |
+
# **VII. Applications of ML in CompChem**
|
152 |
+
|
153 |
+
# * Predicting molecular and material properties
|
154 |
+
# * Predicting chemical reactions and processes
|
155 |
+
# * Predicting materials properties (e.g., conductivity, optical properties)
|
156 |
+
# * Predicting drug design and development
|
157 |
+
|
158 |
+
# **VIII. Future Directions**
|
159 |
+
|
160 |
+
# * Developing more accurate ML models for CompChem
|
161 |
+
# * Improving the transferability of ML models between different systems
|
162 |
+
# * Using ML to accelerate and improve the discovery of new materials and compounds
|
163 |
+
# '''
|
164 |
+
# generate_mindmap_svg(md)
|
requirements.txt
CHANGED
@@ -4,3 +4,7 @@ langchain-community==0.3.7
|
|
4 |
graphviz==0.20.3
|
5 |
llama-cpp-python==0.3.1
|
6 |
pypdf==5.1.0
|
|
|
|
|
|
|
|
|
|
4 |
graphviz==0.20.3
|
5 |
llama-cpp-python==0.3.1
|
6 |
pypdf==5.1.0
|
7 |
+
llama-cpp-agent==0.2.35
|
8 |
+
huggingface-hub==0.26.2
|
9 |
+
spaces==0.30.4
|
10 |
+
cairosvg==2.7.1
|