Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c39d6e0
1
Parent(s):
1e07fc0
Refactor response handling in generate function and add generate_swanson_style_prompt for bridge detection
Browse files- app.py +14 -176
- swanson_style_prompt.py +133 -0
app.py
CHANGED
@@ -9,6 +9,7 @@ import gradio as gr
|
|
9 |
import pandas as pd
|
10 |
import spaces
|
11 |
import torch
|
|
|
12 |
from huggingface_hub import InferenceClient
|
13 |
from sentence_transformers import SentenceTransformer
|
14 |
|
@@ -144,87 +145,7 @@ def generate(messages: list[dict[str, str]]) -> str:
|
|
144 |
|
145 |
messages.insert(0, system_message)
|
146 |
|
147 |
-
response_schema =
|
148 |
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
149 |
-
"title": "Generated schema for Root",
|
150 |
-
"type": "object",
|
151 |
-
"properties": {
|
152 |
-
"reasoning": {
|
153 |
-
"type": "string"
|
154 |
-
},
|
155 |
-
"key_connections": {
|
156 |
-
"type": "array",
|
157 |
-
"items": {
|
158 |
-
"type": "object",
|
159 |
-
"properties": {
|
160 |
-
"connection": {
|
161 |
-
"type": "string"
|
162 |
-
},
|
163 |
-
"description": {
|
164 |
-
"type": "string"
|
165 |
-
}
|
166 |
-
},
|
167 |
-
"required": [
|
168 |
-
"connection",
|
169 |
-
"description"
|
170 |
-
]
|
171 |
-
}
|
172 |
-
},
|
173 |
-
"synergies_and_complementarities": {
|
174 |
-
"type": "array",
|
175 |
-
"items": {
|
176 |
-
"type": "object",
|
177 |
-
"properties": {
|
178 |
-
"type": {
|
179 |
-
"type": "array",
|
180 |
-
"items": {
|
181 |
-
"type": "string"
|
182 |
-
}
|
183 |
-
},
|
184 |
-
"description": {
|
185 |
-
"type": "string"
|
186 |
-
}
|
187 |
-
},
|
188 |
-
"required": [
|
189 |
-
"type",
|
190 |
-
"description"
|
191 |
-
]
|
192 |
-
}
|
193 |
-
},
|
194 |
-
"research_potential": {
|
195 |
-
"type": "array",
|
196 |
-
"items": {
|
197 |
-
"type": "object",
|
198 |
-
"properties": {
|
199 |
-
"potential": {
|
200 |
-
"type": "string"
|
201 |
-
},
|
202 |
-
"description": {
|
203 |
-
"type": "string"
|
204 |
-
}
|
205 |
-
},
|
206 |
-
"required": [
|
207 |
-
"potential",
|
208 |
-
"description"
|
209 |
-
]
|
210 |
-
}
|
211 |
-
},
|
212 |
-
"rating": {
|
213 |
-
"type": "number"
|
214 |
-
},
|
215 |
-
"confidence": {
|
216 |
-
"type": "number"
|
217 |
-
}
|
218 |
-
},
|
219 |
-
"required": [
|
220 |
-
"reasoning",
|
221 |
-
"key_connections",
|
222 |
-
"synergies_and_complementarities",
|
223 |
-
"research_potential",
|
224 |
-
"rating",
|
225 |
-
"confidence"
|
226 |
-
]
|
227 |
-
}"""
|
228 |
|
229 |
response_format = {
|
230 |
"type": "json",
|
@@ -265,70 +186,7 @@ def analyse_abstracts(query_abstract: str, compare_abstract: dict) -> str:
|
|
265 |
if not compare_abstract["abstract"].strip():
|
266 |
return "Invalid compare_abstract format. Expected a non-empty string."
|
267 |
|
268 |
-
messages = [
|
269 |
-
{
|
270 |
-
"role": "user",
|
271 |
-
"content": f"""You are trained in evaluating conceptual and methodological connections between research papers. Please **identify and analyze the reasoning-based links** between the following two papers:
|
272 |
-
|
273 |
-
Paper 1 Abstract:
|
274 |
-
{query_abstract}
|
275 |
-
|
276 |
-
Paper 2 Abstract:
|
277 |
-
{compare_abstract["abstract"]}
|
278 |
-
|
279 |
-
In your evaluation, consider the following dimensions:
|
280 |
-
|
281 |
-
* **Methodological Cross-Pollination**: Do the methods or approaches from one paper **directly inform, enhance, or contrast with** the other?
|
282 |
-
* **Principle or Mechanism Extension**: Do the papers **share core principles, mechanisms, or assumptions** that could be **combined or extended** to generate new understanding or tools?
|
283 |
-
* **Interdisciplinary Bridges**: Are there clear opportunities for **knowledge transfer or collaboration** across fields or problem domains?
|
284 |
-
* **Solution or Application Overlap**: Can the solutions, frameworks, or applications in one paper be **adapted or repurposed** to benefit the work in the other, leading to **tangible, novel outcomes**?
|
285 |
-
|
286 |
-
Assess these connections in both directions (Paper 1 → Paper 2 and Paper 2 → Paper 1). Focus on **relevant and practically meaningful links** — especially those that might be **missed in practice** due to the sheer volume of publications or the separation between research communities. These are often connections that would be **immediately apparent to an expert** familiar with both papers, but easily overlooked otherwise.
|
287 |
-
|
288 |
-
Return a valid JSON object in the following structure:
|
289 |
-
{{
|
290 |
-
"reasoning": "Step-by-step conceptual analysis of how the papers relate, highlighting **key connections**, complementary methods, or shared ideas. Emphasize the most **relevant, practically useful takeaways**, and use markdown bold to highlight major points.",
|
291 |
-
|
292 |
-
"key_connections": [
|
293 |
-
{{
|
294 |
-
"connection": "connection 1",
|
295 |
-
"description": "1–2 sentence explanation of the **main conceptual or methodological link**, emphasizing its practical or theoretical relevance."
|
296 |
-
}},
|
297 |
-
...
|
298 |
-
],
|
299 |
-
|
300 |
-
"complementarities": [
|
301 |
-
{{
|
302 |
-
"type": ["Methodological Cross-Pollination", "Principle or Mechanism Extension", "Interdisciplinary Bridges", "Solution or Application Overlap"], # Use only the most relevant label per entry
|
303 |
-
"description": "A concise explanation (1–2 sentences) of the **identified complementarity** or **productive relationship**, including a specific example or outcome it could enable."
|
304 |
-
}},
|
305 |
-
...
|
306 |
-
],
|
307 |
-
|
308 |
-
"research_potential": [
|
309 |
-
{{
|
310 |
-
"potential": "Potential application or outcome 1",
|
311 |
-
"description": "1–2 sentence explanation of the **concrete potential impact**, framed in terms of a **realistic scenario or use case**."
|
312 |
-
}},
|
313 |
-
...
|
314 |
-
],
|
315 |
-
|
316 |
-
"rating": 1-5, # Overall strength of the connection:
|
317 |
-
# 1 = No meaningful connection
|
318 |
-
# 2 = Weak or speculative connection
|
319 |
-
# 3 = Plausible but unproven connection
|
320 |
-
# 4 = Solid connection with future potential
|
321 |
-
# 5 = Strong, well-aligned connection with immediate, valuable implications
|
322 |
-
|
323 |
-
"confidence": 0.0-1.0 # Confidence score in your assessment (e.g., 0.85 for high confidence, 1.0 for absolute certainty)
|
324 |
-
# Note: The confidence score should reflect your level of certainty in the analysis, not the strength of the connection itself.
|
325 |
-
# A score of 0.0 indicates no confidence in the analysis, while 1.0 indicates absolute certainty.
|
326 |
-
}}
|
327 |
-
|
328 |
-
Return only the JSON object. All key names and string values must be in double quotes.
|
329 |
-
""",
|
330 |
-
},
|
331 |
-
]
|
332 |
|
333 |
# Generate analysis
|
334 |
try:
|
@@ -342,38 +200,18 @@ Return only the JSON object. All key names and string values must be in double q
|
|
342 |
except Exception as e:
|
343 |
return f"Error: {e}"
|
344 |
|
345 |
-
# Format the output as markdown
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
if "research_potential" in output:
|
358 |
-
for potential in output["research_potential"]:
|
359 |
-
research_potential += f"- {potential['potential']}: {potential['description']}\n"
|
360 |
|
361 |
-
formatted_output = f"""## Synergy Analysis
|
362 |
-
|
363 |
-
**Rating**: {'★' * output['rating']}{'☆' * (5-output['rating'])} **Confidence**: {'★' * round(output['confidence'] * 5)}{'☆' * round((1-output['confidence']) * 5)}
|
364 |
-
|
365 |
-
### Key Connections
|
366 |
-
{key_connections}
|
367 |
-
|
368 |
-
### Synergies and Complementarities
|
369 |
-
{synergies_and_complementarities}
|
370 |
-
|
371 |
-
### Research Potential
|
372 |
-
{research_potential}
|
373 |
-
|
374 |
-
### Reasoning
|
375 |
-
{output['reasoning']}
|
376 |
-
"""
|
377 |
return formatted_output
|
378 |
# return '```"""\n' + output + '\n"""```'
|
379 |
|
|
|
9 |
import pandas as pd
|
10 |
import spaces
|
11 |
import torch
|
12 |
+
from swanson_style_prompt import generate_swanson_style_prompt, get_json_schema
|
13 |
from huggingface_hub import InferenceClient
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
|
|
|
145 |
|
146 |
messages.insert(0, system_message)
|
147 |
|
148 |
+
response_schema = get_json_schema()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
response_format = {
|
151 |
"type": "json",
|
|
|
186 |
if not compare_abstract["abstract"].strip():
|
187 |
return "Invalid compare_abstract format. Expected a non-empty string."
|
188 |
|
189 |
+
messages = generate_swanson_style_prompt(query_abstract, compare_abstract["abstract"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
# Generate analysis
|
192 |
try:
|
|
|
200 |
except Exception as e:
|
201 |
return f"Error: {e}"
|
202 |
|
203 |
+
# Format the output as markdown
|
204 |
+
formatted_output = "# Connection Analysis\n"
|
205 |
+
if "bridge_exists" in output and output["bridge_exists"] is False:
|
206 |
+
formatted_output += "There is no bridge between the two papers."
|
207 |
+
formatted_output += "## Explanation\n" + output.get("bridge_explanation", "No explanation provided.")
|
208 |
+
elif "bridge_exists" in output and output["bridge_exists"] is True:
|
209 |
+
formatted_output += "## Bridge Concept\n" + output.get("bridge_concept", "Unknown")
|
210 |
+
formatted_output += "\n## Explanation\n" + output.get("bridge_explanation", "No explanation provided.")
|
211 |
+
formatted_output += "\n## Hypothesis\n" + output.get("hypothesis", "No hypothesis provided.")
|
212 |
+
else:
|
213 |
+
formatted_output = "Invalid output format. Please check the model's response: " + output
|
|
|
|
|
|
|
|
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
return formatted_output
|
216 |
# return '```"""\n' + output + '\n"""```'
|
217 |
|
swanson_style_prompt.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def generate_swanson_style_prompt(abstract_1: str, abstract_2: str) -> list:
|
2 |
+
"""
|
3 |
+
Generates a prompt for detecting Swanson-style bridges between two papers.
|
4 |
+
"""
|
5 |
+
|
6 |
+
messages = [
|
7 |
+
{
|
8 |
+
"role": "user",
|
9 |
+
"content": f"""
|
10 |
+
You are trained to detect **Swanson-style bridges** — **intermediate concepts** that connect two papers *without direct citation links*. These bridges are:
|
11 |
+
- **Non-obvious** (not explicitly stated in either paper),
|
12 |
+
- **Interdisciplinary** (linking distinct domains),
|
13 |
+
- **Logically inferable** (i.e., can be *reasoned from context* in both papers),
|
14 |
+
- **Specific and inferential** (not vague or tautological).
|
15 |
+
|
16 |
+
---
|
17 |
+
|
18 |
+
### 🔍 What Makes a Valid Bridge Concept?
|
19 |
+
|
20 |
+
In 1986, Don Swanson discovered a bridge between **fish oil** and **Raynaud’s phenomenon** using only literature analysis. Neither paper mentioned "blood viscosity" directly, but both *implied* it:
|
21 |
+
- **Paper A**: Fish oil reduces blood thickness.
|
22 |
+
- **Paper B**: Raynaud’s involves poor blood flow.
|
23 |
+
- **Bridge Concept**: Blood viscosity (inferred from both papers).
|
24 |
+
|
25 |
+
Swanson's success was due to:
|
26 |
+
1. **Inference**: He inferred a shared mechanism not directly mentioned.
|
27 |
+
2. **Interdisciplinarity**: He linked nutrition and vascular medicine.
|
28 |
+
3. **Actionable Logic**: He created a testable hypothesis.
|
29 |
+
|
30 |
+
---
|
31 |
+
|
32 |
+
### 🧠 Critical Instructions
|
33 |
+
|
34 |
+
- **Allow inference**: If the papers *together imply* a concept — even if **neither explicitly states it** — it can serve as a bridge.
|
35 |
+
- **Reject tautological bridges**: If the only shared concept is the general domain (e.g., "Time Series Classification", "Language Models", "Signal Processing"), and no specific, inferred mechanism connects the papers, respond with [NO BRIDGE].
|
36 |
+
- **Prioritize inferential logic**: A bridge can be derived from the implications of each paper, not just what’s stated.
|
37 |
+
- **Mechanistic**: A bridge concept must connect the papers through a specific, inferred mechanism, not just a shared problem domain or methodology class.
|
38 |
+
|
39 |
+
---
|
40 |
+
|
41 |
+
### 🧪 Your Task
|
42 |
+
|
43 |
+
Consider these two papers:
|
44 |
+
|
45 |
+
**Paper 1 Abstract**:
|
46 |
+
{abstract_1}
|
47 |
+
|
48 |
+
**Paper 2 Abstract**:
|
49 |
+
{abstract_2}
|
50 |
+
|
51 |
+
**Step-by-Step Instructions**
|
52 |
+
1. **Bridge Detection**:
|
53 |
+
- Look for a **plausible, inferable concept** that connects the papers.
|
54 |
+
- The bridge should:
|
55 |
+
- Be **logically implied** by both papers.
|
56 |
+
- Be **specific** and **actionable** (e.g., a biological mechanism, a process, or a regulatory system).
|
57 |
+
- Be **inferable from context**, not just present as a keyword.
|
58 |
+
- Be **more specific than the domain** and **logically chainable between the papers**.
|
59 |
+
|
60 |
+
2. **Bridge Concept**:
|
61 |
+
- Name the inferred concept (e.g., "circadian rhythm regulation").
|
62 |
+
- Must be **specific** (not a domain like "immunology" or "neurology").
|
63 |
+
|
64 |
+
3. **Bridge Explanation**:
|
65 |
+
- Use Swanson-style reasoning:
|
66 |
+
- How does **Paper 1** → **Bridge Concept** logically follow?
|
67 |
+
- How does **Bridge Concept** → **Paper 2** logically follow?
|
68 |
+
|
69 |
+
4. **Hypothesis**:
|
70 |
+
- Propose a **testable hypothesis** based on the bridge (e.g., "Melatonin may modulate autoimmune response via circadian rhythm regulation.").
|
71 |
+
|
72 |
+
---
|
73 |
+
|
74 |
+
### ✅ Example Output (Bridge Exists)
|
75 |
+
|
76 |
+
```json
|
77 |
+
{{
|
78 |
+
"bridge_exists": true,
|
79 |
+
"bridge_concept": "Circadian Rhythm Regulation",
|
80 |
+
"bridge_explanation": "Paper 1 discusses melatonin's role in regulating sleep through circadian rhythm pathways, such as BMAL1/CLOCK signaling. Paper 2 explores how immune cell differentiation (e.g., Th17 and Treg cells) is influenced by circadian rhythm disruptions. Circadian rhythm regulation serves as the inferred bridge, linking melatonin's effects on sleep to immune modulation in autoimmune disease.",
|
81 |
+
"hypothesis": "Melatonin supplementation may reduce autoimmune activity in multiple sclerosis by modulating circadian rhythm regulation, which influences T-cell differentiation."
|
82 |
+
}}
|
83 |
+
```
|
84 |
+
|
85 |
+
---
|
86 |
+
|
87 |
+
### ❌ Example Output (No Bridge Exists)
|
88 |
+
|
89 |
+
```json
|
90 |
+
{{
|
91 |
+
"bridge_exists": false,
|
92 |
+
"bridge_concept": "[NO BRIDGE]",
|
93 |
+
"bridge_explanation": "The only shared concept between the papers is a general term ('regulation'), which is not specific or inferable as a causal mechanism. No intermediate concept connects the melatonin-based sleep regulation in Paper 1 to the immune modulation in Paper 2 in a logically chainable way.",
|
94 |
+
"hypothesis": "[NO BRIDGE]"
|
95 |
+
}}
|
96 |
+
```"""
|
97 |
+
},
|
98 |
+
]
|
99 |
+
|
100 |
+
return messages
|
101 |
+
|
102 |
+
|
103 |
+
def get_json_schema() -> str:
|
104 |
+
"""
|
105 |
+
Returns the JSON schema for the response.
|
106 |
+
"""
|
107 |
+
response_schema = r"""{
|
108 |
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
109 |
+
"title": "Generated schema for Root",
|
110 |
+
"type": "object",
|
111 |
+
"properties": {
|
112 |
+
"bridge_exists": {
|
113 |
+
"type": "boolean"
|
114 |
+
},
|
115 |
+
"bridge_concept": {
|
116 |
+
"type": "string"
|
117 |
+
},
|
118 |
+
"bridge_explanation": {
|
119 |
+
"type": "string"
|
120 |
+
},
|
121 |
+
"hypothesis": {
|
122 |
+
"type": "string"
|
123 |
+
}
|
124 |
+
},
|
125 |
+
"required": [
|
126 |
+
"bridge_exists",
|
127 |
+
"bridge_concept",
|
128 |
+
"bridge_explanation",
|
129 |
+
"hypothesis"
|
130 |
+
]
|
131 |
+
}"""
|
132 |
+
|
133 |
+
return response_schema
|