nomadicsynth commited on
Commit
c39d6e0
·
1 Parent(s): 1e07fc0

Refactor response handling in generate function and add generate_swanson_style_prompt for bridge detection

Browse files
Files changed (2) hide show
  1. app.py +14 -176
  2. swanson_style_prompt.py +133 -0
app.py CHANGED
@@ -9,6 +9,7 @@ import gradio as gr
9
  import pandas as pd
10
  import spaces
11
  import torch
 
12
  from huggingface_hub import InferenceClient
13
  from sentence_transformers import SentenceTransformer
14
 
@@ -144,87 +145,7 @@ def generate(messages: list[dict[str, str]]) -> str:
144
 
145
  messages.insert(0, system_message)
146
 
147
- response_schema = r"""{
148
- "$schema": "http://json-schema.org/draft-07/schema#",
149
- "title": "Generated schema for Root",
150
- "type": "object",
151
- "properties": {
152
- "reasoning": {
153
- "type": "string"
154
- },
155
- "key_connections": {
156
- "type": "array",
157
- "items": {
158
- "type": "object",
159
- "properties": {
160
- "connection": {
161
- "type": "string"
162
- },
163
- "description": {
164
- "type": "string"
165
- }
166
- },
167
- "required": [
168
- "connection",
169
- "description"
170
- ]
171
- }
172
- },
173
- "synergies_and_complementarities": {
174
- "type": "array",
175
- "items": {
176
- "type": "object",
177
- "properties": {
178
- "type": {
179
- "type": "array",
180
- "items": {
181
- "type": "string"
182
- }
183
- },
184
- "description": {
185
- "type": "string"
186
- }
187
- },
188
- "required": [
189
- "type",
190
- "description"
191
- ]
192
- }
193
- },
194
- "research_potential": {
195
- "type": "array",
196
- "items": {
197
- "type": "object",
198
- "properties": {
199
- "potential": {
200
- "type": "string"
201
- },
202
- "description": {
203
- "type": "string"
204
- }
205
- },
206
- "required": [
207
- "potential",
208
- "description"
209
- ]
210
- }
211
- },
212
- "rating": {
213
- "type": "number"
214
- },
215
- "confidence": {
216
- "type": "number"
217
- }
218
- },
219
- "required": [
220
- "reasoning",
221
- "key_connections",
222
- "synergies_and_complementarities",
223
- "research_potential",
224
- "rating",
225
- "confidence"
226
- ]
227
- }"""
228
 
229
  response_format = {
230
  "type": "json",
@@ -265,70 +186,7 @@ def analyse_abstracts(query_abstract: str, compare_abstract: dict) -> str:
265
  if not compare_abstract["abstract"].strip():
266
  return "Invalid compare_abstract format. Expected a non-empty string."
267
 
268
- messages = [
269
- {
270
- "role": "user",
271
- "content": f"""You are trained in evaluating conceptual and methodological connections between research papers. Please **identify and analyze the reasoning-based links** between the following two papers:
272
-
273
- Paper 1 Abstract:
274
- {query_abstract}
275
-
276
- Paper 2 Abstract:
277
- {compare_abstract["abstract"]}
278
-
279
- In your evaluation, consider the following dimensions:
280
-
281
- * **Methodological Cross-Pollination**: Do the methods or approaches from one paper **directly inform, enhance, or contrast with** the other?
282
- * **Principle or Mechanism Extension**: Do the papers **share core principles, mechanisms, or assumptions** that could be **combined or extended** to generate new understanding or tools?
283
- * **Interdisciplinary Bridges**: Are there clear opportunities for **knowledge transfer or collaboration** across fields or problem domains?
284
- * **Solution or Application Overlap**: Can the solutions, frameworks, or applications in one paper be **adapted or repurposed** to benefit the work in the other, leading to **tangible, novel outcomes**?
285
-
286
- Assess these connections in both directions (Paper 1 → Paper 2 and Paper 2 → Paper 1). Focus on **relevant and practically meaningful links** — especially those that might be **missed in practice** due to the sheer volume of publications or the separation between research communities. These are often connections that would be **immediately apparent to an expert** familiar with both papers, but easily overlooked otherwise.
287
-
288
- Return a valid JSON object in the following structure:
289
- {{
290
- "reasoning": "Step-by-step conceptual analysis of how the papers relate, highlighting **key connections**, complementary methods, or shared ideas. Emphasize the most **relevant, practically useful takeaways**, and use markdown bold to highlight major points.",
291
-
292
- "key_connections": [
293
- {{
294
- "connection": "connection 1",
295
- "description": "1–2 sentence explanation of the **main conceptual or methodological link**, emphasizing its practical or theoretical relevance."
296
- }},
297
- ...
298
- ],
299
-
300
- "complementarities": [
301
- {{
302
- "type": ["Methodological Cross-Pollination", "Principle or Mechanism Extension", "Interdisciplinary Bridges", "Solution or Application Overlap"], # Use only the most relevant label per entry
303
- "description": "A concise explanation (1–2 sentences) of the **identified complementarity** or **productive relationship**, including a specific example or outcome it could enable."
304
- }},
305
- ...
306
- ],
307
-
308
- "research_potential": [
309
- {{
310
- "potential": "Potential application or outcome 1",
311
- "description": "1–2 sentence explanation of the **concrete potential impact**, framed in terms of a **realistic scenario or use case**."
312
- }},
313
- ...
314
- ],
315
-
316
- "rating": 1-5, # Overall strength of the connection:
317
- # 1 = No meaningful connection
318
- # 2 = Weak or speculative connection
319
- # 3 = Plausible but unproven connection
320
- # 4 = Solid connection with future potential
321
- # 5 = Strong, well-aligned connection with immediate, valuable implications
322
-
323
- "confidence": 0.0-1.0 # Confidence score in your assessment (e.g., 0.85 for high confidence, 1.0 for absolute certainty)
324
- # Note: The confidence score should reflect your level of certainty in the analysis, not the strength of the connection itself.
325
- # A score of 0.0 indicates no confidence in the analysis, while 1.0 indicates absolute certainty.
326
- }}
327
-
328
- Return only the JSON object. All key names and string values must be in double quotes.
329
- """,
330
- },
331
- ]
332
 
333
  # Generate analysis
334
  try:
@@ -342,38 +200,18 @@ Return only the JSON object. All key names and string values must be in double q
342
  except Exception as e:
343
  return f"Error: {e}"
344
 
345
- # Format the output as markdown for better display
346
- key_connections = ""
347
- synergies_and_complementarities = ""
348
- research_potential = ""
349
- if "key_connections" in output:
350
- for connection in output["key_connections"]:
351
- key_connections += f"- {connection['connection']}: {connection['description']}\n"
352
-
353
- if "synergies_and_complementarities" in output:
354
- for synergy in output["synergies_and_complementarities"]:
355
- synergies_and_complementarities += f"- {', '.join(synergy['type'])}: {synergy['description']}\n"
356
-
357
- if "research_potential" in output:
358
- for potential in output["research_potential"]:
359
- research_potential += f"- {potential['potential']}: {potential['description']}\n"
360
 
361
- formatted_output = f"""## Synergy Analysis
362
-
363
- **Rating**: {'★' * output['rating']}{'☆' * (5-output['rating'])} **Confidence**: {'★' * round(output['confidence'] * 5)}{'☆' * round((1-output['confidence']) * 5)}
364
-
365
- ### Key Connections
366
- {key_connections}
367
-
368
- ### Synergies and Complementarities
369
- {synergies_and_complementarities}
370
-
371
- ### Research Potential
372
- {research_potential}
373
-
374
- ### Reasoning
375
- {output['reasoning']}
376
- """
377
  return formatted_output
378
  # return '```"""\n' + output + '\n"""```'
379
 
 
9
  import pandas as pd
10
  import spaces
11
  import torch
12
+ from swanson_style_prompt import generate_swanson_style_prompt, get_json_schema
13
  from huggingface_hub import InferenceClient
14
  from sentence_transformers import SentenceTransformer
15
 
 
145
 
146
  messages.insert(0, system_message)
147
 
148
+ response_schema = get_json_schema()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  response_format = {
151
  "type": "json",
 
186
  if not compare_abstract["abstract"].strip():
187
  return "Invalid compare_abstract format. Expected a non-empty string."
188
 
189
+ messages = generate_swanson_style_prompt(query_abstract, compare_abstract["abstract"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  # Generate analysis
192
  try:
 
200
  except Exception as e:
201
  return f"Error: {e}"
202
 
203
+ # Format the output as markdown
204
+ formatted_output = "# Connection Analysis\n"
205
+ if "bridge_exists" in output and output["bridge_exists"] is False:
206
+ formatted_output += "There is no bridge between the two papers."
207
+ formatted_output += "## Explanation\n" + output.get("bridge_explanation", "No explanation provided.")
208
+ elif "bridge_exists" in output and output["bridge_exists"] is True:
209
+ formatted_output += "## Bridge Concept\n" + output.get("bridge_concept", "Unknown")
210
+ formatted_output += "\n## Explanation\n" + output.get("bridge_explanation", "No explanation provided.")
211
+ formatted_output += "\n## Hypothesis\n" + output.get("hypothesis", "No hypothesis provided.")
212
+ else:
213
+ formatted_output = "Invalid output format. Please check the model's response: " + output
 
 
 
 
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  return formatted_output
216
  # return '```"""\n' + output + '\n"""```'
217
 
swanson_style_prompt.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def generate_swanson_style_prompt(abstract_1: str, abstract_2: str) -> list:
2
+ """
3
+ Generates a prompt for detecting Swanson-style bridges between two papers.
4
+ """
5
+
6
+ messages = [
7
+ {
8
+ "role": "user",
9
+ "content": f"""
10
+ You are trained to detect **Swanson-style bridges** — **intermediate concepts** that connect two papers *without direct citation links*. These bridges are:
11
+ - **Non-obvious** (not explicitly stated in either paper),
12
+ - **Interdisciplinary** (linking distinct domains),
13
+ - **Logically inferable** (i.e., can be *reasoned from context* in both papers),
14
+ - **Specific and inferential** (not vague or tautological).
15
+
16
+ ---
17
+
18
+ ### 🔍 What Makes a Valid Bridge Concept?
19
+
20
+ In 1986, Don Swanson discovered a bridge between **fish oil** and **Raynaud’s phenomenon** using only literature analysis. Neither paper mentioned "blood viscosity" directly, but both *implied* it:
21
+ - **Paper A**: Fish oil reduces blood thickness.
22
+ - **Paper B**: Raynaud’s involves poor blood flow.
23
+ - **Bridge Concept**: Blood viscosity (inferred from both papers).
24
+
25
+ Swanson's success was due to:
26
+ 1. **Inference**: He inferred a shared mechanism not directly mentioned.
27
+ 2. **Interdisciplinarity**: He linked nutrition and vascular medicine.
28
+ 3. **Actionable Logic**: He created a testable hypothesis.
29
+
30
+ ---
31
+
32
+ ### 🧠 Critical Instructions
33
+
34
+ - **Allow inference**: If the papers *together imply* a concept — even if **neither explicitly states it** — it can serve as a bridge.
35
+ - **Reject tautological bridges**: If the only shared concept is the general domain (e.g., "Time Series Classification", "Language Models", "Signal Processing"), and no specific, inferred mechanism connects the papers, respond with [NO BRIDGE].
36
+ - **Prioritize inferential logic**: A bridge can be derived from the implications of each paper, not just what’s stated.
37
+ - **Mechanistic**: A bridge concept must connect the papers through a specific, inferred mechanism, not just a shared problem domain or methodology class.
38
+
39
+ ---
40
+
41
+ ### 🧪 Your Task
42
+
43
+ Consider these two papers:
44
+
45
+ **Paper 1 Abstract**:
46
+ {abstract_1}
47
+
48
+ **Paper 2 Abstract**:
49
+ {abstract_2}
50
+
51
+ **Step-by-Step Instructions**
52
+ 1. **Bridge Detection**:
53
+ - Look for a **plausible, inferable concept** that connects the papers.
54
+ - The bridge should:
55
+ - Be **logically implied** by both papers.
56
+ - Be **specific** and **actionable** (e.g., a biological mechanism, a process, or a regulatory system).
57
+ - Be **inferable from context**, not just present as a keyword.
58
+ - Be **more specific than the domain** and **logically chainable between the papers**.
59
+
60
+ 2. **Bridge Concept**:
61
+ - Name the inferred concept (e.g., "circadian rhythm regulation").
62
+ - Must be **specific** (not a domain like "immunology" or "neurology").
63
+
64
+ 3. **Bridge Explanation**:
65
+ - Use Swanson-style reasoning:
66
+ - How does **Paper 1** → **Bridge Concept** logically follow?
67
+ - How does **Bridge Concept** → **Paper 2** logically follow?
68
+
69
+ 4. **Hypothesis**:
70
+ - Propose a **testable hypothesis** based on the bridge (e.g., "Melatonin may modulate autoimmune response via circadian rhythm regulation.").
71
+
72
+ ---
73
+
74
+ ### ✅ Example Output (Bridge Exists)
75
+
76
+ ```json
77
+ {{
78
+ "bridge_exists": true,
79
+ "bridge_concept": "Circadian Rhythm Regulation",
80
+ "bridge_explanation": "Paper 1 discusses melatonin's role in regulating sleep through circadian rhythm pathways, such as BMAL1/CLOCK signaling. Paper 2 explores how immune cell differentiation (e.g., Th17 and Treg cells) is influenced by circadian rhythm disruptions. Circadian rhythm regulation serves as the inferred bridge, linking melatonin's effects on sleep to immune modulation in autoimmune disease.",
81
+ "hypothesis": "Melatonin supplementation may reduce autoimmune activity in multiple sclerosis by modulating circadian rhythm regulation, which influences T-cell differentiation."
82
+ }}
83
+ ```
84
+
85
+ ---
86
+
87
+ ### ❌ Example Output (No Bridge Exists)
88
+
89
+ ```json
90
+ {{
91
+ "bridge_exists": false,
92
+ "bridge_concept": "[NO BRIDGE]",
93
+ "bridge_explanation": "The only shared concept between the papers is a general term ('regulation'), which is not specific or inferable as a causal mechanism. No intermediate concept connects the melatonin-based sleep regulation in Paper 1 to the immune modulation in Paper 2 in a logically chainable way.",
94
+ "hypothesis": "[NO BRIDGE]"
95
+ }}
96
+ ```"""
97
+ },
98
+ ]
99
+
100
+ return messages
101
+
102
+
103
+ def get_json_schema() -> str:
104
+ """
105
+ Returns the JSON schema for the response.
106
+ """
107
+ response_schema = r"""{
108
+ "$schema": "http://json-schema.org/draft-07/schema#",
109
+ "title": "Generated schema for Root",
110
+ "type": "object",
111
+ "properties": {
112
+ "bridge_exists": {
113
+ "type": "boolean"
114
+ },
115
+ "bridge_concept": {
116
+ "type": "string"
117
+ },
118
+ "bridge_explanation": {
119
+ "type": "string"
120
+ },
121
+ "hypothesis": {
122
+ "type": "string"
123
+ }
124
+ },
125
+ "required": [
126
+ "bridge_exists",
127
+ "bridge_concept",
128
+ "bridge_explanation",
129
+ "hypothesis"
130
+ ]
131
+ }"""
132
+
133
+ return response_schema