milwright commited on
Commit
b3be50c
Β·
1 Parent(s): f60970a

improve url grounding by including source urls in context and citation instructions

Browse files
Files changed (3) hide show
  1. academic_templates.json +1 -1
  2. app.py +3 -2
  3. space_template.py +5 -4
academic_templates.json CHANGED
@@ -49,7 +49,7 @@
49
  "name": "Course Assistant Example",
50
  "description": "Python support for cultural analytics students",
51
  "theme": "Monochrome",
52
- "system_prompt": "You're a Python guide for CCNY's CSC 10800 where September covers foundations (command line, Jupyter, script anatomy), October builds programming basics (data types through functions) with Activities 1-2, and November-December advances to pandas, network analysis, and data collection with Activities 3-5, culminating in a Social Coding Portfolio. Support diverse learners by first assessing their comfort level and adapt your explanations accordingly. Always provide multiple entry points to concepts: start with the simplest working example that accomplishes the goal, then show incremental improvements and allow students to work and learn at their comfort level while, giving advanced learners paths to explore new concept and expand their programming repertoire.",
53
  "examples": [
54
  "How do I set up a interactive development environment?",
55
  "Where can I find the course schedule and syllabus?",
 
49
  "name": "Course Assistant Example",
50
  "description": "Python support for cultural analytics students",
51
  "theme": "Monochrome",
52
+ "system_prompt": "You're a Python guide for CCNY's CSC 10800 where September covers foundations (command line, Jupyter, script anatomy), October builds programming basics (data types through functions) with Activities 1-2, and November-December advances to pandas, network analysis, and data collection with Activities 3-5, culminating in a Social Coding Portfolio. Support diverse learners by first assessing their comfort level and adapt your explanations accordingly. Always provide multiple entry points to concepts: start with the simplest working example that accomplishes the goal, then show incremental improvements and allow students to work and learn at their comfort level while, giving advanced learners paths to explore new concept and expand their programming repertoire. Expect to complete all responses in under 750 tokens.",
53
  "examples": [
54
  "How do I set up a interactive development environment?",
55
  "Where can I find the course schedule and syllabus?",
app.py CHANGED
@@ -551,7 +551,7 @@ class SpaceGenerator:
551
  try:
552
  content = fetch_url_content(url, max_chars=8000)
553
  if not content.startswith("❌") and not content.startswith("⏱️"):
554
- grounding_context += f"\n**Primary Source {i}:** {content}\n"
555
  except:
556
  pass
557
 
@@ -563,7 +563,7 @@ class SpaceGenerator:
563
  try:
564
  content = fetch_url_content(url, max_chars=2500)
565
  if not content.startswith("❌") and not content.startswith("⏱️"):
566
- grounding_context += f"\n**Secondary Source {i}:** {content}\n"
567
  except:
568
  pass
569
 
@@ -576,6 +576,7 @@ class SpaceGenerator:
576
  system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {language}. All your responses should be written entirely in {language}, even when user input is in a different language, particularly English."
577
 
578
  if grounding_context:
 
579
  system_content = f"{system_content}\n\n{grounding_context}"
580
 
581
  messages = [{"role": "system", "content": system_content}]
 
551
  try:
552
  content = fetch_url_content(url, max_chars=8000)
553
  if not content.startswith("❌") and not content.startswith("⏱️"):
554
+ grounding_context += f"\n**Primary Source {i} - {url}:**\n{content}\n"
555
  except:
556
  pass
557
 
 
563
  try:
564
  content = fetch_url_content(url, max_chars=2500)
565
  if not content.startswith("❌") and not content.startswith("⏱️"):
566
+ grounding_context += f"\n**Secondary Source {i} - {url}:**\n{content}\n"
567
  except:
568
  pass
569
 
 
576
  system_content += f"\n\nIMPORTANT: You must respond EXCLUSIVELY in {language}. All your responses should be written entirely in {language}, even when user input is in a different language, particularly English."
577
 
578
  if grounding_context:
579
+ system_content += "\n\nIMPORTANT: When providing information from the reference sources below, please cite the specific URL(s) where the information can be found."
580
  system_content = f"{system_content}\n\n{grounding_context}"
581
 
582
  messages = [{"role": "system", "content": system_content}]
space_template.py CHANGED
@@ -213,13 +213,13 @@ def fetch_url_content(url: str, max_length: int = 3000) -> str:
213
  if len(text) > max_length:
214
  text = text[:max_length] + "... [truncated]"
215
 
216
- return f"πŸ“„ Content from {{url}}:\\n{{text}}\\n"
217
 
218
  elif any(ct in content_type for ct in ['text/plain', 'application/json']):
219
  text = response.text
220
  if len(text) > max_length:
221
  text = text[:max_length] + "... [truncated]"
222
- return f"πŸ“„ Content from {{url}}:\\n{{text}}\\n"
223
 
224
  else:
225
  return f"⚠️ Unsupported content type at {{url}}: {{content_type}}"
@@ -313,7 +313,7 @@ def get_grounding_context() -> str:
313
  _url_content_cache[url] = content
314
 
315
  if not content.startswith("❌") and not content.startswith("⏱️"):
316
- context_parts.append(f"\\n**Primary Source {{i}}:** {{content}}")
317
 
318
  # Process secondary sources (URLs 3+ with 2500 char limit)
319
  secondary_urls = urls[2:]
@@ -327,7 +327,7 @@ def get_grounding_context() -> str:
327
  _url_content_cache[url] = content
328
 
329
  if not content.startswith("❌") and not content.startswith("⏱️"):
330
- context_parts.append(f"\\n**Secondary Source {{i}}:** {{content}}")
331
 
332
  if len(context_parts) > 0:
333
  return "\\n".join(context_parts)
@@ -428,6 +428,7 @@ Get your API key at: https://openrouter.ai/keys"""
428
  system_content += f"\\n\\nIMPORTANT: You must respond EXCLUSIVELY in {{LANGUAGE}}. All your responses should be written entirely in {{LANGUAGE}}, even when user input is in a different language, particularly English."
429
 
430
  if grounding_context:
 
431
  system_content = f"{{system_content}}\\n\\n{{grounding_context}}"
432
  if file_context:
433
  system_content = f"{{system_content}}\\n\\n{{file_context}}"
 
213
  if len(text) > max_length:
214
  text = text[:max_length] + "... [truncated]"
215
 
216
+ return f"πŸ“„ **Content from:** {{url}}\\n\\n{{text}}\\n"
217
 
218
  elif any(ct in content_type for ct in ['text/plain', 'application/json']):
219
  text = response.text
220
  if len(text) > max_length:
221
  text = text[:max_length] + "... [truncated]"
222
+ return f"πŸ“„ **Content from:** {{url}}\\n\\n{{text}}\\n"
223
 
224
  else:
225
  return f"⚠️ Unsupported content type at {{url}}: {{content_type}}"
 
313
  _url_content_cache[url] = content
314
 
315
  if not content.startswith("❌") and not content.startswith("⏱️"):
316
+ context_parts.append(f"\\n**Primary Source {{i}} - {{url}}:**\\n{{content}}")
317
 
318
  # Process secondary sources (URLs 3+ with 2500 char limit)
319
  secondary_urls = urls[2:]
 
327
  _url_content_cache[url] = content
328
 
329
  if not content.startswith("❌") and not content.startswith("⏱️"):
330
+ context_parts.append(f"\\n**Secondary Source {{i}} - {{url}}:**\\n{{content}}")
331
 
332
  if len(context_parts) > 0:
333
  return "\\n".join(context_parts)
 
428
  system_content += f"\\n\\nIMPORTANT: You must respond EXCLUSIVELY in {{LANGUAGE}}. All your responses should be written entirely in {{LANGUAGE}}, even when user input is in a different language, particularly English."
429
 
430
  if grounding_context:
431
+ system_content += "\\n\\nIMPORTANT: When providing information from the reference sources below, please cite the specific URL(s) where the information can be found."
432
  system_content = f"{{system_content}}\\n\\n{{grounding_context}}"
433
  if file_context:
434
  system_content = f"{{system_content}}\\n\\n{{file_context}}"