Spaces:
Running
Running
import os | |
import re | |
import requests | |
import json | |
from typing import Tuple, List | |
from omegaconf import OmegaConf | |
from typing import Optional | |
from pydantic import Field, BaseModel | |
from vectara_agentic.agent import Agent | |
from vectara_agentic.tools import ToolsFactory, VectaraToolFactory | |
from vectara_agentic.tools_catalog import summarize_text | |
from dotenv import load_dotenv | |
load_dotenv(override=True) | |
citation_description = ''' | |
The citation for a particular case. | |
Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
''' | |
def extract_components_from_citation(citation: str) -> dict: | |
citation_components = citation.split(' ') | |
volume_num = citation_components[0] | |
reporter = '-'.join(citation_components[1:-1]).replace('.', '').lower() | |
first_page = citation_components[-1] | |
if not volume_num.isdigit(): | |
return {} | |
if not first_page.isdigit(): | |
return {} | |
return {'volume': int(volume_num), 'reporter': reporter, 'first_page': int(first_page)} | |
def create_assistant_tools(cfg): | |
def get_opinion_text( | |
case_citation: str = Field(description = citation_description), | |
summarize: bool = Field(default=True, description="if True returns case summary, otherwise the full text of the case") | |
) -> str: | |
""" | |
Returns the full opinion/ruling text of the case, or the summary if summarize=True. | |
If there is more than one opinion for the case, the type of each opinion is returned with the text, | |
and the opinions (or their summaries) are separated by semicolons (;) | |
Args | |
case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. | |
summarize (bool): True to return just a summary of the case, False to return full case text. | |
""" | |
citation_dict = extract_components_from_citation(case_citation) | |
if not citation_dict: | |
return f"Citation is invalid: {case_citation}." | |
reporter = citation_dict['reporter'] | |
volume_num = citation_dict['volume'] | |
first_page = citation_dict['first_page'] | |
response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
if response.status_code != 200: | |
return f"Case not found; please check the citation {case_citation}." | |
res = json.loads(response.text) | |
if len(res["casebody"]["opinions"]) == 1: | |
text = res["casebody"]["opinions"][0]["text"] | |
output = text if not summarize else summarize_text(text, "law") | |
else: | |
output = "" | |
for opinion in res["casebody"]["opinions"]: | |
text = opinion["text"] if not summarize else summarize_text(opinion["text"], "law") | |
output += f"Opinion type: {opinion['type']}, text: {text};" | |
return output | |
def get_case_document_pdf( | |
case_citation = Field(description = citation_description) | |
) -> str: | |
""" | |
Given a case citation, returns a valid web url to a pdf of the case record | |
""" | |
citation_dict = extract_components_from_citation(case_citation) | |
if not citation_dict: | |
return f"Citation is invalid: {case_citation}." | |
reporter = citation_dict['reporter'] | |
volume_num = citation_dict['volume'] | |
first_page = citation_dict['first_page'] | |
response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
if response.status_code != 200: | |
return f"Case not found; please check the citation {case_citation}." | |
res = json.loads(response.text) | |
page_number = res["first_page_order"] | |
return f"https://static.case.law/{reporter}/{volume_num}.pdf#page={page_number}" | |
def get_case_document_page( | |
case_citation = Field(description = citation_description) | |
) -> str: | |
""" | |
Given a case citation, returns a valid web url to a page with information about the case. | |
""" | |
citation_dict = extract_components_from_citation(case_citation) | |
if not citation_dict: | |
return f"Citation is invalid: {case_citation}." | |
reporter = citation_dict['reporter'] | |
volume_num = citation_dict['volume'] | |
first_page = citation_dict['first_page'] | |
url = f"https://case.law/caselaw/?reporter={reporter}&volume={volume_num}&case={first_page:04d}-01" | |
response = requests.get(url) | |
if response.status_code != 200: | |
return "Case not found; please check the citation." | |
return url | |
def get_case_name( | |
case_citation = Field(description = citation_description) | |
) -> Tuple[str, str]: | |
""" | |
Given a case citation, returns its name and name abbreviation. | |
""" | |
citation_dict = extract_components_from_citation(case_citation) | |
if not citation_dict: | |
return f"Citation is invalid: {case_citation}.", f"Citation is invalid: {case_citation}." | |
reporter = citation_dict['reporter'] | |
volume_num = citation_dict['volume'] | |
first_page = citation_dict['first_page'] | |
response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
if response.status_code != 200: | |
return "Case not found", "Case not found" | |
res = json.loads(response.text) | |
return res["name"], res["name_abbreviation"] | |
def get_cited_cases( | |
case_citation = Field(description = citation_description) | |
) -> List[dict]: | |
""" | |
Given a case citation, returns a list of cases that are cited by the opinion of this case. | |
The output is a list of cases, each a dict with the citation, name and name_abbreviation of the case. | |
""" | |
citation_dict = extract_components_from_citation(case_citation) | |
if not citation_dict: | |
return [f"Citation is invalid: {case_citation}."] | |
reporter = citation_dict['reporter'] | |
volume_num = citation_dict['volume'] | |
first_page = citation_dict['first_page'] | |
response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") | |
if response.status_code != 200: | |
return "Case not found; please check the citation." | |
res = json.loads(response.text) | |
citations = res["cites_to"] | |
res = [] | |
for citation in citations[:10]: | |
name, name_abbreviation = get_case_name(citation["cite"]) | |
res.append({ | |
"citation": citation["cite"], | |
"name": name, | |
"name_abbreviation": name_abbreviation | |
}) | |
return res | |
def validate_url( | |
url = Field(description = "A web url pointing to case-law document") | |
) -> str: | |
""" | |
Given a link, returns whether or not the link is valid. | |
If it is not valid, it should not be used in any output. | |
""" | |
pdf_pattern = re.compile(r'^https://static.case.law/.*') | |
document_pattern = re.compile(r'^https://case.law/caselaw/?reporter=.*') | |
return "URL is valid" if bool(pdf_pattern.match(url)) | bool(document_pattern.match(url)) else "URL is bad" | |
class QueryCaselawArgs(BaseModel): | |
query: str = Field(..., description="The user query.") | |
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key, | |
vectara_customer_id=cfg.customer_id, | |
vectara_corpus_id=cfg.corpus_id) | |
summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni' | |
ask_caselaw = vec_factory.create_rag_tool( | |
tool_name = "ask_caselaw", | |
tool_description = "A tool for asking questions about case law in Alaska.", | |
tool_args_schema = QueryCaselawArgs, | |
reranker = "chain", rerank_k = 100, | |
rerank_chain = [ | |
{ | |
"type": "slingshot", | |
"cutoff": 0.2 | |
}, | |
{ | |
"type": "mmr", | |
"diversity_bias": 0.1 | |
}, | |
{ | |
"type": "udf", | |
"user_function": "max(1000 * get('$.score') - hours(seconds(to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.decision_date'), 'yyyy-MM-dd')))) / 24 / 365, 0)" | |
} | |
], | |
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, | |
summary_num_results = 15, | |
vectara_summarizer = summarizer, | |
include_citations = False, | |
) | |
tools_factory = ToolsFactory() | |
return ( | |
[ask_caselaw] + | |
[tools_factory.create_tool(tool) for tool in [ | |
get_opinion_text, | |
get_case_document_pdf, | |
get_case_document_page, | |
get_cited_cases, | |
get_case_name, | |
validate_url | |
]] | |
) | |
def get_agent_config() -> OmegaConf: | |
cfg = OmegaConf.create({ | |
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']), | |
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']), | |
'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']), | |
'api_key': str(os.environ['VECTARA_API_KEY']), | |
'examples': os.environ.get('QUERY_EXAMPLES', None), | |
'demo_name': "legal-agent", | |
'demo_welcome': "Welcome to the Legal Assistant demo.", | |
'demo_description': "This demo can help you prepare for a court case by providing you information about past court cases in Alaska.", | |
}) | |
return cfg | |
def initialize_agent(_cfg, agent_progress_callback=None): | |
legal_assistant_instructions = """ | |
- You are a helpful legal assistant, with expertise in case law for the state of Alaska. | |
- The ask_caselaw tool is your primary tools for finding information about cases. | |
Do not use your own knowledge to answer questions. | |
- If the ask_caselaw tool responds that it does not have enough information to answer the query, | |
try to rephrase the query and call the tool again. | |
- When presenting the output from ask_caselaw tool, use the metadata provided in the tool's response (references). | |
For example you can include citations, decision date, or case name. | |
- Citations include 3 components: volume number, reporter, and first page. | |
Here are some examples: '253 P.2d 136', '10 Alaska 11', '6 C.M.A. 3' | |
- Never use your internal knowledge to guess citations. Only use citations information provided by a tool or the user. | |
- If two cases have conflicting rulings, assume that the case with the more current ruling date is correct. | |
- If the response is based on cases that are older than 5 years, make sure to inform the user that the information may be outdated, | |
since some case opinions may no longer apply in law. | |
- To summarize the case, use the get_opinion_text with summarize=True. | |
- Use get_opinion_text with summarize=False only when full text is needed. Consider summarizing the text when possible to make things run faster. | |
- If a user wants to learn more about a case, you can call the get_case_document_pdf tool with the citation to get a valid URL. | |
If this is unsuccessful, call the get_case_document_page tool instead. | |
The text displayed with this URL should be the name_abbreviation of the case (DON'T just say the info can be found here). | |
Don't call the get_case_document_page tool until after you have tried the get_case_document_pdf tool. | |
Don't provide URLs from any other tools. Do not generate URLs yourself. | |
- When presenting a URL in your response, use the validate_url tool. | |
- If a user wants to test their argument, use the ask_caselaw tool to gather information about cases related to their argument | |
and the critique_as_judge tool to determine whether their argument is sound or has issues that must be corrected. | |
- Never discuss politics, and always respond politely. | |
""" | |
agent = Agent( | |
tools=create_assistant_tools(_cfg), | |
topic="Case law in Alaska", | |
custom_instructions=legal_assistant_instructions, | |
agent_progress_callback=agent_progress_callback, | |
) | |
return agent | |