Johan713 commited on
Commit
bf9cb63
·
verified ·
1 Parent(s): 0c84c50

Update app2.py

Browse files
Files changed (1) hide show
  1. app2.py +20 -29
app2.py CHANGED
@@ -10,6 +10,7 @@ import PyPDF2
10
  import io
11
  import random
12
  import docx
 
13
  from docx import Document
14
  from docx.shared import Inches
15
  from datetime import datetime
@@ -47,8 +48,7 @@ except ImportError:
47
  st.error("Missing dependency: streamlit_lottie. Please install it using 'pip install streamlit-lottie'")
48
  st.stop()
49
 
50
- # Constants
51
- AI71_API_KEY = "api71-api-92fc2ef9-9f3c-47e5-a019-18e257b04af2"
52
 
53
  # Initialize AI71 client
54
  try:
@@ -57,7 +57,6 @@ except Exception as e:
57
  st.error(f"Failed to initialize AI71 client: {str(e)}")
58
  st.stop()
59
 
60
- # Initialize chat history and other session state variables
61
  if "chat_history" not in st.session_state:
62
  st.session_state.chat_history = []
63
  if "uploaded_documents" not in st.session_state:
@@ -114,7 +113,7 @@ def get_ai_response(prompt: str) -> str:
114
  except Exception as e:
115
  print(f"Streaming failed, falling back to non-streaming request. Error: {e}")
116
  try:
117
- # Fall back to non-streaming request
118
  completion = ai71.chat.completions.create(
119
  model="tiiuae/falcon-180b-chat",
120
  messages=messages,
@@ -177,7 +176,7 @@ def search_web(query: str, num_results: int = 3) -> List[Dict[str, str]]:
177
  results = []
178
  if "items" in res:
179
  for item in res["items"]:
180
- # Check if the result is relevant (you may need to adjust these conditions)
181
  if any(keyword in item["title"].lower() or keyword in item["snippet"].lower()
182
  for keyword in ["law", "legal", "court", "case", "attorney", "lawyer"]):
183
  result = {
@@ -229,7 +228,7 @@ def perform_web_search(query: str) -> List[Dict[str, Any]]:
229
  "cost_estimates": cost_estimates
230
  })
231
 
232
- return results[:3] # Return top 3 results with cost estimates
233
 
234
  def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
235
  """Performs a comprehensive analysis of the document, including web and Wikipedia searches."""
@@ -246,7 +245,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
246
 
247
  return {
248
  "document_analysis": document_analysis,
249
- "related_articles": web_results or [], # Ensure this is always a list
250
  "wikipedia_summary": wiki_results
251
  }
252
  except Exception as e:
@@ -259,7 +258,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
259
 
260
  def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
261
  try:
262
- # Ensure query is a string before slicing
263
  truncated_query = str(query)[:300]
264
 
265
  # Search Wikipedia
@@ -276,7 +275,6 @@ def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
276
  summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
277
  return {"summary": summary, "url": page.url, "title": page.title}
278
  except wikipedia.exceptions.DisambiguationError as e:
279
- # If it's a disambiguation page, choose the first option
280
  try:
281
  page = wikipedia.page(e.options[0], auto_suggest=False)
282
  summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
@@ -454,7 +452,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
454
 
455
  justia_results = justia_soup.find_all('div', class_='g')
456
 
457
- for result in justia_results[:5]: # Limit to top 5 results
458
  title_elem = result.find('h3')
459
  link_elem = result.find('a')
460
  snippet_elem = result.find('div', class_='VwiC3b')
@@ -464,7 +462,7 @@ def query_public_case_law(query: str) -> List[Dict[str, Any]]:
464
  link = link_elem['href']
465
  snippet = snippet_elem.text
466
 
467
- # Extract case name and citation from the title
468
  case_info = title.split(' - ')
469
  if len(case_info) >= 2:
470
  case_name = case_info[0]
@@ -518,7 +516,6 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
518
  analysis_prompt = f"Analyze the following legal document and provide a summary, potential issues, and key clauses:\n\n{content}"
519
  document_analysis = get_ai_response(analysis_prompt)
520
 
521
- # Extract main topics or keywords from the document
522
  topic_extraction_prompt = f"Extract the main topics or keywords from the following document summary:\n\n{document_analysis}"
523
  topics = get_ai_response(topic_extraction_prompt)
524
 
@@ -527,7 +524,7 @@ def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
527
 
528
  return {
529
  "document_analysis": document_analysis,
530
- "related_articles": web_results or [], # Ensure this is always a list
531
  "wikipedia_summary": wiki_results
532
  }
533
  except Exception as e:
@@ -647,7 +644,6 @@ def search_web_duckduckgo(query: str, num_results: int = 3, max_retries: int = 3
647
 
648
  service = build("customsearch", "v1", developerKey=api_key)
649
 
650
- # Execute the search request
651
  res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
652
 
653
  results = []
@@ -963,7 +959,7 @@ def case_trend_visualizer_ui():
963
  # Display the Plotly chart
964
  st.plotly_chart(fig, use_container_width=True)
965
 
966
- # --- Display Statistics ---
967
  st.subheader("Case Statistics")
968
  total_cases = df['Number of Cases'].sum()
969
  avg_cases = df['Number of Cases'].mean()
@@ -975,13 +971,13 @@ def case_trend_visualizer_ui():
975
  col2.metric("Average Cases per Year", f"{avg_cases:,.0f}")
976
  col3.metric("Peak Year", f"{max_year}")
977
 
978
- # --- Trend Description ---
979
  st.write("Trend Description:", get_trend_description(df))
980
 
981
  if st.session_state.current_data is not None:
982
  df = st.session_state.current_data
983
 
984
- # --- Interactive Analysis Section ---
985
  st.subheader("Interactive Analysis")
986
 
987
  # Year-over-Year Change
@@ -998,11 +994,11 @@ def case_trend_visualizer_ui():
998
  ma_fig = px.line(df, x='Year', y=['Number of Cases', 'Moving Average'], title=f"{window}-Year Moving Average")
999
  st.plotly_chart(ma_fig, use_container_width=True)
1000
 
1001
- # --- Raw Data ---
1002
  st.subheader("Raw Data")
1003
  st.dataframe(df)
1004
 
1005
- # --- Download Options ---
1006
  csv = df.to_csv(index=False)
1007
  st.download_button(
1008
  label="Download data as CSV",
@@ -1011,7 +1007,7 @@ def case_trend_visualizer_ui():
1011
  mime="text/csv",
1012
  )
1013
 
1014
- # --- Additional Information & Data Sources ---
1015
  st.subheader("Additional Information")
1016
  info = get_additional_info(case_type)
1017
  st.markdown(info)
@@ -1346,7 +1342,7 @@ class LegalDataRetriever:
1346
  if author_elem:
1347
  author = author_elem.text.strip()
1348
  elif judges and judges[0] != "Not available":
1349
- author = judges[0] # Assume the first judge is the author if not explicitly stated
1350
 
1351
  if author == "Not available":
1352
  self.logger.warning("No author found in the HTML structure, searching in text content")
@@ -1365,24 +1361,21 @@ class LegalDataRetriever:
1365
  return author
1366
 
1367
  def extract_court_opinion(self, soup):
1368
- # Target the article tag with class col-sm-9 first
1369
  article_div = soup.find('article', class_='col-sm-9')
1370
  if not article_div:
1371
  self.logger.error("Could not find the main article div (col-sm-9).")
1372
  return "Case details not available (main article div not found)."
1373
 
1374
- # Find the tab-content div within the article div
1375
  opinion_div = article_div.find('div', class_='tab-content')
1376
  if not opinion_div:
1377
  self.logger.error("Could not find the case details content (tab-content div).")
1378
  return "Case details not available (tab-content div not found)."
1379
 
1380
- # Extract all text from the tab-content div
1381
  case_details = opinion_div.get_text(separator='\n', strip=True)
1382
 
1383
  # Clean up the text
1384
- case_details = re.sub(r'\n+', '\n', case_details) # Remove multiple newlines
1385
- case_details = re.sub(r'\s+', ' ', case_details) # Remove extra whitespace
1386
 
1387
  return case_details
1388
 
@@ -1563,7 +1556,6 @@ def automated_legal_brief_generation_ui():
1563
  mime="text/plain"
1564
  )
1565
  # --- Streamlit App ---
1566
- # Custom CSS to improve the overall look
1567
  st.markdown("""
1568
  <style>
1569
  .reportview-container {
@@ -1776,8 +1768,7 @@ elif feature == "Case Information Retrieval":
1776
 
1777
  elif feature == "Automated Legal Brief Generation":
1778
  automated_legal_brief_generation_ui()
1779
- # Add a footer with a disclaimer
1780
- # Footer
1781
  st.markdown("---")
1782
  st.markdown(
1783
  """
 
10
  import io
11
  import random
12
  import docx
13
+ import os
14
  from docx import Document
15
  from docx.shared import Inches
16
  from datetime import datetime
 
48
  st.error("Missing dependency: streamlit_lottie. Please install it using 'pip install streamlit-lottie'")
49
  st.stop()
50
 
51
+ AI71_API_KEY = os.getenv('AI71_API_KEY')
 
52
 
53
  # Initialize AI71 client
54
  try:
 
57
  st.error(f"Failed to initialize AI71 client: {str(e)}")
58
  st.stop()
59
 
 
60
  if "chat_history" not in st.session_state:
61
  st.session_state.chat_history = []
62
  if "uploaded_documents" not in st.session_state:
 
113
  except Exception as e:
114
  print(f"Streaming failed, falling back to non-streaming request. Error: {e}")
115
  try:
116
+ # makes it fall back to non-streaming request
117
  completion = ai71.chat.completions.create(
118
  model="tiiuae/falcon-180b-chat",
119
  messages=messages,
 
176
  results = []
177
  if "items" in res:
178
  for item in res["items"]:
179
+ # Check if the result is relevant
180
  if any(keyword in item["title"].lower() or keyword in item["snippet"].lower()
181
  for keyword in ["law", "legal", "court", "case", "attorney", "lawyer"]):
182
  result = {
 
228
  "cost_estimates": cost_estimates
229
  })
230
 
231
+ return results[:3] # Return top 3 results with their cost estimates
232
 
233
  def comprehensive_document_analysis(content: str) -> Dict[str, Any]:
234
  """Performs a comprehensive analysis of the document, including web and Wikipedia searches."""
 
245
 
246
  return {
247
  "document_analysis": document_analysis,
248
+ "related_articles": web_results or [], # Ensure that this this is always a list
249
  "wikipedia_summary": wiki_results
250
  }
251
  except Exception as e:
 
258
 
259
  def search_wikipedia(query: str, sentences: int = 2) -> Dict[str, str]:
260
  try:
261
+ # Ensures that the query is a string before slicing
262
  truncated_query = str(query)[:300]
263
 
264
  # Search Wikipedia
 
275
  summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
276
  return {"summary": summary, "url": page.url, "title": page.title}
277
  except wikipedia.exceptions.DisambiguationError as e:
 
278
  try:
279
  page = wikipedia.page(e.options[0], auto_suggest=False)
280
  summary = wikipedia.summary(page.title, sentences=sentences, auto_suggest=False)
 
452
 
453
  justia_results = justia_soup.find_all('div', class_='g')
454
 
455
+ for result in justia_results[:5]: # Limits it to top 5 results
456
  title_elem = result.find('h3')
457
  link_elem = result.find('a')
458
  snippet_elem = result.find('div', class_='VwiC3b')
 
462
  link = link_elem['href']
463
  snippet = snippet_elem.text
464
 
465
+ # it extract case name and citation from the title
466
  case_info = title.split(' - ')
467
  if len(case_info) >= 2:
468
  case_name = case_info[0]
 
516
  analysis_prompt = f"Analyze the following legal document and provide a summary, potential issues, and key clauses:\n\n{content}"
517
  document_analysis = get_ai_response(analysis_prompt)
518
 
 
519
  topic_extraction_prompt = f"Extract the main topics or keywords from the following document summary:\n\n{document_analysis}"
520
  topics = get_ai_response(topic_extraction_prompt)
521
 
 
524
 
525
  return {
526
  "document_analysis": document_analysis,
527
+ "related_articles": web_results or [],
528
  "wikipedia_summary": wiki_results
529
  }
530
  except Exception as e:
 
644
 
645
  service = build("customsearch", "v1", developerKey=api_key)
646
 
 
647
  res = service.cse().list(q=query, cx=cse_id, num=num_results).execute()
648
 
649
  results = []
 
959
  # Display the Plotly chart
960
  st.plotly_chart(fig, use_container_width=True)
961
 
962
+ # Display Statistics
963
  st.subheader("Case Statistics")
964
  total_cases = df['Number of Cases'].sum()
965
  avg_cases = df['Number of Cases'].mean()
 
971
  col2.metric("Average Cases per Year", f"{avg_cases:,.0f}")
972
  col3.metric("Peak Year", f"{max_year}")
973
 
974
+ # Trend Description
975
  st.write("Trend Description:", get_trend_description(df))
976
 
977
  if st.session_state.current_data is not None:
978
  df = st.session_state.current_data
979
 
980
+ # Interactive Analysis Section
981
  st.subheader("Interactive Analysis")
982
 
983
  # Year-over-Year Change
 
994
  ma_fig = px.line(df, x='Year', y=['Number of Cases', 'Moving Average'], title=f"{window}-Year Moving Average")
995
  st.plotly_chart(ma_fig, use_container_width=True)
996
 
997
+ # Raw Data
998
  st.subheader("Raw Data")
999
  st.dataframe(df)
1000
 
1001
+ # Download Options
1002
  csv = df.to_csv(index=False)
1003
  st.download_button(
1004
  label="Download data as CSV",
 
1007
  mime="text/csv",
1008
  )
1009
 
1010
+ # Additional Information & Data Sources
1011
  st.subheader("Additional Information")
1012
  info = get_additional_info(case_type)
1013
  st.markdown(info)
 
1342
  if author_elem:
1343
  author = author_elem.text.strip()
1344
  elif judges and judges[0] != "Not available":
1345
+ author = judges[0]
1346
 
1347
  if author == "Not available":
1348
  self.logger.warning("No author found in the HTML structure, searching in text content")
 
1361
  return author
1362
 
1363
  def extract_court_opinion(self, soup):
 
1364
  article_div = soup.find('article', class_='col-sm-9')
1365
  if not article_div:
1366
  self.logger.error("Could not find the main article div (col-sm-9).")
1367
  return "Case details not available (main article div not found)."
1368
 
 
1369
  opinion_div = article_div.find('div', class_='tab-content')
1370
  if not opinion_div:
1371
  self.logger.error("Could not find the case details content (tab-content div).")
1372
  return "Case details not available (tab-content div not found)."
1373
 
 
1374
  case_details = opinion_div.get_text(separator='\n', strip=True)
1375
 
1376
  # Clean up the text
1377
+ case_details = re.sub(r'\n+', '\n', case_details)
1378
+ case_details = re.sub(r'\s+', ' ', case_details)
1379
 
1380
  return case_details
1381
 
 
1556
  mime="text/plain"
1557
  )
1558
  # --- Streamlit App ---
 
1559
  st.markdown("""
1560
  <style>
1561
  .reportview-container {
 
1768
 
1769
  elif feature == "Automated Legal Brief Generation":
1770
  automated_legal_brief_generation_ui()
1771
+
 
1772
  st.markdown("---")
1773
  st.markdown(
1774
  """