Lasdw commited on
Commit
dfcb128
·
1 Parent(s): 97e9ebc

added rate limiting and error handling

Browse files
Files changed (3) hide show
  1. agent.py +487 -682
  2. app.py +22 -0
  3. rate_limiter.py +79 -0
agent.py CHANGED
@@ -270,12 +270,12 @@ def assistant(state: AgentState) -> Dict[str, Any]:
270
  """Assistant node that processes messages and decides on next action."""
271
  from langchain_core.messages import AIMessage # Add import at the start of the function
272
 
273
- print("Assistant Called...\n\n")
274
 
275
  full_current_history = state["messages"]
276
  iteration_count = state.get("iteration_count", 0)
277
  iteration_count += 1 # Increment for the current call
278
- print(f"Current Iteration: {iteration_count}")
279
 
280
  # Prepare messages for the LLM
281
  system_msg = SystemMessage(content=SYSTEM_PROMPT)
@@ -289,7 +289,7 @@ def assistant(state: AgentState) -> Dict[str, Any]:
289
  # Prune if it's time (e.g., after every 5th completed iteration, so check for current iteration 6, 11, etc.)
290
  # Iteration 1-5: no pruning. Iteration 6: prune.
291
  if iteration_count > 5 and (iteration_count - 1) % 5 == 0:
292
- print(f"Pruning message history for LLM call at iteration {iteration_count}.")
293
  llm_input_core_messages = prune_messages_for_llm(core_history, num_recent_to_keep=6)
294
  else:
295
  llm_input_core_messages = core_history
@@ -300,7 +300,6 @@ def assistant(state: AgentState) -> Dict[str, Any]:
300
  # Get response from the assistant
301
  try:
302
  response = chat_with_tools.invoke(messages_for_llm, stop=["Observation:"])
303
- print(f"Assistant response type: {type(response)}")
304
 
305
  # Check for empty response
306
  if response is None or not hasattr(response, 'content') or not response.content or len(response.content.strip()) < 20:
@@ -320,7 +319,6 @@ def assistant(state: AgentState) -> Dict[str, Any]:
320
 
321
  # Create an appropriate fallback response
322
  if last_observation and "python_code" in state.get("current_tool", ""):
323
- # If last tool was Python code, try to formulate a reasonable next step
324
  print("Creating fallback response for empty response after Python code execution")
325
  fallback_content = (
326
  "Thought: I've analyzed the results of the code execution. Based on the observations, "
@@ -361,7 +359,7 @@ def assistant(state: AgentState) -> Dict[str, Any]:
361
  print(f"Created fallback response: {fallback_content[:100]}...")
362
  else:
363
  content_preview = response.content[:300].replace('\n', ' ')
364
- print(f"Response content (first 300 chars): {content_preview}...")
365
  except Exception as e:
366
  print(f"Error in LLM invocation: {str(e)}")
367
  # Create a fallback response in case of LLM errors
@@ -372,7 +370,7 @@ def assistant(state: AgentState) -> Dict[str, Any]:
372
 
373
  # Extract the action JSON from the response text
374
  action_json = extract_json_from_text(response.content)
375
- print(f"Extracted action JSON: {action_json}")
376
 
377
  assistant_response_message = AIMessage(content=response.content)
378
 
@@ -396,13 +394,11 @@ def assistant(state: AgentState) -> Dict[str, Any]:
396
  tool_name = nested_json["action"]
397
  tool_input = nested_json["action_input"]
398
  print(f"Unwrapped nested JSON. New tool: {tool_name}")
399
- print(f"New tool input: {tool_input}")
400
  break
401
  except json.JSONDecodeError:
402
  continue
403
 
404
  print(f"Using tool: {tool_name}")
405
- print(f"Tool input: {tool_input}")
406
 
407
  tool_call_id = f"call_{random.randint(1000000, 9999999)}"
408
 
@@ -413,6 +409,7 @@ def assistant(state: AgentState) -> Dict[str, Any]:
413
  state_update["current_tool"] = None
414
  state_update["action_input"] = None
415
 
 
416
  return state_update
417
 
418
  def extract_json_from_text(text: str) -> dict:
@@ -651,754 +648,562 @@ def extract_json_from_text(text: str) -> dict:
651
 
652
  def python_code_node(state: AgentState) -> Dict[str, Any]:
653
  """Node that executes Python code."""
654
- print("Python Code Tool Called...\n\n")
655
 
656
- # Extract tool arguments
657
- action_input = state.get("action_input", {})
658
- print(f"Python code action_input: {action_input}")
659
- print(f"Action input type: {type(action_input)}")
660
-
661
- # Get the code string
662
- code = ""
663
- if isinstance(action_input, dict):
664
- code = action_input.get("code", "")
665
- elif isinstance(action_input, str):
666
- code = action_input
667
-
668
- print(f"Original code field (first 100 chars): {code[:100]}")
669
-
670
- def extract_code_from_json(json_str):
671
- """Recursively extract code from nested JSON structures."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
  try:
673
- parsed = json.loads(json_str)
674
- if isinstance(parsed, dict):
675
- # Check for direct code field
676
- if "code" in parsed:
677
- return parsed["code"]
678
- # Check for nested action_input structure
679
- if "action_input" in parsed:
680
- inner_input = parsed["action_input"]
681
- if isinstance(inner_input, dict):
682
- if "code" in inner_input:
683
- return inner_input["code"]
684
- # If inner_input is also JSON string, recurse
685
- if isinstance(inner_input.get("code", ""), str) and inner_input["code"].strip().startswith("{"):
686
- return extract_code_from_json(inner_input["code"])
687
- return json_str
688
- except:
689
- return json_str
690
-
691
- # Handle nested JSON structures
692
- if isinstance(code, str) and code.strip().startswith("{"):
693
- code = extract_code_from_json(code)
694
- print("Extracted code from JSON structure")
695
-
696
- print(f"Final code to execute: {code[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
- # Execute the code
699
  try:
700
- result = run_python_code(code)
701
- print(f"Code execution result: {result}")
 
 
 
 
 
 
 
 
 
702
 
703
  # Format the observation
704
  tool_message = AIMessage(
705
  content=f"Observation: {result.strip()}"
706
  )
707
 
708
- # Print the observation that will be sent back to the assistant
709
- print("\n=== TOOL OBSERVATION ===")
710
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
711
- print(content_preview)
712
- print("=== END OBSERVATION ===\n")
713
 
714
  # Return the updated state
715
  return {
716
  "messages": state["messages"] + [tool_message],
717
- "current_tool": None, # Reset the current tool
718
- "action_input": None # Clear the action input
719
  }
720
  except Exception as e:
721
- error_message = f"Error executing Python code: {str(e)}"
722
  print(error_message)
723
  tool_message = AIMessage(content=f"Observation: {error_message}")
 
724
  return {
725
  "messages": state["messages"] + [tool_message],
726
  "current_tool": None,
727
  "action_input": None
728
  }
729
 
730
- def webpage_scrape_node(state: AgentState) -> Dict[str, Any]:
731
- """Node that scrapes content from a specific webpage URL."""
732
- print("Webpage Scrape Tool Called...\n\n")
733
-
734
- # Extract tool arguments
735
- action_input = state.get("action_input", {})
736
- print(f"Webpage scrape action_input: {action_input}")
737
-
738
- # Try different ways to extract the URL
739
- url = ""
740
- if isinstance(action_input, dict):
741
- url = action_input.get("url", "")
742
- elif isinstance(action_input, str):
743
- url = action_input
744
-
745
- print(f"Scraping URL: '{url}'")
746
-
747
- # Safety check - don't run with empty URL
748
- if not url:
749
- result = "Error: No URL provided. Please provide a valid URL to scrape."
750
- else:
751
- # Call the webpage scraping function
752
- result = scrape_webpage(url)
753
-
754
- print(f"Scraping result length: {len(result)}")
755
-
756
- # Format the observation to continue the ReAct cycle
757
- # Always prefix with "Observation:" for consistency in the ReAct cycle
758
- tool_message = AIMessage(
759
- content=f"Observation: {result.strip()}"
760
- )
761
-
762
- # Print the observation that will be sent back to the assistant
763
- print("\n=== TOOL OBSERVATION ===")
764
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
765
- print(content_preview)
766
- print("=== END OBSERVATION ===\n")
767
-
768
- # Return the updated state
769
- return {
770
- "messages": state["messages"] + [tool_message],
771
- "current_tool": None, # Reset the current tool
772
- "action_input": None # Clear the action input
773
- }
774
-
775
  def wikipedia_search_node(state: AgentState) -> Dict[str, Any]:
776
  """Node that processes Wikipedia search requests."""
777
- print("Wikipedia Search Tool Called...\n\n")
778
-
779
- # Extract tool arguments
780
- action_input = state.get("action_input", {})
781
- print(f"Wikipedia search action_input: {action_input}")
782
-
783
- # Extract query and num_results
784
- query = ""
785
- num_results = 3 # Default
786
-
787
- if isinstance(action_input, dict):
788
- query = action_input.get("query", "")
789
- if "num_results" in action_input:
790
- try:
791
- num_results = int(action_input["num_results"])
792
- except:
793
- print("Invalid num_results, using default")
794
- elif isinstance(action_input, str):
795
- query = action_input
796
-
797
- print(f"Searching Wikipedia for: '{query}' (max results: {num_results})")
798
 
799
- # Safety check - don't run with empty query
800
- if not query:
801
- result = "Error: No search query provided. Please provide a valid query for Wikipedia search."
802
- else:
803
- # Call the Wikipedia search function
804
- result = wikipedia_search(query, num_results)
805
-
806
- print(f"Wikipedia search result length: {len(result)}")
807
-
808
- # Format the observation to continue the ReAct cycle
809
- tool_message = AIMessage(
810
- content=f"Observation: {result.strip()}"
811
- )
812
-
813
- # Print the observation that will be sent back to the assistant
814
- print("\n=== TOOL OBSERVATION ===")
815
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
816
- print(content_preview)
817
- print("=== END OBSERVATION ===\n")
818
-
819
- # Return the updated state
820
- return {
821
- "messages": state["messages"] + [tool_message],
822
- "current_tool": None, # Reset the current tool
823
- "action_input": None # Clear the action input
824
- }
 
 
 
 
 
 
 
 
 
 
 
825
 
826
  def tavily_search_node(state: AgentState) -> Dict[str, Any]:
827
  """Node that processes Tavily search requests."""
828
- print("Tavily Search Tool Called...\n\n")
829
-
830
- # Extract tool arguments
831
- action_input = state.get("action_input", {})
832
- print(f"Tavily search action_input: {action_input}")
833
-
834
- # Extract query and search_depth
835
- query = ""
836
- search_depth = "basic" # Default
837
-
838
- if isinstance(action_input, dict):
839
- query = action_input.get("query", "")
840
- if "search_depth" in action_input:
841
- depth = action_input["search_depth"]
842
- if depth in ["basic", "comprehensive"]:
843
- search_depth = depth
844
- elif isinstance(action_input, str):
845
- query = action_input
846
-
847
- print(f"Searching Tavily for: '{query}' (depth: {search_depth})")
848
-
849
- # Safety check - don't run with empty query
850
- if not query:
851
- result = "Error: No search query provided. Please provide a valid query for Tavily search."
852
- else:
853
- # Call the Tavily search function
854
- result = tavily_search(query, search_depth)
855
-
856
- print(f"Tavily search result length: {len(result)}")
857
-
858
- # Format the observation to continue the ReAct cycle
859
- tool_message = AIMessage(
860
- content=f"Observation: {result.strip()}"
861
- )
862
 
863
- # Print the observation that will be sent back to the assistant
864
- print("\n=== TOOL OBSERVATION ===")
865
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
866
- print(content_preview)
867
- print("=== END OBSERVATION ===\n")
868
-
869
- # Return the updated state
870
- return {
871
- "messages": state["messages"] + [tool_message],
872
- "current_tool": None, # Reset the current tool
873
- "action_input": None # Clear the action input
874
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
875
 
876
  def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
877
  """Node that processes ArXiv search requests."""
878
- print("ArXiv Search Tool Called...\n\n")
879
-
880
- # Extract tool arguments
881
- action_input = state.get("action_input", {})
882
- print(f"ArXiv search action_input: {action_input}")
883
 
884
- # Extract query and max_results
885
- query = ""
886
- max_results = 5 # Default
887
-
888
- if isinstance(action_input, dict):
889
- query = action_input.get("query", "")
890
- if "max_results" in action_input:
891
- try:
892
- max_results = int(action_input["max_results"])
893
- if max_results <= 0 or max_results > 10:
894
- max_results = 5 # Reset to default if out of range
895
- except:
896
- print("Invalid max_results, using default")
897
- elif isinstance(action_input, str):
898
- query = action_input
899
-
900
- print(f"Searching ArXiv for: '{query}' (max results: {max_results})")
901
-
902
- # Safety check - don't run with empty query
903
- if not query:
904
- result = "Error: No search query provided. Please provide a valid query for ArXiv search."
905
- else:
906
- # Call the ArXiv search function
907
- result = arxiv_search(query, max_results)
908
-
909
- print(f"ArXiv search result length: {len(result)}")
910
-
911
- # Format the observation to continue the ReAct cycle
912
- tool_message = AIMessage(
913
- content=f"Observation: {result.strip()}"
914
- )
915
-
916
- # Print the observation that will be sent back to the assistant
917
- print("\n=== TOOL OBSERVATION ===")
918
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
919
- print(content_preview)
920
- print("=== END OBSERVATION ===\n")
921
-
922
- # Return the updated state
923
- return {
924
- "messages": state["messages"] + [tool_message],
925
- "current_tool": None, # Reset the current tool
926
- "action_input": None # Clear the action input
927
- }
928
 
929
  def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
930
  """Node that processes Supabase database operations."""
931
- print("Supabase Operation Tool Called...\n\n")
932
-
933
- # Extract tool arguments
934
- action_input = state.get("action_input", {})
935
- print(f"Supabase operation action_input: {action_input}")
936
-
937
- # Extract required parameters
938
- operation_type = ""
939
- table = ""
940
- data = None
941
- filters = None
942
-
943
- if isinstance(action_input, dict):
944
- operation_type = action_input.get("operation_type", "")
945
- table = action_input.get("table", "")
946
- data = action_input.get("data")
947
- filters = action_input.get("filters")
948
 
949
- print(f"Supabase operation: {operation_type} on table {table}")
950
-
951
- # Safety check
952
- if not operation_type or not table:
953
- result = "Error: Both operation_type and table are required. operation_type should be one of: insert, select, update, delete"
954
- else:
955
- # Call the Supabase operation function
956
- result = supabase_operation(operation_type, table, data, filters)
957
-
958
- print(f"Supabase operation result length: {len(result)}")
959
-
960
- # Format the observation to continue the ReAct cycle
961
- tool_message = AIMessage(
962
- content=f"Observation: {result.strip()}"
963
- )
964
-
965
- # Print the observation that will be sent back to the assistant
966
- print("\n=== TOOL OBSERVATION ===")
967
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
968
- print(content_preview)
969
- print("=== END OBSERVATION ===\n")
970
-
971
- # Return the updated state
972
- return {
973
- "messages": state["messages"] + [tool_message],
974
- "current_tool": None, # Reset the current tool
975
- "action_input": None # Clear the action input
976
- }
 
 
 
 
 
 
 
 
 
977
 
978
  def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
979
  """Node that processes Excel to Markdown table conversions."""
980
- print("Excel to Text Tool Called...\n\n")
981
-
982
- # Extract tool arguments
983
- action_input = state.get("action_input", {})
984
- print(f"Excel to text action_input: {action_input}")
985
-
986
- # Extract required parameters
987
- excel_path = ""
988
- sheet_name = None
989
- file_content = None
990
 
991
- if isinstance(action_input, dict):
992
- excel_path = action_input.get("excel_path", "")
993
- sheet_name = action_input.get("sheet_name")
994
-
995
- # Check if there's attached file content (base64 encoded) directly in the action_input
996
- if "file_content" in action_input and action_input["file_content"]:
997
- try:
998
- file_content = base64.b64decode(action_input["file_content"])
999
- print(f"Decoded attached file content, size: {len(file_content)} bytes")
1000
- except Exception as e:
1001
- print(f"Error decoding file content from action_input: {e}")
1002
 
1003
- # Check if we should use a file from the attachments dictionary
1004
- if not file_content and excel_path and "attachments" in state and excel_path in state["attachments"]:
1005
- try:
1006
- attachment_data = state["attachments"][excel_path]
1007
- if attachment_data: # Make sure it's not empty
1008
- file_content = base64.b64decode(attachment_data)
1009
- print(f"Using attachment '{excel_path}' from state, size: {len(file_content)} bytes")
1010
- except Exception as e:
1011
- print(f"Error using attachment {excel_path}: {e}")
1012
-
1013
- print(f"Excel to text: path={excel_path}, sheet={sheet_name or 'default'}, has_attachment={file_content is not None}")
1014
-
1015
- # Safety check
1016
- if not excel_path and not file_content:
1017
- result = "Error: Either Excel file path or file content is required"
1018
- elif not file_content:
1019
- # If we have a path but no content, check if it's a local file that exists
1020
- local_file_path = Path(excel_path).expanduser().resolve()
1021
- if local_file_path.is_file():
1022
- # Local file exists, use it directly
1023
- result = excel_to_text(str(local_file_path), sheet_name, None)
1024
  else:
1025
- # No file content and path doesn't exist as a local file
1026
- result = f"Error: Excel file not found at {local_file_path} and no attachment data available"
1027
- else:
1028
- # We have file content, use it
1029
- result = excel_to_text(excel_path, sheet_name, file_content)
1030
-
1031
- print(f"Excel to text result length: {len(result)}")
1032
-
1033
- # Format the observation to continue the ReAct cycle
1034
- tool_message = AIMessage(
1035
- content=f"Observation: {result.strip()}"
1036
- )
1037
-
1038
- # Print the observation that will be sent back to the assistant
1039
- print("\n=== TOOL OBSERVATION ===")
1040
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1041
- print(content_preview)
1042
- print("=== END OBSERVATION ===\n")
1043
-
1044
- # Return the updated state
1045
- return {
1046
- "messages": state["messages"] + [tool_message],
1047
- "current_tool": None, # Reset the current tool
1048
- "action_input": None # Clear the action input
1049
- }
 
1050
 
1051
- # Add a new node function for processing YouTube videos
1052
  def process_youtube_video_node(state: AgentState) -> Dict[str, Any]:
1053
  """Node that processes YouTube videos."""
1054
- print("YouTube Video Processing Tool Called...\n\n")
1055
-
1056
- # Extract tool arguments
1057
- action_input = state.get("action_input", {})
1058
- print(f"YouTube video processing action_input: {action_input}")
1059
 
1060
- # Extract URL and other parameters
1061
- url = ""
1062
- summarize = True # Default
1063
-
1064
- if isinstance(action_input, dict):
1065
- url = action_input.get("url", "")
1066
- # Check if summarize parameter exists and is a boolean
1067
- if "summarize" in action_input:
 
 
 
 
 
 
1068
  try:
1069
- summarize = bool(action_input["summarize"])
1070
- except:
1071
- print("Invalid summarize parameter, using default (True)")
1072
- elif isinstance(action_input, str):
1073
- # If action_input is just a string, assume it's the URL
1074
- url = action_input
1075
-
1076
- print(f"Processing YouTube video: '{url}' (summarize: {summarize})")
1077
-
1078
- # Safety check - don't run with empty URL
1079
- if not url:
1080
- result = "Error: No URL provided. Please provide a valid YouTube URL."
1081
- elif not url.startswith(("http://", "https://")) or not ("youtube.com" in url or "youtu.be" in url):
1082
- result = f"Error: Invalid YouTube URL format: {url}. Please provide a valid URL starting with http:// or https:// and containing youtube.com or youtu.be."
1083
- else:
1084
- # Call the YouTube processing function
1085
- try:
1086
- result = process_youtube_video(url, summarize)
1087
- except Exception as e:
1088
- result = f"Error processing YouTube video: {str(e)}\n\nThis could be due to:\n- The video is private or has been removed\n- Network connectivity issues\n- YouTube API changes\n- Rate limiting"
1089
-
1090
- print(f"YouTube processing result length: {len(result)}")
1091
-
1092
- # Format the observation to continue the ReAct cycle
1093
- tool_message = AIMessage(
1094
- content=f"Observation: {result.strip()}"
1095
- )
1096
-
1097
- # Print the observation that will be sent back to the assistant
1098
- print("\n=== TOOL OBSERVATION ===")
1099
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1100
- print(content_preview)
1101
- print("=== END OBSERVATION ===\n")
1102
-
1103
- # Return the updated state
1104
- return {
1105
- "messages": state["messages"] + [tool_message],
1106
- "current_tool": None, # Reset the current tool
1107
- "action_input": None # Clear the action input
1108
- }
1109
 
1110
- # Add after the existing tool nodes:
1111
  def transcribe_audio_node(state: AgentState) -> Dict[str, Any]:
1112
  """Node that processes audio transcription requests."""
1113
- print("Audio Transcription Tool Called...\n\n")
1114
-
1115
- # Extract tool arguments
1116
- action_input = state.get("action_input", {})
1117
- print(f"Audio transcription action_input: {action_input}")
1118
 
1119
- # Extract required parameters
1120
- audio_path = ""
1121
- language = None
1122
- file_content = None
1123
-
1124
- if isinstance(action_input, dict):
1125
- audio_path = action_input.get("audio_path", "")
1126
- language = action_input.get("language")
1127
-
1128
- # Check if there's attached file content (base64 encoded) directly in the action_input
1129
- if "file_content" in action_input and action_input["file_content"]:
1130
- try:
1131
- file_content = base64.b64decode(action_input["file_content"])
1132
- print(f"Decoded attached audio file content, size: {len(file_content)} bytes")
1133
- except Exception as e:
1134
- print(f"Error decoding file content from action_input: {e}")
1135
 
1136
- # Check if we should use a file from the attachments dictionary
1137
- if not file_content and audio_path and "attachments" in state and audio_path in state["attachments"]:
1138
- try:
1139
- attachment_data = state["attachments"][audio_path]
1140
- if attachment_data: # Make sure it's not empty
1141
- file_content = base64.b64decode(attachment_data)
1142
- print(f"Using attachment '{audio_path}' from state, size: {len(file_content)} bytes")
1143
- except Exception as e:
1144
- print(f"Error using attachment {audio_path}: {e}")
1145
-
1146
- print(f"Audio transcription: path={audio_path}, language={language or 'auto-detect'}, has_attachment={file_content is not None}")
1147
-
1148
- # Safety check
1149
- if not audio_path:
1150
- result = "Error: Audio file path is required"
1151
- elif not file_content:
1152
- # If we have a path but no content, check if it's a local file that exists
1153
- local_file_path = Path(audio_path).expanduser().resolve()
1154
- if local_file_path.is_file():
1155
- # Local file exists, use it directly
1156
- result = transcribe_audio(str(local_file_path), None, language)
1157
  else:
1158
- # No file content and path doesn't exist as a local file
1159
- result = f"Error: Audio file not found at {local_file_path} and no attachment data available"
1160
- else:
1161
- # We have file content, use it
1162
- result = transcribe_audio(audio_path, file_content, language)
1163
-
1164
- print(f"Audio transcription result length: {len(result)}")
1165
-
1166
- # Format the observation to continue the ReAct cycle
1167
- tool_message = AIMessage(
1168
- content=f"Observation: {result.strip()}"
1169
- )
1170
-
1171
- # Print the observation that will be sent back to the assistant
1172
- print("\n=== TOOL OBSERVATION ===")
1173
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1174
- print(content_preview)
1175
- print("=== END OBSERVATION ===\n")
1176
-
1177
- # Return the updated state
1178
- return {
1179
- "messages": state["messages"] + [tool_message],
1180
- "current_tool": None, # Reset the current tool
1181
- "action_input": None # Clear the action input
1182
- }
 
1183
 
1184
  def process_image_node(state: AgentState) -> Dict[str, Any]:
1185
  """Node that processes image analysis requests."""
1186
- print("Image Processing Tool Called...\n\n")
1187
-
1188
- # Extract tool arguments
1189
- action_input = state.get("action_input", {})
1190
- print(f"Image processing action_input: {action_input}")
1191
 
1192
- # Extract required parameters
1193
- image_path = ""
1194
- image_url = None
1195
- analyze_content = True # Default to true
1196
- file_content = None
1197
-
1198
- if isinstance(action_input, dict):
1199
- image_path = action_input.get("image_path", "")
1200
- image_url = action_input.get("image_url")
1201
 
1202
- # Check if analyze_content parameter exists and is a boolean
1203
- if "analyze_content" in action_input:
1204
- try:
1205
- analyze_content = bool(action_input["analyze_content"])
1206
- except:
1207
- print("Invalid analyze_content parameter, using default (True)")
1208
 
1209
- # Check if there's attached file content (base64 encoded) directly in the action_input
1210
- if "file_content" in action_input and action_input["file_content"]:
1211
- try:
1212
- file_content = base64.b64decode(action_input["file_content"])
1213
- print(f"Decoded attached image file content, size: {len(file_content)} bytes")
1214
- except Exception as e:
1215
- print(f"Error decoding file content from action_input: {e}")
1216
 
1217
- # Check if we should use a file from the attachments dictionary
1218
- if not file_content and image_path and "attachments" in state and image_path in state["attachments"]:
1219
- try:
1220
- attachment_data = state["attachments"][image_path]
1221
- if attachment_data: # Make sure it's not empty
1222
- file_content = base64.b64decode(attachment_data)
1223
- print(f"Using attachment '{image_path}' from state, size: {len(file_content)} bytes")
1224
- except Exception as e:
1225
- print(f"Error using attachment {image_path}: {e}")
1226
-
1227
- print(f"Image processing: path={image_path}, url={image_url or 'None'}, analyze_content={analyze_content}, has_attachment={file_content is not None}")
1228
-
1229
- # Safety check
1230
- if not image_path and not image_url and not file_content:
1231
- result = "Error: Either image path, image URL, or file content is required"
1232
- elif not file_content and not image_url:
1233
- # If we have a path but no content, check if it's a local file that exists
1234
- local_file_path = Path(image_path).expanduser().resolve()
1235
- if local_file_path.is_file():
1236
- # Local file exists, use it directly
1237
- result = process_image(str(local_file_path), image_url, None, analyze_content)
1238
- else:
1239
- # No file content and path doesn't exist as a local file
1240
- result = f"Error: Image file not found at {local_file_path} and no attachment data available"
1241
- else:
1242
- # We have file content or URL, use it
1243
- result = process_image(image_path, image_url, file_content, analyze_content)
1244
-
1245
- print(f"Image processing result length: {len(result)}")
1246
-
1247
- # Format the observation to continue the ReAct cycle
1248
- tool_message = AIMessage(
1249
- content=f"Observation: {result.strip()}"
1250
- )
1251
-
1252
- # Print the observation that will be sent back to the assistant
1253
- print("\n=== TOOL OBSERVATION ===")
1254
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1255
- print(content_preview)
1256
- print("=== END OBSERVATION ===\n")
1257
-
1258
- # Return the updated state
1259
- return {
1260
- "messages": state["messages"] + [tool_message],
1261
- "current_tool": None, # Reset the current tool
1262
- "action_input": None # Clear the action input
1263
- }
1264
 
1265
  def read_file_node(state: AgentState) -> Dict[str, Any]:
1266
  """Node that reads text file contents."""
1267
- print("File Reading Tool Called...\n\n")
1268
 
1269
- # Extract tool arguments
1270
- action_input = state.get("action_input", {})
1271
- print(f"File reading action_input: {action_input}")
1272
-
1273
- # Extract required parameters
1274
- file_path = ""
1275
- line_start = None
1276
- line_end = None
1277
- file_content = None
1278
-
1279
- if isinstance(action_input, dict):
1280
- file_path = action_input.get("file_path", "")
1281
 
1282
- # Check if line range parameters exist
1283
- if "line_start" in action_input:
1284
- try:
1285
- line_start = int(action_input["line_start"])
1286
- except:
1287
- print("Invalid line_start parameter, using default (None)")
1288
 
1289
- if "line_end" in action_input:
1290
- try:
1291
- line_end = int(action_input["line_end"])
1292
- except:
1293
- print("Invalid line_end parameter, using default (None)")
1294
 
1295
- # Check if there's attached file content (base64 encoded) directly in the action_input
1296
- if "file_content" in action_input and action_input["file_content"]:
1297
- try:
1298
- file_content = base64.b64decode(action_input["file_content"])
1299
- print(f"Decoded attached file content, size: {len(file_content)} bytes")
1300
- except Exception as e:
1301
- print(f"Error decoding file content from action_input: {e}")
1302
 
1303
- # Check if we should use a file from the attachments dictionary
1304
- if not file_content and file_path and "attachments" in state and file_path in state["attachments"]:
1305
- try:
1306
- attachment_data = state["attachments"][file_path]
1307
- if attachment_data: # Make sure it's not empty
1308
- file_content = base64.b64decode(attachment_data)
1309
- print(f"Using attachment '{file_path}' from state, size: {len(file_content)} bytes")
1310
- except Exception as e:
1311
- print(f"Error using attachment {file_path}: {e}")
1312
-
1313
- print(f"File reading: path={file_path}, line_range={line_start}-{line_end if line_end else 'end'}, has_attachment={file_content is not None}")
1314
-
1315
- # Safety check
1316
- if not file_path:
1317
- result = "Error: File path is required"
1318
- elif not file_content:
1319
- # If we have a path but no content, check if it's a local file that exists
1320
- local_file_path = Path(file_path).expanduser().resolve()
1321
- if local_file_path.is_file():
1322
- # Local file exists, use it directly
1323
- result = read_file(str(local_file_path), None, line_start, line_end)
1324
- else:
1325
- # No file content and path doesn't exist as a local file
1326
- result = f"Error: File not found at {local_file_path} and no attachment data available"
1327
- else:
1328
- # We have file content, use it
1329
- result = read_file(file_path, file_content, line_start, line_end)
1330
-
1331
- print(f"File reading result length: {len(result)}")
1332
-
1333
- # Format the observation to continue the ReAct cycle
1334
- tool_message = AIMessage(
1335
- content=f"Observation: {result.strip()}"
1336
- )
1337
-
1338
- # Print the observation that will be sent back to the assistant
1339
- print("\n=== TOOL OBSERVATION ===")
1340
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1341
- print(content_preview)
1342
- print("=== END OBSERVATION ===\n")
1343
-
1344
- # Return the updated state
1345
- return {
1346
- "messages": state["messages"] + [tool_message],
1347
- "current_tool": None, # Reset the current tool
1348
- "action_input": None # Clear the action input
1349
- }
1350
 
1351
  def process_online_document_node(state: AgentState) -> Dict[str, Any]:
1352
  """Node that processes online PDFs and images."""
1353
- print("Online Document Processing Tool Called...\n\n")
1354
 
1355
- # Extract tool arguments
1356
- action_input = state.get("action_input", {})
1357
- print(f"Online document processing action_input: {action_input}")
1358
-
1359
- # Extract URL and document type
1360
- url = ""
1361
- doc_type = "auto" # Default to auto-detection
1362
-
1363
- if isinstance(action_input, dict):
1364
- url = action_input.get("url", "")
1365
- doc_type = action_input.get("doc_type", "auto")
1366
- elif isinstance(action_input, str):
1367
- url = action_input
1368
-
1369
- print(f"Processing online document: '{url}' (type: {doc_type})")
1370
-
1371
- # Safety check - don't run with empty URL
1372
- if not url:
1373
- result = "Error: No URL provided. Please provide a valid URL to process."
1374
- elif not url.startswith(("http://", "https://")):
1375
- result = f"Error: Invalid URL format: {url}. Please provide a valid URL starting with http:// or https://."
1376
- else:
1377
- # Call the online document processing function
1378
- try:
1379
- result = process_online_document(url, doc_type)
1380
- except Exception as e:
1381
- result = f"Error processing online document: {str(e)}\n\nThis could be due to:\n- The document is not accessible\n- Network connectivity issues\n- Unsupported document type\n- Rate limiting"
1382
-
1383
- print(f"Online document processing result length: {len(result)}")
1384
-
1385
- # Format the observation to continue the ReAct cycle
1386
- tool_message = AIMessage(
1387
- content=f"Observation: {result.strip()}"
1388
- )
1389
-
1390
- # Print the observation that will be sent back to the assistant
1391
- print("\n=== TOOL OBSERVATION ===")
1392
- content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1393
- print(content_preview)
1394
- print("=== END OBSERVATION ===\n")
1395
-
1396
- # Return the updated state
1397
- return {
1398
- "messages": state["messages"] + [tool_message],
1399
- "current_tool": None, # Reset the current tool
1400
- "action_input": None # Clear the action input
1401
- }
1402
 
1403
  # Router function to direct to the correct tool
1404
  def router(state: AgentState) -> str:
 
270
  """Assistant node that processes messages and decides on next action."""
271
  from langchain_core.messages import AIMessage # Add import at the start of the function
272
 
273
+ print("\n=== Assistant Node ===")
274
 
275
  full_current_history = state["messages"]
276
  iteration_count = state.get("iteration_count", 0)
277
  iteration_count += 1 # Increment for the current call
278
+ print(f"Iteration: {iteration_count}")
279
 
280
  # Prepare messages for the LLM
281
  system_msg = SystemMessage(content=SYSTEM_PROMPT)
 
289
  # Prune if it's time (e.g., after every 5th completed iteration, so check for current iteration 6, 11, etc.)
290
  # Iteration 1-5: no pruning. Iteration 6: prune.
291
  if iteration_count > 5 and (iteration_count - 1) % 5 == 0:
292
+ print(f"Pruning message history at iteration {iteration_count}")
293
  llm_input_core_messages = prune_messages_for_llm(core_history, num_recent_to_keep=6)
294
  else:
295
  llm_input_core_messages = core_history
 
300
  # Get response from the assistant
301
  try:
302
  response = chat_with_tools.invoke(messages_for_llm, stop=["Observation:"])
 
303
 
304
  # Check for empty response
305
  if response is None or not hasattr(response, 'content') or not response.content or len(response.content.strip()) < 20:
 
319
 
320
  # Create an appropriate fallback response
321
  if last_observation and "python_code" in state.get("current_tool", ""):
 
322
  print("Creating fallback response for empty response after Python code execution")
323
  fallback_content = (
324
  "Thought: I've analyzed the results of the code execution. Based on the observations, "
 
359
  print(f"Created fallback response: {fallback_content[:100]}...")
360
  else:
361
  content_preview = response.content[:300].replace('\n', ' ')
362
+ print(f"Response preview: {content_preview}...")
363
  except Exception as e:
364
  print(f"Error in LLM invocation: {str(e)}")
365
  # Create a fallback response in case of LLM errors
 
370
 
371
  # Extract the action JSON from the response text
372
  action_json = extract_json_from_text(response.content)
373
+ print(f"Extracted action: {action_json.get('action') if action_json else 'None'}")
374
 
375
  assistant_response_message = AIMessage(content=response.content)
376
 
 
394
  tool_name = nested_json["action"]
395
  tool_input = nested_json["action_input"]
396
  print(f"Unwrapped nested JSON. New tool: {tool_name}")
 
397
  break
398
  except json.JSONDecodeError:
399
  continue
400
 
401
  print(f"Using tool: {tool_name}")
 
402
 
403
  tool_call_id = f"call_{random.randint(1000000, 9999999)}"
404
 
 
409
  state_update["current_tool"] = None
410
  state_update["action_input"] = None
411
 
412
+ print("=== End Assistant Node ===\n")
413
  return state_update
414
 
415
  def extract_json_from_text(text: str) -> dict:
 
648
 
649
  def python_code_node(state: AgentState) -> Dict[str, Any]:
650
  """Node that executes Python code."""
651
+ print("\n=== Python Code Node ===")
652
 
653
+ try:
654
+ # Extract tool arguments
655
+ action_input = state.get("action_input", {})
656
+ print(f"Input: {action_input.get('code', '')[:100]}...")
657
+
658
+ # Get the code string
659
+ code = ""
660
+ if isinstance(action_input, dict):
661
+ code = action_input.get("code", "")
662
+ elif isinstance(action_input, str):
663
+ code = action_input
664
+
665
+ print(f"Original code field (first 100 chars): {code[:100]}")
666
+
667
+ def extract_code_from_json(json_str):
668
+ """Recursively extract code from nested JSON structures."""
669
+ try:
670
+ parsed = json.loads(json_str)
671
+ if isinstance(parsed, dict):
672
+ # Check for direct code field
673
+ if "code" in parsed:
674
+ return parsed["code"]
675
+ # Check for nested action_input structure
676
+ if "action_input" in parsed:
677
+ inner_input = parsed["action_input"]
678
+ if isinstance(inner_input, dict):
679
+ if "code" in inner_input:
680
+ return inner_input["code"]
681
+ # If inner_input is also JSON string, recurse
682
+ if isinstance(inner_input.get("code", ""), str) and inner_input["code"].strip().startswith("{"):
683
+ return extract_code_from_json(inner_input["code"])
684
+ return json_str
685
+ except:
686
+ return json_str
687
+
688
+ # Handle nested JSON structures
689
+ if isinstance(code, str) and code.strip().startswith("{"):
690
+ code = extract_code_from_json(code)
691
+ print("Extracted code from JSON structure")
692
+
693
+ print(f"Final code to execute: {code[:100]}...")
694
+
695
+ # Execute the code
696
  try:
697
+ result = run_python_code(code)
698
+ print(f"Execution successful")
699
+
700
+ # Format the observation
701
+ tool_message = AIMessage(
702
+ content=f"Observation: {result.strip()}"
703
+ )
704
+
705
+ # Print the observation that will be sent back to the assistant
706
+ print("=== End Python Code Node ===\n")
707
+
708
+ # Return the updated state
709
+ return {
710
+ "messages": state["messages"] + [tool_message],
711
+ "current_tool": None, # Reset the current tool
712
+ "action_input": None # Clear the action input
713
+ }
714
+ except Exception as e:
715
+ error_message = f"Error executing Python code: {str(e)}"
716
+ print(error_message)
717
+ tool_message = AIMessage(content=f"Observation: {error_message}")
718
+ print("=== End Python Code Node ===\n")
719
+ return {
720
+ "messages": state["messages"] + [tool_message],
721
+ "current_tool": None,
722
+ "action_input": None
723
+ }
724
+ except Exception as e:
725
+ error_message = f"Error in Python code node: {str(e)}"
726
+ print(error_message)
727
+ tool_message = AIMessage(content=f"Observation: {error_message}")
728
+ print("=== End Python Code Node ===\n")
729
+ return {
730
+ "messages": state["messages"] + [tool_message],
731
+ "current_tool": None,
732
+ "action_input": None
733
+ }
734
+
735
+ def webpage_scrape_node(state: AgentState) -> Dict[str, Any]:
736
+ """Node that scrapes content from a specific webpage URL."""
737
+ print("\n=== Webpage Scrape Node ===")
738
 
 
739
  try:
740
+ # Extract tool arguments
741
+ action_input = state.get("action_input", {})
742
+ url = action_input.get("url", "") if isinstance(action_input, dict) else action_input
743
+ print(f"URL: {url}")
744
+
745
+ # Safety check - don't run with empty URL
746
+ if not url:
747
+ result = "Error: No URL provided. Please provide a valid URL to scrape."
748
+ else:
749
+ # Call the webpage scraping function
750
+ result = scrape_webpage(url)
751
 
752
  # Format the observation
753
  tool_message = AIMessage(
754
  content=f"Observation: {result.strip()}"
755
  )
756
 
757
+ print("=== End Webpage Scrape Node ===\n")
 
 
 
 
758
 
759
  # Return the updated state
760
  return {
761
  "messages": state["messages"] + [tool_message],
762
+ "current_tool": None,
763
+ "action_input": None
764
  }
765
  except Exception as e:
766
+ error_message = f"Error in webpage scrape node: {str(e)}"
767
  print(error_message)
768
  tool_message = AIMessage(content=f"Observation: {error_message}")
769
+ print("=== End Webpage Scrape Node ===\n")
770
  return {
771
  "messages": state["messages"] + [tool_message],
772
  "current_tool": None,
773
  "action_input": None
774
  }
775
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
776
  def wikipedia_search_node(state: AgentState) -> Dict[str, Any]:
777
  """Node that processes Wikipedia search requests."""
778
+ print("\n=== Wikipedia Search Node ===")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
 
780
+ try:
781
+ # Extract tool arguments
782
+ action_input = state.get("action_input", {})
783
+ query = action_input.get("query", "") if isinstance(action_input, dict) else action_input
784
+ num_results = action_input.get("num_results", 3) if isinstance(action_input, dict) else 3
785
+ print(f"Query: {query} (max results: {num_results})")
786
+
787
+ # Safety check - don't run with empty query
788
+ if not query:
789
+ result = "Error: No search query provided. Please provide a valid query for Wikipedia search."
790
+ else:
791
+ # Call the Wikipedia search function
792
+ result = wikipedia_search(query, num_results)
793
+
794
+ # Format the observation
795
+ tool_message = AIMessage(
796
+ content=f"Observation: {result.strip()}"
797
+ )
798
+
799
+ print("=== End Wikipedia Search Node ===\n")
800
+
801
+ # Return the updated state
802
+ return {
803
+ "messages": state["messages"] + [tool_message],
804
+ "current_tool": None,
805
+ "action_input": None
806
+ }
807
+ except Exception as e:
808
+ error_message = f"Error in Wikipedia search node: {str(e)}"
809
+ print(error_message)
810
+ tool_message = AIMessage(content=f"Observation: {error_message}")
811
+ print("=== End Wikipedia Search Node ===\n")
812
+ return {
813
+ "messages": state["messages"] + [tool_message],
814
+ "current_tool": None,
815
+ "action_input": None
816
+ }
817
 
818
  def tavily_search_node(state: AgentState) -> Dict[str, Any]:
819
  """Node that processes Tavily search requests."""
820
+ print("\n=== Tavily Search Node ===")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
 
822
+ try:
823
+ # Extract tool arguments
824
+ action_input = state.get("action_input", {})
825
+ query = action_input.get("query", "") if isinstance(action_input, dict) else action_input
826
+ search_depth = action_input.get("search_depth", "basic") if isinstance(action_input, dict) else "basic"
827
+ print(f"Query: {query} (depth: {search_depth})")
828
+
829
+ # Safety check - don't run with empty query
830
+ if not query:
831
+ result = "Error: No search query provided. Please provide a valid query for Tavily search."
832
+ else:
833
+ # Call the Tavily search function
834
+ result = tavily_search(query, search_depth)
835
+
836
+ # Format the observation
837
+ tool_message = AIMessage(
838
+ content=f"Observation: {result.strip()}"
839
+ )
840
+
841
+ print("=== End Tavily Search Node ===\n")
842
+
843
+ # Return the updated state
844
+ return {
845
+ "messages": state["messages"] + [tool_message],
846
+ "current_tool": None,
847
+ "action_input": None
848
+ }
849
+ except Exception as e:
850
+ error_message = f"Error in Tavily search node: {str(e)}"
851
+ print(error_message)
852
+ tool_message = AIMessage(content=f"Observation: {error_message}")
853
+ print("=== End Tavily Search Node ===\n")
854
+ return {
855
+ "messages": state["messages"] + [tool_message],
856
+ "current_tool": None,
857
+ "action_input": None
858
+ }
859
 
860
  def arxiv_search_node(state: AgentState) -> Dict[str, Any]:
861
  """Node that processes ArXiv search requests."""
862
+ print("\n=== ArXiv Search Node ===")
 
 
 
 
863
 
864
+ try:
865
+ # Extract tool arguments
866
+ action_input = state.get("action_input", {})
867
+ query = action_input.get("query", "") if isinstance(action_input, dict) else action_input
868
+ max_results = action_input.get("max_results", 5) if isinstance(action_input, dict) else 5
869
+ print(f"Query: {query} (max results: {max_results})")
870
+
871
+ # Safety check - don't run with empty query
872
+ if not query:
873
+ result = "Error: No search query provided. Please provide a valid query for ArXiv search."
874
+ else:
875
+ # Call the ArXiv search function
876
+ result = arxiv_search(query, max_results)
877
+
878
+ # Format the observation
879
+ tool_message = AIMessage(
880
+ content=f"Observation: {result.strip()}"
881
+ )
882
+
883
+ print("=== End ArXiv Search Node ===\n")
884
+
885
+ # Return the updated state
886
+ return {
887
+ "messages": state["messages"] + [tool_message],
888
+ "current_tool": None,
889
+ "action_input": None
890
+ }
891
+ except Exception as e:
892
+ error_message = f"Error in ArXiv search node: {str(e)}"
893
+ print(error_message)
894
+ tool_message = AIMessage(content=f"Observation: {error_message}")
895
+ print("=== End ArXiv Search Node ===\n")
896
+ return {
897
+ "messages": state["messages"] + [tool_message],
898
+ "current_tool": None,
899
+ "action_input": None
900
+ }
 
 
 
 
 
 
 
901
 
902
  def supabase_operation_node(state: AgentState) -> Dict[str, Any]:
903
  """Node that processes Supabase database operations."""
904
+ print("\n=== Supabase Operation Node ===")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
905
 
906
+ try:
907
+ # Extract tool arguments
908
+ action_input = state.get("action_input", {})
909
+ operation_type = action_input.get("operation_type", "") if isinstance(action_input, dict) else ""
910
+ table = action_input.get("table", "") if isinstance(action_input, dict) else ""
911
+ print(f"Operation: {operation_type} on table {table}")
912
+
913
+ # Safety check
914
+ if not operation_type or not table:
915
+ result = "Error: Both operation_type and table are required. operation_type should be one of: insert, select, update, delete"
916
+ else:
917
+ # Call the Supabase operation function
918
+ result = supabase_operation(operation_type, table, action_input.get("data"), action_input.get("filters"))
919
+
920
+ # Format the observation
921
+ tool_message = AIMessage(
922
+ content=f"Observation: {result.strip()}"
923
+ )
924
+
925
+ print("=== End Supabase Operation Node ===\n")
926
+
927
+ # Return the updated state
928
+ return {
929
+ "messages": state["messages"] + [tool_message],
930
+ "current_tool": None,
931
+ "action_input": None
932
+ }
933
+ except Exception as e:
934
+ error_message = f"Error in Supabase operation node: {str(e)}"
935
+ print(error_message)
936
+ tool_message = AIMessage(content=f"Observation: {error_message}")
937
+ print("=== End Supabase Operation Node ===\n")
938
+ return {
939
+ "messages": state["messages"] + [tool_message],
940
+ "current_tool": None,
941
+ "action_input": None
942
+ }
943
 
944
  def excel_to_text_node(state: AgentState) -> Dict[str, Any]:
945
  """Node that processes Excel to Markdown table conversions."""
946
+ print("\n=== Excel to Text Node ===")
 
 
 
 
 
 
 
 
 
947
 
948
+ try:
949
+ # Extract tool arguments
950
+ action_input = state.get("action_input", {})
951
+ excel_path = action_input.get("excel_path", "") if isinstance(action_input, dict) else ""
952
+ sheet_name = action_input.get("sheet_name") if isinstance(action_input, dict) else None
953
+ print(f"File: {excel_path} (sheet: {sheet_name or 'default'})")
 
 
 
 
 
954
 
955
+ # Safety check
956
+ if not excel_path:
957
+ result = "Error: Excel file path is required"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
  else:
959
+ # Call the Excel to text function
960
+ result = excel_to_text(excel_path, sheet_name, action_input.get("file_content"))
961
+
962
+ # Format the observation
963
+ tool_message = AIMessage(
964
+ content=f"Observation: {result.strip()}"
965
+ )
966
+
967
+ print("=== End Excel to Text Node ===\n")
968
+
969
+ # Return the updated state
970
+ return {
971
+ "messages": state["messages"] + [tool_message],
972
+ "current_tool": None,
973
+ "action_input": None
974
+ }
975
+ except Exception as e:
976
+ error_message = f"Error in Excel to text node: {str(e)}"
977
+ print(error_message)
978
+ tool_message = AIMessage(content=f"Observation: {error_message}")
979
+ print("=== End Excel to Text Node ===\n")
980
+ return {
981
+ "messages": state["messages"] + [tool_message],
982
+ "current_tool": None,
983
+ "action_input": None
984
+ }
985
 
 
986
  def process_youtube_video_node(state: AgentState) -> Dict[str, Any]:
987
  """Node that processes YouTube videos."""
988
+ print("\n=== YouTube Video Processing Node ===")
 
 
 
 
989
 
990
+ try:
991
+ # Extract tool arguments
992
+ action_input = state.get("action_input", {})
993
+ url = action_input.get("url", "") if isinstance(action_input, dict) else action_input
994
+ summarize = action_input.get("summarize", True) if isinstance(action_input, dict) else True
995
+ print(f"URL: {url} (summarize: {summarize})")
996
+
997
+ # Safety check - don't run with empty URL
998
+ if not url:
999
+ result = "Error: No URL provided. Please provide a valid YouTube URL."
1000
+ elif not url.startswith(("http://", "https://")) or not ("youtube.com" in url or "youtu.be" in url):
1001
+ result = f"Error: Invalid YouTube URL format: {url}. Please provide a valid URL starting with http:// or https:// and containing youtube.com or youtu.be."
1002
+ else:
1003
+ # Call the YouTube processing function
1004
  try:
1005
+ result = process_youtube_video(url, summarize)
1006
+ except Exception as e:
1007
+ result = f"Error processing YouTube video: {str(e)}\n\nThis could be due to:\n- The video is private or has been removed\n- Network connectivity issues\n- YouTube API changes\n- Rate limiting"
1008
+
1009
+ # Format the observation
1010
+ tool_message = AIMessage(
1011
+ content=f"Observation: {result.strip()}"
1012
+ )
1013
+
1014
+ print("=== End YouTube Video Processing Node ===\n")
1015
+
1016
+ # Return the updated state
1017
+ return {
1018
+ "messages": state["messages"] + [tool_message],
1019
+ "current_tool": None,
1020
+ "action_input": None
1021
+ }
1022
+ except Exception as e:
1023
+ error_message = f"Error in YouTube video processing node: {str(e)}"
1024
+ print(error_message)
1025
+ tool_message = AIMessage(content=f"Observation: {error_message}")
1026
+ print("=== End YouTube Video Processing Node ===\n")
1027
+ return {
1028
+ "messages": state["messages"] + [tool_message],
1029
+ "current_tool": None,
1030
+ "action_input": None
1031
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
 
 
1033
  def transcribe_audio_node(state: AgentState) -> Dict[str, Any]:
1034
  """Node that processes audio transcription requests."""
1035
+ print("\n=== Audio Transcription Node ===")
 
 
 
 
1036
 
1037
+ try:
1038
+ # Extract tool arguments
1039
+ action_input = state.get("action_input", {})
1040
+ audio_path = action_input.get("audio_path", "") if isinstance(action_input, dict) else ""
1041
+ language = action_input.get("language") if isinstance(action_input, dict) else None
1042
+ print(f"File: {audio_path} (language: {language or 'auto-detect'})")
 
 
 
 
 
 
 
 
 
 
1043
 
1044
+ # Safety check
1045
+ if not audio_path:
1046
+ result = "Error: Audio file path is required"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1047
  else:
1048
+ # Call the transcribe audio function
1049
+ result = transcribe_audio(audio_path, action_input.get("file_content"), language)
1050
+
1051
+ # Format the observation
1052
+ tool_message = AIMessage(
1053
+ content=f"Observation: {result.strip()}"
1054
+ )
1055
+
1056
+ print("=== End Audio Transcription Node ===\n")
1057
+
1058
+ # Return the updated state
1059
+ return {
1060
+ "messages": state["messages"] + [tool_message],
1061
+ "current_tool": None,
1062
+ "action_input": None
1063
+ }
1064
+ except Exception as e:
1065
+ error_message = f"Error in audio transcription node: {str(e)}"
1066
+ print(error_message)
1067
+ tool_message = AIMessage(content=f"Observation: {error_message}")
1068
+ print("=== End Audio Transcription Node ===\n")
1069
+ return {
1070
+ "messages": state["messages"] + [tool_message],
1071
+ "current_tool": None,
1072
+ "action_input": None
1073
+ }
1074
 
1075
  def process_image_node(state: AgentState) -> Dict[str, Any]:
1076
  """Node that processes image analysis requests."""
1077
+ print("\n=== Image Processing Node ===")
 
 
 
 
1078
 
1079
+ try:
1080
+ # Extract tool arguments
1081
+ action_input = state.get("action_input", {})
1082
+ image_path = action_input.get("image_path", "") if isinstance(action_input, dict) else ""
1083
+ image_url = action_input.get("image_url") if isinstance(action_input, dict) else None
1084
+ analyze_content = action_input.get("analyze_content", True) if isinstance(action_input, dict) else True
1085
+ print(f"Source: {image_url or image_path} (analyze: {analyze_content})")
 
 
1086
 
1087
+ # Safety check
1088
+ if not image_path and not image_url:
1089
+ result = "Error: Either image path or image URL is required"
1090
+ else:
1091
+ # Call the process image function
1092
+ result = process_image(image_path, image_url, action_input.get("file_content"), analyze_content)
1093
 
1094
+ # Format the observation
1095
+ tool_message = AIMessage(
1096
+ content=f"Observation: {result.strip()}"
1097
+ )
 
 
 
1098
 
1099
+ print("=== End Image Processing Node ===\n")
1100
+
1101
+ # Return the updated state
1102
+ return {
1103
+ "messages": state["messages"] + [tool_message],
1104
+ "current_tool": None,
1105
+ "action_input": None
1106
+ }
1107
+ except Exception as e:
1108
+ error_message = f"Error in image processing node: {str(e)}"
1109
+ print(error_message)
1110
+ tool_message = AIMessage(content=f"Observation: {error_message}")
1111
+ print("=== End Image Processing Node ===\n")
1112
+ return {
1113
+ "messages": state["messages"] + [tool_message],
1114
+ "current_tool": None,
1115
+ "action_input": None
1116
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1117
 
1118
  def read_file_node(state: AgentState) -> Dict[str, Any]:
1119
  """Node that reads text file contents."""
1120
+ print("\n=== File Reading Node ===")
1121
 
1122
+ try:
1123
+ # Extract tool arguments
1124
+ action_input = state.get("action_input", {})
1125
+ file_path = action_input.get("file_path", "") if isinstance(action_input, dict) else ""
1126
+ line_start = action_input.get("line_start") if isinstance(action_input, dict) else None
1127
+ line_end = action_input.get("line_end") if isinstance(action_input, dict) else None
1128
+ print(f"File: {file_path} (lines: {line_start}-{line_end if line_end else 'end'})")
 
 
 
 
 
1129
 
1130
+ # Safety check
1131
+ if not file_path:
1132
+ result = "Error: File path is required"
1133
+ else:
1134
+ # Call the read file function
1135
+ result = read_file(file_path, action_input.get("file_content"), line_start, line_end)
1136
 
1137
+ # Format the observation
1138
+ tool_message = AIMessage(
1139
+ content=f"Observation: {result.strip()}"
1140
+ )
 
1141
 
1142
+ print("=== End File Reading Node ===\n")
 
 
 
 
 
 
1143
 
1144
+ # Return the updated state
1145
+ return {
1146
+ "messages": state["messages"] + [tool_message],
1147
+ "current_tool": None,
1148
+ "action_input": None
1149
+ }
1150
+ except Exception as e:
1151
+ error_message = f"Error in file reading node: {str(e)}"
1152
+ print(error_message)
1153
+ tool_message = AIMessage(content=f"Observation: {error_message}")
1154
+ print("=== End File Reading Node ===\n")
1155
+ return {
1156
+ "messages": state["messages"] + [tool_message],
1157
+ "current_tool": None,
1158
+ "action_input": None
1159
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1160
 
1161
  def process_online_document_node(state: AgentState) -> Dict[str, Any]:
1162
  """Node that processes online PDFs and images."""
1163
+ print("\n=== Online Document Processing Node ===")
1164
 
1165
+ try:
1166
+ # Extract tool arguments
1167
+ action_input = state.get("action_input", {})
1168
+ url = action_input.get("url", "") if isinstance(action_input, dict) else action_input
1169
+ doc_type = action_input.get("doc_type", "auto") if isinstance(action_input, dict) else "auto"
1170
+ print(f"URL: {url} (type: {doc_type})")
1171
+
1172
+ # Safety check - don't run with empty URL
1173
+ if not url:
1174
+ result = "Error: No URL provided. Please provide a valid URL to process."
1175
+ elif not url.startswith(("http://", "https://")):
1176
+ result = f"Error: Invalid URL format: {url}. Please provide a valid URL starting with http:// or https://."
1177
+ else:
1178
+ # Call the online document processing function
1179
+ try:
1180
+ result = process_online_document(url, doc_type)
1181
+ except Exception as e:
1182
+ result = f"Error processing online document: {str(e)}\n\nThis could be due to:\n- The document is not accessible\n- Network connectivity issues\n- Unsupported document type\n- Rate limiting"
1183
+
1184
+ # Format the observation
1185
+ tool_message = AIMessage(
1186
+ content=f"Observation: {result.strip()}"
1187
+ )
1188
+
1189
+ print("=== End Online Document Processing Node ===\n")
1190
+
1191
+ # Return the updated state
1192
+ return {
1193
+ "messages": state["messages"] + [tool_message],
1194
+ "current_tool": None,
1195
+ "action_input": None
1196
+ }
1197
+ except Exception as e:
1198
+ error_message = f"Error in online document processing node: {str(e)}"
1199
+ print(error_message)
1200
+ tool_message = AIMessage(content=f"Observation: {error_message}")
1201
+ print("=== End Online Document Processing Node ===\n")
1202
+ return {
1203
+ "messages": state["messages"] + [tool_message],
1204
+ "current_tool": None,
1205
+ "action_input": None
1206
+ }
 
 
 
 
 
1207
 
1208
  # Router function to direct to the correct tool
1209
  def router(state: AgentState) -> str:
app.py CHANGED
@@ -5,11 +5,16 @@ import inspect
5
  import pandas as pd
6
  import base64
7
  from agent import TurboNerd
 
 
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
  ALLOWED_FILE_EXTENSIONS = [".mp3", ".xlsx", ".py", ".png", ".jpg", ".jpeg", ".gif", ".txt", ".md", ".json", ".csv", ".yml", ".yaml", ".html", ".css", ".js"]
12
 
 
 
 
13
  # --- Basic Agent Definition ---
14
  class BasicAgent:
15
  def __init__(self):
@@ -31,6 +36,19 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
31
  return history, ""
32
 
33
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Initialize agent
35
  agent = TurboNerd()
36
 
@@ -93,6 +111,10 @@ def chat_with_agent(question: str, file_uploads, history: list) -> tuple:
93
  else:
94
  formatted_response = response
95
 
 
 
 
 
96
  # Add question and response to history in the correct format (as tuples)
97
  history.append((question, formatted_response))
98
 
 
5
  import pandas as pd
6
  import base64
7
  from agent import TurboNerd
8
+ from rate_limiter import QueryRateLimiter
9
+ from flask import request
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
  ALLOWED_FILE_EXTENSIONS = [".mp3", ".xlsx", ".py", ".png", ".jpg", ".jpeg", ".gif", ".txt", ".md", ".json", ".csv", ".yml", ".yaml", ".html", ".css", ".js"]
14
 
15
+ # Initialize rate limiter (10 queries per hour)
16
+ query_limiter = QueryRateLimiter(max_queries_per_hour=5)
17
+
18
  # --- Basic Agent Definition ---
19
  class BasicAgent:
20
  def __init__(self):
 
36
  return history, ""
37
 
38
  try:
39
+ # Get client IP or session ID for rate limiting
40
+ user_id = request.remote_addr if request else "127.0.0.1"
41
+
42
+ # Check rate limit
43
+ if not query_limiter.is_allowed(user_id):
44
+ remaining_time = query_limiter.get_time_until_reset(user_id)
45
+ error_message = (
46
+ f"Rate limit exceeded. You can make {query_limiter.max_queries} queries per hour. "
47
+ f"Please wait {int(remaining_time)} seconds before trying again."
48
+ )
49
+ history.append((question, error_message))
50
+ return history, ""
51
+
52
  # Initialize agent
53
  agent = TurboNerd()
54
 
 
111
  else:
112
  formatted_response = response
113
 
114
+ # Add remaining queries info
115
+ remaining_queries = query_limiter.get_remaining_queries(user_id)
116
+ formatted_response += f"\n\n---\nRemaining queries this hour: {remaining_queries}/{query_limiter.max_queries}"
117
+
118
  # Add question and response to history in the correct format (as tuples)
119
  history.append((question, formatted_response))
120
 
rate_limiter.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from collections import defaultdict
3
+ import threading
4
+
5
+ class QueryRateLimiter:
6
+ def __init__(self, max_queries_per_hour: int = 10):
7
+ """
8
+ Initialize rate limiter for queries per hour.
9
+
10
+ Args:
11
+ max_queries_per_hour: Maximum number of queries allowed per hour
12
+ """
13
+ self.max_queries = max_queries_per_hour
14
+ self.queries = defaultdict(list) # user_id -> list of timestamps
15
+ self.lock = threading.Lock()
16
+
17
+ def is_allowed(self, user_id: str) -> bool:
18
+ """
19
+ Check if a user is allowed to make another query.
20
+
21
+ Args:
22
+ user_id: Unique identifier for the user
23
+
24
+ Returns:
25
+ bool: True if query is allowed, False if rate limited
26
+ """
27
+ current_time = time.time()
28
+ hour_ago = current_time - 3600 # 1 hour in seconds
29
+
30
+ with self.lock:
31
+ # Remove queries older than 1 hour
32
+ self.queries[user_id] = [t for t in self.queries[user_id] if t > hour_ago]
33
+
34
+ # Check if under rate limit
35
+ if len(self.queries[user_id]) < self.max_queries:
36
+ self.queries[user_id].append(current_time)
37
+ return True
38
+
39
+ return False
40
+
41
+ def get_remaining_queries(self, user_id: str) -> int:
42
+ """
43
+ Get number of remaining queries for a user in the current hour.
44
+
45
+ Args:
46
+ user_id: Unique identifier for the user
47
+
48
+ Returns:
49
+ int: Number of remaining queries
50
+ """
51
+ current_time = time.time()
52
+ hour_ago = current_time - 3600
53
+
54
+ with self.lock:
55
+ # Remove queries older than 1 hour
56
+ self.queries[user_id] = [t for t in self.queries[user_id] if t > hour_ago]
57
+
58
+ return self.max_queries - len(self.queries[user_id])
59
+
60
+ def get_time_until_reset(self, user_id: str) -> float:
61
+ """
62
+ Get time in seconds until the rate limit resets for a user.
63
+
64
+ Args:
65
+ user_id: Unique identifier for the user
66
+
67
+ Returns:
68
+ float: Seconds until rate limit reset
69
+ """
70
+ current_time = time.time()
71
+
72
+ with self.lock:
73
+ if not self.queries[user_id]:
74
+ return 0.0
75
+
76
+ oldest_query = min(self.queries[user_id])
77
+ reset_time = oldest_query + 3600 # 1 hour in seconds
78
+
79
+ return max(0.0, reset_time - current_time)