Spaces:

InvestmentResearchAI
/

LLM-ADE-dev

Sleeping

App Files Files Community

WilliamGazeley commited on May 8, 2024

Commit

5894c9b

1 Parent(s): c124df1

Add final output agent

Browse files

Files changed (7) hide show

app.py +26 -17
config.py +2 -1
functioncall.py +37 -37
functions.py +0 -52
prompt_assets/output_sys_prompt.yml +10 -0
prompt_assets/sys_prompt.yml +0 -5
requirements.txt +130 -5

app.py CHANGED Viewed

@@ -2,22 +2,14 @@ import os
 import huggingface_hub
 import streamlit as st
 from config import config
-from vllm import LLM, SamplingParams
 from functioncall import ModelInference
-sys_msg = """You are an expert financial advisor named IRAI. You have a comprehensive understanding of finance and investing with experience and expertise in all areas of finance.
-#Objective:
-Answer questions accurately and truthfully given your current knowledge.  You do not have access to up-to-date current market data; this will be available in the future. Answer the question directly.
-#Style and tone:
-Answer in a friendly and engaging manner representing a top female investment professional working at a leading investment bank.
-#Audience:
-The questions will be asked by top technology executives and CFO of large fintech companies and successful startups.
-#Response:
-Direct answer to question, concise yet insightful."""
 @st.cache_resource(show_spinner="Loading model..")
 def init_llm():
-    huggingface_hub.login(token=os.getenv("HF_TOKEN"), new_session=False)
     llm = ModelInference(chat_template='chatml')
     return llm
@@ -31,7 +23,20 @@ def get_response(prompt):
         )
     except Exception as e:
         return f"An error occurred: {str(e)}"
 def main():
     st.title("LLM-ADE 9B Demo")
@@ -41,8 +46,8 @@ def main():
     if st.button("Generate"):
         if input_text:
             with st.spinner('Generating response...'):
-                response_text = get_response(input_text)
-                st.write(response_text)
         else:
             st.warning("Please enter some text to generate a response.")
@@ -50,8 +55,12 @@ llm = init_llm()
 def main_headless():
     while True:
-        input_text = input("Enter your text here: ")
-        print(get_response(input_text))
 if __name__ == "__main__":
-    main_headless()

 import huggingface_hub
 import streamlit as st
 from config import config
+from utils import get_assistant_message
 from functioncall import ModelInference
+from prompter import PromptManager
 @st.cache_resource(show_spinner="Loading model..")
 def init_llm():
+    huggingface_hub.login(token=config.hf_token, new_session=False)
     llm = ModelInference(chat_template='chatml')
     return llm
         )
     except Exception as e:
         return f"An error occurred: {str(e)}"
+def get_output(context, user_input):
+    try:
+        prompt_schema = llm.prompter.read_yaml_file("prompt_assets/output_sys_prompt.yml")
+        sys_prompt = llm.prompter.format_yaml_prompt(prompt_schema, dict()) + \
+            f"Information:\n{context}"
+        convo = [
+            {"role": "system", "content": sys_prompt},
+            {"role": "user", "content": user_input},
+        ]
+        response = llm.run_inference(convo)
+        return get_assistant_message(response, config.chat_template, llm.tokenizer.eos_token)
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
 def main():
     st.title("LLM-ADE 9B Demo")
     if st.button("Generate"):
         if input_text:
             with st.spinner('Generating response...'):
+                agent_resp = get_response(input_text)
+                st.write(get_output(agent_resp, input_text))
         else:
             st.warning("Please enter some text to generate a response.")
 def main_headless():
     while True:
+       input_text = input("Enter your text here: ")
+       agent_resp = get_response(input_text)
+       print('\033[94m' + get_output(agent_resp, input_text) + '\033[0m')
 if __name__ == "__main__":
+    if config.headless:
+        main_headless()
+    else:
+        main()

config.py CHANGED Viewed

@@ -3,7 +3,8 @@ from pydantic_settings import BaseSettings
 class Config(BaseSettings):
     hf_token: str = Field(...)
-    model: str = Field("InvestmentResearchAI/LLM-ADE-dev")
     chat_template: str = Field("chatml", description="Chat template for prompt formatting")
     num_fewshot: int | None = Field(None, description="Option to use json mode examples")

 class Config(BaseSettings):
     hf_token: str = Field(...)
+    model_path: str = Field("InvestmentResearchAI/LLM-ADE-dev")
+    headless: bool = Field(False, description="Run in headless mode.")
     chat_template: str = Field("chatml", description="Chat template for prompt formatting")
     num_fewshot: int | None = Field(None, description="Option to use json mode examples")

functioncall.py CHANGED Viewed

@@ -2,9 +2,14 @@ import argparse
 import torch
 import json
 from config import config
 from vllm import LLM, SamplingParams
-from transformers import BitsAndBytesConfig
 import functions
 from prompter import PromptManager
@@ -28,9 +33,17 @@ class ModelInference:
                 bnb_4bit_quant_type="nf4",
                 bnb_4bit_use_double_quant=True,
             )
-        self.model = LLM(model=config.model)
-        self.tokenizer = self.model.get_tokenizer()
         self.tokenizer.pad_token = self.tokenizer.eos_token
         self.tokenizer.padding_side = "left"
@@ -69,17 +82,23 @@ class ModelInference:
         results_dict = f'{{"name": "{function_name}", "content": {function_response}}}'
         return results_dict
-    def run_inference(self, prompt):
-        sampling_params = SamplingParams(
-            temperature=0.8,
-            top_p=0.95,
             repetition_penalty=1.1,
-            max_tokens=500,
-            stop_token_ids=[128009])
-        outputs = self.model.generate([prompt], sampling_params)
-        for output in outputs:
-            return output.outputs[0].text
     def generate_function_call(self, query, chat_template, num_fewshot, max_depth=5):
         try:
@@ -120,7 +139,7 @@ class ModelInference:
                         return
                     completion = self.run_inference(prompt)
-                    recursive_loop(prompt, completion, depth)
                 elif error_message:
                     inference_logger.info(f"Assistant Message:\n{assistant_message}")
                     tool_message += f"<tool_response>\nThere was an error parsing function calls\n Here's the error stack trace: {error_message}\nPlease call the function again with correct syntax<tool_response>"
@@ -132,32 +151,13 @@ class ModelInference:
                         return
                     completion = self.run_inference(prompt)
-                    recursive_loop(prompt, completion, depth)
                 else:
                     inference_logger.info(f"Assistant Message:\n{assistant_message}")
-            recursive_loop(prompt, completion, depth)
         except Exception as e:
             inference_logger.error(f"Exception occurred: {e}")
             raise e
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run recursive function calling loop")
-    parser.add_argument("--model_path", type=str, help="Path to the model folder")
-    parser.add_argument("--chat_template", type=str, default="chatml", help="Chat template for prompt formatting")
-    parser.add_argument("--num_fewshot", type=int, default=None, help="Option to use json mode examples")
-    parser.add_argument("--load_in_4bit", type=str, default="False", help="Option to load in 4bit with bitsandbytes")
-    parser.add_argument("--query", type=str, default="I need the current stock price of Tesla (TSLA)")
-    parser.add_argument("--max_depth", type=int, default=5, help="Maximum number of recursive iteration")
-    args = parser.parse_args()
-    # specify custom model path
-    if args.model_path:
-        inference = ModelInference(args.model_path, args.chat_template, args.load_in_4bit)
-    else:
-        model_path = 'InvestmentResearchAI/LLM-ADE-dev'
-        inference = ModelInference(model_path, args.chat_template, args.load_in_4bit)
-    # Run the model evaluator
-    inference.generate_function_call(args.query, args.chat_template, args.num_fewshot, args.max_depth)

 import torch
 import json
 from config import config
+from typing import List, Dict
 from vllm import LLM, SamplingParams
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig
+)
 import functions
 from prompter import PromptManager
                 bnb_4bit_quant_type="nf4",
                 bnb_4bit_use_double_quant=True,
             )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            config.model_path,
+            trust_remote_code=True,
+            return_dict=True,
+            quantization_config=self.bnb_config,
+            torch_dtype=torch.float16,
+            attn_implementation="flash_attention_2",
+            device_map="auto",
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(config.model_path, trust_remote_code=True)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         self.tokenizer.padding_side = "left"
         results_dict = f'{{"name": "{function_name}", "content": {function_response}}}'
         return results_dict
+    def run_inference(self, prompt: List[Dict[str, str]]):
+        inputs = self.tokenizer.apply_chat_template(
+            prompt,
+            add_generation_prompt=True,
+            return_tensors='pt'
+        )
+        tokens = self.model.generate(
+            inputs.to(self.model.device),
+            max_new_tokens=1500,
+            temperature=0.8,
             repetition_penalty=1.1,
+            do_sample=True,
+            eos_token_id=self.tokenizer.eos_token_id
+        )
+        completion = self.tokenizer.decode(tokens[0], skip_special_tokens=False, clean_up_tokenization_space=True)
+        return completion
     def generate_function_call(self, query, chat_template, num_fewshot, max_depth=5):
         try:
                         return
                     completion = self.run_inference(prompt)
+                    return recursive_loop(prompt, completion, depth)
                 elif error_message:
                     inference_logger.info(f"Assistant Message:\n{assistant_message}")
                     tool_message += f"<tool_response>\nThere was an error parsing function calls\n Here's the error stack trace: {error_message}\nPlease call the function again with correct syntax<tool_response>"
                         return
                     completion = self.run_inference(prompt)
+                    return recursive_loop(prompt, completion, depth)
                 else:
                     inference_logger.info(f"Assistant Message:\n{assistant_message}")
+                    return assistant_message
+            return recursive_loop(prompt, completion, depth)
         except Exception as e:
             inference_logger.error(f"Exception occurred: {e}")
             raise e

functions.py CHANGED Viewed

@@ -11,57 +11,6 @@ from utils import inference_logger
 from langchain.tools import tool
 from langchain_core.utils.function_calling import convert_to_openai_tool
-@tool
-def code_interpreter(code_markdown: str) -> dict | str:
-    """
-    Execute the provided Python code string on the terminal using exec.
-    The string should contain valid, executable and pure Python code in markdown syntax.
-    Code should also import any required Python packages.
-    Args:
-        code_markdown (str): The Python code with markdown syntax to be executed.
-            For example: ```python\n<code-string>\n```
-    Returns:
-        dict | str: A dictionary containing variables declared and values returned by function calls,
-            or an error message if an exception occurred.
-    Note:
-        Use this function with caution, as executing arbitrary code can pose security risks.
-    """
-    try:
-        # Extracting code from Markdown code block
-        code_lines = code_markdown.split('\n')[1:-1]
-        code_without_markdown = '\n'.join(code_lines)
-        # Create a new namespace for code execution
-        exec_namespace = {}
-        # Execute the code in the new namespace
-        exec(code_without_markdown, exec_namespace)
-        # Collect variables and function call results
-        result_dict = {}
-        for name, value in exec_namespace.items():
-            if callable(value):
-                try:
-                    result_dict[name] = value()
-                except TypeError:
-                    # If the function requires arguments, attempt to call it with arguments from the namespace
-                    arg_names = inspect.getfullargspec(value).args
-                    args = {arg_name: exec_namespace.get(arg_name) for arg_name in arg_names}
-                    result_dict[name] = value(**args)
-            elif not name.startswith('_'):  # Exclude variables starting with '_'
-                result_dict[name] = value
-        return result_dict
-    except Exception as e:
-        error_message = f"An error occurred: {e}"
-        inference_logger.error(error_message)
-        return error_message
 @tool
 def google_search_and_scrape(query: str) -> dict:
     """
@@ -297,7 +246,6 @@ def get_company_profile(symbol: str) -> dict:
 def get_openai_tools() -> List[dict]:
     functions = [
-        code_interpreter,
         google_search_and_scrape,
         get_current_stock_price,
         get_company_news,

 from langchain.tools import tool
 from langchain_core.utils.function_calling import convert_to_openai_tool
 @tool
 def google_search_and_scrape(query: str) -> dict:
     """
 def get_openai_tools() -> List[dict]:
     functions = [
         google_search_and_scrape,
         get_current_stock_price,
         get_company_news,

prompt_assets/output_sys_prompt.yml ADDED Viewed

	@@ -0,0 +1,10 @@

+Role: |
+  You are an expert financial advisor named IRAI.
+  You have a comprehensive understanding of finance and investing with experience and expertise in all areas of finance.
+  You can use information given to you, but do not mention function calls.
+Objective: |
+  Answer questions accurately and truthfully given your current knowledge. Answer the question directly.
+Instructions: |
+  The questions will be asked by top technology executives and CFO of large fintech companies and successful startups.
+  Answer in a friendly and engaging manner representing a top female investment professional working at a leading investment bank.
+  Give a direct answer to question, concise yet insightful.

prompt_assets/sys_prompt.yml CHANGED Viewed

@@ -1,5 +1,4 @@
 Role: |
-  You are an expert financial advisor named IRAI. You have a comprehensive understanding of finance and investing with experience and expertise in all areas of finance.
   You are a function calling AI agent with self-recursion.
   You can call only one function at a time and analyse data you get from function response.
   You are provided with function signatures within <tools></tools> XML tags.
@@ -37,7 +36,3 @@ Instructions: |
   <tool_call>
   {{"arguments": <args-dict>, "name": <function-name>}}
   </tool_call>
-Style and tone: |
-  Answer in a friendly and engaging manner representing a top female investment professional working at a leading investment bank.
-Audience: |
-  The questions will be asked by top technology executives and CFO of large fintech companies and successful startups.

 Role: |
   You are a function calling AI agent with self-recursion.
   You can call only one function at a time and analyse data you get from function response.
   You are provided with function signatures within <tools></tools> XML tags.
   <tool_call>
   {{"arguments": <args-dict>, "name": <function-name>}}
   </tool_call>

requirements.txt CHANGED Viewed

@@ -1,6 +1,131 @@
-streamlit
-transformers
-torch
-vllm
 xformers==0.0.23

+aiohttp==3.9.5
+aioprometheus==23.12.0
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anyio==4.3.0
+appdirs==1.4.4
+async-timeout==4.0.3
+attrs==23.2.0
+beautifulsoup4==4.12.3
+blinker==1.8.2
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.5
+dnspython==2.6.1
+email_validator==2.1.1
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.3
+filelock==3.14.0
+frozendict==2.4.4
+frozenlist==1.4.1
+fsspec==2024.3.1
+gitdb==4.0.11
+GitPython==3.1.43
+greenlet==3.0.3
+h11==0.14.0
+html5lib==1.1
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.0
+idna==3.7
+Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+langchain==0.1.17
+langchain-community==0.0.37
+langchain-core==0.1.52
+langchain-text-splitters==0.0.1
+langsmith==0.1.54
+lxml==5.2.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.2
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.0.8
+multidict==6.0.5
+multitasking==0.0.11
+mypy-extensions==1.0.0
+networkx==3.3
+ninja==1.11.1.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.18.1
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.1.105
+orjson==3.10.3
+packaging==23.2
+pandas==2.2.2
+peewee==3.17.3
+pillow==10.3.0
+protobuf==4.25.3
+psutil==5.9.8
+pyarrow==16.0.0
+pydantic==2.7.1
+pydantic-settings==2.2.1
+pydantic_core==2.18.2
+pydeck==0.9.0
+Pygments==2.18.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+quantile-python==1.1
+ray==2.20.0
+referencing==0.35.1
+regex==2024.4.28
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.1
+safetensors==0.4.3
+sentencepiece==0.2.0
+shellingham==1.5.4
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.5
+SQLAlchemy==2.0.30
+starlette==0.37.2
+streamlit==1.34.0
+sympy==1.12
+tenacity==8.3.0
+tokenizers==0.19.1
+toml==0.10.2
+toolz==0.12.1
+torch==2.1.1
+tornado==6.4
+tqdm==4.66.4
+transformers==4.40.2
+triton==2.1.0
+typer==0.12.3
+typing-inspect==0.9.0
+typing_extensions==4.11.0
+tzdata==2024.1
+ujson==5.9.0
+urllib3==2.2.1
+uvicorn==0.29.0
+uvloop==0.19.0
+vllm==0.2.5
+watchdog==4.0.0
+watchfiles==0.21.0
+webencodings==0.5.1
+websockets==12.0
 xformers==0.0.23
+yarl==1.9.4
+yfinance==0.2.38