import os import sys #from dotenv import load_dotenv from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import UnstructuredMarkdownLoader from langchain.document_loaders import TextLoader from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.text_splitter import CharacterTextSplitter from langchain.agents.agent_toolkits import create_retriever_tool from langchain.agents.agent_toolkits import create_conversational_retrieval_agent from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationChain from langchain import OpenAI from langchain.chains.conversation.memory import ConversationBufferMemory from langchain.callbacks import get_openai_callback with open('docs/ae_ontology.ttl', 'r') as file: # Read the entire contents of the file into a string ttl_file_contents = file.read() import streamlit as st st.write("""In this space you need to provide the description of the engagement and it will automatically generate the deception strategy and itemized action along with the JSON-LD description.""") OpenAI_Key = st.text_input(" Please enter your OpenAI key here to continue") # only continue if the key is given if OpenAI_Key: os.environ['OPENAI_API_KEY'] = OpenAI_Key vectordb = Chroma(persist_directory="./data", embedding_function=OpenAIEmbeddings()) retriever = vectordb.as_retriever() tool = create_retriever_tool( retriever, "search_AEO", "Searches and returns documents regarding adversary engagement." ) tools = [tool] llm = ChatOpenAI(model_name="gpt-4", temperature = 0) agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True) # result = agent_executor({"input": "What is the point of the engagement matrix?"}) # Creating High Level Deception and denial strategy example= """ You are part of a small business called MineralRUs Inc., which is a small, yet critical mining company in New Hampshire specializing in rare earth elements. Your main clients are from particular sectors of technology and defense industries, and are considered to be critical infrastructure. Therefore, your business and its operation is essential to the integrity of supply chains enabling those critical infrastructure. MineralRUs Inc. maintains paper-based record of their key intellectual property and operational procedures, which are stored in a physical safe. This fact is known by only a select few in the company. Your business has a small, unorganized computer network with only generic security infrastructure, and the company does not have the resources to employ an in-house information security team. Instead, a very small team is tasked with maintaining Operational Technology (OT) systems on the network. Your company has recently received an advisory from federal authorities, warning businesses in your industry about the rapidly growing threat of cyber espionage by state-sponsored Advanced Persistent Threats (APTs). In response, your company decides to engage an external cybersecurity provider to assist with timely incident response and prevention. The external provider recommends the employment of active adversary engagement plans, and asks you to develop a high-level deception and denial strategy. """ user_input = st.text_area("Enter your engagement strategy: ", example, height=400) prompt_1 = user_input + "### Instruction: You are an expert in Mitre's Attack Framework. Please create a high-level deception and denial strategy for the given scenario." st.subheader('High Level Deception and Denial Strategy ') result = agent_executor({"input":prompt_1 }) st.write(result['output']) if result: st.subheader('Itemized Action Plan') prompt_2 = """### Instruction: I am giving you the high level deception and denial strategy. Please create a specific itemized action plan for the below strategy. ### Scenario""" + result['output'] result_2 = agent_executor({"input":prompt_2 }) st.write(result_2['output']) if result_2: st.write('Please wait, json-ld description will take a time to load...') # lets make gpt to digest our ontology, call the 16k to avoid token limit # first initialize the large language model llm = OpenAI( temperature=0, openai_api_key= OpenAI_Key, model_name="gpt-3.5-turbo-16k" ) conversation = ConversationChain(llm=llm) conversation_buf = ConversationChain( llm=llm, memory=ConversationBufferMemory() ) #internal purpose def count_tokens(chain, query): with get_openai_callback() as cb: result = chain.run(query) print(f'Spent a total of {cb.total_tokens} tokens') return result count_tokens( conversation_buf, f""" Please digest this ttl file content and simply acknowledge that you have understand it. No need to say anything else.\n {ttl_file_contents} """ ) result_3 = count_tokens( conversation_buf, f""" I will provide you with the deception strategy plan. Please create a json-ld description of the strategy based on the ontology you have learned. The json-ld must comply with the ontology structure. Please do not use classes outside of the given ontology. Strategy: {result_2['output']} """ ) st.subheader('JSON-LD Description') st.write(result_3)