import gradio as gr
from huggingface_hub import InferenceClient
import os
from mitreattack.stix20 import MitreAttackData
from descriptions import descriptions

# Chemins des fichiers JSON
ics_attack_path = 'ics-attack.json'
enterprise_attack_path = 'enterprise-attack.json'

# Charger les données ATT&CK
mitre_attack_data = MitreAttackData(enterprise_attack_path)

# Charger les techniques ATT&CK
techniques = mitre_attack_data.get_techniques(remove_revoked_deprecated=True)

# Convert techniques to a readable string format
techniques_str = "\n".join([f"{technique['name']} ({mitre_attack_data.get_attack_id(technique['id'])})" for technique in techniques])

def clean_string(input_string):

    characters_to_remove = [',', '/', ':', '"',';','|','-','_']
    cleaned_string = ''.join(char for char in input_string if char not in characters_to_remove)
    
    return cleaned_string

client = InferenceClient(model='mistralai/Mixtral-8x7B-Instruct-v0.1')


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})
    message_content = message

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        # gr.Textbox(value=f"""<s>[INST] Given these TTPs: {techniques_str}\n\n and here are {descriptions}\n\nfigure out which major techniques are used in these logs and respond in bullets points and nothing else[/INST]""", label="System message"),
        # gr.Textbox(value=f"""<s>[INST] Given these TTPs: {techniques_str}\n\nfigure out which major techniques are used in these logs and respond in bullets points and nothing else[/INST]""", label="System message"),
gr.Textbox(
    value=f"""<s>[INST] 
    Step 1: Given these TTPs: {techniques_str}, identify the major techniques present in these logs and list them in bullet points only.\n\n
    
    Step 2: As a cybersecurity analyst, interpret the logs provided, which include login failures, event logs, firewall logs, and brute force logs. Analyze the data and provide an interpretation based on the following indicators:
    - Multiple IP addresses signing in to the same account within a short period
    - Excessive login failures (failed MFA requests, failed username/password attempts, failures due to geo-blocking)
    - Multiple sign-in attempts from different countries within a short period
    - Detection of malware on the device
    - Unusual activity by admin accounts (excessive actions, resetting passwords, changing MFA methods)
    - Sharing emails with attachments to personal accounts
    - Logins occurring after working hours
    - General unusual user account activity

    Important: Do not use any information outside of the input provided. Focus solely on the data and indicators given in this prompt.

    Response: Provide a detailed analysis and interpretation of the observed logs, focusing on identifying and explaining potential security threats or breaches based solely on the information and indicators provided.
    [/INST]""",
    label="System message"
),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
)

if __name__ == "__main__":
    demo.launch()