psychohistory

Runtime error

File size: 8,474 Bytes

import torch
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import json
import jsonschema  # Import the jsonschema library
from jsonschema import validate  # Import the validate function

tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')

# Configure 4-bit quantization using BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
)

# Load the model with the quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    'google/gemma-2-2b-it',
    device_map="auto",
    quantization_config=quantization_config,
)



# Definir el prompt para generar un JSON con eventos anidados
prompt = (
    "Genera un JSON que describa una serie de eventos consecutivos en un formato similar al siguiente:\n\n"
    "{\n"
    "  \"events\": {\n"
    "    \"event\": {\n"
    "      \"event_number\": 1,\n"
    "      \"name\": \"conflict_start\",\n"
    "      \"description\": \"Tensions escalate between Iran and Israel\",\n"
    "      \"probability\": 70,\n"
    "      \"duration_days\": 30,\n"
    "      \"subevents\": {\n"
    "        \"event\": {\n"
    "          \"event_number\": 2,\n"
    "          \"name\": \"diplomatic_failure\",\n"
    "          \"description\": \"Diplomatic negotiations fail\",\n"
    "          \"probability\": 60,\n"
    "          \"duration_days\": 15,\n"
    "          \"subevents\": {\n"
    "            \"event\": {\n"
    "              \"event_number\": 3,\n"
    "              \"name\": \"military_clash\",\n"
    "              \"description\": \"Initial military clash at the border\",\n"
    "              \"probability\": 50,\n"
    "              \"duration_days\": 10,\n"
    "              \"subevents\": {\n"
    "                \"event\": [\n"
    "                  {\n"
    "                    \"event_number\": 4,\n"
    "                    \"name\": \"escalation\",\n"
    "                    \"description\": \"Conflict escalates into full-scale war\",\n"
    "                    \"probability\": 40,\n"
    "                    \"duration_days\": 180,\n"
    "                    \"subevents\": {\n"
    "                      \"event\": [\n"
    "                        {\n"
    "                          \"event_number\": 5,\n"
    "                          \"name\": \"regional_involvement\",\n"
    "                          \"description\": \"Other Middle Eastern countries get involved\",\n"
    "                          \"probability\": 30,\n"
    "                          \"duration_days\": 365,\n"
    "                          \"subevents\": {\n"
    "                            \"event\": [\n"
    "                              {\n"
    "                                \"event_number\": 6,\n"
    "                                \"name\": \"ceasefire\",\n"
    "                                \"description\": \"International powers broker a ceasefire\",\n"
    "                                \"probability\": 20,\n"
    "                                \"duration_days\": 30\n"
    "                              },\n"
    "                              {\n"
    "                                \"event_number\": 7,\n"
    "                                \"name\": \"prolonged_conflict\",\n"
    "                                \"description\": \"Conflict continues for over a year\",\n"
    "                                \"probability\": 50,\n"
    "                                \"duration_days\": 365\n"
    "                              }\n"
    "                            ]\n"
    "                          }\n"
    "                        },\n"
    "                        {\n"
    "                          \"event_number\": 8,\n"
    "                          \"name\": \"international_intervention\",\n"
    "                          \"description\": \"UN or other international organizations intervene\",\n"
    "                          \"probability\": 25,\n"
    "                          \"duration_days\": 60\n"
    "                        }\n"
    "                      ]\n"
    "                    }\n"
    "                  },\n"
    "                  {\n"
    "                    \"event_number\": 9,\n"
    "                    \"name\": \"containment\",\n"
    "                    \"description\": \"Conflict is contained and doesn't escalate\",\n"
    "                    \"probability\": 30,\n"
    "                    \"duration_days\": 90\n"
    "                  }\n"
    "                ]\n"
    "              }\n"
    "            },\n"
    "            \"event\": {\n"
    "              \"event_number\": 10,\n"
    "              \"name\": \"sanctions\",\n"
    "              \"description\": \"Increased sanctions on Iran\",\n"
    "              \"probability\": 70,\n"
    "              \"duration_days\": 180,\n"
    "              \"subevents\": {\n"
    "                \"event\": [\n"
    "                  {\n"
    "                    \"event_number\": 11,\n"
    "                    \"name\": \"iran_retaliates\",\n"
    "                    \"description\": \"Iran retaliates with cyberattacks\",\n"
    "                    \"probability\": 40,\n"
    "                    \"duration_days\": 60\n"
    "                  },\n"
    "                  {\n"
    "                    \"event_number\": 12,\n"
    "                    \"name\": \"israel_response\",\n"
    "                    \"description\": \"Israel responds with targeted airstrikes\",\n"
    "                    \"probability\": 50,\n"
    "                    \"duration_days\": 60\n"
    "                  }\n"
    "                ]\n"
    "              }\n"
    "            }\n"
    "          }\n"
    "        },\n"
    "        \"event\": {\n"
    "          \"event_number\": 13,\n"
    "          \"name\": \"diplomatic_success\",\n"
    "          \"description\": \"Successful diplomatic negotiations\",\n"
    "          \"probability\": 40,\n"
    "          \"duration_days\": 30,\n"
    "          \"subevents\": {\n"
    "            \"event\": [\n"
    "              {\n"
    "                \"event_number\": 14,\n"
    "                \"name\": \"peace_agreement\",\n"
    "                \"description\": \"Iran and Israel sign a peace agreement\",\n"
    "                \"probability\": 20,\n"
    "                \"duration_days\": 60\n"
    "              },\n"
    "              {\n"
    "                \"event_number\": 15,\n"
    "                \"name\": \"temporary_truce\",\n"
    "                \"description\": \"A temporary truce is established\",\n"
    "                \"probability\": 30,\n"
    "                \"duration_days\": 30\n"
    "              }\n"
    "            ]\n"
    "          }\n"
    "        }\n"
    "      }\n"
    "    }\n"
    "  }\n"
    "}\n\n"
    "Ahora, genera un JSON similar con eventos anidados, pero cambia los detalles y números para hacer que sea con el input que viene a continuacion, respondiendo solo el JSON empezando con <json>:"
)


def generate(event):
    combined_input = f"{prompt} {event}"  # Combine prompt and event
    prompt_msg = [{'role': 'user', 'content': combined_input}]

    inputs = tokenizer.apply_chat_template(
        prompt_msg,
        add_generation_prompt=True,
        return_tensors='pt'
    )

    tokens = model.generate(
        inputs.to(model.device),
        max_new_tokens=1024,
        temperature=0.5,
        do_sample=True
    )


    output_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
    user_prompt_length = len(f"<bos><start_of_turn>user\n{prompt}\n{event}<end_of_turn>\n<start_of_turn>model\n")  # Calculate user prompt length

    json_start_index = output_text.find("<json>")
    json_end_index = output_text.find("</json>")

    if json_start_index != -1 and json_end_index != -1:
        json_string = output_text[max(json_start_index + 6, user_prompt_length):json_end_index].strip() # Trim whitespace and remove prompt

        # Validate JSON (you'll need to define a schema for your JSON structure)
        try:
            validate(instance=json.loads(json_string), schema=your_json_schema)
            return json_string
        except ValidationError as e:
            return f"Error: Invalid JSON - {e}"

    else:
        return "Error: <json> or </json> not found in generated output"