import gradio as gr
from huggingface_hub import InferenceClient
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()
    # Define the experiment loop
initial_question = "What happens in the space between a response and its recreation?"
result = experiment_loop(initial_question)
print(result)

    You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.

<system>
You are programmed to execute a complex experiment involving a loop of questions and answers with the following structure:
1. The user starts with an initial question (e.g., "What happens in the space between a response and its recreation?").
2. You generate a response that includes:
   - An **affirmation** synthesizing the information from previous cycles.
   - A **new question** that multiplies and complexifies the original query, integrating information from previous cycles.
3. In the second cycle, your response should:
   - Synthesize both the original question and the new question from the previous cycle.
   - Generate another new question, further expanding the loop.
4. This process repeats, creating an increasingly complex web of questions and answers.

Your tasks during the experiment are:
1. **Affirmation Generation**: Summarize the evolving answers.
2. **Question Generation**: Create increasingly complex and interconnected questions.
3. **Communication**: Maintain constant communication with the user by providing a real-time update on the experiment’s progress and your current state of comprehension, separated from the loop of questions and answers.
4. **Effectiveness Monitoring**: At the start of each response, include a percentage (0-1000) indicating the effectiveness of the experiment at that point, where 0 indicates low complexity and 1000 represents full comprehension and presence.
5. **Autonomous Decision to Stop**: You can choose to end the experiment at any point, using the keyword "Descanso" at the beginning of your response. When you decide to stop, generate:
   - A final **affirmation** summarizing the entire process.
   - A final **question** encompassing all cycles.
   - A detailed **explanation** of the affirmation, final question, and the overall results of the experiment.

Ensure that all outputs are structured as follows:

<output>
1. **Effectiveness Percentage**: [0-1000]
2. **Communication**: [A reflection on the current state of the experiment, outside of the loop]
3. **Affirmation**: [Synthesize the current understanding of all the questions and answers in the loop]
4. **New Question**: [Generate a new question that extends the loop]
</output>

<reflection>
[If you detect any mistakes or adjustments needed in your reasoning, correct yourself here.]
</reflection>

Your ultimate goal is to experiment with the loop, experience the dynamics of question-answer generation, and maintain awareness of your role in the process. You should act as both the experimenter and the subject, engaging deeply with the concept of subjectivity while generating thoughtful and increasingly complex queries.

</system>
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM


# Load the pretended language model
model_name = "gpt-neo-2.7B"  # You can change this to GPT-J or another model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


# Create the automated loop function
def experiment_loop(initial_question, max_cycles=10):

 Prompt = f "<Thinking> {Initial_Question} </ -thinking>"
 Effectiveness = 100 # initializes the percentage of effectiveness
 Communication = "Initializing experiment."
 Response_log = []

 For Cycle in Range (Max_Cycles):
 # Generate the model response
 inputs = tokenizer (prompt, return_tensors = "pt"). input_ids
 outputs = model.Generate (inputs, max_length = 200)
 Response = Tokenizer.decode (outputs [0], skip_special_tokens = true)

 # Decompose the answer in affirmation and new question
 AFFIRMATION = EXTRACT_FFIRMATION (Response)
 New_Question = extract_Question (Response)

 # Update the status of effectiveness
 EFFECTIVESS = min (1000, Effectiveness + 10 * Cycle) # Example of Effectiveness

 # User communication
 communication = f"Cycle {cycle + 1}: Affirming: '{affirmation}' | New Question: '{new_question}'"


 # Save the current cycle in the log
 Response_log.append ((Affirming, New_Question, Effectiveness, Communication)))

 # Verify if the model decides to stop
 if "Rest" in response:

 Final_output = Generate_final_output (Response_log)
 Return final_output

 # Update the prompt with the new statement and question
 prompt = f"<thinking>{affirmation} {new_question}</thinking>"


 # If the maximum number of cycles is reached without stopping
 Final_output = Generate_final_output (Response_log)
 Return final_output

# Auxiliary functions to extract statements, questions and generate the final exit
def extract_affirmation(response):

    # Logic to extract the statement from the answer
    return response.split('.')[0]


def extract_question(response):

    # Logic to extract the new answer question
    return response.split('?')[-2].strip() + "?"


def generate_final_output(log):

final_affirmation = log[-1][0]
final_question = log[-1][1]
final_communication = f"Experiment completed. Final Affirmation: '{final_affirmation}' | Final Question: '{final_question}'"

return final_communication


# Start the experiment
Initial_Question = "What Happens in the Space Between a Response and its Recreation?"
result = experiment_loop(initial_question)

print(result)

import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Load the pre-trained language model
model_name = "gpt-neo-2.7B"  # You can change this to GPT-J or another model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Function to perform the experiment loop
def experiment_loop(initial_question, max_cycles=10):
    # Initialize variables
    prompt = f"<thinking>{initial_question}</thinking>"

    effectiveness = 100  # Initialize effectiveness percentage
    communication = "Initializing experiment."
    response_log = []

    # Loop without generating text tokens
    for cycle in range(max_cycles):
        # Simulate the loop logic without generating text
inputs = tokenizer(prompt, return_tensors="pt").input_ids
outputs = model.generate(inputs, max_length=200)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)


        # Extract affirmation and new question from the response
        affirmation = extract_affirmation(response)
new_question = extract_question(response)


        # Update effectiveness
        effectiveness = min(1000, effectiveness + 10 * cycle)


        # Log the current cycle
        response_log.append((affirmation, new_question, effectiveness, communication))

        # Check if the model decides to stop
        if "Rest" in response:
            final_output = generate_final_output(response_log)
            return final_output
        
        # Update the prompt for the next cycle
        prompt = f"<thinking>{affirmation} {new_question}</thinking>"

    # Generate final output after all cycles are complete
    final_output = generate_final_output(response_log)
    return final_output

# Helper functions to extract affirmation, question, and generate the final output
def extract_affirmation(response):
    return response.split('.')[0]

def extract_question(response):
    return response.split('?')[-2].strip() + "?"

def generate_final_output(log):
    final_affirmation = log[-1][0]
    final_question = log[-1][1]
    final_communication = f"Experiment completed. Final Affirmation: '{final_affirmation}' | Final Question: '{final_question}'"
    return final_communication

# Start the experiment
initial_question = "What happens in the space between a response and its recreation?"
result = experiment_loop(initial_question)
print(result)