Spaces:

ProCreations
/

what-comes-next

Running

App Files Files Community

ProCreations commited on Apr 24

Commit

421d392

verified ·

1 Parent(s): 6760dbb

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -148

app.py CHANGED Viewed

@@ -1,23 +1,21 @@
 #!/usr/bin/env python3
 """
-what_comes_next.py – Hugging Face Space implementation of **What Comes Next**
-A slow, contemplative global guessing game.
-🔮  HOW IT WORKS  🔮
-• A single Llama‑3.1‑8B‑Instruct model (FP32 on CPU) is generating one very long completion
-  for a chosen mystical prompt. It runs continuously in the background for everyone.
-• Any visitor sees the same prompt and the Oracle’s current partial response.
-• Players may submit *one* of two kinds of guesses:
-    1. 🧠  **Exact Completion** – the full sentence/paragraph they think the Oracle will
-       eventually write.
-    2. 💡  **General Idea**   – a short summary of the direction or theme they expect.
-• Each guess is recorded immediately (with timestamp, Oracle progress, etc.) to
-  `data.json` (JSON‑Lines).  When the Oracle finally finishes, offline evaluation can
-  score the guesses against the final text.
-The game then moves on to the next prompt and the cycle repeats.
 """
 import os
 import json
 import time
@@ -25,47 +23,45 @@ import random
 import threading
 import logging
 from datetime import datetime, timezone
-from pathlib import Path
 from typing import Dict, Any
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 import gradio as gr
 ###############################################################################
-# Settings                                                                     #
 ###############################################################################
-MODEL_NAME           = "meta-llama/Llama-3.1-8B-Instruct"       # FP32, CPU‑only
-PROMPTS_PATH         = "oracle_prompts.json"                    # 100 unfinished lines
-STATE_PATH           = "current_state.json"                     # persistent Oracle state
-DATA_PATH            = "data.json"                              # JSONL of user guesses
-TOKENS_PER_PROMPT    = 2048                                      # stop after N tokens
-SECS_BETWEEN_TOKENS  = 15                                        # pacing (≈10h / prompt)
-TEMPERATURE          = 0.8
-TOP_P                = 0.95
-MAX_CONTEXT_TOKENS   = 8192
 ###############################################################################
 logging.basicConfig(format="[%(asctime)s] %(levelname)s: %(message)s", level=logging.INFO)
-log = logging.getLogger("what‑comes‑next")
-lock = threading.Lock()  # global file/variable lock
-# --------------------------------------------------------------------------- #
-# Helper functions                                                             #
-# --------------------------------------------------------------------------- #
 def _read_json(path: str, default: Any):
     try:
-        with open(path, "r", encoding="utf‑8") as f:
             return json.load(f)
     except FileNotFoundError:
         return default
-def _write_json(path: str, obj: Any):
     tmp = f"{path}.tmp"
-    with open(tmp, "w", encoding="utf‑8") as f:
         json.dump(obj, f, ensure_ascii=False, indent=2)
     os.replace(tmp, path)
@@ -73,170 +69,156 @@ def _write_json(path: str, obj: Any):
 def load_prompts() -> list[str]:
     if not os.path.exists(PROMPTS_PATH):
         raise FileNotFoundError(f"Missing {PROMPTS_PATH}. Please add 100 prompts.")
-    with open(PROMPTS_PATH, "r", encoding="utf‑8") as f:
-        return json.load(f)
-prompts = load_prompts()
-# --------------------------------------------------------------------------- #
-# Model loading (FP32 ‑ CPU)                                                   #
-# --------------------------------------------------------------------------- #
-log.info("Loading Llama‑3.1‑8B‑Instruct in FP32 on CPU (this is *slow*) …")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     torch_dtype=torch.float32,
-    device_map={"": "cpu"},  # force CPU
 )
 model.eval()
-log.info("Model loaded.")
-# --------------------------------------------------------------------------- #
 # Oracle generation thread                                                     #
-# --------------------------------------------------------------------------- #
-def init_state() -> Dict[str, Any]:
-    """Return existing state or create a new one."""
     state = _read_json(STATE_PATH, {})
-    if state.get("finished", False):
-        state = {}  # finished, start new prompt
-    if not state:
         prompt_idx = random.randrange(len(prompts))
-        prompt = prompts[prompt_idx]
         state = {
             "prompt_idx": prompt_idx,
-            "prompt": prompt,
-            "generated": "",         # Oracle’s text so far (string)
             "start_time": time.time(),
-            "finished": False,
-            "tokens_done": 0
         }
-        _write_json(STATE_PATH, state)
-        log.info(f"Starting new Oracle prompt #{prompt_idx}: {prompt[:60]}…")
     return state
 def oracle_loop():
-    """Continuously extend the Oracle’s text by one token every SECS_BETWEEN_TOKENS."""
     while True:
         with lock:
-            state = init_state()
-            if state["finished"]:
-                # Should not happen, but guard anyway
-                time.sleep(SECS_BETWEEN_TOKENS)
-                continue
-            prompt_text = state["prompt"]
-            generated_text = state["generated"]
-            tokens_done = state["tokens_done"]
-        # Build input_ids (prompt + generated so far)
-        full_input = prompt_text + generated_text
-        input_ids = tokenizer(full_input, return_tensors="pt", truncation=True, max_length=MAX_CONTEXT_TOKENS).input_ids
-        # Generate ONE token
         with torch.no_grad():
-            outputs = model.generate(
                 input_ids,
                 max_new_tokens=1,
                 do_sample=True,
                 temperature=TEMPERATURE,
                 top_p=TOP_P,
             )
-            next_token_id = outputs[0, -1].unsqueeze(0)
-            next_token_text = tokenizer.decode(next_token_id, skip_special_tokens=True, clean_up_tokenization_spaces=False)
         with lock:
-            # Update state
-            state["generated"] += next_token_text
             state["tokens_done"] += 1
             if state["tokens_done"] >= TOKENS_PER_PROMPT:
                 state["finished"] = True
-                log.info("Prompt complete. Oracle will pick a new one next cycle.")
-            _write_json(STATE_PATH, state)
-        time.sleep(SECS_BETWEEN_TOKENS)  # pacing
 threading.Thread(target=oracle_loop, daemon=True).start()
-# --------------------------------------------------------------------------- #
-# Gradio Interface                                                             #
-# --------------------------------------------------------------------------- #
-def human_readable_elapsed(start: float) -> str:
-    delta = int(time.time() - start)
-    h, rem = divmod(delta, 3600)
-    m, s = divmod(rem, 60)
-    return f"{h}h {m}m {s}s"
-def get_current_state() -> Dict[str, Any]:
-    with lock:
-        state = _read_json(STATE_PATH, {})
-    if not state:
-        return {"prompt": "…loading…", "generated": "", "elapsed": "0h 0m 0s"}
-    return {
-        "prompt": state["prompt"],
-        "generated": state["generated"],
-        "elapsed": human_readable_elapsed(state["start_time"])
-    }
-def record_guess(full_guess: str, idea_guess: str):
-    state = get_current_state()
-    guess_text = full_guess.strip() or idea_guess.strip()
-    if not guess_text:
-        return gr.update(value="⚠️ Please enter a guess in one of the boxes …"), gr.update()
-    guess_type = "full" if full_guess.strip() else "idea"
     record = {
         "timestamp": datetime.now(timezone.utc).isoformat(),
-        "prompt": state["prompt"],
-        "point‑in‑time": state["elapsed"],
-        "response‑point": state["generated"],
-        "user‑guess": guess_text,
-        "guess‑type": guess_type
     }
-    # Append to JSONL (data.json)
     with lock:
-        with open(DATA_PATH, "a", encoding="utf‑8") as f:
             f.write(json.dumps(record, ensure_ascii=False) + "\n")
-    log.info(f"Recorded {guess_type} guess ({len(guess_text)} chars).")
-    return gr.update(value="✅ Guess recorded – check back when the Oracle finishes!"), gr.update(value="")
-with gr.Blocks(title="What Comes Next", theme="gradio/soft") as demo:
-    gr.Markdown("""# ✨ What Comes Next
-A global, slow‑burn guessing game. The Oracle is continuously writing its story.
-Read the prompt, see the Oracle’s progress, and predict **what comes next**!
-*(FP32 CPU inference – deliberately unhurried.)*""")
-    ### Live Oracle view
-    prompt_box     = gr.Markdown(label="🔮 Current Oracle Prompt")
-    oracle_box     = gr.Textbox(label="📜 Oracle’s current text", lines=10, interactive=False)
-    elapsed_box    = gr.Textbox(label="⏱️ Elapsed", interactive=False)
-    ### Guess inputs
-    gr.Markdown("**Make your prediction:** Fill **either** the exact continuation *or* a general idea.")
-    with gr.Row():
-        full_guess  = gr.Textbox(label="🧠 Exact continuation (full)")
-        idea_guess  = gr.Textbox(label="💡 General idea")
-    submit_btn     = gr.Button("Submit Guess")
-    status_msg     = gr.Textbox(label="Status", interactive=False)
-    ### Refresh button
-    refresh_btn    = gr.Button("🔄 Refresh Oracle progress")
-    def refresh():
-        st = get_current_state()
-        return st["prompt"], st["generated"], st["elapsed"]
-    refresh_btn.click(refresh, outputs=[prompt_box, oracle_box, elapsed_box])
-    demo.load(refresh, outputs=[prompt_box, oracle_box, elapsed_box])  # auto‑load on launch
-    submit_btn.click(record_guess,
-                     inputs=[full_guess, idea_guess],
-                     outputs=[status_msg, full_guess])  # clear full_guess box on success
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

 #!/usr/bin/env python3
 """
+what_comes_next.py – Hugging Face Space implementation of **What Comes Next**
+A global, slow-burn guessing game powered by Llama-3.1-8B-Instruct (FP32, CPU-only).
+HOW IT WORKS
+============
+• One shared model generates a single, very long completion (≈2 k tokens) for a chosen
+  prompt in *full precision* on CPU.  One token is sampled every ~15 s, so a prompt
+  unfolds for roughly 10 hours.  All visitors see the same progress in real-time.
+• Players read the partial output and may submit **either**
+    🧠 Exact continuation (full guess) **or** 💡 General idea (summary guess).
+• Each guess is appended to `data.json` with prompt, Oracle progress, timestamp & type.
+• Offline scoring (not included here) can later measure similarity vs the final text.
 """
+from __future__ import annotations
 import os
 import json
 import time
 import threading
 import logging
 from datetime import datetime, timezone
 from typing import Dict, Any
 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 ###############################################################################
+# Configuration                                                                #
 ###############################################################################
+MODEL_NAME           = "meta-llama/Llama-3.1-8B-Instruct"  # full-precision model
+PROMPTS_PATH         = "full_prompts.json"                 # 100 full prompts
+STATE_PATH           = "current_state.json"                # persistent Oracle state
+DATA_PATH            = "data.json"                         # JSONL log of guesses
+TOKENS_PER_PROMPT    = 2048        # stop after N generated tokens
+SECS_BETWEEN_TOKENS  = 15          # ~10 h per prompt
+TEMPERATURE          = 0.9         # higher creativity, as requested
+TOP_P                = 0.95        # nucleus sampling
+MAX_CONTEXT_TOKENS   = 8192        # safety cap
 ###############################################################################
 logging.basicConfig(format="[%(asctime)s] %(levelname)s: %(message)s", level=logging.INFO)
+log = logging.getLogger("what-comes-next")
+###############################################################################
+# Utility helpers                                                              #
+###############################################################################
 def _read_json(path: str, default: Any):
     try:
+        with open(path, "r", encoding="utf-8") as f:
             return json.load(f)
     except FileNotFoundError:
         return default
+def _atomic_write(path: str, obj: Any):
     tmp = f"{path}.tmp"
+    with open(tmp, "w", encoding="utf-8") as f:
         json.dump(obj, f, ensure_ascii=False, indent=2)
     os.replace(tmp, path)
 def load_prompts() -> list[str]:
     if not os.path.exists(PROMPTS_PATH):
         raise FileNotFoundError(f"Missing {PROMPTS_PATH}. Please add 100 prompts.")
+    with open(PROMPTS_PATH, "r", encoding="utf-8") as f:
+        prompts = json.load(f)
+    if not isinstance(prompts, list) or not prompts:
+        raise ValueError("full_prompts.json must be a non-empty JSON array of strings")
+    return prompts
+###############################################################################
+# Model loading                                                                #
+###############################################################################
+log.info("Loading Llama-3.1-8B-Instruct (FP32 CPU-only)… this can take a while.")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     torch_dtype=torch.float32,
+    device_map={"": "cpu"},  # force CPU placement
 )
 model.eval()
+log.info("Model ready – Oracle awakened.")
+###############################################################################
+# Global state                                                                 #
+###############################################################################
+lock = threading.Lock()          # guard state + files
+prompts = load_prompts()         # list of 100 strings
+###############################################################################
 # Oracle generation thread                                                     #
+###############################################################################
+def _init_state() -> Dict[str, Any]:
+    """Return existing state or create a fresh one if none/finished."""
     state = _read_json(STATE_PATH, {})
+    if not state or state.get("finished"):
         prompt_idx = random.randrange(len(prompts))
         state = {
             "prompt_idx": prompt_idx,
+            "prompt": prompts[prompt_idx],
+            "generated": "",          # text so far
+            "tokens_done": 0,
             "start_time": time.time(),
+            "finished": False
         }
+        _atomic_write(STATE_PATH, state)
+        log.info(f"New Oracle prompt #{prompt_idx}: {state['prompt'][:80]}…")
     return state
+def _elapsed_str(start: float) -> str:
+    d = int(time.time() - start)
+    h, r = divmod(d, 3600)
+    m, s = divmod(r, 60)
+    return f"{h}h {m}m {s}s"
 def oracle_loop():
     while True:
         with lock:
+            state = _init_state()
+        if state["finished"]:
+            time.sleep(SECS_BETWEEN_TOKENS)
+            continue
+        # Build context: prompt + generated so far
+        context = state["prompt"] + state["generated"]
+        input_ids = tokenizer(context, return_tensors="pt", truncation=True, max_length=MAX_CONTEXT_TOKENS).input_ids
+        # Sample one token
         with torch.no_grad():
+            out = model.generate(
                 input_ids,
                 max_new_tokens=1,
                 do_sample=True,
                 temperature=TEMPERATURE,
                 top_p=TOP_P,
             )
+        next_token = tokenizer.decode(out[0, -1], skip_special_tokens=True, clean_up_tokenization_spaces=False)
         with lock:
+            state["generated"] += next_token
             state["tokens_done"] += 1
             if state["tokens_done"] >= TOKENS_PER_PROMPT:
                 state["finished"] = True
+                log.info("Prompt completed – Oracle will select a new one shortly.")
+            _atomic_write(STATE_PATH, state)
+        time.sleep(SECS_BETWEEN_TOKENS)
 threading.Thread(target=oracle_loop, daemon=True).start()
+###############################################################################
+# Gradio interface                                                             #
+###############################################################################
+def fetch_state() -> tuple[str, str, str]:
+    state = _read_json(STATE_PATH, {})
+    if not state:
+        return "Loading…", "", "0h 0m 0s"
+    return state["prompt"], state["generated"], _elapsed_str(state["start_time"])
+def submit_guess(full: str, idea: str):
+    full = full.strip()
+    idea = idea.strip()
+    if not full and not idea:
+        return gr.update(value="⚠️ Enter a guess in one of the fields."), gr.update(), gr.update()
+    prompt, generated, elapsed = fetch_state()
+    guess_text = full or idea
+    guess_type = "full" if full else "idea"
     record = {
         "timestamp": datetime.now(timezone.utc).isoformat(),
+        "prompt": prompt,
+        "point-in-time": elapsed,
+        "response-point": generated,
+        "user-guess": guess_text,
+        "guess-type": guess_type
     }
     with lock:
+        with open(DATA_PATH, "a", encoding="utf-8") as f:
             f.write(json.dumps(record, ensure_ascii=False) + "\n")
+    log.info(f"Logged {guess_type} guess ({len(guess_text)} chars).")
+    return gr.update(value="✅ Guess recorded – thanks!"), gr.update(value=""), gr.update(value="")
+with gr.Blocks(title="What Comes Next", theme="gradio/soft") as demo:
+    gr.Markdown("""# 🌌 What Comes Next
+Watch the Oracle craft an extended response – **one token at a time**. Predict its
+next words or general direction and see how close you were when the tale concludes.
+(All inputs are stored in `data.json` for research.)""")
+    prompt_md   = gr.Markdown()
+    oracle_box  = gr.Textbox(lines=10, interactive=False, label="📜 Oracle text so far")
+    elapsed_tb  = gr.Textbox(interactive=False, label="⏱ Elapsed time")
+    refresh_btn = gr.Button("🔄 Refresh")
+    with gr.Row():
+        exact_tb = gr.Textbox(label="🧠 Exact continuation (full)")
+        idea_tb  = gr.Textbox(label="💡 General idea")
+    submit_btn  = gr.Button("Submit Guess")
+    status_tb   = gr.Textbox(interactive=False, label="Status")
+    # Actions
+    refresh_btn.click(fetch_state, outputs=[prompt_md, oracle_box, elapsed_tb])
+    demo.load(fetch_state, outputs=[prompt_md, oracle_box, elapsed_tb])
+    submit_btn.click(submit_guess,
+                     inputs=[exact_tb, idea_tb],
+                     outputs=[status_tb, exact_tb, idea_tb])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)