diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index a540e11fc8285e9d3c70f3590aaa15ea0f143b1e..0000000000000000000000000000000000000000 --- a/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -lens -.git diff --git a/.gitattributes b/.gitattributes index d38175476330ebc679bc3ab6c4059a2b5f00ad7d..c7d9f3332a950355d5a77d85000f05e6f45435ea 100644 --- a/.gitattributes +++ b/.gitattributes @@ -32,4 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text -*.pyc filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index b37462737be5068087d02d7f3e81b162491b5acd..0000000000000000000000000000000000000000 --- a/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM python:3.9 - -WORKDIR /code - -COPY ./requirements.txt /code/requirements.txt - -RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt - -# Set up a new user named "user" with user ID 1000 -RUN useradd -m -u 1000 user - -# Switch to the "user" user -USER user - -# Set home to the user's home directory -ENV HOME=/home/user \ - PATH=/home/user/.local/bin:$PATH - -# Set the working directory to the user's home directory -WORKDIR $HOME/app - -# Copy the current directory contents into the container at $HOME/app setting the owner to the user -COPY --chown=user . $HOME/app - -CMD ["python", "app.py"] diff --git a/README.md b/README.md index 9e77df2f9974291988a66ba8982098cef3473962..e60581a6aceb021d5b790f1bd06a12cd73c4921d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,10 @@ title: Tuned Lens emoji: 🔎 colorFrom: pink colorTo: blue -sdk: docker +sdk: gradio +python_version: 3.10.2 +sdk_version: 3.20.0 +app_file: app.py pinned: false license: mit --- diff --git a/__pycache__/app.cpython-310.pyc b/__pycache__/app.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38be6ee86d10dd12e248744a482be6a6020aee6e Binary files /dev/null and b/__pycache__/app.cpython-310.pyc differ diff --git a/app.py b/app.py index 58ebd59e09e6b64b9720e81a568f5a111a532580..43005f713d4e76508b3232c1bc6a52e911782e2e 100644 --- a/app.py +++ b/app.py @@ -1,20 +1,17 @@ import torch from tuned_lens.nn.lenses import TunedLens, LogitLens from transformers import AutoModelForCausalLM, AutoTokenizer -from tuned_lens.plotting import PredictionTrajectory +from tuned_lens.plotting import plot_lens import gradio as gr from plotly import graph_objects as go device = torch.device("cpu") print(f"Using device {device} for inference") -model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-410m-deduped") +model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-410m-deduped-v0") model = model.to(device) -tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m-deduped") -tuned_lens = TunedLens.from_model_and_pretrained( - model=model, - map_location=device, -) -logit_lens = LogitLens.from_model(model) +tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m-deduped-v0") +tuned_lens = TunedLens.load("pythia-410m-deduped-v0", map_location=device) +logit_lens = LogitLens(model) lens_options_dict = { "Tuned Lens": tuned_lens, @@ -23,35 +20,32 @@ lens_options_dict = { statistic_options_dict = { "Entropy": "entropy", - "Cross Entropy": "cross_entropy", + "Cross Entropy": "ce", "Forward KL": "forward_kl", } def make_plot(lens, text, statistic, token_cutoff): - input_ids = tokenizer.encode(text) - input_ids = [tokenizer.bos_token_id] + input_ids - targets = input_ids[1:] + [tokenizer.eos_token_id] + input_ids = tokenizer.encode(text, return_tensors="pt") - if len(input_ids) == 1: + if len(input_ids[0]) == 0: return go.Figure(layout=dict(title="Please enter some text.")) if token_cutoff < 1: return go.Figure(layout=dict(title="Please provide valid token cut off.")) - start_pos=max(len(input_ids) - token_cutoff, 0) - pred_traj = PredictionTrajectory.from_lens_and_model( - lens=lens_options_dict[lens], - model=model, + fig = plot_lens( + model, + tokenizer, + lens_options_dict[lens], + layer_stride=2, input_ids=input_ids, - tokenizer=tokenizer, - targets=targets, - start_pos=start_pos, + start_pos=max(len(input_ids[0]) - token_cutoff, 0), + statistic=statistic_options_dict[statistic], ) - return getattr(pred_traj, statistic_options_dict[statistic])().figure( - title=f"{lens} ({model.name_or_path}) {statistic}", - ) + return fig + preamble = """ # The Tuned Lens 🔎 @@ -114,4 +108,5 @@ with gr.Blocks() as demo: demo.load(make_plot, [lens_options, text, statistic, token_cutoff], plot) if __name__ == "__main__": - demo.launch(server_name="0.0.0.0", server_port=7860) + demo.launch() + diff --git a/lens/CarperAI/stable-vicuna-13b/config.json b/lens/CarperAI/stable-vicuna-13b/config.json deleted file mode 100644 index d4281e1ab276a7318e54f8433900a37ac52d4d21..0000000000000000000000000000000000000000 --- a/lens/CarperAI/stable-vicuna-13b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "vicuna-13b", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "270c781d8280754a30abaf6fc186b1d754cc5d6bb17173d572d5517d5c7c702c", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/CarperAI/stable-vicuna-13b/params.pt b/lens/CarperAI/stable-vicuna-13b/params.pt deleted file mode 100644 index 9a012ea51f7eeacd26a74dd2590a6d6b4aa20d32..0000000000000000000000000000000000000000 --- a/lens/CarperAI/stable-vicuna-13b/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:47afb57632ac509a309e75da139e666ba5c7e93f2a305b2e286d7e748b7cc476 -size 2097581027 diff --git a/lens/EleutherAI/gpt-neox-20b/config.json b/lens/EleutherAI/gpt-neox-20b/config.json deleted file mode 100644 index 1155cb089d682f661ea3d7891b23ad210495246e..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/gpt-neox-20b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/gpt-neox-20b", "d_model": 6144, "num_hidden_layers": 44, "bias": true, "base_model_revision": "4e49eadb5d14bd22f314ec3f45b69a87b88c7691", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-1.4b-deduped-v0/config.json b/lens/EleutherAI/pythia-1.4b-deduped-v0/config.json deleted file mode 100644 index b70300d227b9071d232c7777c85e0cb673a512c2..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-1.4b-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-1.4b-deduped-v0", "d_model": 2048, "num_hidden_layers": 24, "bias": true, "base_model_revision": "b541e01fddacd3038799915cf8ff5b52e835a6c4", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-1.4b-deduped/config.json b/lens/EleutherAI/pythia-1.4b-deduped/config.json deleted file mode 100644 index 1c7122dc18b77ca08d43368e42a4a79d0bc35fcb..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-1.4b-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-1.4b-deduped", "d_model": 2048, "num_hidden_layers": 24, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-1.4b-deduped/params.pt b/lens/EleutherAI/pythia-1.4b-deduped/params.pt deleted file mode 100644 index 7df56ff54fa89ae6231be400ae6ae2583cd92364..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-1.4b-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b56db530d2c0df1bc5916bae58b241cfd4389dd4b1aa29e7210395df97164824 -size 402861347 diff --git a/lens/EleutherAI/pythia-12b-deduped-v0/config.json b/lens/EleutherAI/pythia-12b-deduped-v0/config.json deleted file mode 100644 index 15730b71a08276923eeb5f86fe92e8921ec6e997..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-12b-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-12b-deduped-v0", "d_model": 5120, "num_hidden_layers": 36, "bias": true, "base_model_revision": "b497662035bf3c80b4f6a1ddfe09bc27763e843a", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-12b-deduped/config.json b/lens/EleutherAI/pythia-12b-deduped/config.json deleted file mode 100644 index fad7d3daca2615ee1d7e2947d6d1727fbc4cb7df..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-12b-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-12b-deduped", "d_model": 5120, "num_hidden_layers": 36, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-12b-deduped/params.pt b/lens/EleutherAI/pythia-12b-deduped/params.pt deleted file mode 100644 index 77991b01e3f6fc78dcb5e2570fdf57a32ed7aa42..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-12b-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8452947780e3caf7009a752d62f1a2d71c66f67b3855c7e409869b7e633db40e -size 3775628355 diff --git a/lens/EleutherAI/pythia-160m-deduped-v0/config.json b/lens/EleutherAI/pythia-160m-deduped-v0/config.json deleted file mode 100644 index dac7a4bbfe63e2cd1af368c20d87c6ccbafc6d93..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-160m-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-160m-deduped-v0", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "7e57cc978f5da949f028f36b5baf8f5d6c3281b1", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-160m-deduped/config.json b/lens/EleutherAI/pythia-160m-deduped/config.json deleted file mode 100644 index 8292296cdfb48f0de06f36ed9036f9981b9464c0..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-160m-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-160m-deduped", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-160m-deduped/params.pt b/lens/EleutherAI/pythia-160m-deduped/params.pt deleted file mode 100644 index 54317bf887ad3d0f621d30272acf59ce85514233..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-160m-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1d16b68baa1eb903bbf74eaf859cee5c34361f3e8dff331b17f55b8ce52a2dbe -size 28354051 diff --git a/lens/EleutherAI/pythia-1b-deduped-v0/config.json b/lens/EleutherAI/pythia-1b-deduped-v0/config.json deleted file mode 100644 index f677b115f68397c0ded54a0adb6e71332e8fcd47..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-1b-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-1b-deduped-v0", "d_model": 2048, "num_hidden_layers": 16, "bias": true, "base_model_revision": "021f79f50ff000ae1c159e22402ffec62284664d", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-2.8b-deduped-v0/config.json b/lens/EleutherAI/pythia-2.8b-deduped-v0/config.json deleted file mode 100644 index 6dde57245db5a5f4391bd8d889f64003fa124a11..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-2.8b-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-2.8b-deduped-v0", "d_model": 2560, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-2.8b-deduped-v0/params.pt b/lens/EleutherAI/pythia-2.8b-deduped-v0/params.pt deleted file mode 100644 index cdf87ecbb6fb2881c989b2080b03544dbbbe73ce..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-2.8b-deduped-v0/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7892bd4af71a436aaba3a650ea907c94a077b8f16eb343c9a8762872c73026f -size 839204003 diff --git a/lens/EleutherAI/pythia-2.8b-deduped/config.json b/lens/EleutherAI/pythia-2.8b-deduped/config.json deleted file mode 100644 index 5a60f82c1eb9cddba8b046fbbc63002755a8d7f7..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-2.8b-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-2.8b-deduped", "d_model": 2560, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-2.8b-deduped/params.pt b/lens/EleutherAI/pythia-2.8b-deduped/params.pt deleted file mode 100644 index f08825ff4500d8fe43cfa7fc53c70ed70948c2e7..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-2.8b-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:314d403f2e1b1bf575ab2e851419d3a687c54138bfae5feace3ff00f1a96fd60 -size 839204003 diff --git a/lens/EleutherAI/pythia-410m-deduped-v0/config.json b/lens/EleutherAI/pythia-410m-deduped-v0/config.json deleted file mode 100644 index 7e4025c075430cb5e177f8cc75c9b0caff2439a0..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-410m-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-410m-deduped-v0", "d_model": 1024, "num_hidden_layers": 24, "bias": true, "base_model_revision": "3538d3569a7e313e445ad6401c92c6e16777a2da", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-410m-deduped/config.json b/lens/EleutherAI/pythia-410m-deduped/config.json deleted file mode 100644 index ee5bf8aff434ca741ef753de5fc2e3dc3b031a03..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-410m-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-410m-deduped", "d_model": 1024, "num_hidden_layers": 24, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-410m-deduped/params.pt b/lens/EleutherAI/pythia-410m-deduped/params.pt deleted file mode 100644 index 1cc539359351cb8accfaa5b86cccdcbe2c3802ea..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-410m-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:166ea259b35481e1eb2feba50b5ac4d9a8faed47b0937ede0d7bd6d9830dbc95 -size 100773155 diff --git a/lens/EleutherAI/pythia-6.9b-deduped-v0/config.json b/lens/EleutherAI/pythia-6.9b-deduped-v0/config.json deleted file mode 100644 index 27e094938885d74b953f99510863cf57a30c1310..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-6.9b-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-6.9b-deduped-v0", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": "cbd53efc2e56056e3bd0235277b5d0b668a6dfbb", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-6.9b-deduped/config.json b/lens/EleutherAI/pythia-6.9b-deduped/config.json deleted file mode 100644 index 3e1bcd580e9a92cd37f70c7671e334b6bcd740f8..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-6.9b-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-6.9b-deduped", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-6.9b-deduped/params.pt b/lens/EleutherAI/pythia-6.9b-deduped/params.pt deleted file mode 100644 index 454fbdcecc7f13d90c2dbb19fccb5580acab72c5..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-6.9b-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e2d00af6f64631b932b089fac5ca031061e207d51132a0f7433bd7a34fb06da0 -size 2148023459 diff --git a/lens/EleutherAI/pythia-70m-deduped-v0/config.json b/lens/EleutherAI/pythia-70m-deduped-v0/config.json deleted file mode 100644 index a030b8a54af4a856d61d819da05a89f1a5b6ea1c..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-70m-deduped-v0/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-70m-deduped-v0", "d_model": 512, "num_hidden_layers": 6, "bias": true, "base_model_revision": "ec30f7539a604fcb0b7fbba04fb1eb0110735d29", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-70m-deduped/config.json b/lens/EleutherAI/pythia-70m-deduped/config.json deleted file mode 100644 index af95bb2b068b60d7d6fcb7b4e067296c74eedcb6..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-70m-deduped/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "EleutherAI/pythia-70m-deduped", "d_model": 512, "num_hidden_layers": 6, "bias": true, "base_model_revision": null, "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-70m-deduped/params.pt b/lens/EleutherAI/pythia-70m-deduped/params.pt deleted file mode 100644 index b374fb1fe4c9f0f8eec2cf4a9f5c60742c624664..0000000000000000000000000000000000000000 --- a/lens/EleutherAI/pythia-70m-deduped/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c363b641564af68e9f9b73af56015b6b5c5b08caea5880787744eaabfab01343 -size 6306803 diff --git a/lens/facebook/llama-13b/config.json b/lens/facebook/llama-13b/config.json deleted file mode 100644 index 164088bcbd4e14d8e9a1f9d3dd87a1ec66f0bc32..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-13b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "huggyllama/llama-13b", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "86cefdfd94bb3da225b405dd1328136786f6177b03d82f90e5c734d23c47e8ca", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/llama-13b/params.pt b/lens/facebook/llama-13b/params.pt deleted file mode 100644 index 77963d1952531ca707f410ee53c72ff402106891..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-13b/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2700859b4335f4e4bdd4040232f594170bb717f8af04f65e5560c49cfb6da122 -size 2097581027 diff --git a/lens/facebook/llama-30b/config.json b/lens/facebook/llama-30b/config.json deleted file mode 100644 index 9acceec1235efe0c7170736687e0f69ed708d99a..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-30b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "huggyllama/llama-30b", "d_model": 6656, "num_hidden_layers": 60, "bias": true, "base_model_revision": null, "unembed_hash": "512b41bb8fc0e2b3647a877b5e114ca9503d7c800185b14a8e7ad9e921424367", "lens_type": "linear_tuned_lens"} diff --git a/lens/facebook/llama-30b/params.pt b/lens/facebook/llama-30b/params.pt deleted file mode 100644 index e705b2e99e3ece945c0c2e5b6bb9bc9c2ede59ed..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-30b/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10e781269b476a6de8f4ac12ea3f8b20fe22d3a8ca8cfdf770366a5f4e36c78f -size 5317111487 diff --git a/lens/facebook/llama-65b/config.json b/lens/facebook/llama-65b/config.json deleted file mode 100644 index 3fba9f1386a5a6a77601d7dad201eb03b820940b..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-65b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "huggyllama/llama-65b", "d_model": 8192, "num_hidden_layers": 80, "bias": true, "base_model_revision": null, "unembed_hash": "8ffc2dbf80dd5c131c4be9e347d090856bef2dbc973433dbd42ca9257b00d5e1", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/llama-65b/params.pt b/lens/facebook/llama-65b/params.pt deleted file mode 100644 index 312d6cf0a3b655581cc4ae8cb18908523565a9c7..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-65b/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c9199874796630e1be615b0b34cd48e7f37ed09c6140153cdf6b7c509c45361b -size 10738779199 diff --git a/lens/facebook/llama-7b/config.json b/lens/facebook/llama-7b/config.json deleted file mode 100644 index 975a70715c914c2668eb088234b11e2dd41ba810..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-7b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "huggyllama/llama-7b", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "7d0c2d720d286bdd706e662ea04f327204090f7d54054b0d5faabbc1b06a72fe", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/llama-7b/params.pt b/lens/facebook/llama-7b/params.pt deleted file mode 100644 index a27dd4210f1d327467fa102025014bc2b2d25701..0000000000000000000000000000000000000000 --- a/lens/facebook/llama-7b/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:02186a08b29e7173ae229218823b65891a7b2f2eadf0e69baf44a9c62b55a7cf -size 1074019491 diff --git a/lens/facebook/opt-1.3b/config.json b/lens/facebook/opt-1.3b/config.json deleted file mode 100644 index 7f1985c200e88f908717cb2b21fbcdb4a8d9d975..0000000000000000000000000000000000000000 --- a/lens/facebook/opt-1.3b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "facebook/opt-1.3b", "d_model": 2048, "num_hidden_layers": 24, "bias": true, "base_model_revision": "8c7b10754972749675d22364c25c428b29face51", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/opt-125m/config.json b/lens/facebook/opt-125m/config.json deleted file mode 100644 index 5b7e5a5d612c8bfe9bdd02327920d72d95764406..0000000000000000000000000000000000000000 --- a/lens/facebook/opt-125m/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "facebook/opt-125m", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/opt-6.7b/config.json b/lens/facebook/opt-6.7b/config.json deleted file mode 100644 index 749d4eb6b8873415c1007325a6a5a826d88df853..0000000000000000000000000000000000000000 --- a/lens/facebook/opt-6.7b/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "facebook/opt-6.7b", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": "a45aa65bbeb77c1558bc99bedc6779195462dab0", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/gpt-neox-20b/config.json b/lens/gpt-neox-20b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9b4f5ea40e45fad0137783f22642d134d28bd30 --- /dev/null +++ b/lens/gpt-neox-20b/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/gpt-neox-20b", "d_model": 6144, "num_hidden_layers": 44, "bias": true, "base_model_revision": "4e49eadb5d14bd22f314ec3f45b69a87b88c7691", "unemebd_hash": "323d4c731c33556e143503e3be913c109ead330080b4065552be97000c19ed67", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/gpt-neox-20b/params.pt b/lens/gpt-neox-20b/params.pt similarity index 100% rename from lens/EleutherAI/gpt-neox-20b/params.pt rename to lens/gpt-neox-20b/params.pt diff --git a/lens/gpt2-large/config.json b/lens/gpt2-large/config.json index 7c2825e5fc02e2c8e9a28b319d9530edb8e5028c..4bbcc9802a33f5de3f4ae60717597ba0be44ec79 100644 --- a/lens/gpt2-large/config.json +++ b/lens/gpt2-large/config.json @@ -1 +1 @@ -{"base_model_name_or_path": "gpt2-large", "d_model": 1280, "num_hidden_layers": 36, "bias": true, "base_model_revision": "212095d5832abbf9926672e1c1e8d14312a3be20", "lens_type": "linear_tuned_lens"} \ No newline at end of file +{"base_model_name_or_path": "gpt2-large", "d_model": 1280, "num_hidden_layers": 36, "bias": true, "base_model_revision": "212095d5832abbf9926672e1c1e8d14312a3be20", "unemebd_hash": "9b7da774c0a326716dca888539370ddff25804795949e5ace65ef9f761f47397", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/gpt2-xl/config.json b/lens/gpt2-xl/config.json index a7c0cd65adc019e4c81421676462caf8d77c0e84..96d2e2c81c270fea69ad901f4e2f3cbff3adedb4 100644 --- a/lens/gpt2-xl/config.json +++ b/lens/gpt2-xl/config.json @@ -1 +1 @@ -{"base_model_name_or_path": "gpt2-xl", "d_model": 1600, "num_hidden_layers": 48, "bias": true, "base_model_revision": "33cdb5c0db5423c1879b1b9f16c352988e8754a8", "lens_type": "linear_tuned_lens"} \ No newline at end of file +{"base_model_name_or_path": "gpt2-xl", "d_model": 1600, "num_hidden_layers": 48, "bias": true, "base_model_revision": "33cdb5c0db5423c1879b1b9f16c352988e8754a8", "unemebd_hash": "70bf58a8cf7964b39530e30fdaebb89de39489546244437b1ed56fb81bd4c746", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/gpt2/config.json b/lens/gpt2/config.json index 4f43d2940b170aa19b31d295d5f10bd0d2908c85..9d10cd36a405ab09b5e329d006863fcd38cf05db 100644 --- a/lens/gpt2/config.json +++ b/lens/gpt2/config.json @@ -1 +1 @@ -{"base_model_name_or_path": "gpt2", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "e7da7f221d5bf496a48136c0cd264e630fe9fcc8", "lens_type": "linear_tuned_lens"} \ No newline at end of file +{"base_model_name_or_path": "gpt2", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "e7da7f221d5bf496a48136c0cd264e630fe9fcc8", "unemebd_hash": "608e50247f57691c90453601e854f2287141e4db9cba436af0b0186003e2daae", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/lmsys/vicuna-13b-v1.1/config.json b/lens/lmsys/vicuna-13b-v1.1/config.json deleted file mode 100644 index bdf4940362d37b864123cdd5dff5d3c4e16960de..0000000000000000000000000000000000000000 --- a/lens/lmsys/vicuna-13b-v1.1/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "vicuna-original-13b", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "86cefdfd94bb3da225b405dd1328136786f6177b03d82f90e5c734d23c47e8ca", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/lmsys/vicuna-13b-v1.1/params.pt b/lens/lmsys/vicuna-13b-v1.1/params.pt deleted file mode 100644 index 9dad719390e626dba1190c154fa447f8787a7728..0000000000000000000000000000000000000000 --- a/lens/lmsys/vicuna-13b-v1.1/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0281f46b65b728cb390e41bd5848d8ab635b2847b48c080b2de2b78627baa40d -size 2097581027 diff --git a/lens/meta-llama/Llama-2-13b-chat-hf/config.json b/lens/meta-llama/Llama-2-13b-chat-hf/config.json deleted file mode 100644 index f17140a4f80243723d54207d8ac7e03fdb330122..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-chat-hf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-13b-chat-hf", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "05e6ab11c049cc1356b38bf9ff84ffbedaf802a3c7fdda9e763bca0c4e9de2ab", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/config.json b/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/config.json deleted file mode 100644 index f17140a4f80243723d54207d8ac7e03fdb330122..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-13b-chat-hf", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "05e6ab11c049cc1356b38bf9ff84ffbedaf802a3c7fdda9e763bca0c4e9de2ab", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/params.pt b/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/params.pt deleted file mode 100644 index 00d178b9504b367bde551a75a87967c13b1b490e..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-chat-hf/hh-rlhf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6ceabb160e1b3ff887232514159bdf22c7f109a585759284eb7b18e72fb27667 -size 2097581027 diff --git a/lens/meta-llama/Llama-2-13b-chat-hf/params.pt b/lens/meta-llama/Llama-2-13b-chat-hf/params.pt deleted file mode 100644 index 8878ffe397eef446c401301a230513e804a7ed3c..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-chat-hf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a8af029394d9be03e6a9a75a03ac477fdb9e42352ad3df035439bbb8ccb1b5d -size 2097581027 diff --git a/lens/meta-llama/Llama-2-13b-hf/config.json b/lens/meta-llama/Llama-2-13b-hf/config.json deleted file mode 100644 index 1b1b533d83452fd33a6097eae099b83e77d07381..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-hf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-13b-hf", "d_model": 5120, "num_hidden_layers": 40, "bias": true, "base_model_revision": null, "unembed_hash": "1f421a88f21fcda6d1c8c17a481c46918d355f8b06cfc77b05cf738e11b55f7a", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-13b-hf/params.pt b/lens/meta-llama/Llama-2-13b-hf/params.pt deleted file mode 100644 index 4c904cb27b20f39f57d91de533fa5ce150b0f9dc..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-13b-hf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f87b2a5f2b3125d9e49343cbac244b901795e3af4117ec0f6ee428f79027778d -size 2097581027 diff --git a/lens/meta-llama/Llama-2-7b-chat-hf/config.json b/lens/meta-llama/Llama-2-7b-chat-hf/config.json deleted file mode 100644 index bb3da5d9cee530daa8f738438cd6ba26d83ab206..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-chat-hf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "3313b62209f366e7b19233ae7068501553350611c20f96caf2499c0486f8836b", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/config.json b/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/config.json deleted file mode 100644 index bb3da5d9cee530daa8f738438cd6ba26d83ab206..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "3313b62209f366e7b19233ae7068501553350611c20f96caf2499c0486f8836b", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/params.pt b/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/params.pt deleted file mode 100644 index 6a79df8fb5174a0247bde4a21d2e97fa7164dac2..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-chat-hf/hh-rlhf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86abc8c2a2044c3a70e88291b636222a6db6cdb25af51e86108e479843a2326e -size 2148023459 diff --git a/lens/meta-llama/Llama-2-7b-chat-hf/params.pt b/lens/meta-llama/Llama-2-7b-chat-hf/params.pt deleted file mode 100644 index d1823473cf01a7693e47ccbb1ab7699947174315..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-chat-hf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c66e7d56afd044bc6472e91bf6103ee5ab837c5953b44c791b0f1f8ac8ce8578 -size 2148023459 diff --git a/lens/meta-llama/Llama-2-7b-hf/config.json b/lens/meta-llama/Llama-2-7b-hf/config.json deleted file mode 100644 index 168bd6faa0a846012d95ae8bd515aeba928f7d61..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-hf/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Llama-2-7b-hf", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "3313b62209f366e7b19233ae7068501553350611c20f96caf2499c0486f8836b", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Llama-2-7b-hf/params.pt b/lens/meta-llama/Llama-2-7b-hf/params.pt deleted file mode 100644 index 0239435da2308cf03cd2727b4b248fd8d2f26a11..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Llama-2-7b-hf/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc13d3a19107877e868019505dbc6428d594c573edcac2c28d1017dd521f8fb3 -size 2148023459 diff --git a/lens/meta-llama/Meta-Llama-3-8B-Instruct/config.json b/lens/meta-llama/Meta-Llama-3-8B-Instruct/config.json deleted file mode 100644 index dab3bf51e71356b28f3d989feb7fa7cddd87ecfb..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Meta-Llama-3-8B-Instruct/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "fdd74d04451fa6853022839edeccc6a288ebdbae0f0371d8ca2316325be7e925", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Meta-Llama-3-8B-Instruct/params.pt b/lens/meta-llama/Meta-Llama-3-8B-Instruct/params.pt deleted file mode 100644 index 210447ac1e23844c4bb4521444e17484d51c715b..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Meta-Llama-3-8B-Instruct/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c1869e84c1d4f27d7ae6a46199daeee8c828db47c07438e0bc23d9b25b1166d -size 2148023890 diff --git a/lens/meta-llama/Meta-Llama-3-8B/config.json b/lens/meta-llama/Meta-Llama-3-8B/config.json deleted file mode 100644 index 03e39b6f6d7d09d46e5388203c4dde8f22953fe3..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Meta-Llama-3-8B/config.json +++ /dev/null @@ -1 +0,0 @@ -{"base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": null, "unembed_hash": "de7042374eeecd4a3a6243327929653305d241c950c5dd79b6609d2d76390aa6", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/meta-llama/Meta-Llama-3-8B/params.pt b/lens/meta-llama/Meta-Llama-3-8B/params.pt deleted file mode 100644 index 386c6cf954858835adb10acd0215959fb8f70ffe..0000000000000000000000000000000000000000 --- a/lens/meta-llama/Meta-Llama-3-8B/params.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2a0cf218333d8c33fbcc24b051ddf5afcdb4d240999626ca937f81e23b90967 -size 2148023890 diff --git a/lens/opt-1.3b/config.json b/lens/opt-1.3b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eaa543dd325aff4da1f94e5cca621c31139bad15 --- /dev/null +++ b/lens/opt-1.3b/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "facebook/opt-1.3b", "d_model": 2048, "num_hidden_layers": 24, "bias": true, "base_model_revision": "8c7b10754972749675d22364c25c428b29face51", "unemebd_hash": "2db68eed8b11e46e8a969c14b1ce9269edec3154b19cdd18970dcfc405533070", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/opt-1.3b/params.pt b/lens/opt-1.3b/params.pt similarity index 100% rename from lens/facebook/opt-1.3b/params.pt rename to lens/opt-1.3b/params.pt diff --git a/lens/opt-125m/config.json b/lens/opt-125m/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c180904bbd4942a61f39c6d6d43d52384cd6b5b3 --- /dev/null +++ b/lens/opt-125m/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "facebook/opt-125m", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32", "unemebd_hash": "d54b1bdd7e16d4dab3bb9f1856c1146310a3ce228e667640843455a3956dc9b4", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/opt-125m/params.pt b/lens/opt-125m/params.pt similarity index 100% rename from lens/facebook/opt-125m/params.pt rename to lens/opt-125m/params.pt diff --git a/lens/opt-6.7b/config.json b/lens/opt-6.7b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e3741b3fa955e3bc7d76492c6307cf4e86a3733c --- /dev/null +++ b/lens/opt-6.7b/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "facebook/opt-6.7b", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": "a45aa65bbeb77c1558bc99bedc6779195462dab0", "unemebd_hash": "35676bc5e38da5b53231218f1c829b91bc89de7f65fec1b2fe885b9c42f93dcb", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/facebook/opt-6.7b/params.pt b/lens/opt-6.7b/params.pt similarity index 100% rename from lens/facebook/opt-6.7b/params.pt rename to lens/opt-6.7b/params.pt diff --git a/lens/pythia-1.4b-deduped-v0/config.json b/lens/pythia-1.4b-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e821bd5e8a700a1f314e96fdb92a4b50f4ca070 --- /dev/null +++ b/lens/pythia-1.4b-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-1.4b-deduped-v0", "d_model": 2048, "num_hidden_layers": 24, "bias": true, "base_model_revision": "b541e01fddacd3038799915cf8ff5b52e835a6c4", "unemebd_hash": "da1780eccec1a4ff12e43464da6cbef33b9ffde398a3056ac9648dd53229943e", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-1.4b-deduped-v0/params.pt b/lens/pythia-1.4b-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-1.4b-deduped-v0/params.pt rename to lens/pythia-1.4b-deduped-v0/params.pt diff --git a/lens/pythia-12b-deduped-v0/config.json b/lens/pythia-12b-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..260760ad72ec518dabc233d3a699868e395827ac --- /dev/null +++ b/lens/pythia-12b-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-12b-deduped-v0", "d_model": 5120, "num_hidden_layers": 36, "bias": true, "base_model_revision": "b497662035bf3c80b4f6a1ddfe09bc27763e843a", "unemebd_hash": "a161c0d1dd8793ca1683b0422f3b6573178ea7ebf26cf207e40cc56507aa0526", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-12b-deduped-v0/params.pt b/lens/pythia-12b-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-12b-deduped-v0/params.pt rename to lens/pythia-12b-deduped-v0/params.pt diff --git a/lens/pythia-160m-deduped-v0/config.json b/lens/pythia-160m-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c93799b734b4b54c8280df1be1841e02e7e6d729 --- /dev/null +++ b/lens/pythia-160m-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-160m-deduped-v0", "d_model": 768, "num_hidden_layers": 12, "bias": true, "base_model_revision": "7e57cc978f5da949f028f36b5baf8f5d6c3281b1", "unemebd_hash": "922e5aee39d4874fb5c1163087858333808367bf9b02c4a1ae4a06828af2f58a", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-160m-deduped-v0/params.pt b/lens/pythia-160m-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-160m-deduped-v0/params.pt rename to lens/pythia-160m-deduped-v0/params.pt diff --git a/lens/pythia-1b-deduped-v0/config.json b/lens/pythia-1b-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b9d36446fe8742ad439fb39bdb5073c7f5a49855 --- /dev/null +++ b/lens/pythia-1b-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-1b-deduped-v0", "d_model": 2048, "num_hidden_layers": 16, "bias": true, "base_model_revision": "021f79f50ff000ae1c159e22402ffec62284664d", "unemebd_hash": "b97dd35a220ea2694e263be05d2f19129fc5725c1d201d83eae5a78eeebcf527", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-1b-deduped-v0/params.pt b/lens/pythia-1b-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-1b-deduped-v0/params.pt rename to lens/pythia-1b-deduped-v0/params.pt diff --git a/lens/pythia-410m-deduped-v0/config.json b/lens/pythia-410m-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1d752fce20c2b384ea10cb8a95bbeba43ea5a4 --- /dev/null +++ b/lens/pythia-410m-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-410m-deduped-v0", "d_model": 1024, "num_hidden_layers": 24, "bias": true, "base_model_revision": "3538d3569a7e313e445ad6401c92c6e16777a2da", "unemebd_hash": "281af3dac813ef2f2eb5a1a359c402627bc9cf104710d00f891f767b17687758", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-410m-deduped-v0/params.pt b/lens/pythia-410m-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-410m-deduped-v0/params.pt rename to lens/pythia-410m-deduped-v0/params.pt diff --git a/lens/pythia-6.9b-deduped-v0/config.json b/lens/pythia-6.9b-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..85a4f47de440dd672ed31a0ae40a5d9ea1e8b965 --- /dev/null +++ b/lens/pythia-6.9b-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-6.9b-deduped-v0", "d_model": 4096, "num_hidden_layers": 32, "bias": true, "base_model_revision": "cbd53efc2e56056e3bd0235277b5d0b668a6dfbb", "unemebd_hash": "5a037e6f7542abd5e0817e46c7e9127c18164ac34d06051b5faac190103f6951", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-6.9b-deduped-v0/params.pt b/lens/pythia-6.9b-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-6.9b-deduped-v0/params.pt rename to lens/pythia-6.9b-deduped-v0/params.pt diff --git a/lens/pythia-70m-deduped-v0/config.json b/lens/pythia-70m-deduped-v0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ee16350592ae6aa3bbc31b0f93955112b5e9bf6 --- /dev/null +++ b/lens/pythia-70m-deduped-v0/config.json @@ -0,0 +1 @@ +{"base_model_name_or_path": "EleutherAI/pythia-70m-deduped-v0", "d_model": 512, "num_hidden_layers": 6, "bias": true, "base_model_revision": "ec30f7539a604fcb0b7fbba04fb1eb0110735d29", "unemebd_hash": "6c42572c654f76afb6ad30aafac2644308d5e3e708ee54051fa9d4e043918f3a", "lens_type": "linear_tuned_lens"} \ No newline at end of file diff --git a/lens/EleutherAI/pythia-70m-deduped-v0/params.pt b/lens/pythia-70m-deduped-v0/params.pt similarity index 100% rename from lens/EleutherAI/pythia-70m-deduped-v0/params.pt rename to lens/pythia-70m-deduped-v0/params.pt diff --git a/lens_migration.py b/lens_migration.py new file mode 100644 index 0000000000000000000000000000000000000000..316255f73bfe176641053b87726ed52de18e9c69 --- /dev/null +++ b/lens_migration.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +from huggingface_hub import model_info +import argparse +from copy import deepcopy +import inspect +from logging import warn +from pathlib import Path +from tqdm import tqdm +import json + +from tuned_lens.model_surgery import get_final_norm, get_transformer_layers +from tuned_lens.load_artifacts import load_lens_artifacts +from tuned_lens.nn import TunedLens +from transformers.models.bloom.modeling_bloom import BloomBlock +from transformers import PreTrainedModel, AutoModelForCausalLM +from typing import Optional, Generator, Union +import torch as th + +from tuned_lens.stats.distance import js_divergence + + +def instantiate_layer(model_config, layer_idx: int, model_type: str) -> th.nn.Module: + if model_type == "bloom": + from transformers.models.bloom.modeling_bloom import BloomBlock + + return _BloomBlockWrapper(BloomBlock(model_config)) # type: ignore[arg-type] + if model_type == "gpt_neo": + from transformers.models.gpt_neo.modeling_gpt_neo import GPTNeoBlock + + return GPTNeoBlock(model_config, layer_idx) + if model_type == "gpt_neox": + from transformers.models.gpt_neox.modeling_gpt_neox import ( + GPTNeoXLayer, + ) + + return GPTNeoXLayer(model_config) # type: ignore[arg-type] + if model_type == "gpt2": + from transformers.models.gpt2.modeling_gpt2 import GPT2Block + + return GPT2Block(model_config, layer_idx) # type: ignore[arg-type] + if model_type == "opt": + from transformers.models.opt.modeling_opt import OPTDecoderLayer + + return OPTDecoderLayer(model_config) # type: ignore[arg-type] + else: + raise ValueError(f"Unknown model type '{model_type}'") + + +def maybe_wrap(layer: th.nn.Module) -> th.nn.Module: + return _BloomBlockWrapper(layer) if isinstance(layer, BloomBlock) else layer + + +# Very annoying that we have to do this. See https://bit.ly/3XSQ7W6 for context on +# what we're doing here. +class _BloomBlockWrapper(th.nn.Module): + def __init__(self, block: BloomBlock): + super().__init__() + self.block = block + + def forward(self, x: th.Tensor) -> th.Tensor: + from transformers.models.bloom.modeling_bloom import ( + BloomModel, + build_alibi_tensor, + ) + + batch_size, seq_len, _ = x.shape + dummy_mask = x.new_ones([batch_size, seq_len]) + + # Causal mask isn't created inside the block itself, so we have to do it here. + # Weirdly _prepare_attn_mask doesn't depend on `self` at all but is still an + # instance method for some reason, so we pass `None` as the first argument. + causal_mask = BloomModel._prepare_attn_mask( + None, dummy_mask, (batch_size, seq_len), 0 # type: ignore[arg-type] + ) + alibi = build_alibi_tensor(dummy_mask, self.block.num_heads, x.dtype) + h, *_ = self.block(x, alibi, causal_mask) + return h + + +class TunedLensOld(th.nn.Module): + """A tuned lens for decoding hidden states into logits.""" + + layer_norm: th.nn.LayerNorm + unembedding: th.nn.Linear + extra_layers: th.nn.Sequential + layer_translators: th.nn.ModuleList + + def __init__( + self, + model: Optional[PreTrainedModel] = None, + *, + bias: bool = True, + extra_layers: int = 0, + include_input: bool = True, + reuse_unembedding: bool = True, + # Used when saving and loading the lens + model_config: Optional[dict] = None, + d_model: Optional[int] = None, + num_layers: Optional[int] = None, + vocab_size: Optional[int] = None, + ): + """Create a TunedLensOld. + + Args: + model : A pertained model from the transformers library you wish to inspect. + bias : Whether to include a bias term in the translator layers. + extra_layers : The number of extra layers to apply to the hidden states + before decoding into logits. + + include_input : Whether to include a lens that decodes the word embeddings. + reuse_unembedding : Weather to reuse the unembedding matrix from the model. + model_config : The config of the model. Used for saving and loading. + d_model : The models hidden size. Used for saving and loading. + num_layers : The number of layers in the model. Used for saving and loading. + vocab_size : The size of the vocabulary. Used for saving and loading. + + Raises: + ValueError: if neither a model or d_model, num_layers, and vocab_size, + are provided. + """ + super().__init__() + + self.extra_layers = th.nn.Sequential() + + if ( + model + is None + == (d_model is None or num_layers is None or vocab_size is None) + ): + raise ValueError( + "Must provide either a model or d_model, num_layers, and vocab_size" + ) + + # Initializing from scratch without a model + if not model: + assert d_model and num_layers and vocab_size + self.layer_norm = th.nn.LayerNorm(d_model) + self.unembedding = th.nn.Linear(d_model, vocab_size, bias=False) + + # Use HuggingFace methods to get decoder layers + else: + assert not (d_model or num_layers or vocab_size) + d_model = model.config.hidden_size + num_layers = model.config.num_hidden_layers + vocab_size = model.config.vocab_size + assert isinstance(d_model, int) and isinstance(vocab_size, int) + + model_config = model.config.to_dict() # type: ignore[F841] + + # Currently we convert the decoder to full precision + self.unembedding = deepcopy(model.get_output_embeddings()).float() + if ln := get_final_norm(model): + self.layer_norm = deepcopy(ln).float() + else: + self.layer_norm = th.nn.Identity() + + if extra_layers: + _, layers = get_transformer_layers(model) + self.extra_layers.extend( + [maybe_wrap(layer) for layer in layers[-extra_layers:]] + ) + + # Save config for later + config_keys = set(inspect.getfullargspec(TunedLensOld).kwonlyargs) + self.config = {k: v for k, v in locals().items() if k in config_keys} + del model_config + + # Try to prevent finetuning the decoder + assert d_model and num_layers + self.layer_norm.requires_grad_(False) + self.unembedding.requires_grad_(False) + + out_features = d_model if reuse_unembedding else vocab_size + translator = th.nn.Linear(d_model, out_features, bias=bias) + if not reuse_unembedding: + translator.weight.data = self.unembedding.weight.data.clone() + translator.bias.data.zero_() + else: + translator.weight.data.zero_() + translator.bias.data.zero_() + + self.add_module("input_translator", translator if include_input else None) + # Don't include the final layer + num_layers -= 1 + + self.layer_translators = th.nn.ModuleList( + [deepcopy(translator) for _ in range(num_layers)] + ) + + def __getitem__(self, item: int) -> th.nn.Module: + """Get the probe module at the given index.""" + if isinstance(self.input_translator, th.nn.Module): + if item == 0: + return self.input_translator + else: + item -= 1 + + return self.layer_translators[item] + + def __iter__(self) -> Generator[th.nn.Module, None, None]: + """Get iterator over the translators within the lens.""" + if isinstance(self.input_translator, th.nn.Module): + yield self.input_translator + + yield from self.layer_translators + + @classmethod + def load(cls, resource_id: str, **kwargs) -> "TunedLensOld": + """Load a tuned lens from a or hugging face hub. + + Args: + resource_id : The path to the directory containing the config and checkpoint + or the name of the model on the hugging face hub. + **kwargs : Additional arguments to pass to torch.load. + + Returns: + A TunedLensOld instance. + """ + config_path, ckpt_path = load_lens_artifacts(resource_id) + # Load config + with open(config_path, "r") as f: + config = json.load(f) + + # Load parameters + state = th.load(ckpt_path, **kwargs) + + # Backwards compatibility we really need to stop renaming things + keys = list(state.keys()) + for key in keys: + for old_key in ["probe", "adapter"]: + if old_key in key: + warn( + f"Loading a checkpoint with a '{old_key}' key. " + "This is deprecated and may be removed in a future version. " + ) + new_key = key.replace(old_key, "translator") + state[new_key] = state.pop(key) + + # Drop unrecognized config keys + unrecognized = set(config) - set(inspect.getfullargspec(cls).kwonlyargs) + for key in unrecognized: + warn(f"Ignoring config key '{key}'") + del config[key] + + lens = cls(**config) + + if num_extras := config.get("extra_layers"): + # This is sort of a hack but AutoConfig doesn't appear to have a from_dict + # for some reason. + from transformers.models.auto import CONFIG_MAPPING + + model_conf_dict = config.get("model_config") + del model_conf_dict["torch_dtype"] + assert model_conf_dict, "Need a 'model_config' entry to load extra layers" + + model_type = model_conf_dict["model_type"] + config_cls = CONFIG_MAPPING[model_type] + model_config = config_cls.from_dict(model_conf_dict) + + lens.extra_layers = th.nn.Sequential( + *[ + instantiate_layer( + model_config, model_config.num_hidden_layers - i - 1, model_type + ) + for i in range(num_extras) + ] + ) + + lens.load_state_dict(state) + return lens + + def save( + self, + path: Union[Path, str], + ckpt: str = "params.pt", + config: str = "config.json", + ) -> None: + """Save the lens to a directory. + + Args: + path : The path to the directory to save the lens to. + ckpt : The name of the checkpoint file to save the parameters to. + config : The name of the config file to save the config to. + """ + path = Path(path) + path.mkdir(exist_ok=True, parents=True) + th.save(self.state_dict(), path / ckpt) + + with open(path / config, "w") as f: + json.dump(self.config, f) + + def normalize_(self): + """Canonicalize the transforms by centering their weights and biases.""" + for linear in self: + assert isinstance(linear, th.nn.Linear) + + A, b = linear.weight.data, linear.bias.data + A -= A.mean(dim=0, keepdim=True) + b -= b.mean() + + def transform_hidden(self, h: th.Tensor, idx: int) -> th.Tensor: + """Transform hidden state from layer `idx`.""" + if not self.config["reuse_unembedding"]: + raise RuntimeError("TunedLensOld.transform_hidden requires reuse_unembedding") + + # Note that we add the translator output residually, in contrast to the formula + # in the paper. By parametrizing it this way we ensure that weight decay + # regularizes the transform toward the identity, not the zero transformation. + return h + self[idx](h) + + def to_logits(self, h: th.Tensor) -> th.Tensor: + """Decode a hidden state into logits.""" + h = self.extra_layers(h) + while isinstance(h, tuple): + h, *_ = h + + return self.unembedding(self.layer_norm(h)) + + def forward(self, h: th.Tensor, idx: int) -> th.Tensor: + """Transform and then decode the hidden states into logits.""" + # Sanity check to make sure we don't finetune the decoder + # if any(p.requires_grad for p in self.parameters(recurse=False)): + # raise RuntimeError("Make sure to freeze the decoder") + + # We're learning a separate unembedding for each layer + if not self.config["reuse_unembedding"]: + h_ = self.layer_norm(h) + return self[idx](h_) + + h = self.transform_hidden(h, idx) + return self.to_logits(h) + + def __len__(self) -> int: + """Return the number of layer translators in the lens.""" + N = len(self.layer_translators) + if self.input_translator: + N += 1 + + return N + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, default="gpt2") + parser.add_argument("--resource-id", type=str, default="gpt2") + parser.add_argument("--output-dir", type=str, default="lens/gpt2") + args = parser.parse_args() + + model = AutoModelForCausalLM.from_pretrained(args.model) + revision = model_info(args.model).sha + model.eval() + model.requires_grad_(False) + + device = th.device("cuda:0" if th.cuda.is_available() else "cpu") + + print("Loading old lens") + tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device) + + print("Initializing new lens") + tuned_lens = TunedLens.from_model( + model, bias=tuned_lens_old.config['bias'], revision=revision + ) + + for i in tqdm(range(len(tuned_lens_old)), desc="Copying parameters"): + tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict()) + + + tuned_lens = tuned_lens.to(device) + tuned_lens_old = tuned_lens_old.to(device) + model = model.to(device) + + # Fuzz the new lens against the old one's + with th.no_grad(): + for i in tqdm(range(len(tuned_lens)), desc="Fuzzing layers"): + for _ in range(10): + a = th.randn(1, 1, tuned_lens.config.d_model, device=device) + logits_new = tuned_lens(a, i) + logits_old = tuned_lens_old(a, i) + log_ps_new = logits_new.log_softmax(-1) + log_ps_old = logits_old.log_softmax(-1) + print("js div", js_divergence(log_ps_new, log_ps_old)) + assert (th.allclose(log_ps_new, log_ps_old, atol=1e-4)), (log_ps_new - log_ps_old).abs().max() + print("Saving new lens to", args.output_dir) + tuned_lens.to(th.device("cpu")).save(args.output_dir) diff --git a/migrate.sh b/migrate.sh new file mode 100644 index 0000000000000000000000000000000000000000..298b90611ba1871d974f2c1bdc584663e35f8c05 --- /dev/null +++ b/migrate.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -e + +for i in pythia-70m-deduped-v0,EleutherAI/pythia-70m-deduped-v0 +do + IFS="," + set -- $i + echo "migrating $2" + CUDA_VISIBLE_DEVICES=-1 python3 lens_migration.py --model $2 --resource-id $1 --output lens/$1 + git commit -am "$1 migrated" +done diff --git a/requirements.txt b/requirements.txt index f8a583fdc0eb84bc4ddda4b09de05472d0ec79ef..94b94e6fccaba4602dd23495336c74ab5c8ba61e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -tuned_lens==0.1.0 -gradio \ No newline at end of file +tuned_lens==0.0.3