Spaces:

HugoVoxx
/

GeoGenSolve

Sleeping

App Files Files

HugoVoxx commited on Nov 26, 2024

Commit

9334a63

verified ·

1 Parent(s): c6eebc1

Update ag4masses/alphageometry/models.py

Browse files

Files changed (1) hide show

ag4masses/alphageometry/models.py +178 -178

ag4masses/alphageometry/models.py CHANGED Viewed

@@ -1,178 +1,178 @@
-# Copyright 2023 DeepMind Technologies Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Transformer language model generate mode."""
-from typing import Any, Tuple
-import beam_search
-import decoder_stack
-import gin
-import jax
-import jax.numpy as jnp
-from meliad_lib.meliad.transformer import models
-@gin.configurable
-class DecoderOnlyLanguageModelGenerate(models.DecoderOnlyLanguageModel):
-  """Decoder only language modeling in inference mode."""
-  decoder_factory = decoder_stack.DecoderStackGenerate
-  num_heads: int = gin.REQUIRED
-  head_size: int = gin.REQUIRED
-  def get_fake_input(self) -> dict[str, Any]:
-    fake_input_dict = super().get_fake_input()
-    b = self.task_config.batch_size
-    n = self.num_heads
-    h = self.head_size
-    fake_input_dict.update({
-        'dstate': tuple(
-            [{
-                'current_index': jnp.array([0] * b, dtype=jnp.int32),
-                'keys': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
-                'values': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
-                'recurrent_kvq': None,
-                'relative_position_bias': jnp.zeros(
-                    (b, n, 1, 1024), dtype=jnp.bfloat16
-                ),
-            }]
-            * 12
-        ),
-        'eos': jnp.zeros([1024], dtype=jnp.bfloat16),
-        'mask': jnp.ones([1024], dtype=jnp.bfloat16),
-        'length': 1,
-        'temperature': 1.0,
-    })
-    return fake_input_dict
-  def __call__(self, inputs: ...) -> tuple[Any, dict[str, Any]]:
-    # Make sure this code is not used on untested cases.
-    if self.mode not in ['init', 'beam_search']:
-      raise ValueError(f'{type(self)} cannot do mode {self.mode}')
-    if self.decoder.supports_generate():
-      raise ValueError(f'{type(self)}.decoder cannot supports_generate()')
-    self.decoder(
-        input_tokens=inputs['targets'][:, 0:1],
-        target_tokens=None,
-        start_of_sequence=inputs['start_of_sequence'],
-    )
-    b = inputs['targets'].shape[0]
-    no_start_of_seq = jnp.array([False] * b, dtype=jnp.bool_)
-    # This fn is used in both beam_search or topk_sampling.
-    def tokens_to_logits_fn(
-        input_token: jnp.ndarray, dstate: tuple[dict[str, jnp.ndarray], ...]
-    ) -> tuple[jnp.ndarray, tuple[dict[str, jnp.ndarray], ...]]:
-      (logits, dstate, _) = self.decoder(
-          input_tokens=input_token,
-          target_tokens=None,
-          start_of_sequence=no_start_of_seq,
-          decoder_state=dstate,
-      )
-      return logits[:, -1, :], dstate
-    last_token = jax.lax.dynamic_slice_in_dim(
-        inputs['targets'], inputs['length'] - 1, 1, axis=1
-    )
-    # last token is used to seed beam_search
-    inputs['targets'] = inputs['targets'][:, 0:-1]
-    dstate = jax.lax.cond(
-        inputs['start_of_sequence'][0],
-        lambda: self.generate(inputs)[0],
-        lambda: inputs['dstate'],
-    )
-    # Then we run beam search, init with last_token & dstate.
-    finished_seqs, finished_scores, dstate = beam_search.beam_search_flat(
-        last_token,
-        dstate,
-        tokens_to_logits_fn,
-        max_decode_len=512,
-        eos=inputs['eos'].reshape((1, 1, -1)),
-        mask=inputs['mask'].reshape((1, 1, -1)),
-    )
-    return 0.0, {
-        'finished_seqs': finished_seqs,
-        'finished_scores': finished_scores,
-        'dstate': dstate,
-    }
-  def generate(
-      self, inputs: ...
-  ) -> tuple[tuple[dict[str, jnp.ndarray, ...], ...], jnp.ndarray]:
-    """Generate an output sequence.
-    Args:
-      inputs: the same as argument to _call_.
-    Returns:
-      An array of generated tokens of shape (batch_size, sequence_length).
-    """
-    input_tokens = inputs['targets']  # [b,seq_len]
-    start_of_sequence = inputs['start_of_sequence']  # [b]
-    target_tokens = jnp.pad(input_tokens[:, 1:], [(0, 0), (0, 1)])
-    batch_size = target_tokens.shape[0]
-    # Assuming all sequences start at the same time.
-    start0 = inputs['start_of_sequence'][0]
-    dstate = jax.lax.cond(
-        start0,
-        lambda: self.decoder.init_decoder_state_vanilla(  # pylint: disable=g-long-lambda
-            1024, start_of_sequence
-        ),
-        lambda: inputs['dstate'],
-    )
-    first_token = input_tokens[:, 0:1]
-    no_start_of_seq = jnp.array([False] * batch_size, dtype=jnp.bool_)
-    temperature = 1
-    if 'temperature' in inputs:
-      temperature = inputs['temperature']
-    num_steps = inputs['length']
-    if self.mode == 'beam_search':
-      num_steps -= 1
-    def cond_fn(scan_state) -> jnp.bool_:
-      _, _, i, _ = scan_state
-      return i < num_steps
-    def loop_fn(scan_state: Any) -> Tuple[Any, Any, Any, Any]:
-      (dstate, input_token, i, _) = scan_state
-      (logits, dstate, _) = self.decoder(
-          input_tokens=input_token,
-          target_tokens=None,
-          start_of_sequence=no_start_of_seq,
-          decoder_state=dstate,
-      )
-      logits = logits / temperature
-      output_token = jax.lax.dynamic_slice_in_dim(target_tokens, i, 1, axis=1)
-      return (dstate, output_token, i + 1, logits)
-    # Scan over the sequence length.
-    dummy_logits = jnp.zeros((batch_size, 1, 1024))
-    initial_scan_state = (dstate, first_token, 0, dummy_logits)
-    dstate, _, _, logits = jax.lax.while_loop(
-        cond_fn, loop_fn, initial_scan_state
-    )
-    return dstate, logits

+# Copyright 2023 DeepMind Technologies Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Transformer language model generate mode."""
+from typing import Any, Tuple
+import beam_search
+import decoder_stack
+import gin
+import jax
+import jax.numpy as jnp
+from aglib.meliad.transformer import models
+@gin.configurable
+class DecoderOnlyLanguageModelGenerate(models.DecoderOnlyLanguageModel):
+  """Decoder only language modeling in inference mode."""
+  decoder_factory = decoder_stack.DecoderStackGenerate
+  num_heads: int = gin.REQUIRED
+  head_size: int = gin.REQUIRED
+  def get_fake_input(self) -> dict[str, Any]:
+    fake_input_dict = super().get_fake_input()
+    b = self.task_config.batch_size
+    n = self.num_heads
+    h = self.head_size
+    fake_input_dict.update({
+        'dstate': tuple(
+            [{
+                'current_index': jnp.array([0] * b, dtype=jnp.int32),
+                'keys': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
+                'values': jnp.zeros((b, 2048, n, h), dtype=jnp.bfloat16),
+                'recurrent_kvq': None,
+                'relative_position_bias': jnp.zeros(
+                    (b, n, 1, 1024), dtype=jnp.bfloat16
+                ),
+            }]
+            * 12
+        ),
+        'eos': jnp.zeros([1024], dtype=jnp.bfloat16),
+        'mask': jnp.ones([1024], dtype=jnp.bfloat16),
+        'length': 1,
+        'temperature': 1.0,
+    })
+    return fake_input_dict
+  def __call__(self, inputs: ...) -> tuple[Any, dict[str, Any]]:
+    # Make sure this code is not used on untested cases.
+    if self.mode not in ['init', 'beam_search']:
+      raise ValueError(f'{type(self)} cannot do mode {self.mode}')
+    if self.decoder.supports_generate():
+      raise ValueError(f'{type(self)}.decoder cannot supports_generate()')
+    self.decoder(
+        input_tokens=inputs['targets'][:, 0:1],
+        target_tokens=None,
+        start_of_sequence=inputs['start_of_sequence'],
+    )
+    b = inputs['targets'].shape[0]
+    no_start_of_seq = jnp.array([False] * b, dtype=jnp.bool_)
+    # This fn is used in both beam_search or topk_sampling.
+    def tokens_to_logits_fn(
+        input_token: jnp.ndarray, dstate: tuple[dict[str, jnp.ndarray], ...]
+    ) -> tuple[jnp.ndarray, tuple[dict[str, jnp.ndarray], ...]]:
+      (logits, dstate, _) = self.decoder(
+          input_tokens=input_token,
+          target_tokens=None,
+          start_of_sequence=no_start_of_seq,
+          decoder_state=dstate,
+      )
+      return logits[:, -1, :], dstate
+    last_token = jax.lax.dynamic_slice_in_dim(
+        inputs['targets'], inputs['length'] - 1, 1, axis=1
+    )
+    # last token is used to seed beam_search
+    inputs['targets'] = inputs['targets'][:, 0:-1]
+    dstate = jax.lax.cond(
+        inputs['start_of_sequence'][0],
+        lambda: self.generate(inputs)[0],
+        lambda: inputs['dstate'],
+    )
+    # Then we run beam search, init with last_token & dstate.
+    finished_seqs, finished_scores, dstate = beam_search.beam_search_flat(
+        last_token,
+        dstate,
+        tokens_to_logits_fn,
+        max_decode_len=512,
+        eos=inputs['eos'].reshape((1, 1, -1)),
+        mask=inputs['mask'].reshape((1, 1, -1)),
+    )
+    return 0.0, {
+        'finished_seqs': finished_seqs,
+        'finished_scores': finished_scores,
+        'dstate': dstate,
+    }
+  def generate(
+      self, inputs: ...
+  ) -> tuple[tuple[dict[str, jnp.ndarray, ...], ...], jnp.ndarray]:
+    """Generate an output sequence.
+    Args:
+      inputs: the same as argument to _call_.
+    Returns:
+      An array of generated tokens of shape (batch_size, sequence_length).
+    """
+    input_tokens = inputs['targets']  # [b,seq_len]
+    start_of_sequence = inputs['start_of_sequence']  # [b]
+    target_tokens = jnp.pad(input_tokens[:, 1:], [(0, 0), (0, 1)])
+    batch_size = target_tokens.shape[0]
+    # Assuming all sequences start at the same time.
+    start0 = inputs['start_of_sequence'][0]
+    dstate = jax.lax.cond(
+        start0,
+        lambda: self.decoder.init_decoder_state_vanilla(  # pylint: disable=g-long-lambda
+            1024, start_of_sequence
+        ),
+        lambda: inputs['dstate'],
+    )
+    first_token = input_tokens[:, 0:1]
+    no_start_of_seq = jnp.array([False] * batch_size, dtype=jnp.bool_)
+    temperature = 1
+    if 'temperature' in inputs:
+      temperature = inputs['temperature']
+    num_steps = inputs['length']
+    if self.mode == 'beam_search':
+      num_steps -= 1
+    def cond_fn(scan_state) -> jnp.bool_:
+      _, _, i, _ = scan_state
+      return i < num_steps
+    def loop_fn(scan_state: Any) -> Tuple[Any, Any, Any, Any]:
+      (dstate, input_token, i, _) = scan_state
+      (logits, dstate, _) = self.decoder(
+          input_tokens=input_token,
+          target_tokens=None,
+          start_of_sequence=no_start_of_seq,
+          decoder_state=dstate,
+      )
+      logits = logits / temperature
+      output_token = jax.lax.dynamic_slice_in_dim(target_tokens, i, 1, axis=1)
+      return (dstate, output_token, i + 1, logits)
+    # Scan over the sequence length.
+    dummy_logits = jnp.zeros((batch_size, 1, 1024))
+    initial_scan_state = (dstate, first_token, 0, dummy_logits)
+    dstate, _, _, logits = jax.lax.while_loop(
+        cond_fn, loop_fn, initial_scan_state
+    )
+    return dstate, logits