google · copybara-service · Apr 15, 2025 · Apr 14, 2025
diff --git a/benchmarks/nnx_graph_overhead.py b/benchmarks/nnx_graph_overhead.py
@@ -97,11 +97,9 @@ def main(argv):
     def step_nnx(model: MLP, optimizer: nnx.Optimizer):
       pass
 
-    cached_step_nnx = nnx.cached_partial(step_nnx, model, optimizer)
-
     t0 = time()
     for _ in range(total_steps):
-      cached_step_nnx()
+      step_nnx(model, optimizer)
 
     total_time = time() - t0
     time_per_step = total_time / total_steps

diff --git a/examples/ppo/ppo_lib_test.py b/examples/ppo/ppo_lib_test.py
@@ -25,6 +25,10 @@
 import env_utils
 import models
 import ppo_lib
+import gymnasium as gym
+import ale_py
+
+gym.register_envs(ale_py)
 
 
 # test GAE

diff --git a/examples/ppo/ppo_main.py b/examples/ppo/ppo_main.py
@@ -23,6 +23,10 @@
 import env_utils
 import models
 import ppo_lib
+import gymnasium as gym
+import ale_py
+
+gym.register_envs(ale_py)
 
 
 FLAGS = flags.FLAGS

diff --git a/flax/linen/attention.py b/flax/linen/attention.py
@@ -444,7 +444,6 @@ def __call__(
     deterministic: bool | None = None,
     dropout_rng: PRNGKey | None = None,
     sow_weights: bool = False,
-    attention_bias: Array | None = None,
   ):
     """Applies multi-head dot product attention on the input data.
 
@@ -475,8 +474,6 @@ def __call__(
         'intermediates' collection. Remember to mark 'intermediates' as
         mutable via ``mutable=['intermediates']`` in order to have that
         collection returned.
-      attention_bias: bias for the attention logits of shape
-        ``[batch_sizes..., num_heads, q_length, kv_length]``.
 
     Returns:
       output of shape ``[batch_sizes..., length, features]``.
@@ -665,7 +662,6 @@ def __call__(
       force_fp32_for_softmax=self.force_fp32_for_softmax,
       qk_attn_weights_einsum=qk_attn_weights_einsum,
       attn_weights_value_einsum=attn_weights_value_einsum,
-      bias=attention_bias,
     )
     attn_kwargs = {
         k: v

diff --git a/flax/nnx/bridge/module.py b/flax/nnx/bridge/module.py
@@ -255,7 +255,7 @@ def _graph_node_flatten(self):
       PriorityStr(self.attr_priorities.get(k, AttrPriority.DEFAULT), k)
       for k in nodes.keys()
     )
-    sorted_nodes = ((k, nodes[k]) for k in sorted(keys))
+    sorted_nodes = list((k, nodes[k]) for k in sorted(keys))
     return sorted_nodes, type(self)
 
   def set_attr_priority(self, name: str, value: AttrPriority):

diff --git a/flax/nnx/bridge/variables.py b/flax/nnx/bridge/variables.py
@@ -154,7 +154,7 @@ def nnx_attrs_to_linen_vars(nnx_attrs: dict) -> dict:
     elif isinstance(v, variablelib.VariableState):
       col_name = variablelib.variable_name_from_type(v.type)
       v = to_linen_var(v)
-    elif isinstance(v, graph.NodeDef) or isinstance(v, graph.NodeRef):
+    elif isinstance(v, graph.GraphDef):
       col_name = 'nnx'  # an nnx.GraphDef for some ToLinen submodule
     else:
       raise ValueError(f'Cannot infer collection name from value: {v}')

diff --git a/flax/nnx/bridge/wrappers.py b/flax/nnx/bridge/wrappers.py
@@ -38,7 +38,7 @@
 @dataclasses.dataclass
 class Functional(tp.Generic[M]):
   module_type: tp.Type[M]
-  graphdef: tp.Optional[graph.NodeDef[M]]
+  graphdef: tp.Optional[graph.GraphDef[M]]
   args: tuple[tp.Any, ...]
   kwargs: dict[str, tp.Any]
 
@@ -48,7 +48,6 @@ def init(self, *, rngs: tp.Optional[Rngs] = None) -> State:
       kwargs['rngs'] = rngs
     module = self.module_type(*self.args, **self.kwargs, **kwargs)
     graphdef, state = nnx.split(module)
-    assert type(graphdef) is graph.NodeDef
     self.graphdef = graphdef
     return state  # type: ignore
 
@@ -217,7 +216,7 @@ class ToLinen(linen.Module):
     >>> variables.keys()
     dict_keys(['nnx', 'params'])
     >>> type(variables['nnx']['graphdef'])
-    <class 'flax.nnx.graph.NodeDef'>
+    <class 'flax.nnx.graph.GraphDef'>
 
   Args:
     nnx_class: The NNX Module class (not instance!).