From 460c92872195904e775ee5c5149471c1defe9d76 Mon Sep 17 00:00:00 2001 From: yhteoh Date: Tue, 28 May 2024 14:26:11 -0400 Subject: [PATCH] Deployed e7e28c5 with MkDocs version: 1.6.0 --- 404.html | 4 +- data/index.html | 6 +- .../{1_overview => 1_Overview}/index.html | 113 ++++++++++++++++-- examples/2_Dataset/index.html | 6 +- examples/3_Observables/index.html | 4 +- get_started/index.html | 4 +- index.html | 4 +- reference/data/index.html | 4 +- reference/models/graph/index.html | 4 +- reference/models/index.html | 4 +- reference/models/transformer/index.html | 4 +- reference/observables/index.html | 4 +- reference/training/index.html | 4 +- reference/utils/index.html | 4 +- search/search_index.json | 2 +- 15 files changed, 130 insertions(+), 41 deletions(-) rename examples/{1_overview => 1_Overview}/index.html (96%) diff --git a/404.html b/404.html index 7133cd1b..7f19b83e 100755 --- a/404.html +++ b/404.html @@ -232,7 +232,7 @@
  • - + Tutorials @@ -381,7 +381,7 @@ - + diff --git a/data/index.html b/data/index.html index e799de16..33229ec0 100755 --- a/data/index.html +++ b/data/index.html @@ -14,7 +14,7 @@ - + @@ -243,7 +243,7 @@
  • - + Tutorials @@ -523,7 +523,7 @@ - + diff --git a/examples/1_overview/index.html b/examples/1_Overview/index.html similarity index 96% rename from examples/1_overview/index.html rename to examples/1_Overview/index.html index 3c9fdce0..20cd3faa 100755 --- a/examples/1_overview/index.html +++ b/examples/1_Overview/index.html @@ -11,6 +11,10 @@ + + + + @@ -18,7 +22,7 @@ - 1 overview - RydbergGPT + Overview - RydbergGPT @@ -114,7 +118,7 @@
    - 1 overview + Overview
    @@ -233,11 +237,13 @@ + + -
  • - +
  • + Tutorials @@ -365,41 +371,124 @@ + + + + + + + +
  • - + + + + + +
  • diff --git a/examples/2_Dataset/index.html b/examples/2_Dataset/index.html index 34cda392..2a70d39d 100755 --- a/examples/2_Dataset/index.html +++ b/examples/2_Dataset/index.html @@ -11,7 +11,7 @@ - + @@ -243,7 +243,7 @@
  • - + Tutorials @@ -421,7 +421,7 @@
  • - + diff --git a/examples/3_Observables/index.html b/examples/3_Observables/index.html index 33dfada8..4286ee48 100755 --- a/examples/3_Observables/index.html +++ b/examples/3_Observables/index.html @@ -243,7 +243,7 @@
  • - + Tutorials @@ -421,7 +421,7 @@
  • - + diff --git a/get_started/index.html b/get_started/index.html index 08896dca..7ed7a75c 100755 --- a/get_started/index.html +++ b/get_started/index.html @@ -243,7 +243,7 @@
  • - + Tutorials @@ -547,7 +547,7 @@ - + diff --git a/index.html b/index.html index 0d202cea..74176906 100755 --- a/index.html +++ b/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -554,7 +554,7 @@ - + diff --git a/reference/data/index.html b/reference/data/index.html index 4bbdbe94..02a3aae7 100755 --- a/reference/data/index.html +++ b/reference/data/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/models/graph/index.html b/reference/models/graph/index.html index 1ff24321..55d449a0 100755 --- a/reference/models/graph/index.html +++ b/reference/models/graph/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/models/index.html b/reference/models/index.html index 5239bed1..cb6a3ed9 100755 --- a/reference/models/index.html +++ b/reference/models/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/models/transformer/index.html b/reference/models/transformer/index.html index 936cfc7c..94aadb00 100755 --- a/reference/models/transformer/index.html +++ b/reference/models/transformer/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/observables/index.html b/reference/observables/index.html index d06f2b18..df25c6fc 100755 --- a/reference/observables/index.html +++ b/reference/observables/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/training/index.html b/reference/training/index.html index e058c5c1..2b6f0213 100755 --- a/reference/training/index.html +++ b/reference/training/index.html @@ -241,7 +241,7 @@
  • - + Tutorials @@ -392,7 +392,7 @@ - + diff --git a/reference/utils/index.html b/reference/utils/index.html index a53b7bc3..ff857855 100755 --- a/reference/utils/index.html +++ b/reference/utils/index.html @@ -239,7 +239,7 @@
  • - + Tutorials @@ -390,7 +390,7 @@ - + diff --git a/search/search_index.json b/search/search_index.json index bda178b7..0e38f6b2 100755 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"RydbergGPT","text":"

    A large language model (LLM) for Rydberg atom array physics.

    "},{"location":"#architecture","title":"Architecture","text":""},{"location":"#rydberg-system","title":"Rydberg System","text":"\\[ \\hat{H}_{\\mathrm{Rydberg}} = \\sum_{i < j}^{N} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert} \\hat{n}_i \\hat{n}_j - \\delta \\sum_{i}^{N} \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i}^{N} \\hat{\\sigma}_i^{(x)}, \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\]
    • \\(N = L \\times L =\\) number of atoms/qubits
    • \\(i, j =\\) qubit index
    • \\(V_{ij} =\\) blockade interaction between qubits \\(i\\) and \\(j\\)
    • \\(a =\\) Lattice spacing
    • \\(R_b =\\) Rydberg blockade radius
    • \\(\\mathbf{r}_i =\\) the position of qubit \\(i\\)
    • \\(\\hat{n}_i =\\) number operator at qubit \\(i\\)
    • \\(\\delta =\\) detuning at qubit \\(i\\)
    • \\(\\Omega =\\) Rabi frequency at qubit \\(i\\)
    "},{"location":"#transformer","title":"Transformer","text":"

    Vanilla transformer architecture taken from Attention is All You Need.

    • \\(H_i = \\mathrm{GraphNN}(\\mathrm{edges} = V_{ij} \\ ; \\mathrm{nodes}= \\{ \\Omega, \\Delta, R_b, \\beta \\}_i)\\)
    • \\(\\sigma_i =\\) one-hot encoding of measured qubit \\(i\\)
    • \\(P_i = P(\\sigma_i | \\sigma_{< i}) =\\) conditional probability distribution of qubit \\(i\\)

    The transformer encoder represents the Rydberg Hamiltonian with a sequence. The transformer decoder represents the corresponding ground state wavefunction.

    "},{"location":"#acknowledgements","title":"Acknowledgements","text":"

    We sincerely thank the authors of the following very helpful codebases we used when building this repository :

    • Transformer tutorials:
      • Annotated Transformer
      • Illustrated Transformer
    • Transformer quantum state:
      • Predicting Properties of Quantum Systems with Conditional Generative Models
      • Transformer Quantum State
    "},{"location":"#references","title":"References","text":"
    @inproceedings{46201,\ntitle   = {Attention is All You Need},\nauthor  = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},\nyear    = {2017},\nURL = {https://arxiv.org/pdf/1706.03762.pdf}\n}\n
    "},{"location":"data/","title":"Data","text":""},{"location":"data/#rydberg-system","title":"Rydberg System","text":"\\[ \\hat{H}_{\\mathrm{Rydberg}} = \\sum_{i < j}^{N} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert} \\hat{n}_i \\hat{n}_j - \\delta \\sum_{i}^{N} \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i}^{N} \\hat{\\sigma}_i^{(x)}, \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\]
    • \\(N = L \\times L =\\) number of atoms/qubits
    • \\(i, j =\\) qubit index
    • \\(V_{ij} =\\) blockade interaction between qubits \\(i\\) and \\(j\\)
    • \\(a =\\) Lattice spacing
    • \\(R_b =\\) Rydberg blockade radius
    • \\(\\mathbf{r}_i =\\) the position of qubit \\(i\\)
    • \\(\\hat{n}_i =\\) number operator at qubit \\(i\\)
    • \\(\\delta =\\) detuning at qubit \\(i\\)
    • \\(\\Omega =\\) Rabi frequency at qubit \\(i\\)
    "},{"location":"data/#dataset","title":"Dataset","text":"

    Consider setting \\(\\Omega = 1\\) and varying the other Hamiltonian parameters independently :

    $$ L = [5, 6, 11, 12, 15, 16] $$ $$ \\delta / \\Omega = [-0.36, -0.13, 0.93, 1.05, 1.17, 1.29, 1.52, 1.76, 2.94, 3.17] $$ $$ R_b / a = [1.05, 1.15, 1.3] $$ $$ \\beta \\Omega = [0.5, 1, 2, 4, 8, 16, 32, 48, 64] $$ There are a total of 8 x 10 x 3 x 9 = 2160 configurations.

    "},{"location":"get_started/","title":"Get Started","text":""},{"location":"get_started/#installation","title":"Installation","text":"

    Clone the repository using the following command:

    git clone https://github.com/PIQuIL/RydbergGPT\n
    Install with pip :
    cd RydbergGPT\npip install .\n

    "},{"location":"get_started/#usage","title":"Usage","text":""},{"location":"get_started/#configuration","title":"Configuration","text":"

    Theconfig.yaml is used to define the hyperparameters for: - Model architecture - Training settings - Data loading - Others

    "},{"location":"get_started/#training","title":"Training","text":"

    To train RydbergGPT locally, execute the train.py with:

    python train.py --config_name=config_small.yaml\n

    "},{"location":"examples/1_overview/","title":"1 overview","text":"
    %load_ext autoreload\n%autoreload 2\n\nimport itertools as it\nimport numpy as np\nimport networkx as nx\nimport matplotlib.pyplot as plt\nfrom torch_geometric.utils import to_networkx\nimport numpy as np\nimport networkx as nx\nimport torch\nfrom torch_geometric.data import Data\n
    import copy\nfrom typing import Tuple\n\nimport torch\nfrom pytorch_lightning import LightningModule\nfrom torch import Tensor, nn\nfrom torch_geometric.nn import GATConv, GCNConv\n\nfrom rydberggpt.models.graph_embedding.models import GraphEmbedding\nfrom rydberggpt.models.rydberg_encoder_decoder import RydbergEncoderDecoder\n\nfrom rydberggpt.models.transformer.layers import DecoderLayer, EncoderLayer\nfrom rydberggpt.models.transformer.models import (\n    Decoder,\n    Encoder,\n    EncoderDecoder,\n    Generator,\n)\nfrom rydberggpt.models.transformer.modules import (\n    PositionalEncoding,\n    PositionwiseFeedForward,\n)\nfrom rydberggpt.utils import to_one_hot\n
    def get_rydberg_graph_encoder_decoder(config):\n    c = copy.deepcopy\n    attn = nn.MultiheadAttention(config.d_model, config.num_heads, batch_first=True)\n    position = PositionalEncoding(config.d_model, config.dropout)\n    ff = PositionwiseFeedForward(config.d_model, config.d_ff, config.dropout)\n\n    model = RydbergEncoderDecoder(\n        encoder=Encoder(\n            EncoderLayer(config.d_model, c(attn), c(ff), config.dropout),\n            config.num_blocks_encoder,\n        ),\n        decoder=Decoder(\n            DecoderLayer(config.d_model, c(attn), c(attn), c(ff), config.dropout),\n            config.num_blocks_decoder,\n        ),\n        src_embed=GraphEmbedding(\n            graph_layer=GCNConv,  # GATConv\n            in_node_dim=config.in_node_dim,\n            d_hidden=config.graph_hidden_dim,\n            d_model=config.d_model,\n            num_layers=config.graph_num_layers,\n            dropout=config.dropout,\n        ),\n        tgt_embed=nn.Sequential(\n            nn.Linear(config.num_states, config.d_model), c(position)\n        ),\n        generator=Generator(config.d_model, 2),\n        config=config,\n    )\n\n    for p in model.parameters():\n        if p.dim() > 1:\n            nn.init.xavier_uniform_(p)\n\n    return model\n

    In our approach, we leverage graph neural networks (GNNs) to process the underlying graph structure of Rydberg atom systems. In these systems, the graph nodes represent the Rydberg atoms, and each node is assigned a node_feature vector containing information about the Rabi frequency (\u03a9), detuning (\u0394), and temperature (\u03b2). The Rydberg blockade radius, which determines the interaction strength between atoms, is encoded as edge attributes in the graph.

    GNNs are powerful tools for learning representations of graph-structured data, capturing both local and global information within the graph. In our model, we employ graph convolutional layers, such as GCNConv, to learn meaningful embeddings of the input graph. These embeddings take into account both node features and edge attributes, enabling the model to learn complex relationships between atoms in the Rydberg system.

    To understand the basics of graph neural networks and their applications, we recommend the following resources:

    1. A Gentle Introduction to Graph Neural Networks: This article provides an accessible and visually appealing introduction to GNNs, covering their motivation, core concepts, and various architectures.

    2. Understanding Convolutions on Graphs: This article dives deeper into the inner workings of GNNs, specifically focusing on convolution operations on graphs. It provides insights into how graph convolutions can be understood as message-passing mechanisms and how they can be generalized.

    3. Pytorch_geometric: PyTorch Geometric is a library for deep learning on irregular input data such as graphs, point clouds, and manifolds. It provides efficient implementations of various GNN layers and models, making it easier to implement and experiment with graph-based neural networks. This resource serves as a guide to getting started with the library and provides documentation for its various features.

    In our Rydberg atom system model, the graph embedding component serves as a crucial bridge between the graph-structured input data and the encoder-decoder architecture. By leveraging the capabilities of GNNs, we can effectively learn complex patterns in the graph structure and enhance the performance of our model for predicting properties of quantum many-body systems.

    "},{"location":"examples/1_overview/#tutorial-overview","title":"Tutorial: Overview","text":""},{"location":"examples/1_overview/#introduction","title":"Introduction","text":"

    Machine learning has recently emerged as a powerful tool for predicting properties of quantum many-body systems. Generative models can learn from measurements of a single quantum state to accurately reconstruct the state and predict local observables for many ground states of Hamiltonians. In this tutorial, we focus on Rydberg atom systems and propose the use of conditional generative models to simultaneously represent a family of states by learning shared structures of different quantum states from measurements.

    Refs:

    Predicting Properties of Quantum Systems with Conditional Generative Models

    Transformer Quantum State: A Multi-Purpose Model for Quantum Many-Body Problems

    Bloqade

    "},{"location":"examples/1_overview/#rydberg-hamiltonian","title":"Rydberg Hamiltonian","text":"

    We consider a system of \\(N=L \\times L\\) atoms arranged on a square lattice. The governing Hamiltonian defining the Rydberg atom array interactions has the following form:

    \\[ \\hat{H} = \\sum_{i<j} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\hat{n}_i \\hat{n}_j -\\delta \\sum_{i=1}^N \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i=1}^N \\hat{\\sigma}^x_i. \\quad (1) \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6}, \\quad (2) \\]

    where \\(\\hat{\\sigma}^{x}_{i} = \\vert g \\rangle_i \\langle r\\vert_i + \\vert r \\rangle_i \\langle g\\vert_i\\), the occupation number operator \\(\\hat{n}_i = \\frac{1}{2} \\left( \\hat{\\sigma}_{i} + \\mathbb{1} \\right) = \\vert r\\rangle_i \\langle r \\vert_i\\) and \\(\\hat{\\sigma}_{i} = \\vert r \\rangle_i \\langle r \\vert_i - \\vert g \\rangle_i \\langle g \\vert_i\\). The experimental settings of a Rydberg atom array are controlled by the detuning from resonance \\(\\delta\\), Rabi frequency \\(\\Omega\\), lattice length scale \\(a\\) and the positions of the atoms \\(\\{\\mathbf{r}_i\\}_i^N\\). From equation (2) above, we obtain a symmetric matrix \\(\\mathbf{V}\\), that encapsulates the relevant information about the lattice geometry, and derive the Rydberg blockade radius \\(R_b\\), within which simultaneous excitations are penalized. Finally, for the purposes of our study, the atom array is considered to be affected by thermal noise, in equilibrium at a temperature \\(T\\). The experimental settings are thus captured by the set of parameters \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\), where \\(\\beta\\) is the inverse temperature.

    "},{"location":"examples/1_overview/#representation-of-the-quantum-state","title":"Representation of the quantum state","text":"

    Decomposing the joint distribution into a product of conditional distributions in an autoregressive manner,

    \\[ p_{\\theta}(\\boldsymbol{\\sigma}) = \\prod_{i=1}^n p_{\\theta}\\left(\\sigma_i \\mid \\sigma_{i-1}, \\ldots, \\sigma_1\\right). \\]

    where \\(\\theta\\) denotes the set of parameters of the generative model.

    "},{"location":"examples/1_overview/#the-graph-encoder-decoder-transformer-architecture","title":"The Graph Encoder Decoder Transformer architecture","text":"

    In this tutorial, we will explain the network architecture used in the get_rydberg_graph_encoder_decoder function, which creates a RydbergEncoderDecoder model. This model is designed to process graph-structured data using a combination of Graph Convolutional Networks (GCNs) and the classic Encoder-Decoder architecture as introduced in Vaswani et al..

    "},{"location":"examples/1_overview/#main-components","title":"Main components","text":"

    The RydbergEncoderDecoder model consists of the following main components:

    Encoder: The encoder processes the input graph data and generates a continuous representation. It consists of multiple EncoderLayer blocks, each containing a multi-head self-attention mechanism and a position-wise feed-forward network, followed by layer normalization and dropout.

    Decoder: The decoder takes the continuous representation generated by the encoder and produces the output predictions. It is composed of multiple DecoderLayer blocks, each containing two multi-head attention mechanisms (self-attention and encoder-decoder attention) and a position-wise feed-forward network, followed by layer normalization and dropout.

    src_embed: This component is responsible for transforming the input graph data into a continuous representation. It uses the GraphEmbedding class, which employs GCNConv layers (or other graph convolution layers, such as GATConv) to process the graph structure. The number of graph layers can be controlled with the num_layers parameter.

    tgt_embed: This is a sequential model that first applies a linear transformation to the target input states and then adds positional encoding to provide information about the sequence order. The positional encoding is applied using the PositionalEncoding class.

    Generator: The generator is a simple linear layer that maps the output of the decoder to the desired output dimension (in this case, 2). It is used for producing the final output predictions.

    In the get_rydberg_graph_encoder_decoder function, the model is created using the provided configuration (config). This configuration contains information about the model's dimensions, number of layers, and other hyperparameters. After initializing the model, the weights of the parameters with more than one dimension are initialized using Xavier uniform initialization.

    Overall, this network architecture combines the power of graph convolutional networks for processing graph-structured data with the sequence-to-sequence learning capabilities of the Encoder-Decoder architecture. This allows the model to effectively learn complex patterns in both the graph structure and the sequence data.

    "},{"location":"examples/1_overview/#loss-function","title":"Loss function","text":"

    The dataset is composed of \\(N_H\\) Hamiltonians and obtain \\(N_s\\) measurement outcomes for each ground state leading to a training set \\(\\mathcal{D}\\) of size \\(N_HN_s\\). The training objective is the average negative log-likelihood loss,

    \\[ \\mathcal{L}(\\theta) \\approx -\\frac{1}{|\\mathcal{D}|} \\sum_{\\boldsymbol{\\sigma} \\in \\mathcal{D}} \\ln p_{\\theta}(\\boldsymbol{\\sigma}). \\]

    corresponding to maximizing the conditional likelihoods over the observed measurment outcomes.

    "},{"location":"examples/1_overview/#graph-embedding-in-rydberg-atom-systems","title":"Graph Embedding in Rydberg Atom Systems","text":""},{"location":"examples/2_Dataset/","title":"Dataset","text":"
    %load_ext autoreload\n%autoreload 2\n\nimport os\n\nimport matplotlib.colors as mcolors\nimport matplotlib.pyplot as plt\nimport networkx as nx\nfrom tqdm import tqdm\n\nfrom rydberggpt.data.dataclasses import GridGraph\nfrom rydberggpt.data.graph_structures import get_graph\nfrom rydberggpt.data.rydberg_dataset import get_rydberg_dataloader\nfrom rydberggpt.data.utils_graph import graph_to_dict\nfrom rydberggpt.utils import shift_inputs\n\n\nbase_path = os.path.abspath(\"../\")\n
    n_rows = 4\nn_cols = 4\nnum_atoms = n_rows * n_cols\n\ngraph_config = GridGraph(\n    num_atoms=num_atoms,\n    graph_name=\"grid_graph\",\n    Rb=1.0,\n    delta=1.0,\n    omega=1.0,\n    beta=1.0,\n    n_rows=n_rows,\n    n_cols=n_cols,\n)\n\ngraph = get_graph(graph_config)\ngraph_dict = graph_to_dict(graph)\ngraph_nx = nx.node_link_graph(graph_dict)\n
    adj_matrix = nx.to_numpy_array(graph_nx)\nplt.imshow(adj_matrix, cmap=\"Blues\")\nplt.title(\"Adjacency Matrix\")\nplt.show()\n

    or plot the graph.

    def plot_graph(graph):\n    # Get node positions from the graph\n    pos = nx.get_node_attributes(graph, \"pos\")\n\n    # Extract edge weights for edge coloring\n    edges, weights = zip(*nx.get_edge_attributes(graph, \"weight\").items())\n\n    # Normalize edge weights for better visualization\n    normalized_weights = [w / max(weights) for w in weights]\n\n    # Calculate edge widths proportional to normalized weights\n    edge_widths = [w * 2 for w in normalized_weights]\n\n    # Create a color map for the edges\n    cmap = plt.cm.Blues\n    norm = mcolors.Normalize(vmin=min(normalized_weights), vmax=max(normalized_weights))\n\n    # Plot the graph\n    fig, ax = plt.subplots(figsize=(8, 8))\n    nx.draw(\n        graph,\n        pos,\n        node_color=\"white\",\n        with_labels=True,\n        font_color=\"black\",\n        edge_cmap=cmap,\n        node_size=400,\n        width=edge_widths,\n        alpha=0.5,\n        edgecolors=\"black\",\n        edgelist=edges,\n        edge_color=normalized_weights,\n        verticalalignment=\"center_baseline\",\n        font_size=12,\n    )\n    plt.title(\"Grid Graph\", fontsize=18)\n\n    # Add a color bar\n    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n    sm.set_array([])\n    cbar = plt.colorbar(sm, ax=ax)\n    cbar.set_label(\"1/Distance\")\n\n    plt.show()\n
    plot_graph(graph_nx)\n

    Each node contains a node_feature vector encoding omega, delta and beta.

    import warnings\n\nwith warnings.catch_warnings():\n    warnings.simplefilter(\"ignore\")\n\n    batch_size = 128\n    buffer_size = 2\n    num_workers = 0\n\n    data_path = os.path.join(base_path, \"src/rydberggpt/tests/dataset_test/\")\n\n\n    dataloader = get_rydberg_dataloader(\n        batch_size=batch_size,\n        data_path=data_path,\n        buffer_size=buffer_size,\n        num_workers=num_workers,\n    )\n\n\n    counter = 0\n    for batch in dataloader:\n        print(batch.m_onehot.shape)\n        m_shifted_onehot = shift_inputs(batch.m_onehot)\n        print(m_shifted_onehot.shape)\n\n\n        counter += 1\n\n        if counter > 1:\n            break\n
    \ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\n\n
    "},{"location":"examples/2_Dataset/#tutorial-dataset","title":"Tutorial: Dataset","text":"

    In this tutorial we discuss how the dataset is structured, and how to load it to train a model. The dataset is hosted through xanadu.ai and can be accessed via ADD LINK.

    "},{"location":"examples/2_Dataset/#structure","title":"Structure","text":"

    The dataset is build up of smaller subdatasets, each for a specific Hamiltonian parameter regime. Each sub folder contains four files, namely: - config.json: contains the configuration of the dataset - dataset.h5: contains the measurements of shape [num_samples, num_atoms] - graph.json: contains the graph of the dataset - properties.json: contains the observables of the dataset such as energy, magnetization, etc.

    "},{"location":"examples/2_Dataset/#the-system-prompt","title":"The system prompt","text":"

    The transformer encoder takes as input a graph structure. Each graph has num_atoms nodes and each nodes has a node feature vector containing delta, omega and beta.

    Lets generate an example graph and visualize it.

    "},{"location":"examples/2_Dataset/#loading-the-test-dataset","title":"Loading the test dataset","text":"

    We use the datapipes provided via torchdata to load the dataset. During training we sample from the dataset a list of buffer_size subset datasets, and then sample from this new smaller dataset the training batch.

    Each batch contains a datastructure with 2 elements (see rydberggpt.data.dataclasses). The first element is a pytorch_geometrich graph object ( batch.graph, based on the batch). Each graph has num_atoms nodes and each nodes has a node feature vector containing delta, omega and beta. Finally we need the measurement data. These are one-hot encoded and stored in a tensor of shape [num_samples, num_atoms, 2].

    @dataclass\nclass Batch:\n    graph: Data\n    m_onehot: torch.Tensor\n
    "},{"location":"examples/3_Observables/","title":"Observables","text":"
    import os\n\nimport torch\nfrom rydberggpt.models.rydberg_encoder_decoder import get_rydberg_graph_encoder_decoder\nfrom rydberggpt.models.utils import generate_prompt\nfrom rydberggpt.observables.rydberg_energy import (\n    get_rydberg_energy,\n    get_staggered_magnetization,\n    get_x_magnetization,\n)\nfrom rydberggpt.utils import create_config_from_yaml, load_yaml_file\nfrom rydberggpt.utils_ckpt import get_model_from_ckpt\nfrom torch_geometric.data import Batch\n
    device = \"cpu\"\n\nbase_path = os.path.abspath(\"../\")\nlog_path = os.path.join(base_path, \"models/M_1/\")\n\nyaml_dict = load_yaml_file(log_path, \"hparams.yaml\")\nconfig = create_config_from_yaml(yaml_dict)\n\nmodel = get_model_from_ckpt(\n    log_path, model=get_rydberg_graph_encoder_decoder(config), ckpt=\"best\"\n)\nmodel.to(device=device)\nmodel.eval()  # don't forget to set to eval mode\n
    \nRydbergEncoderDecoder(\n  (encoder): Encoder(\n    (layers): ModuleList(\n      (0): EncoderLayer(\n        (self_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (feed_forward): PositionwiseFeedForward(\n          (w_1): Linear(in_features=32, out_features=128, bias=True)\n          (w_2): Linear(in_features=128, out_features=32, bias=True)\n          (dropout): Dropout(p=0.1, inplace=False)\n        )\n        (sublayer): ModuleList(\n          (0-1): 2 x SublayerConnection(\n            (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (decoder): Decoder(\n    (layers): ModuleList(\n      (0-2): 3 x DecoderLayer(\n        (self_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (src_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (feed_forward): PositionwiseFeedForward(\n          (w_1): Linear(in_features=32, out_features=128, bias=True)\n          (w_2): Linear(in_features=128, out_features=32, bias=True)\n          (dropout): Dropout(p=0.1, inplace=False)\n        )\n        (sublayer): ModuleList(\n          (0-2): 3 x SublayerConnection(\n            (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (src_embed): GraphEmbedding(\n    (layers): ModuleList(\n      (0): GraphLayer(\n        (graph_layer): GCNConv(4, 64)\n        (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (1): GCNConv(64, 32)\n    )\n    (final_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (tgt_embed): Sequential(\n    (0): Linear(in_features=2, out_features=32, bias=True)\n    (1): PositionalEncoding(\n      (dropout): Dropout(p=0.1, inplace=False)\n    )\n  )\n  (generator): Generator(\n    (proj): Linear(in_features=32, out_features=2, bias=True)\n  )\n)\n
    L = 5\ndelta = 1.0\nomega = 1.0\nbeta = 64.0\nRb = 1.15\nnum_samples = 5\n\npyg_graph = generate_prompt(\n    model_config=config,\n    n_rows=L,\n    n_cols=L,\n    delta=delta,\n    omega=omega,\n    beta=beta,\n    Rb=Rb,\n)\n
    # duplicate the prompt for num_samples\ncond = [pyg_graph for _ in range(num_samples)]\ncond = Batch.from_data_list(cond)\n\nsamples = model.get_samples(\n    batch_size=len(cond), cond=cond, num_atoms=L**2, fmt_onehot=False\n)\n
    \nGenerating atom 25/25                                                          \n\n
    energy = get_rydberg_energy(model, samples, cond=pyg_graph, device=device)\nprint(energy.mean() / L**2)\n
    \ntensor(0.0248)\n\n
    staggered_magnetization = get_staggered_magnetization(samples, L, L, device=device)\nprint(staggered_magnetization.mean() / L**2)\n
    \ntensor(0.0208)\n\n
    x_magnetization = get_x_magnetization(model, samples, cond=pyg_graph, device=device)\nprint(x_magnetization.mean() / L**2)\n
    \ntensor(0.7317)\n\n
    "},{"location":"examples/3_Observables/#tutorial-observables","title":"Tutorial: Observables","text":""},{"location":"examples/3_Observables/#background","title":"Background","text":"

    In this tutorial, we are going to load a pretrained model, use it to generate new samples, and calculate relevant observables based on these samples.

    We consider a system of \\(N=L \\times L\\) atoms arranged on a square lattice. The governing Hamiltonian defining the Rydberg atom array interactions has the following form:

    \\[ \\hat{H} = \\sum_{i<j} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\hat{n}_i \\hat{n}_j -\\delta \\sum_{i=1}^N \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i=1}^N \\hat{\\sigma}^x_i. \\quad (1) \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6}, \\quad (2) \\]

    where \\(\\hat{\\sigma}^{x}_{i} = \\vert g \\rangle_i \\langle r\\vert_i + \\vert r \\rangle_i \\langle g\\vert_i\\), the occupation number operator \\(\\hat{n}_i = \\frac{1}{2} \\left( \\hat{\\sigma}_{i} + \\mathbb{1} \\right) = \\vert r\\rangle_i \\langle r \\vert_i\\) and \\(\\hat{\\sigma}_{i} = \\vert r \\rangle_i \\langle r \\vert_i - \\vert g \\rangle_i \\langle g \\vert_i\\). The experimental settings of a Rydberg atom array are controlled by the detuning from resonance \\(\\delta\\), Rabi frequency \\(\\Omega\\), lattice length scale \\(a\\) and the positions of the atoms \\(\\{\\mathbf{r}_i\\}_i^N\\). From equation (2) above, we obtain a symmetric matrix \\(\\mathbf{V}\\), that encapsulates the relevant information about the lattice geometry, and derive the Rydberg blockade radius \\(R_b\\), within which simultaneous excitations are penalized. Finally, for the purposes of our study, the atom array is considered to be affected by thermal noise, in equilibrium at a temperature \\(T\\). The experimental settings are thus captured by the set of parameters \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\), where \\(\\beta\\) is the inverse temperature.

    "},{"location":"examples/3_Observables/#loading-the-model","title":"Loading the model","text":"

    We have three pretrained models trained on three different datasets. Model \\(M_1\\) is trained with data for systems of size \\(L=5,6\\) (models/ds_1); Model \\(M_2\\) utilizes datasets for systems with \\(L=5,6,11,12\\) (models/ds_2); and Model \\(M_3\\) is trained on data covering \\(L=5,6,11,12,15,16\\) (models/ds_3).

    Let's start by loading the pretrained model and setting it into eval mode to ensure that the dropout layers are disabled.

    "},{"location":"examples/3_Observables/#generating-system-prompt-and-samples","title":"Generating system prompt and samples","text":"

    Next, let us define our system prompt \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\). Below is a function generate_prompt that generates the required prompt structure to query the trained model. The prompt is a graph structure capturing the relevant information about the system, such as the lattice geometry, the Rydberg blockade radius, the temperature, and the Rabi frequency. The function generate_samples generates samples from the model given the prompt.

    "},{"location":"examples/3_Observables/#system-prompt","title":"System prompt","text":""},{"location":"examples/3_Observables/#generating-samples","title":"Generating samples","text":"

    The sampling function requires a batch of prompts; therefore, we duplicate our pyg_graph prompts as many times as we want to generate samples. The reasoning behind this is to allow the model to generate samples in parallel for different Hamiltonian parameters. This is especially helpful when training variationally.

    "},{"location":"examples/3_Observables/#observables","title":"Observables","text":"

    Now we are ready to calculate observables based on the samples generated. We consider three observables: the stagger-magnetization, x-magnetization and the Rydberg energy.

    "},{"location":"examples/3_Observables/#rydberg-energy","title":"Rydberg energy","text":"

    We consider an estimate of the ground state energy \\(\\langle E \\rangle\\), which is defined as

    \\[ \\langle E \\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_{\\theta}(\\boldsymbol{\\sigma};\\mathbf{x})} \\frac{\\langle \\boldsymbol{\\sigma}|\\widehat{H}|\\Psi_{\\theta}\\rangle}{\\langle \\boldsymbol{\\sigma}|\\Psi_{\\theta}\\rangle}. \\]

    We provide a function get_rydberg_energy that calculates the Rydberg energy of the samples generated. Note that this fn requires a single prompt.

    "},{"location":"examples/3_Observables/#stagger-magnetization","title":"Stagger magnetization","text":"

    The staggered magnetization for the square-lattice Rydberg array defined in its occupation basis. This quantity is the order parameter for the disorder-to-checkerboard quantum phase transition, and can be calculated simply with

    \\[ \\langle\\hat{\\sigma}^{\\text{stag}}\\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_\\theta(\\boldsymbol{\\sigma};\\mathbf{x})} \\left| \\sum_{i=1}^{N} (-1)^i \\frac{n_i(\\boldsymbol{\\sigma}) - 1/2}{N} \\right| , \\]

    where \\(i\\) runs over all \\(N = L \\times L\\) atoms and \\(n_i(\\boldsymbol{\\sigma}) = \\langle \\boldsymbol{\\sigma}| r_i \\rangle\\langle r_i|\\boldsymbol{\\sigma} \\rangle\\) is the occupation number operator acting on atom \\(i\\) in a given configuration \\(\\boldsymbol{\\sigma}\\). Because this observable is diagonal, it can be computed directly from samples inferred from the decoder. The outer sum shows how importance sampling is used to estimate the expectation value over this operator, approximating the probability of a given configuration with the frequency with which it is sampled.

    We provide a function get_staggered_magnetization that calculates the stagger magnetization of the samples generated.

    "},{"location":"examples/3_Observables/#x-magnetization","title":"X-magnetization","text":"

    We consider an off-diagonal observable, where we must make use of the ground state wave function amplitudes of the inferred samples \\(\\Psi(\\boldsymbol{\\sigma}) = \\sqrt{p_{\\theta}(\\boldsymbol{\\sigma})}\\). As an example, we examine the spatially averaged expectation value of \\(\\hat{\\sigma}_x\\), which is defined as

    \\[ \\langle \\hat{\\sigma}^x \\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_\\theta(\\boldsymbol{\\sigma};\\mathbf{x})} \\frac{1}{N} \\sum_{\\boldsymbol{\\sigma}' \\in \\mathrm{SSF}(\\boldsymbol{\\sigma})} \\frac{\\Psi_\\theta(\\boldsymbol{\\sigma}')}{\\Psi_\\theta(\\boldsymbol{\\sigma})}, \\]

    where the variable \\(\\left\\{\\boldsymbol{\\sigma'}\\right\\}\\) is the set of configurations that are connected to \\(\\boldsymbol{\\sigma}\\) by a single spin flip (SSF).

    We provide a function get_x_magnetization that calculates the stagger magnetization of the samples generated. Note that we do not have to batch our prompt. The energy is calculated for a single system prompt.

    "},{"location":"reference/data/","title":"Data","text":""},{"location":"reference/data/#rydberggpt.data","title":"rydberggpt.data","text":""},{"location":"reference/data/#rydberggpt.data.dataclasses","title":"dataclasses","text":""},{"location":"reference/data/#rydberggpt.data.dataclasses.BaseGraph","title":"BaseGraph dataclass","text":"

    Bases: ABC

    A base dataclass representing a graph configuration.

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass BaseGraph(ABC):\n    \"\"\"A base dataclass representing a graph configuration.\"\"\"\n\n    num_atoms: int\n    graph_name: str\n    Rb: float\n    delta: float\n    omega: float\n    beta: float\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.Batch","title":"Batch dataclass","text":"

    A dataclass representing a batch of graphs

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass Batch:\n    \"\"\"A dataclass representing a batch of graphs\"\"\"\n\n    graph: Data\n    m_onehot: torch.Tensor\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.GridGraph","title":"GridGraph dataclass","text":"

    Bases: BaseGraph

    A dataclass representing the configuration of a grid graph

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass GridGraph(BaseGraph):\n    \"\"\"A dataclass representing the configuration of a grid graph\"\"\"\n\n    n_rows: int\n    n_cols: int\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.custom_collate","title":"custom_collate(batch: List[Batch]) -> Batch","text":"

    Custom collate function to handle Batch objects when creating a DataLoader.

    Parameters:

    Name Type Description Default batch List[Batch]

    A list of Batch objects to be collated.

    required

    Returns:

    Type Description Batch

    A single Batch object containing the collated data.

    Source code in src\\rydberggpt\\data\\dataclasses.py
    def custom_collate(batch: List[Batch]) -> Batch:\n    \"\"\"\n    Custom collate function to handle Batch objects when creating a DataLoader.\n\n    Args:\n        batch (List[Batch]): A list of Batch objects to be collated.\n\n    Returns:\n        (Batch): A single Batch object containing the collated data.\n    \"\"\"\n\n    graph_batch = PyGBatch.from_data_list([b.graph for b in batch])\n\n    # NOTE: The graphs, and measurement data are not of the same size. To ensure\n    # a padded tensor suitable for the neural network, we use the to_dense_batch function. This ensures that our\n    # data is padded with zeros.\n    # see: https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/utils/to_dense_batch.html\n\n    m_onehot = to_dense_batch(\n        torch.cat([b.m_onehot for b in batch], axis=-2),\n        batch=graph_batch.batch,\n    )[0].to(torch.float32)\n\n    return Batch(graph=graph_batch, m_onehot=m_onehot)\n
    "},{"location":"reference/data/#rydberggpt.data.graph_structures","title":"graph_structures","text":""},{"location":"reference/data/#rydberggpt.data.graph_structures.generate_grid_graph","title":"generate_grid_graph(n_rows: int, n_cols: int) -> nx.Graph","text":"

    Generates a fully connected grid graph with weights based on the reciprocal of Euclidean distance. Coordinates is in units of lattice constant a.

    Parameters:

    Name Type Description Default n_rows int

    The number of rows in the grid.

    required n_cols int

    The number of columns in the grid.

    required

    Returns:

    Type Description Graph

    The generated grid graph with node positions and edge weights.

    Source code in src\\rydberggpt\\data\\graph_structures.py
    def generate_grid_graph(n_rows: int, n_cols: int) -> nx.Graph:\n    \"\"\"\n    Generates a fully connected grid graph with weights based on the reciprocal of Euclidean distance. Coordinates is in units of lattice constant a.\n\n    Args:\n        n_rows (int): The number of rows in the grid.\n        n_cols (int): The number of columns in the grid.\n\n    Returns:\n        (nx.Graph): The generated grid graph with node positions and edge weights.\n    \"\"\"\n\n    # Create an empty graph\n    graph = nx.Graph()\n\n    # Add nodes with positions as attributes\n    for i in range(n_rows):\n        for j in range(n_cols):\n            node_id = i * n_cols + j\n            graph.add_node(node_id, pos=(i, j))\n\n    # Add fully connected edges with weights as the reciprocal of Euclidean distance\n    for node1 in graph.nodes:\n        pos1 = np.array(graph.nodes[node1][\"pos\"])\n        for node2 in graph.nodes:\n            if node1 != node2:\n                pos2 = np.array(graph.nodes[node2][\"pos\"])\n                interaction_strength = np.linalg.norm(pos1 - pos2) ** (-6)\n                graph.add_edge(node1, node2, weight=interaction_strength)\n\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.graph_structures.get_graph","title":"get_graph(config: BaseGraph) -> nx.Graph","text":"

    Generates a graph based on the given configuration.

    Parameters:

    Name Type Description Default config BaseGraph

    The graph configuration, an instance of a subclass of the BaseGraph dataclass.

    required

    Returns:

    Type Description Graph

    The generated graph based on the configuration.

    Raises:

    Type Description NotImplementedError

    If the graph name provided in the configuration is not implemented.

    Source code in src\\rydberggpt\\data\\graph_structures.py
    def get_graph(config: BaseGraph) -> nx.Graph:\n    \"\"\"\n    Generates a graph based on the given configuration.\n\n    Args:\n        config (BaseGraph): The graph configuration, an instance of a subclass of the BaseGraph dataclass.\n\n    Returns:\n        (nx.Graph): The generated graph based on the configuration.\n\n    Raises:\n        NotImplementedError: If the graph name provided in the configuration is not implemented.\n    \"\"\"\n    if config.graph_name == \"grid_graph\":\n        graph = generate_grid_graph(config.n_rows, config.n_cols)\n\n    else:\n        raise NotImplementedError(f\"Graph name {config.graph_name} not implemented.\")\n\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.rydberg_dataset","title":"rydberg_dataset","text":""},{"location":"reference/data/#rydberggpt.data.rydberg_dataset.build_datapipes","title":"build_datapipes(root_dir: str, batch_size: int, buffer_size: int)","text":"

    Builds a data pipeline for processing files from a specified directory.

    This function initializes a FileLister to list files from the specified directory and its subdirectories. It then demultiplexes the files into three separate data pipes for processing configuration, dataset, and graph files respectively. The configuration and graph files are opened, parsed as JSON, and processed using a custom selection function. The data pipes are then zipped together, shuffled, filtered, and buffered into batches using a custom collate function.

    Parameters:

    Name Type Description Default root_dir str

    The root directory from which to list files.

    required batch_size int

    The number of samples per batch.

    required buffer_size int

    The buffer size to use when buffering data into batches.

    required

    Returns:

    Type Description IterDataPipe

    The final data pipe containing batches of processed data.

    Source code in src\\rydberggpt\\data\\rydberg_dataset.py
    def build_datapipes(root_dir: str, batch_size: int, buffer_size: int):\n    \"\"\"\n    Builds a data pipeline for processing files from a specified directory.\n\n    This function initializes a FileLister to list files from the specified\n    directory and its subdirectories. It then demultiplexes the files into\n    three separate data pipes for processing configuration, dataset, and\n    graph files respectively. The configuration and graph files are opened,\n    parsed as JSON, and processed using a custom selection function.\n    The data pipes are then zipped together, shuffled, filtered, and buffered\n    into batches using a custom collate function.\n\n    Args:\n        root_dir (str): The root directory from which to list files.\n        batch_size (int): The number of samples per batch.\n        buffer_size (int): The buffer size to use when buffering data into batches.\n\n    Returns:\n        (IterDataPipe): The final data pipe containing batches of processed data.\n    \"\"\"\n    file_lister = FileLister([root_dir], recursive=True)\n    config_dp, dataset_dp, graph_dp = file_lister.demux(\n        3,\n        classify_file_fn,\n        drop_none=True,\n        buffer_size=-1,\n    )\n    config_dp = config_dp.open_files().parse_json_files()\n    graph_dp = graph_dp.open_files().parse_json_files()\n    datapipe = config_dp.zip(dataset_dp).zip(graph_dp).map(map_fn)\n    datapipe = datapipe.shuffle()\n    datapipe = Buffer(source_datapipe=datapipe, buffer_size=buffer_size)\n    datapipe = datapipe.batch(batch_size).collate(custom_collate).sharding_filter()\n\n    return datapipe\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph","title":"utils_graph","text":""},{"location":"reference/data/#rydberggpt.data.utils_graph.batch_pyg_data","title":"batch_pyg_data(data_list: List[Data]) -> Data","text":"

    Batch a list of PyTorch Geometric Data objects into a single Data object.

    Parameters:

    Name Type Description Default data_list List[Data]

    List of PyTorch Geometric Data objects.

    required

    Returns:

    Type Description Data

    A single batched Data object containing all input Data objects.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def batch_pyg_data(data_list: List[Data]) -> Data:\n    \"\"\"\n    Batch a list of PyTorch Geometric Data objects into a single Data object.\n\n    Args:\n        data_list: List of PyTorch Geometric Data objects.\n\n    Returns:\n        (Data): A single batched Data object containing all input Data objects.\n    \"\"\"\n    batched_data = PyGBatch.from_data_list(data_list)\n    return batched_data\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.dict_to_graph","title":"dict_to_graph(graph_dict: Dict) -> nx.Graph","text":"

    Create a NetworkX graph from a dictionary.

    Parameters:

    Name Type Description Default graph_dict Dict

    Dictionary representing a NetworkX graph.

    required

    Returns:

    Type Description Graph

    NetworkX graph object.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def dict_to_graph(graph_dict: Dict) -> nx.Graph:\n    \"\"\"\n    Create a NetworkX graph from a dictionary.\n\n    Args:\n        graph_dict: Dictionary representing a NetworkX graph.\n\n    Returns:\n        (nx.Graph): NetworkX graph object.\n    \"\"\"\n    graph = nx.node_link_graph(graph_dict)\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.graph_to_dict","title":"graph_to_dict(graph: nx.Graph) -> Dict","text":"

    Convert a NetworkX graph to a dictionary.

    Parameters:

    Name Type Description Default graph Graph

    NetworkX graph object.

    required

    Returns:

    Type Description Dict

    A dictionary representing the NetworkX graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def graph_to_dict(graph: nx.Graph) -> Dict:\n    \"\"\"\n    Convert a NetworkX graph to a dictionary.\n\n    Args:\n        graph: NetworkX graph object.\n\n    Returns:\n        (Dict): A dictionary representing the NetworkX graph.\n    \"\"\"\n    graph_dict = nx.node_link_data(graph)\n    return graph_dict\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.networkx_to_pyg_data","title":"networkx_to_pyg_data(graph: nx.Graph, node_features: torch.Tensor) -> Data","text":"

    Convert a NetworkX graph to a PyTorch Geometric Data object.

    Parameters:

    Name Type Description Default graph Graph

    NetworkX graph object.

    required

    Returns:

    Type Description Data

    A PyTorch Geometric Data object representing the input graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def networkx_to_pyg_data(graph: nx.Graph, node_features: torch.Tensor) -> Data:\n    \"\"\"\n    Convert a NetworkX graph to a PyTorch Geometric Data object.\n\n    Args:\n        graph: NetworkX graph object.\n\n    Returns:\n        (Data): A PyTorch Geometric Data object representing the input graph.\n    \"\"\"\n\n    x = node_features.repeat(len(graph.nodes()), 1)\n\n    # Convert the edge list to a PyTorch Geometric edge_index tensor\n    edge_index = torch.tensor(list(graph.edges), dtype=torch.long).t().contiguous()\n\n    # Get edge weights from the graph\n    edge_weight = torch.tensor(\n        list(nx.get_edge_attributes(graph, \"weight\").values()), dtype=torch.float\n    )\n\n    # Create a Data object\n    data = Data(x=x, edge_index=edge_index, edge_attr=edge_weight)\n\n    return data\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.pyg_graph_data","title":"pyg_graph_data(config, graph_data)","text":"

    Convert a graph in node-link format to a PyG Data object.

    Parameters:

    Name Type Description Default graph_data Dict

    The graph in node-link format.

    required config_data Dict

    The configuration data for the graph.

    required

    Returns:

    Type Description Data

    The graph as a PyG Data object.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def pyg_graph_data(config, graph_data):\n    \"\"\"\n    Convert a graph in node-link format to a PyG Data object.\n\n    Args:\n        graph_data (Dict): The graph in node-link format.\n        config_data (Dict): The configuration data for the graph.\n\n    Returns:\n        (Data): The graph as a PyG Data object.\n\n    \"\"\"\n    node_features = torch.tensor(\n        [\n            config[\"delta\"],\n            config[\"omega\"],\n            config[\"beta\"],\n            config[\"Rb\"],\n        ],\n        dtype=torch.float32,\n    )\n    graph_nx = nx.node_link_graph(graph_data)\n    pyg_graph = networkx_to_pyg_data(graph_nx, node_features)\n    return pyg_graph\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.read_graph_from_json","title":"read_graph_from_json(file_path: str) -> Dict","text":"

    Read a JSON file and convert it to a dictionary representing a NetworkX graph.

    Parameters:

    Name Type Description Default file_path str

    Path to the JSON file to read.

    required

    Returns:

    Type Description Dict

    A dictionary representing a NetworkX graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def read_graph_from_json(file_path: str) -> Dict:\n    \"\"\"\n    Read a JSON file and convert it to a dictionary representing a NetworkX graph.\n\n    Args:\n        file_path: Path to the JSON file to read.\n\n    Returns:\n        (Dict): A dictionary representing a NetworkX graph.\n    \"\"\"\n    with open(file_path, \"r\") as f:\n        graph_dict = json.load(f)\n    return graph_dict\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.save_graph_to_json","title":"save_graph_to_json(graph_dict: Dict, file_path: str) -> None","text":"

    Save a dictionary representing a NetworkX graph to a JSON file.

    Parameters:

    Name Type Description Default graph_dict Dict

    Dictionary representing a NetworkX graph.

    required file_path str

    Path to the JSON file to save.

    required Source code in src\\rydberggpt\\data\\utils_graph.py
    def save_graph_to_json(graph_dict: Dict, file_path: str) -> None:\n    \"\"\"\n    Save a dictionary representing a NetworkX graph to a JSON file.\n\n    Args:\n        graph_dict: Dictionary representing a NetworkX graph.\n        file_path: Path to the JSON file to save.\n    \"\"\"\n    with open(file_path, \"w\") as f:\n        json.dump(graph_dict, f)\n
    "},{"location":"reference/observables/","title":"Observables","text":""},{"location":"reference/observables/#rydberggpt.observables","title":"rydberggpt.observables","text":""},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy","title":"rydberg_energy","text":""},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_rydberg_energy","title":"get_rydberg_energy(model: RydbergEncoderDecoder, samples: torch.Tensor, cond: torch.Tensor, device: torch.device, undo_sample_path=None, undo_sample_path_args=None) -> torch.Tensor","text":"

    Calculates energy of the model based on the Hamiltonian defined by cond (graph).

    Parameters:

    Name Type Description Default model RydbergEncoderDecoder

    Model to estimate energy on.

    required samples Tensor

    Samples drawn from model based on cond.

    required cond Tensor

    A tensor containing the input condition.

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required undo_sample_path Tensor

    Map that undoes the sample path of the model to match the labelling of in the graph.

    None undo_sample_path_args tuple

    Additional arguments for undo_sample_path.

    None

    Returns:

    Type Description Tensor

    A tensor containing the estimated energy of each sample alongside its decomposition into terms.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_rydberg_energy(\n    model: RydbergEncoderDecoder,\n    samples: torch.Tensor,  # dtype=torch.int64\n    cond: torch.Tensor,  # dtype=torch.float32\n    device: torch.device,\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n) -> torch.Tensor:\n    \"\"\"\n    Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n    Args:\n        model (RydbergEncoderDecoder): Model to estimate energy on.\n        samples (torch.Tensor): Samples drawn from model based on cond.\n        cond (torch.Tensor): A tensor containing the input condition.\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n        undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n        undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n    \"\"\"\n\n    model = model.to(device)\n    samples = samples.to(device)\n    cond = cond.to(device)\n\n    delta = cond.x[:, 0]  # Detuning coeffs\n    omega = cond.x[0, 1]  # Rabi frequency\n    # beta = cond.x[0, 2]  # Inverse temperature\n    Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n    # Estimate interaction/Rydberg blockade term\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    interaction = (\n        (\n            unpathed_samples[..., cond.edge_index].prod(dim=-2)\n            * cond.edge_attr[None, ...]\n        ).sum(dim=-1)\n        * Rb**6\n        * omega\n    )\n\n    detuning = (delta * unpathed_samples).sum(1)  # sum over sequence length\n\n    x_magnetization = get_x_magnetization(model, samples, cond, device)\n\n    offdiag_energy = 0.5 * omega * x_magnetization\n    diag_energy = interaction - detuning\n    energy = diag_energy - offdiag_energy\n\n    return torch.stack(\n        [\n            energy,\n            interaction,\n            detuning,\n            diag_energy,\n            offdiag_energy,\n        ]\n    ).T\n
    "},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_staggered_magnetization","title":"get_staggered_magnetization(samples: torch.Tensor, Lx: int, Ly: int, device: torch.device, undo_sample_path=None, undo_sample_path_args=None)","text":"

    Calculates staggered magnetization of the model.

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model.

    required Lx int

    Linear size in the x dimension

    required Ly int

    Linear size in the y dimension

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required undo_sample_path Tensor

    Map that undoes the sample path of the model to match the labelling of in the graph.

    None undo_sample_path_args tuple

    Additional arguments for undo_sample_path.

    None

    Returns:

    Type Description Tensor

    A tensor containing the estimated staggered magnetization of each sample.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_staggered_magnetization(\n    samples: torch.Tensor,\n    Lx: int,\n    Ly: int,\n    device: torch.device,\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n):\n    \"\"\"\n    Calculates staggered magnetization of the model.\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model.\n        Lx (int): Linear size in the x dimension\n        Ly (int): Linear size in the y dimension\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n        undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n        undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated staggered magnetization of each sample.\n    \"\"\"\n\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    unpathed_samples = unpathed_samples.reshape(-1, Ly, Lx)\n\n    unpathed_sigmas = 2 * unpathed_samples - 1\n\n    idcs = np.indices((Ly, Lx))\n    checkerboard = 2 * (idcs.sum(0) % 2) - 1\n    checkerboard = torch.from_numpy(checkerboard).to(device=device)\n\n    staggered_magnetization = torch.abs((checkerboard * unpathed_sigmas).mean((-1, -2)))\n\n    return staggered_magnetization\n
    "},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_x_magnetization","title":"get_x_magnetization(model: RydbergEncoderDecoder, samples: torch.Tensor, cond: torch.Tensor, device: torch.device)","text":"

    Calculates x magnetization of the model.

    Parameters:

    Name Type Description Default model RydbergEncoderDecoder

    Model to estimate energy on.

    required samples Tensor

    Samples drawn from model based on cond.

    required cond Tensor

    A tensor containing the input condition.

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required

    Returns:

    Type Description Tensor

    A tensor containing the estimated x magnetization of each sample.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_x_magnetization(\n    model: RydbergEncoderDecoder,\n    samples: torch.Tensor,  # dtype=torch.int64\n    cond: torch.Tensor,  # dtype=torch.float32\n    device: torch.device,\n):\n    \"\"\"\n    Calculates x magnetization of the model.\n\n    Args:\n        model (RydbergEncoderDecoder): Model to estimate energy on.\n        samples (torch.Tensor): Samples drawn from model based on cond.\n        cond (torch.Tensor): A tensor containing the input condition.\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n    \"\"\"\n\n    model = model.to(device)\n    samples = samples.to(device)\n    cond = cond.to(device)\n\n    # Create all possible states achievable by a single spin flip\n    flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n    flipped = flipped.reshape(-1, samples.shape[-1])\n\n    # Get propabilities of sampled states and the single spin flipped states\n    sample_log_probs = model.get_log_probs(to_one_hot(samples, 2), cond)\n    flipped_log_probs = model.get_log_probs(to_one_hot(flipped, 2), cond)\n    flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n    # Calculate ratio of the wavefunction for the sampled and flipped states\n    log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n    psi_ratio = torch.exp(log_psi_ratio)\n\n    x_magnetization = psi_ratio.sum(-1)\n    return x_magnetization\n
    "},{"location":"reference/training/","title":"Training","text":""},{"location":"reference/training/#rydberggpt.training","title":"rydberggpt.training","text":""},{"location":"reference/training/#rydberggpt.training.callbacks","title":"callbacks","text":""},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback","title":"module_info_callback","text":""},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback.ModelInfoCallback","title":"ModelInfoCallback","text":"

    Bases: Callback

    A custom PyTorch Lightning callback that logs model information at the start of training.

    This callback extracts and logs information about the model's structure, total parameters, and total trainable parameters at the beginning of the training process. The information is saved as a YAML file in the logger's log directory.

    Source code in src\\rydberggpt\\training\\callbacks\\module_info_callback.py
    class ModelInfoCallback(Callback):\n    \"\"\"\n    A custom PyTorch Lightning callback that logs model information at the start of training.\n\n    This callback extracts and logs information about the model's structure, total parameters, and\n    total trainable parameters at the beginning of the training process. The information is saved\n    as a YAML file in the logger's log directory.\n    \"\"\"\n\n    def on_train_start(self, trainer, pl_module) -> None:\n        \"\"\"\n        Run the callback at the beginning of training.\n\n        Args:\n            trainer (pytorch_lightning.Trainer): The PyTorch Lightning trainer instance.\n            pl_module (pytorch_lightning.LightningModule): The PyTorch Lightning module instance.\n        \"\"\"\n        # This will run at the beginning of training\n        log_path = trainer.logger.log_dir\n\n        summary = ModelSummary(pl_module, max_depth=1)\n        total_parameters = summary.total_parameters\n        total_trainable_parameters = summary.trainable_parameters\n\n        summary_dict = extract_model_info(pl_module.model)\n        summary_dict[\"total_parameters\"] = total_parameters\n        summary_dict[\"total_trainable_parameters\"] = total_trainable_parameters\n\n        # Save the summary dictionary to a YAML file\n        with open(f\"{log_path}/model_info.yaml\", \"w\") as file:\n            yaml.dump(summary_dict, file)\n
    "},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback.ModelInfoCallback.on_train_start","title":"on_train_start(trainer, pl_module) -> None","text":"

    Run the callback at the beginning of training.

    Parameters:

    Name Type Description Default trainer Trainer

    The PyTorch Lightning trainer instance.

    required pl_module LightningModule

    The PyTorch Lightning module instance.

    required Source code in src\\rydberggpt\\training\\callbacks\\module_info_callback.py
    def on_train_start(self, trainer, pl_module) -> None:\n    \"\"\"\n    Run the callback at the beginning of training.\n\n    Args:\n        trainer (pytorch_lightning.Trainer): The PyTorch Lightning trainer instance.\n        pl_module (pytorch_lightning.LightningModule): The PyTorch Lightning module instance.\n    \"\"\"\n    # This will run at the beginning of training\n    log_path = trainer.logger.log_dir\n\n    summary = ModelSummary(pl_module, max_depth=1)\n    total_parameters = summary.total_parameters\n    total_trainable_parameters = summary.trainable_parameters\n\n    summary_dict = extract_model_info(pl_module.model)\n    summary_dict[\"total_parameters\"] = total_parameters\n    summary_dict[\"total_trainable_parameters\"] = total_trainable_parameters\n\n    # Save the summary dictionary to a YAML file\n    with open(f\"{log_path}/model_info.yaml\", \"w\") as file:\n        yaml.dump(summary_dict, file)\n
    "},{"location":"reference/training/#rydberggpt.training.logger","title":"logger","text":""},{"location":"reference/training/#rydberggpt.training.logger.setup_logger","title":"setup_logger(log_path)","text":"

    Set up the logger to write logs to a file and the console.

    Source code in src\\rydberggpt\\training\\logger.py
    def setup_logger(log_path):\n    \"\"\"\n    Set up the logger to write logs to a file and the console.\n    \"\"\"\n    # Ensure the log_path exists\n    if not os.path.exists(log_path):\n        os.makedirs(log_path)\n\n    logger = logging.getLogger()\n    logger.setLevel(logging.INFO)\n\n    # Console Handler\n    ch = logging.StreamHandler()\n    ch.setLevel(logging.INFO)\n    formatter = logging.Formatter(\"%(asctime)s - %(levelname)s - %(message)s\")\n    ch.setFormatter(formatter)\n    logger.addHandler(ch)\n\n    # File Handler\n    fh = logging.FileHandler(os.path.join(log_path, \"training.log\"))\n    fh.setLevel(logging.INFO)\n    fh.setFormatter(formatter)\n    logger.addHandler(fh)\n\n    return logger\n
    "},{"location":"reference/training/#rydberggpt.training.loss","title":"loss","text":""},{"location":"reference/training/#rydberggpt.training.loss.NLLLoss","title":"NLLLoss","text":"

    Bases: LightningModule

    This class implements the Negative Log Likelihood (NLL) loss function as a PyTorch Lightning module.

    The NLL loss measures the performance of a classification model where the prediction input is a probability distribution over classes. It is useful in training models for multi-class classification problems.

    The loss is calculated by taking the negative log of the probabilities predicted by the model for the true class labels.

    Methods:

    Name Description forward

    Computes the NLL loss based on the conditional log probabilities and the target values.

    Examples:

    >>> nll_loss = NLLLoss()\n>>> loss = nll_loss(cond_log_probs, tgt)\n
    Source code in src\\rydberggpt\\training\\loss.py
    class NLLLoss(pl.LightningModule):\n    \"\"\"\n    This class implements the Negative Log Likelihood (NLL) loss function as a PyTorch Lightning module.\n\n    The NLL loss measures the performance of a classification model where the prediction input is a probability\n    distribution over classes. It is useful in training models for multi-class classification problems.\n\n    The loss is calculated by taking the negative log of the probabilities predicted by the model for the true class labels.\n\n    Methods:\n        forward:\n            Computes the NLL loss based on the conditional log probabilities and the target values.\n\n    Examples:\n        >>> nll_loss = NLLLoss()\n        >>> loss = nll_loss(cond_log_probs, tgt)\n    \"\"\"\n\n    def __init__(self):\n        super(NLLLoss, self).__init__()\n\n    def forward(self, cond_log_probs: Tensor, tgt: Tensor) -> Tensor:\n        \"\"\"\n        Computes the NLL loss based on the conditional log probabilities and the target values.\n\n        Args:\n            cond_log_probs (Tensor): The conditional log probabilities predicted by the model.\n            tgt (Tensor): The target values.\n\n        Returns:\n            (Tensor): The computed NLL loss.\n        \"\"\"\n        num_atoms = tgt.shape[-2] - (tgt == 0.0).all(-1).sum(-1)\n        log_probs = (cond_log_probs * tgt).sum(dim=(-2, -1))\n        loss = -torch.mean(log_probs / num_atoms)\n        return loss\n
    "},{"location":"reference/training/#rydberggpt.training.loss.NLLLoss.forward","title":"forward(cond_log_probs: Tensor, tgt: Tensor) -> Tensor","text":"

    Computes the NLL loss based on the conditional log probabilities and the target values.

    Parameters:

    Name Type Description Default cond_log_probs Tensor

    The conditional log probabilities predicted by the model.

    required tgt Tensor

    The target values.

    required

    Returns:

    Type Description Tensor

    The computed NLL loss.

    Source code in src\\rydberggpt\\training\\loss.py
    def forward(self, cond_log_probs: Tensor, tgt: Tensor) -> Tensor:\n    \"\"\"\n    Computes the NLL loss based on the conditional log probabilities and the target values.\n\n    Args:\n        cond_log_probs (Tensor): The conditional log probabilities predicted by the model.\n        tgt (Tensor): The target values.\n\n    Returns:\n        (Tensor): The computed NLL loss.\n    \"\"\"\n    num_atoms = tgt.shape[-2] - (tgt == 0.0).all(-1).sum(-1)\n    log_probs = (cond_log_probs * tgt).sum(dim=(-2, -1))\n    loss = -torch.mean(log_probs / num_atoms)\n    return loss\n
    "},{"location":"reference/training/#rydberggpt.training.train","title":"train","text":""},{"location":"reference/training/#rydberggpt.training.trainer","title":"trainer","text":""},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer","title":"RydbergGPTTrainer","text":"

    Bases: LightningModule

    A custom PyTorch Lightning module for training a Rydberg GPT model.

    Parameters:

    Name Type Description Default model Module

    The model to be trained.

    required config dataclass

    A dataclass containing the model's configuration parameters.

    required logger TensorBoardLogger

    A TensorBoard logger instance for logging training progress.

    None example_input_array tensor

    An example input tensor used for generating the model summary.

    None Source code in src\\rydberggpt\\training\\trainer.py
    class RydbergGPTTrainer(pl.LightningModule):\n    \"\"\"\n    A custom PyTorch Lightning module for training a Rydberg GPT model.\n\n    Args:\n        model (nn.Module): The model to be trained.\n        config (dataclass): A dataclass containing the model's configuration parameters.\n        logger (TensorBoardLogger): A TensorBoard logger instance for logging training progress.\n        example_input_array (torch.tensor, optional): An example input tensor used for\n            generating the model summary.\n    \"\"\"\n\n    def __init__(\n        self,\n        model: nn.Module,\n        config: dataclass,\n        logger: TensorBoardLogger = None,\n        example_input_array: torch.tensor = None,\n    ) -> None:\n        super().__init__()\n        self.config = config\n        self.save_hyperparameters(asdict(config))\n        self.model = model\n        self.criterion = getattr(loss, self.config.criterion)()\n        self.example_input_array = example_input_array\n\n    def forward(self, m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Perform a forward pass through the model.\n\n        Args:\n            m_onehot (torch.Tensor): One-hot encoded measurements tensor.\n            cond (torch.Tensor): Conditioning tensor. # TODO prompt\n\n        Returns:\n            (torch.Tensor): Conditional log probabilities tensor.\n        \"\"\"\n        out = self.model.forward(m_onehot, cond)\n        cond_log_probs = self.model.generator(out)\n        return cond_log_probs\n\n    def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:\n        \"\"\"\n        Perform a single training step.\n\n        Args:\n            batch (pl.Batch): A batch of data during training.\n            batch_idx (int): The index of the current batch.\n\n        Returns:\n            (torch.Tensor): The training loss for the current batch.\n        \"\"\"\n        m_shifted_onehot = shift_inputs(batch.m_onehot)\n\n        cond_log_probs = self.forward(m_shifted_onehot, batch.graph)\n        loss = self.criterion(cond_log_probs, batch.m_onehot)\n        self.log(\"train_loss\", loss, sync_dist=True)\n        return loss\n\n    def configure_optimizers(self) -> Dict[str, Union[optim.Optimizer, Dict]]:\n        \"\"\"\n        Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.\n\n        Returns:\n            (Dict[str, Union[optim.Optimizer, Dict]]): A dictionary containing the optimizer and lr_scheduler configurations.\n        \"\"\"\n        optimizer_class = getattr(optim, self.config.optimizer)\n        optimizer = optimizer_class(\n            self.model.parameters(), lr=self.config.learning_rate\n        )\n\n        # Add learning rate scheduler\n        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(\n            optimizer,\n            T_0=self.config.t_initial,  # initial number of epochs in a period\n            T_mult=self.config.t_mult,  # factor to increase the period length after each restart\n            eta_min=self.config.eta_min,  # minimum learning rate\n        )\n\n        # Return both the optimizer and the scheduler\n        return {\n            \"optimizer\": optimizer,\n            \"lr_scheduler\": {\n                \"scheduler\": scheduler,\n                \"interval\": \"epoch\",\n                \"monitor\": \"train_loss\",\n            },\n        }\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.configure_optimizers","title":"configure_optimizers() -> Dict[str, Union[optim.Optimizer, Dict]]","text":"

    Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.

    Returns:

    Type Description Dict[str, Union[Optimizer, Dict]]

    A dictionary containing the optimizer and lr_scheduler configurations.

    Source code in src\\rydberggpt\\training\\trainer.py
    def configure_optimizers(self) -> Dict[str, Union[optim.Optimizer, Dict]]:\n    \"\"\"\n    Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.\n\n    Returns:\n        (Dict[str, Union[optim.Optimizer, Dict]]): A dictionary containing the optimizer and lr_scheduler configurations.\n    \"\"\"\n    optimizer_class = getattr(optim, self.config.optimizer)\n    optimizer = optimizer_class(\n        self.model.parameters(), lr=self.config.learning_rate\n    )\n\n    # Add learning rate scheduler\n    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(\n        optimizer,\n        T_0=self.config.t_initial,  # initial number of epochs in a period\n        T_mult=self.config.t_mult,  # factor to increase the period length after each restart\n        eta_min=self.config.eta_min,  # minimum learning rate\n    )\n\n    # Return both the optimizer and the scheduler\n    return {\n        \"optimizer\": optimizer,\n        \"lr_scheduler\": {\n            \"scheduler\": scheduler,\n            \"interval\": \"epoch\",\n            \"monitor\": \"train_loss\",\n        },\n    }\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.forward","title":"forward(m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor","text":"

    Perform a forward pass through the model.

    Parameters:

    Name Type Description Default m_onehot Tensor

    One-hot encoded measurements tensor.

    required cond Tensor

    Conditioning tensor. # TODO prompt

    required

    Returns:

    Type Description Tensor

    Conditional log probabilities tensor.

    Source code in src\\rydberggpt\\training\\trainer.py
    def forward(self, m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Perform a forward pass through the model.\n\n    Args:\n        m_onehot (torch.Tensor): One-hot encoded measurements tensor.\n        cond (torch.Tensor): Conditioning tensor. # TODO prompt\n\n    Returns:\n        (torch.Tensor): Conditional log probabilities tensor.\n    \"\"\"\n    out = self.model.forward(m_onehot, cond)\n    cond_log_probs = self.model.generator(out)\n    return cond_log_probs\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.training_step","title":"training_step(batch: torch.Tensor, batch_idx: int) -> torch.Tensor","text":"

    Perform a single training step.

    Parameters:

    Name Type Description Default batch Batch

    A batch of data during training.

    required batch_idx int

    The index of the current batch.

    required

    Returns:

    Type Description Tensor

    The training loss for the current batch.

    Source code in src\\rydberggpt\\training\\trainer.py
    def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:\n    \"\"\"\n    Perform a single training step.\n\n    Args:\n        batch (pl.Batch): A batch of data during training.\n        batch_idx (int): The index of the current batch.\n\n    Returns:\n        (torch.Tensor): The training loss for the current batch.\n    \"\"\"\n    m_shifted_onehot = shift_inputs(batch.m_onehot)\n\n    cond_log_probs = self.forward(m_shifted_onehot, batch.graph)\n    loss = self.criterion(cond_log_probs, batch.m_onehot)\n    self.log(\"train_loss\", loss, sync_dist=True)\n    return loss\n
    "},{"location":"reference/training/#rydberggpt.training.utils","title":"utils","text":""},{"location":"reference/training/#rydberggpt.training.utils.set_example_input_array","title":"set_example_input_array(train_loader: DataLoader) -> Tuple[Any, Any]","text":"

    Get an example input array from the train loader.

    Parameters:

    Name Type Description Default train_loader DataLoader

    The DataLoader instance for the training data.

    required

    Returns:

    Type Description Tuple[Any, Any]

    A tuple containing m_onehot and graph from the example batch.

    Source code in src\\rydberggpt\\training\\utils.py
    def set_example_input_array(train_loader: DataLoader) -> Tuple[Any, Any]:\n    \"\"\"\n    Get an example input array from the train loader.\n\n    Args:\n        train_loader (DataLoader): The DataLoader instance for the training data.\n\n    Returns:\n        (Tuple[Any, Any]): A tuple containing m_onehot and graph from the example batch.\n    \"\"\"\n    logging.info(\"Setting example input array...\")\n    example_batch = next(iter(train_loader))\n    return example_batch.m_onehot, example_batch.graph\n
    "},{"location":"reference/utils/","title":"Utilities","text":""},{"location":"reference/utils/#rydberggpt.utils","title":"rydberggpt.utils","text":""},{"location":"reference/utils/#rydberggpt.utils.create_config_from_yaml","title":"create_config_from_yaml(yaml_content: Dict) -> dataclass","text":"

    Create a dataclass config object from the given YAML content.

    Parameters:

    Name Type Description Default yaml_content Dict

    A dictionary containing the YAML content.

    required

    Returns:

    Type Description dataclass

    A dataclass object representing the config.

    Source code in src\\rydberggpt\\utils.py
    def create_config_from_yaml(yaml_content: Dict) -> dataclass:\n    \"\"\"\n    Create a dataclass config object from the given YAML content.\n\n    Args:\n        yaml_content (Dict): A dictionary containing the YAML content.\n\n    Returns:\n        (dataclass): A dataclass object representing the config.\n    \"\"\"\n    flattened_config = flatten_yaml(yaml_content)\n    Config = create_dataclass_from_dict(\"Config\", flattened_config)\n    return Config(**flattened_config)\n
    "},{"location":"reference/utils/#rydberggpt.utils.create_dataclass_from_dict","title":"create_dataclass_from_dict(name: str, data: Dict[str, Any]) -> Type","text":"

    Create a dataclass from a dictionary.

    Parameters:

    Name Type Description Default name str

    The name of the dataclass.

    required data Dict[str, Any]

    A dictionary containing the dataclass fields and their values.

    required

    Returns:

    Type Description Type

    A new dataclass with the specified name and fields.

    Source code in src\\rydberggpt\\utils.py
    def create_dataclass_from_dict(name: str, data: Dict[str, Any]) -> Type:\n    \"\"\"\n    Create a dataclass from a dictionary.\n\n    Args:\n        name (str): The name of the dataclass.\n        data (Dict[str, Any]): A dictionary containing the dataclass fields and their values.\n\n    Returns:\n        (Type): A new dataclass with the specified name and fields.\n    \"\"\"\n    fields = [(key, type(value)) for key, value in data.items()]\n    return make_dataclass(name, fields)\n
    "},{"location":"reference/utils/#rydberggpt.utils.flatten_yaml","title":"flatten_yaml(yaml_config: Dict[str, Dict[str, Any]]) -> Dict[str, Any]","text":"

    Flatten a nested YAML configuration dictionary.

    Parameters:

    Name Type Description Default yaml_config Dict[str, Dict[str, Any]]

    A nested dictionary representing the YAML configuration.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A flattened dictionary with the nested structure removed.

    Source code in src\\rydberggpt\\utils.py
    def flatten_yaml(yaml_config: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:\n    \"\"\"\n    Flatten a nested YAML configuration dictionary.\n\n    Args:\n        yaml_config (Dict[str, Dict[str, Any]]): A nested dictionary representing the YAML configuration.\n\n    Returns:\n        Dict[str, Any]: A flattened dictionary with the nested structure removed.\n    \"\"\"\n    flattened_config = {}\n    for section, section_values in yaml_config.items():\n        if isinstance(section_values, dict):\n            for key, value in section_values.items():\n                flattened_config[f\"{key}\"] = value\n        else:\n            flattened_config[section] = section_values\n    return flattened_config\n
    "},{"location":"reference/utils/#rydberggpt.utils.load_config_file","title":"load_config_file(checkpoint_path: str, config_file: str = 'hparams.yaml') -> str","text":"

    Load the configuration file associated with a given checkpoint.

    Parameters:

    Name Type Description Default checkpoint_path str

    The path to the checkpoint file.

    required config_file str

    The name of the configuration file, defaults to \"hparams.yaml\".

    'hparams.yaml'

    Returns:

    Type Description str

    The path to the configuration file.

    Raises:

    Type Description FileNotFoundError

    If the configuration file is not found in the specified directory.

    Source code in src\\rydberggpt\\utils.py
    def load_config_file(checkpoint_path: str, config_file: str = \"hparams.yaml\") -> str:\n    \"\"\"\n    Load the configuration file associated with a given checkpoint.\n\n    Args:\n        checkpoint_path (str): The path to the checkpoint file.\n        config_file (str, optional): The name of the configuration file, defaults to \"hparams.yaml\".\n\n    Returns:\n        (str): The path to the configuration file.\n\n    Raises:\n        FileNotFoundError: If the configuration file is not found in the specified directory.\n    \"\"\"\n    config_dir = os.path.dirname(os.path.dirname(checkpoint_path))\n\n    if not os.path.exists(os.path.join(config_dir, config_file)):\n        raise FileNotFoundError(f\"No config file found in {config_dir}\")\n\n    return os.path.join(config_dir, config_file)\n
    "},{"location":"reference/utils/#rydberggpt.utils.load_yaml_file","title":"load_yaml_file(path: str, yaml_file_name: str) -> Dict[str, Any]","text":"

    Load the content of a YAML file given its path and file name.

    Parameters:

    Name Type Description Default path str

    The path to the directory containing the YAML file.

    required yaml_file_name str

    The name of the YAML file.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the YAML content.

    Source code in src\\rydberggpt\\utils.py
    def load_yaml_file(path: str, yaml_file_name: str) -> Dict[str, Any]:\n    \"\"\"\n    Load the content of a YAML file given its path and file name.\n\n    Args:\n        path (str): The path to the directory containing the YAML file.\n        yaml_file_name (str): The name of the YAML file.\n\n    Returns:\n        Dict[str, Any]: A dictionary containing the YAML content.\n    \"\"\"\n    if not yaml_file_name.endswith(\".yaml\"):\n        yaml_file_name += \".yaml\"\n\n    yaml_path = os.path.join(path, yaml_file_name)\n    with open(yaml_path, \"r\") as file:\n        yaml_content = yaml.safe_load(file)\n    return yaml_content\n
    "},{"location":"reference/utils/#rydberggpt.utils.save_to_yaml","title":"save_to_yaml(data: Dict[str, Any], filename: str) -> None","text":"

    Save a dictionary to a file in YAML format.

    Parameters:

    Name Type Description Default data Dict[str, Any]

    The dictionary to be saved.

    required filename str

    The path to the file where the dictionary will be saved.

    required Source code in src\\rydberggpt\\utils.py
    def save_to_yaml(data: Dict[str, Any], filename: str) -> None:\n    \"\"\"\n    Save a dictionary to a file in YAML format.\n\n    Args:\n        data (Dict[str, Any]): The dictionary to be saved.\n        filename (str): The path to the file where the dictionary will be saved.\n    \"\"\"\n    with open(filename, \"w\") as file:\n        yaml.dump(data, file)\n
    "},{"location":"reference/utils/#rydberggpt.utils.shift_inputs","title":"shift_inputs(tensor: torch.Tensor) -> torch.Tensor","text":"

    Shifts the second dimension (S) of the input tensor by one position to the right and pads the beginning with zeros.

    Parameters:

    Name Type Description Default tensor Tensor

    The input tensor of shape [B, S, D].

    required

    Returns:

    Type Description Tensor

    The resulting tensor after the shift and pad operation.

    Source code in src\\rydberggpt\\utils.py
    def shift_inputs(tensor: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Shifts the second dimension (S) of the input tensor by one position to the right\n    and pads the beginning with zeros.\n\n    Args:\n        tensor (torch.Tensor): The input tensor of shape [B, S, D].\n\n    Returns:\n        (torch.Tensor): The resulting tensor after the shift and pad operation.\n    \"\"\"\n    B, _, D = tensor.size()\n    zero_padding = torch.zeros((B, 1, D), device=tensor.device, dtype=tensor.dtype)\n    shifted_tensor = torch.cat((zero_padding, tensor[:, :-1, :]), dim=1)\n    return shifted_tensor\n
    "},{"location":"reference/utils/#rydberggpt.utils.time_and_log","title":"time_and_log(fn: Callable[..., Any]) -> Callable[..., Any]","text":"

    Decorator function to measure the time taken by a function to execute and log it.

    Parameters:

    Name Type Description Default fn Callable[..., Any]

    The function to be wrapped.

    required

    Returns:

    Type Description Callable[..., Any]

    Callable[..., Any]: The wrapped function.

    Usage
    @time_and_log\ndef my_function(arg1, arg2):\n    # function logic here\n
    Source code in src\\rydberggpt\\utils.py
    def time_and_log(fn: Callable[..., Any]) -> Callable[..., Any]:\n    \"\"\"\n    Decorator function to measure the time taken by a function to execute and log it.\n\n    Args:\n        fn (Callable[..., Any]): The function to be wrapped.\n\n    Returns:\n        Callable[..., Any]: The wrapped function.\n\n    Usage:\n        ```py\n        @time_and_log\n        def my_function(arg1, arg2):\n            # function logic here\n        ```\n    \"\"\"\n\n    def wrapped(*args: Any, **kwargs: Any) -> Any:\n        start_time = time.time()\n        result = fn(*args, **kwargs)\n        elapsed_time = time.time() - start_time\n\n        # Convert elapsed time to HH:MM:SS format\n        formatted_time = str(timedelta(seconds=elapsed_time))\n\n        logging.info(f\"{fn.__name__} took {formatted_time} to run.\")\n        return result\n\n    return wrapped\n
    "},{"location":"reference/utils/#rydberggpt.utils.to_one_hot","title":"to_one_hot(data: Union[torch.Tensor, List[int], Tuple[int]], num_classes: int) -> torch.Tensor","text":"

    Converts the input data into one-hot representation.

    Parameters:

    Name Type Description Default data Union[Tensor, List[int], Tuple[int]]

    Input data to be converted into one-hot. It can be a 1D tensor, list or tuple of integers.

    required num_classes int

    Number of classes in the one-hot representation.

    required

    Returns:

    Name Type Description data Tensor

    The one-hot representation of the input data.

    Source code in src\\rydberggpt\\utils.py
    def to_one_hot(\n    data: Union[torch.Tensor, List[int], Tuple[int]], num_classes: int\n) -> torch.Tensor:\n    \"\"\"\n    Converts the input data into one-hot representation.\n\n    Args:\n        data: Input data to be converted into one-hot. It can be a 1D tensor, list or tuple of integers.\n        num_classes: Number of classes in the one-hot representation.\n\n    Returns:\n        data (torch.Tensor): The one-hot representation of the input data.\n    \"\"\"\n\n    if isinstance(data, (list, tuple)):\n        data = torch.tensor(data, dtype=torch.int64)\n    elif not isinstance(data, torch.Tensor):\n        raise TypeError(\"Input data must be a tensor, list or tuple of integers.\")\n\n    data = nn.functional.one_hot(data.long(), num_classes)\n\n    return data.to(torch.float)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt","title":"rydberggpt.utils_ckpt","text":""},{"location":"reference/utils/#rydberggpt.utils_ckpt.find_best_ckpt","title":"find_best_ckpt(log_dir: str) -> Optional[str]","text":"

    Find the best checkpoint file (with the lowest training loss) in the specified log directory.

    Parameters:

    Name Type Description Default log_dir str

    The path to the log directory containing the checkpoint files.

    required

    Returns:

    Type Description str

    The path to the checkpoint file with the lowest training loss.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def find_best_ckpt(log_dir: str) -> Optional[str]:\n    \"\"\"\n    Find the best checkpoint file (with the lowest training loss) in the specified log directory.\n\n    Args:\n        log_dir (str): The path to the log directory containing the checkpoint files.\n\n    Returns:\n        (str): The path to the checkpoint file with the lowest training loss.\n    \"\"\"\n    log_dir = os.path.join(log_dir, \"checkpoints\")\n    ckpt_files = [file for file in os.listdir(log_dir) if file.endswith(\".ckpt\")]\n\n    if not ckpt_files:\n        raise FileNotFoundError(f\"No checkpoint files found in {log_dir}\")\n\n    # Extract the training loss from the ckpt filenames\n    ckpt_losses = []\n    for file in ckpt_files:\n        match = re.search(r\"train_loss=(\\d+\\.\\d+)\", file)\n        if match:\n            ckpt_losses.append(float(match.group(1)))\n        else:\n            ckpt_losses.append(float(\"inf\"))\n\n    # Find the index of the ckpt with the lowest training loss\n    best_ckpt_index = ckpt_losses.index(min(ckpt_losses))\n    best_ckpt = ckpt_files[best_ckpt_index]\n\n    return os.path.join(log_dir, best_ckpt)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.find_latest_ckpt","title":"find_latest_ckpt(log_dir: str)","text":"

    Find the latest checkpoint file (based on modification time) in the specified log directory.

    Parameters:

    Name Type Description Default log_dir str

    The path to the log directory containing the checkpoint files.

    required

    Returns:

    Type Description str

    The path to the latest checkpoint file.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def find_latest_ckpt(log_dir: str):\n    \"\"\"\n    Find the latest checkpoint file (based on modification time) in the specified log directory.\n\n    Args:\n        log_dir (str): The path to the log directory containing the checkpoint files.\n\n    Returns:\n        (str): The path to the latest checkpoint file.\n    \"\"\"\n    log_dir = os.path.join(log_dir, \"checkpoints\")\n    ckpt_files = [file for file in os.listdir(log_dir) if file.endswith(\".ckpt\")]\n\n    if not ckpt_files:\n        raise FileNotFoundError(f\"No checkpoint files found in {log_dir}\")\n\n    ckpt_files.sort(key=lambda x: os.path.getmtime(os.path.join(log_dir, x)))\n    latest_ckpt = ckpt_files[-1]\n    return os.path.join(log_dir, latest_ckpt)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.get_ckpt_path","title":"get_ckpt_path(from_ckpt: int, log_dir: str = 'logs/lightning_logs') -> str","text":"

    Get the checkpoint path from a specified checkpoint version number.

    Parameters:

    Name Type Description Default from_ckpt int

    The version number of the checkpoint.

    required log_dir str

    The root directory where checkpoints are stored. Defaults to \"logs/lightning_logs\".

    'logs/lightning_logs'

    Returns:

    Type Description str

    The path to the specified checkpoint version directory.

    Raises:

    Type Description FileNotFoundError

    If no checkpoint is found in the specified directory.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def get_ckpt_path(from_ckpt: int, log_dir: str = \"logs/lightning_logs\") -> str:\n    \"\"\"\n    Get the checkpoint path from a specified checkpoint version number.\n\n    Args:\n        from_ckpt (int): The version number of the checkpoint.\n        log_dir (str, optional): The root directory where checkpoints are stored.\n                                Defaults to \"logs/lightning_logs\".\n\n    Returns:\n        (str): The path to the specified checkpoint version directory.\n\n    Raises:\n        FileNotFoundError: If no checkpoint is found in the specified directory.\n    \"\"\"\n    log_dir = os.path.join(log_dir, f\"version_{from_ckpt}\")\n\n    if log_dir is None:\n        raise FileNotFoundError(f\"No checkpoint found in {log_dir}\")\n\n    return log_dir\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.get_model_from_ckpt","title":"get_model_from_ckpt(log_path: str, model: nn.Module, ckpt: str = 'best', trainer: pl.LightningModule = RydbergGPTTrainer) -> nn.Module","text":"

    Load a model from a specified checkpoint file in the log directory.

    Parameters:

    Name Type Description Default log_path str

    The path to the log directory containing the checkpoint files.

    required model Module

    The model class to load.

    required ckpt str

    The checkpoint to load. Must be either \"best\" or \"latest\". Defaults to \"best\".

    'best' trainer LightningModule

    The trainer class to use for loading the model. Defaults to RydbergGPTTrainer.

    RydbergGPTTrainer

    Returns:

    Type Description Module

    The loaded model.

    Raises:

    Type Description ValueError

    If the value of ckpt is not \"best\" or \"latest\".

    Source code in src\\rydberggpt\\utils_ckpt.py
    def get_model_from_ckpt(\n    log_path: str,\n    model: nn.Module,\n    ckpt: str = \"best\",\n    trainer: pl.LightningModule = RydbergGPTTrainer,\n) -> nn.Module:\n    \"\"\"\n    Load a model from a specified checkpoint file in the log directory.\n\n    Args:\n        log_path (str): The path to the log directory containing the checkpoint files.\n        model (nn.Module): The model class to load.\n        ckpt (str, optional): The checkpoint to load. Must be either \"best\" or \"latest\". Defaults to \"best\".\n        trainer (pl.LightningModule, optional): The trainer class to use for loading the model. Defaults to RydbergGPTTrainer.\n\n    Returns:\n        (nn.Module): The loaded model.\n\n    Raises:\n        ValueError: If the value of ckpt is not \"best\" or \"latest\".\n    \"\"\"\n    if ckpt == \"best\":\n        ckpt_path = find_best_ckpt(log_path)\n    elif ckpt == \"last\":\n        ckpt_path = find_latest_ckpt(log_path)\n    else:\n        raise ValueError(f\"ckpt must be 'best' or 'latest', not {ckpt}\")\n\n    yaml_dict = load_yaml_file(log_path, \"hparams.yaml\")\n    config = create_config_from_yaml(yaml_dict)\n\n    rydberg_gpt_trainer = trainer.load_from_checkpoint(\n        ckpt_path,\n        model=model,\n        config=config,\n        logger=None,\n        example_input_array=None,\n    )\n    return rydberg_gpt_trainer.model\n
    "},{"location":"reference/models/","title":"Models","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction","title":"rydberggpt.models.rydberg_decoder_wavefunction","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction","title":"RydbergDecoderWavefunction","text":"

    Bases: RydbergEncoderDecoder

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    class RydbergDecoderWavefunction(RydbergEncoderDecoder):\n    def __init__(\n        self,\n        cond: Batch,\n        encoder: Encoder,\n        decoder: Decoder,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: Generator,\n        config=None,\n    ):\n        super().__init__(\n            encoder.eval(),\n            decoder,\n            src_embed.eval(),\n            tgt_embed,\n            generator,\n            config,\n        )\n\n        if hasattr(cond, \"num_graphs\") and cond.num_graphs > 1:\n            raise ValueError(\"cond should represent a single Hamiltonian/graph\")\n\n        self.N = cond.num_nodes\n        self.cond = cond\n\n        for p in self.encoder.parameters():\n            p.requires_grad_(False)\n        for p in self.src_embed.parameters():\n            p.requires_grad_(False)\n\n        memory, batch_mask = self.encode(cond)\n        self.register_buffer(\"memory\", memory)\n        self.register_buffer(\"batch_mask\", batch_mask)\n        pass\n\n    def forward(self, tgt: torch.Tensor) -> torch.Tensor:\n        memory = self.memory.repeat([*tgt.shape[:-2], 1, 1])\n        batch_mask = self.batch_mask.repeat([*tgt.shape[:-2], 1])\n\n        return self.decode(tgt, memory, batch_mask)\n\n    @classmethod\n    def from_rydberg_encoder_decoder(cls, cond: Batch, model: RydbergEncoderDecoder):\n        \"\"\"\n        Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.\n\n        Args:\n            cond (Batch): The Hamiltonian/graph.\n            model (RydbergEncoderDecoder): The model used to generate a RydbergDecoderWavefunction.\n\n        Returns:\n            (RydbergDecoderWavefunction): The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.\n\n        \"\"\"\n        return cls(\n            cond,\n            model.encoder,\n            model.decoder,\n            model.src_embed,\n            model.tgt_embed,\n            model.generator,\n            model.config,\n        )\n\n    pass\n\n    def get_log_probs(self, x: torch.Tensor):\n        \"\"\"\n        Compute the log probabilities of a given input tensor.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The log probabilities.\n        \"\"\"\n\n        assert (\n            len(x.shape) == 3 and x.shape[-1] == 2\n        ), \"The input must be one hot encoded\"\n\n        y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n        y = y.to(x)  # Match dtype and device\n        y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n        y = self.forward(y)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n\n        y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n        return y\n\n    def get_samples(\n        self,\n        batch_size: int,\n        fmt_onehot: bool = True,\n        requires_grad: bool = False,\n        verbose: bool = True,\n    ):\n        \"\"\"\n        Generate samples using the forward pass and sampling from the conditional probabilities.\n        The samples can be returned either in one-hot encoding format or in label format,\n        according to the `fmt_onehot` argument.\n\n        Args:\n            batch_size (int): The number of samples to generate.\n            fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n              in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n            requires_grad (bool, optional): A flag to determine if grad is needed when sampling. Defaults to False,\n            verbose (bool, optional): A flag indicating whether to print sampling progress. Defaults to True,\n\n        Returns:\n            (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n        \"\"\"\n        if verbose:\n            print(\"\")\n\n        num_atoms = self.N\n\n        m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n        for i in range(num_atoms):\n            if verbose:\n                print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n                sys.stdout.flush()\n\n            y = self.forward(m)  # EncoderDecoder forward pass\n            y = self.generator(y)  # Conditional log probs\n            y = y[:, -1, :]  # Next conditional log probs\n\n            if requires_grad:\n                y = F.gumbel_softmax(logits=y, tau=1, hard=True)[..., None, :]\n\n            else:\n                y = torch.distributions.Categorical(logits=y).sample(\n                    [\n                        1,\n                    ]\n                )  # Sample from next conditional log probs\n                y = y.reshape(y.shape[1], 1)  # Reshape\n                y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n            m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n        if fmt_onehot:\n            m = m[:, 1:, :]  # Remove initial token\n        else:\n            m = m[:, 1:, -1]  # Remove initial token and put into label format\n\n        if verbose:\n            print(\"\")\n        return m\n\n    def get_x_magnetization(\n        self,\n        samples: torch.Tensor,  # dtype=torch.int64\n    ):\n        \"\"\"\n        Calculates x magnetization of the model.\n\n        Args:\n            samples (torch.Tensor): Samples drawn from model based on cond.\n\n        Returns:\n            (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n        \"\"\"\n\n        # Create all possible states achievable by a single spin flip\n        flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n        flipped = flipped.reshape(-1, samples.shape[-1])\n\n        # Get propabilities of sampled states and the single spin flipped states\n        sample_log_probs = self.get_log_probs(to_one_hot(samples, 2))\n        flipped_log_probs = self.get_log_probs(to_one_hot(flipped, 2))\n        flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n        # Calculate ratio of the wavefunction for the sampled and flipped states\n        log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n        psi_ratio = torch.exp(log_psi_ratio)\n\n        x_magnetization = psi_ratio.sum(-1)\n        return x_magnetization\n\n    def get_rydberg_energy(\n        self,\n        samples: torch.Tensor,  # dtype=torch.int64\n        undo_sample_path=None,\n        undo_sample_path_args=None,\n    ) -> torch.Tensor:\n        \"\"\"\n        Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n        Args:\n            samples (torch.Tensor): Samples drawn from model based on cond.\n           undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n           undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n        Returns:\n            (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n        \"\"\"\n\n        samples = samples\n        cond = self.cond\n\n        delta = cond.x[:, 0]  # Detuning coeffs\n        omega = cond.x[0, 1]  # Rabi frequency\n        # beta = cond.x[0, 2]  # Inverse Temperature\n        Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n        # Estimate interaction/Rydberg blockade term\n        if undo_sample_path is not None:\n            unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n        else:\n            unpathed_samples = samples\n\n        interaction = (\n            (\n                unpathed_samples[..., cond.edge_index].prod(dim=-2)\n                * cond.edge_attr[None, ...]\n            ).sum(dim=-1)\n            * Rb**6\n            * omega\n        )\n\n        # Estimate detuning term\n        detuning = (delta * unpathed_samples).sum(-1)  # sum over sequence length\n\n        # Estimate sigma_x\n        x_magnetization = self.get_x_magnetization(samples)\n        offdiag_energy = 0.5 * omega * x_magnetization\n\n        # Diagonal part of energy\n        diag_energy = interaction - detuning\n\n        energy = diag_energy - offdiag_energy  # Energy estimate\n\n        return torch.stack(\n            [\n                energy,\n                interaction,\n                detuning,\n                diag_energy,\n                offdiag_energy,\n            ]\n        ).T\n\n    def variational_loss(\n        self, batch_size: int, undo_sample_path, undo_sample_path_args\n    ):\n        samples = self.get_samples(\n            batch_size=batch_size, fmt_onehot=False, requires_grad=True, verbose=False\n        )\n\n        N = self.N\n        omega = self.cond.x[0, 1]\n\n        energy = self.get_rydberg_energy(\n            samples=samples,\n            undo_sample_path=undo_sample_path,\n            undo_sample_path_args=undo_sample_path_args,\n        )[..., 0].mean() / (N * omega)\n\n        return energy\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.from_rydberg_encoder_decoder","title":"from_rydberg_encoder_decoder(cond: Batch, model: RydbergEncoderDecoder) classmethod","text":"

    Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.

    Parameters:

    Name Type Description Default cond Batch

    The Hamiltonian/graph.

    required model RydbergEncoderDecoder

    The model used to generate a RydbergDecoderWavefunction.

    required

    Returns:

    Type Description RydbergDecoderWavefunction

    The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    @classmethod\ndef from_rydberg_encoder_decoder(cls, cond: Batch, model: RydbergEncoderDecoder):\n    \"\"\"\n    Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.\n\n    Args:\n        cond (Batch): The Hamiltonian/graph.\n        model (RydbergEncoderDecoder): The model used to generate a RydbergDecoderWavefunction.\n\n    Returns:\n        (RydbergDecoderWavefunction): The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.\n\n    \"\"\"\n    return cls(\n        cond,\n        model.encoder,\n        model.decoder,\n        model.src_embed,\n        model.tgt_embed,\n        model.generator,\n        model.config,\n    )\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_log_probs","title":"get_log_probs(x: torch.Tensor)","text":"

    Compute the log probabilities of a given input tensor.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The log probabilities.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_log_probs(self, x: torch.Tensor):\n    \"\"\"\n    Compute the log probabilities of a given input tensor.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The log probabilities.\n    \"\"\"\n\n    assert (\n        len(x.shape) == 3 and x.shape[-1] == 2\n    ), \"The input must be one hot encoded\"\n\n    y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n    y = y.to(x)  # Match dtype and device\n    y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n    y = self.forward(y)  # EncoderDecoder forward pass\n    y = self.generator(y)  # Conditional log probs\n\n    y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n    return y\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_rydberg_energy","title":"get_rydberg_energy(samples: torch.Tensor, undo_sample_path=None, undo_sample_path_args=None) -> torch.Tensor","text":"

    Calculates energy of the model based on the Hamiltonian defined by cond (graph).

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model based on cond.

    required

    undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph. undo_sample_path_args (tuple): Additional arguments for undo_sample_path.

    Returns:

    Type Description Tensor

    A tensor containing the estimated energy of each sample alongside its decomposition into terms.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_rydberg_energy(\n    self,\n    samples: torch.Tensor,  # dtype=torch.int64\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n) -> torch.Tensor:\n    \"\"\"\n    Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model based on cond.\n       undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n       undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n    \"\"\"\n\n    samples = samples\n    cond = self.cond\n\n    delta = cond.x[:, 0]  # Detuning coeffs\n    omega = cond.x[0, 1]  # Rabi frequency\n    # beta = cond.x[0, 2]  # Inverse Temperature\n    Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n    # Estimate interaction/Rydberg blockade term\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    interaction = (\n        (\n            unpathed_samples[..., cond.edge_index].prod(dim=-2)\n            * cond.edge_attr[None, ...]\n        ).sum(dim=-1)\n        * Rb**6\n        * omega\n    )\n\n    # Estimate detuning term\n    detuning = (delta * unpathed_samples).sum(-1)  # sum over sequence length\n\n    # Estimate sigma_x\n    x_magnetization = self.get_x_magnetization(samples)\n    offdiag_energy = 0.5 * omega * x_magnetization\n\n    # Diagonal part of energy\n    diag_energy = interaction - detuning\n\n    energy = diag_energy - offdiag_energy  # Energy estimate\n\n    return torch.stack(\n        [\n            energy,\n            interaction,\n            detuning,\n            diag_energy,\n            offdiag_energy,\n        ]\n    ).T\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_samples","title":"get_samples(batch_size: int, fmt_onehot: bool = True, requires_grad: bool = False, verbose: bool = True)","text":"

    Generate samples using the forward pass and sampling from the conditional probabilities. The samples can be returned either in one-hot encoding format or in label format, according to the fmt_onehot argument.

    Parameters:

    Name Type Description Default batch_size int

    The number of samples to generate.

    required fmt_onehot bool

    A flag to indicate whether to return the samples in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.

    True requires_grad bool

    A flag to determine if grad is needed when sampling. Defaults to False,

    False verbose bool

    A flag indicating whether to print sampling progress. Defaults to True,

    True

    Returns:

    Type Description Tensor

    A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within cond.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_samples(\n    self,\n    batch_size: int,\n    fmt_onehot: bool = True,\n    requires_grad: bool = False,\n    verbose: bool = True,\n):\n    \"\"\"\n    Generate samples using the forward pass and sampling from the conditional probabilities.\n    The samples can be returned either in one-hot encoding format or in label format,\n    according to the `fmt_onehot` argument.\n\n    Args:\n        batch_size (int): The number of samples to generate.\n        fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n          in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n        requires_grad (bool, optional): A flag to determine if grad is needed when sampling. Defaults to False,\n        verbose (bool, optional): A flag indicating whether to print sampling progress. Defaults to True,\n\n    Returns:\n        (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n    \"\"\"\n    if verbose:\n        print(\"\")\n\n    num_atoms = self.N\n\n    m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n    for i in range(num_atoms):\n        if verbose:\n            print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n            sys.stdout.flush()\n\n        y = self.forward(m)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n        y = y[:, -1, :]  # Next conditional log probs\n\n        if requires_grad:\n            y = F.gumbel_softmax(logits=y, tau=1, hard=True)[..., None, :]\n\n        else:\n            y = torch.distributions.Categorical(logits=y).sample(\n                [\n                    1,\n                ]\n            )  # Sample from next conditional log probs\n            y = y.reshape(y.shape[1], 1)  # Reshape\n            y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n        m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n    if fmt_onehot:\n        m = m[:, 1:, :]  # Remove initial token\n    else:\n        m = m[:, 1:, -1]  # Remove initial token and put into label format\n\n    if verbose:\n        print(\"\")\n    return m\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_x_magnetization","title":"get_x_magnetization(samples: torch.Tensor)","text":"

    Calculates x magnetization of the model.

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model based on cond.

    required

    Returns:

    Type Description Tensor

    A tensor containing the estimated x magnetization of each sample.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_x_magnetization(\n    self,\n    samples: torch.Tensor,  # dtype=torch.int64\n):\n    \"\"\"\n    Calculates x magnetization of the model.\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model based on cond.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n    \"\"\"\n\n    # Create all possible states achievable by a single spin flip\n    flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n    flipped = flipped.reshape(-1, samples.shape[-1])\n\n    # Get propabilities of sampled states and the single spin flipped states\n    sample_log_probs = self.get_log_probs(to_one_hot(samples, 2))\n    flipped_log_probs = self.get_log_probs(to_one_hot(flipped, 2))\n    flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n    # Calculate ratio of the wavefunction for the sampled and flipped states\n    log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n    psi_ratio = torch.exp(log_psi_ratio)\n\n    x_magnetization = psi_ratio.sum(-1)\n    return x_magnetization\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder","title":"rydberggpt.models.rydberg_encoder_decoder","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder","title":"RydbergEncoderDecoder","text":"

    Bases: EncoderDecoder

    RydbergTransformer is a specific implementation of the Encoder-Decoder architecture that uses an encoder and decoder composed of multiple layers of EncoderLayer and DecoderLayer modules, respectively. The encoder and decoder are followed by an embedding layer and a generator layer.

    Parameters:

    Name Type Description Default encoder Encoder[EncoderLayer]

    The encoder module.

    required decoder Decoder[DecoderLayer]

    The decoder module.

    required tgt_embed Module

    The target embeddings module.

    required generator Generator

    The generator module.

    required config dict

    A dictionary of configuration options. Defaults to None.

    None **kwargs

    Additional keyword arguments.

    required Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    class RydbergEncoderDecoder(EncoderDecoder):\n    \"\"\"\n    RydbergTransformer is a specific implementation of the Encoder-Decoder architecture\n    that uses an encoder and decoder composed of multiple layers of EncoderLayer and DecoderLayer\n    modules, respectively. The encoder and decoder are followed by an embedding layer and a generator\n    layer.\n\n    Args:\n        encoder (Encoder[EncoderLayer]): The encoder module.\n        decoder (Decoder[DecoderLayer]): The decoder module.\n        tgt_embed (nn.Module): The target embeddings module.\n        generator (Generator): The generator module.\n        config (dict, optional): A dictionary of configuration options. Defaults to None.\n        **kwargs: Additional keyword arguments.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        encoder: Encoder,\n        decoder: Decoder,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: Generator,\n        config=None,\n    ):\n        super().__init__(encoder, decoder, src_embed, tgt_embed, generator)\n        self.config = config\n\n    @torch.no_grad()\n    def get_log_probs(self, x: torch.Tensor, cond: Batch):\n        \"\"\"\n        Compute the log probabilities of a given input tensor.\n\n        Parameters:\n            x (torch.Tensor): The input tensor.\n            cond (Batch): The conditional graph structure.\n\n        Returns:\n            (torch.Tensor): The log probabilities.\n        \"\"\"\n\n        if not hasattr(cond, \"num_graphs\"):\n            cond = Batch.from_data_list([cond.clone() for _ in range(len(x))])\n\n        assert (\n            len(x.shape) == 3 and x.shape[-1] == 2\n        ), \"The input must be one hot encoded\"\n\n        y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n        y = y.to(x)  # Match dtype and device\n        y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n        y = self.forward(y, cond)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n\n        y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n        return y\n\n    @torch.no_grad()\n    def get_samples(\n        self,\n        batch_size: int,\n        cond: Batch,\n        num_atoms: int,\n        fmt_onehot: bool = True,\n    ):\n        \"\"\"\n        Generate samples using the forward pass and sampling from the conditional probabilities.\n        The samples can be returned either in one-hot encoding format or in label format,\n        according to the `fmt_onehot` argument.\n\n        Args:\n            batch_size (int): The number of samples to generate.\n            cond (torch_geometric.data.Batch): The batch of conditional graph structures.\n            num_atoms (int): The number of atoms to sample. For num_atoms > num_nodes\n              in each graph within `cond`, the extra atoms are padded with zeros (onehot) or nan (label).\n            fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n              in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n\n        Returns:\n            (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n        \"\"\"\n\n        if not hasattr(cond, \"num_graphs\"):\n            cond = Batch.from_data_list([cond.clone() for _ in range(batch_size)])\n\n        assert (\n            cond.num_graphs == batch_size\n        ), \"Incompatible arguments, batch_size ({}) does not match cond.num_graphs ({})\".format(\n            batch_size, cond.num_graphs\n        )\n\n        m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n        for i in range(num_atoms):\n            print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n            sys.stdout.flush()\n\n            y = self.forward(m, cond)  # EncoderDecoder forward pass\n            y = self.generator(y)  # Conditional log probs\n            y = y[:, -1, :]  # Next conditional log probs\n            y = torch.distributions.Categorical(logits=y).sample(\n                [\n                    1,\n                ]\n            )  # Sample from next conditional log probs\n            y = y.reshape(y.shape[1], 1)  # Reshape\n            y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n            m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n        if fmt_onehot:\n            for i in range(m.shape[0]):\n                # Depending on num_nodes/num_atoms in graph pad samples with [0,0]\n                m[i, cond[i].num_nodes + 1 :, :] = 0\n\n            m = m[:, 1:, :]  # Remove initial token\n        else:\n            m = m[:, :, -1]\n\n            for i in range(m.shape[0]):\n                # Depending on num_nodes/num_atoms in graph pad samples with nan\n                m[i, cond[i].num_nodes + 1 :] = torch.nan\n\n            m = m[:, 1:]\n\n        print(\"\")\n        return m\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder.get_log_probs","title":"get_log_probs(x: torch.Tensor, cond: Batch)","text":"

    Compute the log probabilities of a given input tensor.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required cond Batch

    The conditional graph structure.

    required

    Returns:

    Type Description Tensor

    The log probabilities.

    Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    @torch.no_grad()\ndef get_log_probs(self, x: torch.Tensor, cond: Batch):\n    \"\"\"\n    Compute the log probabilities of a given input tensor.\n\n    Parameters:\n        x (torch.Tensor): The input tensor.\n        cond (Batch): The conditional graph structure.\n\n    Returns:\n        (torch.Tensor): The log probabilities.\n    \"\"\"\n\n    if not hasattr(cond, \"num_graphs\"):\n        cond = Batch.from_data_list([cond.clone() for _ in range(len(x))])\n\n    assert (\n        len(x.shape) == 3 and x.shape[-1] == 2\n    ), \"The input must be one hot encoded\"\n\n    y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n    y = y.to(x)  # Match dtype and device\n    y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n    y = self.forward(y, cond)  # EncoderDecoder forward pass\n    y = self.generator(y)  # Conditional log probs\n\n    y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n    return y\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder.get_samples","title":"get_samples(batch_size: int, cond: Batch, num_atoms: int, fmt_onehot: bool = True)","text":"

    Generate samples using the forward pass and sampling from the conditional probabilities. The samples can be returned either in one-hot encoding format or in label format, according to the fmt_onehot argument.

    Parameters:

    Name Type Description Default batch_size int

    The number of samples to generate.

    required cond Batch

    The batch of conditional graph structures.

    required num_atoms int

    The number of atoms to sample. For num_atoms > num_nodes in each graph within cond, the extra atoms are padded with zeros (onehot) or nan (label).

    required fmt_onehot bool

    A flag to indicate whether to return the samples in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.

    True

    Returns:

    Type Description Tensor

    A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within cond.

    Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    @torch.no_grad()\ndef get_samples(\n    self,\n    batch_size: int,\n    cond: Batch,\n    num_atoms: int,\n    fmt_onehot: bool = True,\n):\n    \"\"\"\n    Generate samples using the forward pass and sampling from the conditional probabilities.\n    The samples can be returned either in one-hot encoding format or in label format,\n    according to the `fmt_onehot` argument.\n\n    Args:\n        batch_size (int): The number of samples to generate.\n        cond (torch_geometric.data.Batch): The batch of conditional graph structures.\n        num_atoms (int): The number of atoms to sample. For num_atoms > num_nodes\n          in each graph within `cond`, the extra atoms are padded with zeros (onehot) or nan (label).\n        fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n          in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n\n    Returns:\n        (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n    \"\"\"\n\n    if not hasattr(cond, \"num_graphs\"):\n        cond = Batch.from_data_list([cond.clone() for _ in range(batch_size)])\n\n    assert (\n        cond.num_graphs == batch_size\n    ), \"Incompatible arguments, batch_size ({}) does not match cond.num_graphs ({})\".format(\n        batch_size, cond.num_graphs\n    )\n\n    m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n    for i in range(num_atoms):\n        print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n        sys.stdout.flush()\n\n        y = self.forward(m, cond)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n        y = y[:, -1, :]  # Next conditional log probs\n        y = torch.distributions.Categorical(logits=y).sample(\n            [\n                1,\n            ]\n        )  # Sample from next conditional log probs\n        y = y.reshape(y.shape[1], 1)  # Reshape\n        y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n        m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n    if fmt_onehot:\n        for i in range(m.shape[0]):\n            # Depending on num_nodes/num_atoms in graph pad samples with [0,0]\n            m[i, cond[i].num_nodes + 1 :, :] = 0\n\n        m = m[:, 1:, :]  # Remove initial token\n    else:\n        m = m[:, :, -1]\n\n        for i in range(m.shape[0]):\n            # Depending on num_nodes/num_atoms in graph pad samples with nan\n            m[i, cond[i].num_nodes + 1 :] = torch.nan\n\n        m = m[:, 1:]\n\n    print(\"\")\n    return m\n
    "},{"location":"reference/models/graph/","title":"Graph","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding","title":"rydberggpt.models.graph_embedding","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers","title":"layers","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer","title":"GraphLayer","text":"

    Bases: Module

    Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    class GraphLayer(nn.Module):\n    def __init__(self, graph_layer: nn.Module, norm_layer: nn.Module, dropout: float):\n        \"\"\"\n        A GraphLayer is a single layer in a graph neural network, consisting of\n        a graph layer, normalization layer, and dropout.\n\n        Args:\n            graph_layer (nn.Module): A graph layer, e.g., GCNConv, GATConv, etc.\n            norm_layer (nn.Module): A normalization layer, e.g., LayerNorm or BatchNorm.\n            dropout (float): Dropout probability.\n        \"\"\"\n        super(GraphLayer, self).__init__()\n        self.graph_layer = graph_layer\n        self.norm = norm_layer\n        self.dropout = nn.Dropout(dropout)\n\n    def forward(\n        self, x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Forward pass through the GraphLayer.\n\n        Args:\n            x (torch.Tensor): Node feature matrix.\n            edge_index (Adj): Edge indices.\n            edge_attr (OptTensor): Edge feature matrix.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through the GraphLayer.\n        \"\"\"\n        x = self.graph_layer(x, edge_index, edge_attr)\n        x = F.relu(self.norm(x))\n        x = self.dropout(x)\n        return x\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer.__init__","title":"__init__(graph_layer: nn.Module, norm_layer: nn.Module, dropout: float)","text":"

    A GraphLayer is a single layer in a graph neural network, consisting of a graph layer, normalization layer, and dropout.

    Parameters:

    Name Type Description Default graph_layer Module

    A graph layer, e.g., GCNConv, GATConv, etc.

    required norm_layer Module

    A normalization layer, e.g., LayerNorm or BatchNorm.

    required dropout float

    Dropout probability.

    required Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    def __init__(self, graph_layer: nn.Module, norm_layer: nn.Module, dropout: float):\n    \"\"\"\n    A GraphLayer is a single layer in a graph neural network, consisting of\n    a graph layer, normalization layer, and dropout.\n\n    Args:\n        graph_layer (nn.Module): A graph layer, e.g., GCNConv, GATConv, etc.\n        norm_layer (nn.Module): A normalization layer, e.g., LayerNorm or BatchNorm.\n        dropout (float): Dropout probability.\n    \"\"\"\n    super(GraphLayer, self).__init__()\n    self.graph_layer = graph_layer\n    self.norm = norm_layer\n    self.dropout = nn.Dropout(dropout)\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer.forward","title":"forward(x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor) -> torch.Tensor","text":"

    Forward pass through the GraphLayer.

    Parameters:

    Name Type Description Default x Tensor

    Node feature matrix.

    required edge_index Adj

    Edge indices.

    required edge_attr OptTensor

    Edge feature matrix.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through the GraphLayer.

    Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    def forward(\n    self, x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor\n) -> torch.Tensor:\n    \"\"\"\n    Forward pass through the GraphLayer.\n\n    Args:\n        x (torch.Tensor): Node feature matrix.\n        edge_index (Adj): Edge indices.\n        edge_attr (OptTensor): Edge feature matrix.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through the GraphLayer.\n    \"\"\"\n    x = self.graph_layer(x, edge_index, edge_attr)\n    x = F.relu(self.norm(x))\n    x = self.dropout(x)\n    return x\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models","title":"models","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding","title":"GraphEmbedding","text":"

    Bases: Module

    Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    class GraphEmbedding(torch.nn.Module):\n    def __init__(\n        self,\n        graph_layer: Type[Callable],\n        in_node_dim: int,\n        d_hidden: int,\n        d_model: int,\n        num_layers: int,\n        dropout: float = 0.1,\n    ) -> None:\n        \"\"\"\n        GraphEmbedding class for creating a graph embedding with multiple layers.\n\n        Args:\n            graph_layer (Type[Callable]): The graph layer to be used in the embedding.\n            in_node_dim (int): The input node dimension. (omega, delta, beta)\n            d_hidden (int): The hidden dimension size.\n            d_model (int): The output node dimension.\n            num_layers (int): The number of layers in the graph embedding.\n            dropout (float, optional): The dropout rate. Defaults to 0.1.\n        \"\"\"\n        super(GraphEmbedding, self).__init__()\n\n        self.graph_layer = graph_layer\n        self.layers = ModuleList()\n        self.layers.append(\n            GraphLayer(\n                self.graph_layer(in_node_dim, d_hidden), LayerNorm(d_hidden), dropout\n            )\n        )\n\n        for _ in range(num_layers - 2):\n            self.layers.append(\n                GraphLayer(\n                    self.graph_layer(d_hidden, d_hidden), LayerNorm(d_hidden), dropout\n                )\n            )\n\n        self.layers.append(self.graph_layer(d_hidden, d_model))\n        self.final_norm = LayerNorm(d_model)\n\n    def forward(self, data: Data) -> Tensor:\n        \"\"\"\n        Forward pass through the graph embedding layers.\n\n        Args:\n            data (Data): The input graph data.\n\n        Returns:\n            (Tensor): The output tensor with reshaped dimensions.\n        \"\"\"\n        # [..., num_features], [2, ...] [...]\n        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr\n\n        for layer in self.layers[:-1]:\n            # [..., num_features]\n            x = layer(x, edge_index, edge_attr)\n\n        # [..., d_model]\n        x = self.final_norm(self.layers[-1](x, edge_index, edge_attr))\n\n        x, batch_mask = to_dense_batch(x, data.batch)\n\n        # [B, N, d_model], where N is the number of nodes or the number of atoms\n        return x, batch_mask\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding.__init__","title":"__init__(graph_layer: Type[Callable], in_node_dim: int, d_hidden: int, d_model: int, num_layers: int, dropout: float = 0.1) -> None","text":"

    GraphEmbedding class for creating a graph embedding with multiple layers.

    Parameters:

    Name Type Description Default graph_layer Type[Callable]

    The graph layer to be used in the embedding.

    required in_node_dim int

    The input node dimension. (omega, delta, beta)

    required d_hidden int

    The hidden dimension size.

    required d_model int

    The output node dimension.

    required num_layers int

    The number of layers in the graph embedding.

    required dropout float

    The dropout rate. Defaults to 0.1.

    0.1 Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    def __init__(\n    self,\n    graph_layer: Type[Callable],\n    in_node_dim: int,\n    d_hidden: int,\n    d_model: int,\n    num_layers: int,\n    dropout: float = 0.1,\n) -> None:\n    \"\"\"\n    GraphEmbedding class for creating a graph embedding with multiple layers.\n\n    Args:\n        graph_layer (Type[Callable]): The graph layer to be used in the embedding.\n        in_node_dim (int): The input node dimension. (omega, delta, beta)\n        d_hidden (int): The hidden dimension size.\n        d_model (int): The output node dimension.\n        num_layers (int): The number of layers in the graph embedding.\n        dropout (float, optional): The dropout rate. Defaults to 0.1.\n    \"\"\"\n    super(GraphEmbedding, self).__init__()\n\n    self.graph_layer = graph_layer\n    self.layers = ModuleList()\n    self.layers.append(\n        GraphLayer(\n            self.graph_layer(in_node_dim, d_hidden), LayerNorm(d_hidden), dropout\n        )\n    )\n\n    for _ in range(num_layers - 2):\n        self.layers.append(\n            GraphLayer(\n                self.graph_layer(d_hidden, d_hidden), LayerNorm(d_hidden), dropout\n            )\n        )\n\n    self.layers.append(self.graph_layer(d_hidden, d_model))\n    self.final_norm = LayerNorm(d_model)\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding.forward","title":"forward(data: Data) -> Tensor","text":"

    Forward pass through the graph embedding layers.

    Parameters:

    Name Type Description Default data Data

    The input graph data.

    required

    Returns:

    Type Description Tensor

    The output tensor with reshaped dimensions.

    Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    def forward(self, data: Data) -> Tensor:\n    \"\"\"\n    Forward pass through the graph embedding layers.\n\n    Args:\n        data (Data): The input graph data.\n\n    Returns:\n        (Tensor): The output tensor with reshaped dimensions.\n    \"\"\"\n    # [..., num_features], [2, ...] [...]\n    x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr\n\n    for layer in self.layers[:-1]:\n        # [..., num_features]\n        x = layer(x, edge_index, edge_attr)\n\n    # [..., d_model]\n    x = self.final_norm(self.layers[-1](x, edge_index, edge_attr))\n\n    x, batch_mask = to_dense_batch(x, data.batch)\n\n    # [B, N, d_model], where N is the number of nodes or the number of atoms\n    return x, batch_mask\n
    "},{"location":"reference/models/transformer/","title":"Transformer","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer","title":"rydberggpt.models.transformer","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers","title":"layers","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.DecoderLayer","title":"DecoderLayer","text":"

    Bases: Module

    Decoder is made of self-attn, src-attn, and feed forward.

    Parameters:

    Name Type Description Default size int

    The input size. (d_model)

    required self_attn MultiheadAttention

    The self-attention module.

    required src_attn MultiheadAttention

    The source-attention module.

    required feed_forward PositionwiseFeedForward

    The feed forward module.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\layers.py
    class DecoderLayer(nn.Module):\n    \"\"\"\n    Decoder is made of self-attn, src-attn, and feed forward.\n\n    Args:\n        size (int): The input size. (d_model)\n        self_attn (nn.MultiheadAttention): The self-attention module.\n        src_attn (nn.MultiheadAttention): The source-attention module.\n        feed_forward (PositionwiseFeedForward): The feed forward module.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        self_attn: nn.MultiheadAttention,\n        src_attn: nn.MultiheadAttention,\n        feed_forward: PositionwiseFeedForward,\n        dropout: float,\n    ):\n        super(DecoderLayer, self).__init__()\n        self.size = size\n        self.self_attn = self_attn\n        self.src_attn = src_attn\n        self.feed_forward = feed_forward\n        self.sublayer = clones(SublayerConnection(size, dropout), 3)\n\n    def forward(\n        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the decoder.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            memory (torch.Tensor): The memory tensor.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n\n        causal_attn_mask = torch.meshgrid(\n            torch.arange(x.shape[-2], device=x.device),\n            torch.arange(x.shape[-2], device=x.device),\n            indexing=\"ij\",\n        )\n        causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]\n        causal_attn_mask = torch.logical_not(causal_attn_mask)\n\n        batch_key_mask = batch_mask\n        batch_key_mask = torch.logical_not(batch_key_mask)\n\n        m = memory\n        x = self.sublayer[0](\n            x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]\n        )\n        x = self.sublayer[1](\n            x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]\n        )\n        return self.sublayer[2](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.DecoderLayer.forward","title":"forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the decoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required memory Tensor

    The memory tensor.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\layers.py
    def forward(\n    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the decoder.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        memory (torch.Tensor): The memory tensor.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n\n    causal_attn_mask = torch.meshgrid(\n        torch.arange(x.shape[-2], device=x.device),\n        torch.arange(x.shape[-2], device=x.device),\n        indexing=\"ij\",\n    )\n    causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]\n    causal_attn_mask = torch.logical_not(causal_attn_mask)\n\n    batch_key_mask = batch_mask\n    batch_key_mask = torch.logical_not(batch_key_mask)\n\n    m = memory\n    x = self.sublayer[0](\n        x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]\n    )\n    x = self.sublayer[1](\n        x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]\n    )\n    return self.sublayer[2](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.EncoderLayer","title":"EncoderLayer","text":"

    Bases: Module

    Encoder is made up of self-attn and feed forward.

    Parameters:

    Name Type Description Default size int

    The input size. (d_model)

    required self_attn MultiheadAttention

    The self-attention module.

    required feed_forward PositionwiseFeedForward

    The feed forward module.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\layers.py
    class EncoderLayer(nn.Module):\n    \"\"\"\n    Encoder is made up of self-attn and feed forward.\n\n    Args:\n        size (int): The input size. (d_model)\n        self_attn (nn.MultiheadAttention): The self-attention module.\n        feed_forward (PositionwiseFeedForward): The feed forward module.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        self_attn: nn.MultiheadAttention,\n        feed_forward: PositionwiseFeedForward,\n        dropout: float,\n    ):\n        super(EncoderLayer, self).__init__()\n        self.self_attn = self_attn\n        self.feed_forward = feed_forward\n        self.sublayer = clones(SublayerConnection(size, dropout), 2)\n        self.size = size\n\n    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the encoder.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n\n        batch_key_mask = batch_mask\n        batch_key_mask = torch.logical_not(batch_key_mask)\n\n        x = self.sublayer[0](\n            x,\n            lambda x: torch.nan_to_num(\n                self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]\n            ),\n        )\n        return self.sublayer[1](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.EncoderLayer.forward","title":"forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the encoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\layers.py
    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the encoder.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n\n    batch_key_mask = batch_mask\n    batch_key_mask = torch.logical_not(batch_key_mask)\n\n    x = self.sublayer[0](\n        x,\n        lambda x: torch.nan_to_num(\n            self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]\n        ),\n    )\n    return self.sublayer[1](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models","title":"models","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder","title":"Decoder","text":"

    Bases: Module

    The core of the transformer, which consists of a stack of decoder layers.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Decoder(nn.Module):\n    \"\"\"\n    The core of the transformer, which consists of a stack of decoder layers.\n    \"\"\"\n\n    def __init__(self, layer: nn.Module, n_layers: int):\n        \"\"\"\n        Initialize the Decoder class.\n\n        Args:\n            layer (nn.Module): A single instance of the decoder layer to be cloned.\n            n_layers (int): The number of decoder layers in the stack.\n        \"\"\"\n        super(Decoder, self).__init__()\n        self.layers = clones(layer, n_layers)\n        self.norm = nn.LayerNorm(layer.size)\n\n    def forward(\n        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Pass the (masked) input through all layers of the decoder.\n\n        Args:\n            x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).\n            memory (torch.Tensor): The memory tensor, typically the output of the encoder.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).\n        \"\"\"\n        for layer in self.layers:\n            x = layer(x, memory, batch_mask=batch_mask)\n        return self.norm(x)  # [batch_size, seq_len, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder.__init__","title":"__init__(layer: nn.Module, n_layers: int)","text":"

    Initialize the Decoder class.

    Parameters:

    Name Type Description Default layer Module

    A single instance of the decoder layer to be cloned.

    required n_layers int

    The number of decoder layers in the stack.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, layer: nn.Module, n_layers: int):\n    \"\"\"\n    Initialize the Decoder class.\n\n    Args:\n        layer (nn.Module): A single instance of the decoder layer to be cloned.\n        n_layers (int): The number of decoder layers in the stack.\n    \"\"\"\n    super(Decoder, self).__init__()\n    self.layers = clones(layer, n_layers)\n    self.norm = nn.LayerNorm(layer.size)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder.forward","title":"forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Pass the (masked) input through all layers of the decoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor to the decoder of shape (batch_size, seq_length, d_model).

    required memory Tensor

    The memory tensor, typically the output of the encoder.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(\n    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Pass the (masked) input through all layers of the decoder.\n\n    Args:\n        x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).\n        memory (torch.Tensor): The memory tensor, typically the output of the encoder.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).\n    \"\"\"\n    for layer in self.layers:\n        x = layer(x, memory, batch_mask=batch_mask)\n    return self.norm(x)  # [batch_size, seq_len, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder","title":"Encoder","text":"

    Bases: Module

    The core encoder, which consists of a stack of N layers.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Encoder(nn.Module):\n    \"\"\"\n    The core encoder, which consists of a stack of N layers.\n    \"\"\"\n\n    def __init__(self, layer: nn.Module, N: int):\n        \"\"\"\n        Initialize the Encoder class.\n\n        Args:\n            layer (nn.Module): A single instance of the encoder layer to be cloned.\n            N (int): The number of encoder layers in the stack.\n        \"\"\"\n        super(Encoder, self).__init__()\n        self.layers = clones(layer, N)\n        self.norm = nn.LayerNorm(layer.size)\n\n    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Pass the input through each layer in turn.\n\n        Args:\n            x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through all layers of the encoder,\n                          with the same shape as the input tensor (batch_size, seq_length, d_model).\n        \"\"\"\n        for layer in self.layers:\n            x = layer(x, batch_mask=batch_mask)\n        return self.norm(x)  # [batch_size, seq_length, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder.__init__","title":"__init__(layer: nn.Module, N: int)","text":"

    Initialize the Encoder class.

    Parameters:

    Name Type Description Default layer Module

    A single instance of the encoder layer to be cloned.

    required N int

    The number of encoder layers in the stack.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, layer: nn.Module, N: int):\n    \"\"\"\n    Initialize the Encoder class.\n\n    Args:\n        layer (nn.Module): A single instance of the encoder layer to be cloned.\n        N (int): The number of encoder layers in the stack.\n    \"\"\"\n    super(Encoder, self).__init__()\n    self.layers = clones(layer, N)\n    self.norm = nn.LayerNorm(layer.size)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder.forward","title":"forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Pass the input through each layer in turn.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor to the encoder of shape (batch_size, seq_length, d_model).

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through all layers of the encoder, with the same shape as the input tensor (batch_size, seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Pass the input through each layer in turn.\n\n    Args:\n        x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through all layers of the encoder,\n                      with the same shape as the input tensor (batch_size, seq_length, d_model).\n    \"\"\"\n    for layer in self.layers:\n        x = layer(x, batch_mask=batch_mask)\n    return self.norm(x)  # [batch_size, seq_length, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder","title":"EncoderDecoder","text":"

    Bases: LightningModule

    A standard Encoder-Decoder architecture. Base for this and many other models.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class EncoderDecoder(pl.LightningModule):\n    \"\"\"\n    A standard Encoder-Decoder architecture. Base for this and many other models.\n    \"\"\"\n\n    def __init__(\n        self,\n        encoder: nn.Module,\n        decoder: nn.Module,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: nn.Module,\n    ):\n        \"\"\"\n        Initialize the EncoderDecoder class.\n\n        Args:\n            encoder (nn.Module): The encoder module.\n            decoder (nn.Module): The decoder module.\n            tgt_embed (nn.Module): The target embedding module.\n            generator (nn.Module): The generator module.\n        \"\"\"\n        super(EncoderDecoder, self).__init__()\n        self.encoder = encoder\n        self.decoder = decoder\n        self.src_embed = src_embed\n        self.tgt_embed = tgt_embed\n        self.generator = generator\n\n    def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Take in and process masked src and target sequences.\n\n        Args:\n            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,\n                          with shape (batch_size, tgt_seq_length, d_model).\n        \"\"\"\n\n        memory, batch_mask = self.encode(src)\n\n        return self.decode(tgt, memory, batch_mask)\n\n    def encode(self, src: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Encode the source tensor.\n\n        Args:\n            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n        Returns:\n            (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).\n        \"\"\"\n\n        x, batch_mask = self.src_embed(src)\n\n        return self.encoder(x, batch_mask=batch_mask), batch_mask\n\n    def decode(\n        self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Decode the target tensor using the memory tensor.\n\n        Args:\n            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n            memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).\n\n        Returns:\n            (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).\n        \"\"\"\n        return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.__init__","title":"__init__(encoder: nn.Module, decoder: nn.Module, src_embed: nn.Module, tgt_embed: nn.Module, generator: nn.Module)","text":"

    Initialize the EncoderDecoder class.

    Parameters:

    Name Type Description Default encoder Module

    The encoder module.

    required decoder Module

    The decoder module.

    required tgt_embed Module

    The target embedding module.

    required generator Module

    The generator module.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(\n    self,\n    encoder: nn.Module,\n    decoder: nn.Module,\n    src_embed: nn.Module,\n    tgt_embed: nn.Module,\n    generator: nn.Module,\n):\n    \"\"\"\n    Initialize the EncoderDecoder class.\n\n    Args:\n        encoder (nn.Module): The encoder module.\n        decoder (nn.Module): The decoder module.\n        tgt_embed (nn.Module): The target embedding module.\n        generator (nn.Module): The generator module.\n    \"\"\"\n    super(EncoderDecoder, self).__init__()\n    self.encoder = encoder\n    self.decoder = decoder\n    self.src_embed = src_embed\n    self.tgt_embed = tgt_embed\n    self.generator = generator\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.decode","title":"decode(tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Decode the target tensor using the memory tensor.

    Parameters:

    Name Type Description Default tgt Tensor

    The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

    required memory Tensor

    The memory tensor of shape (batch_size, src_seq_length, d_model).

    required

    Returns:

    Type Description Tensor

    The decoded tensor of shape (batch_size, tgt_seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def decode(\n    self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Decode the target tensor using the memory tensor.\n\n    Args:\n        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n        memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).\n\n    Returns:\n        (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).\n    \"\"\"\n    return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.encode","title":"encode(src: torch.Tensor) -> torch.Tensor","text":"

    Encode the source tensor.

    Parameters:

    Name Type Description Default src Tensor

    The source tensor of shape (batch_size, src_seq_length, d_model_src).

    required

    Returns:

    Type Description Tensor

    The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def encode(self, src: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Encode the source tensor.\n\n    Args:\n        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n    Returns:\n        (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).\n    \"\"\"\n\n    x, batch_mask = self.src_embed(src)\n\n    return self.encoder(x, batch_mask=batch_mask), batch_mask\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.forward","title":"forward(tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor","text":"

    Take in and process masked src and target sequences.

    Parameters:

    Name Type Description Default tgt Tensor

    The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

    required src Tensor

    The source tensor of shape (batch_size, src_seq_length, d_model_src).

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through the encoder-decoder architecture, with shape (batch_size, tgt_seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Take in and process masked src and target sequences.\n\n    Args:\n        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,\n                      with shape (batch_size, tgt_seq_length, d_model).\n    \"\"\"\n\n    memory, batch_mask = self.encode(src)\n\n    return self.decode(tgt, memory, batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator","title":"Generator","text":"

    Bases: Module

    Linear + softmax layer for generation step. vocab_size for Rydberg is 2.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Generator(nn.Module):\n    \"\"\"\n    Linear + softmax layer for generation step. vocab_size for Rydberg is 2.\n    \"\"\"\n\n    def __init__(self, d_model: int, vocab_size: int):\n        \"\"\"\n        Initialize the Generator class.\n\n        Args:\n            d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).\n            vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.\n        \"\"\"\n        super(Generator, self).__init__()\n        self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass of the Generator.\n\n        Args:\n            x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).\n\n        Returns:\n            (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),\n                          with log-softmax applied along the last dimension.\n        \"\"\"\n\n        proj_offset = self.proj(x) + 1e-10\n        return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator.__init__","title":"__init__(d_model: int, vocab_size: int)","text":"

    Initialize the Generator class.

    Parameters:

    Name Type Description Default d_model int

    The dimension of the input features (i.e., the last dimension of the input tensor).

    required vocab_size int

    The size of the vocabulary, which determines the last dimension of the output tensor.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, d_model: int, vocab_size: int):\n    \"\"\"\n    Initialize the Generator class.\n\n    Args:\n        d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).\n        vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.\n    \"\"\"\n    super(Generator, self).__init__()\n    self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass of the Generator.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor of shape (batch_size, seq_length, d_model).

    required

    Returns:

    Type Description Tensor

    The output tensor of shape (batch_size, seq_length, vocab_size), with log-softmax applied along the last dimension.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass of the Generator.\n\n    Args:\n        x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).\n\n    Returns:\n        (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),\n                      with log-softmax applied along the last dimension.\n    \"\"\"\n\n    proj_offset = self.proj(x) + 1e-10\n    return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules","title":"modules","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.Embeddings","title":"Embeddings","text":"

    Bases: Module

    The embedding layer.

    Parameters:

    Name Type Description Default d_model int

    The embedding size.

    required vocab_size int

    The vocabulary size.

    required Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class Embeddings(nn.Module):\n    \"\"\"\n    The embedding layer.\n\n    Args:\n        d_model (int): The embedding size.\n        vocab_size (int): The vocabulary size.\n    \"\"\"\n\n    def __init__(self, d_model: int, vocab_size: int):\n        super(Embeddings, self).__init__()\n        self.lut = nn.Linear(vocab_size, d_model)\n        self.d_model = d_model\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Parameters:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        x = self.lut(x) * math.sqrt(self.d_model)\n        return x\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.Embeddings.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Parameters:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    x = self.lut(x) * math.sqrt(self.d_model)\n    return x\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionalEncoding","title":"PositionalEncoding","text":"

    Bases: Module

    Implement the PE function.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class PositionalEncoding(nn.Module):\n    \"Implement the PE function.\"\n\n    def __init__(self, d_model, dropout, max_len=5000):\n        super(PositionalEncoding, self).__init__()\n        self.dropout = nn.Dropout(p=dropout)\n\n        # Compute the positional encodings once in log space.\n        pe = torch.zeros(max_len, d_model)\n        position = torch.arange(0, max_len).unsqueeze(1)\n        div_term = torch.exp(\n            torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)\n        )\n        pe[:, 0::2] = torch.sin(position * div_term)\n        pe[:, 1::2] = torch.cos(position * div_term)\n        pe = pe.unsqueeze(0)\n        self.register_buffer(\"pe\", pe)\n\n    def forward(self, x):\n        x = x + self.pe[:, : x.size(1)].requires_grad_(False)\n        return self.dropout(x)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionwiseFeedForward","title":"PositionwiseFeedForward","text":"

    Bases: Module

    A two-layer feed-forward network.

    Parameters:

    Name Type Description Default d_model int

    The input size.

    required d_ff int

    The hidden size.

    required dropout float

    The dropout rate. Defaults to 0.1.

    0.1 Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class PositionwiseFeedForward(nn.Module):\n    \"\"\"\n    A two-layer feed-forward network.\n\n    Args:\n        d_model (int): The input size.\n        d_ff (int): The hidden size.\n        dropout (float, optional): The dropout rate. Defaults to 0.1.\n    \"\"\"\n\n    def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):\n        super(PositionwiseFeedForward, self).__init__()\n        self.w_1 = nn.Linear(d_model, d_ff)\n        self.w_2 = nn.Linear(d_ff, d_model)\n        self.dropout = nn.Dropout(p=dropout)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        return self.w_2(self.dropout(F.relu(self.w_1(x))))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionwiseFeedForward.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    return self.w_2(self.dropout(F.relu(self.w_1(x))))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.SublayerConnection","title":"SublayerConnection","text":"

    Bases: Module

    This module implements a residual connection followed by a layer norm.

    Parameters:

    Name Type Description Default size int

    The input size.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class SublayerConnection(nn.Module):\n    \"\"\"\n    This module implements a residual connection followed by a layer norm.\n\n    Args:\n        size (int): The input size.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(self, size: int, dropout: float):\n        super(SublayerConnection, self).__init__()\n        self.layer_norm = nn.LayerNorm(size)\n        self.dropout = nn.Dropout(dropout)\n\n    def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            sublayer (nn.Module): The sublayer module.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)\n        # to the input of each sub-block.\n        # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf\n        return x + self.dropout(sublayer(self.layer_norm(x)))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.SublayerConnection.forward","title":"forward(x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required sublayer Module

    The sublayer module.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        sublayer (nn.Module): The sublayer module.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)\n    # to the input of each sub-block.\n    # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf\n    return x + self.dropout(sublayer(self.layer_norm(x)))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils","title":"utils","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.clones","title":"clones(module: nn.Module, n_clones: int)","text":"

    helper function which produces n_clones copies of a layer

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def clones(module: nn.Module, n_clones: int):\n    \"\"\"helper function which produces n_clones copies of a layer\"\"\"\n    return nn.ModuleList([copy.deepcopy(module) for _ in range(n_clones)])\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.flattened_snake_flip","title":"flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor","text":"

    Implements a \"snake\" flip which reorders the flattened 2D tensor into snake order.

    Parameters:

    Name Type Description Default x Tensor

    The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].

    required

    Returns:

    Type Description Tensor

    The \"snake\" flipped tensor, dimensions will be [..., Ly * Lx].

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor:\n    \"\"\"\n    Implements a \"snake\" flip which reorders the flattened 2D tensor into snake order.\n\n    Args:\n        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].\n\n    Returns:\n        (torch.Tensor): The \"snake\" flipped tensor, dimensions will be [..., Ly * Lx].\n    \"\"\"\n    return snake_flip(x.reshape(*x.shape[:-1], Ly, Lx)).reshape(*x.shape[:-1], -1)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.snake_flip","title":"snake_flip(x: torch.Tensor) -> torch.Tensor","text":"

    Implements a \"snake\" flip which reorders the 2D tensor into snake order when flattened.

    Parameters:

    Name Type Description Default x Tensor

    The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].

    required

    Returns:

    Type Description Tensor

    The \"snake\" flipped tensor, dimensions will be [..., Ly, Lx].

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def snake_flip(x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Implements a \"snake\" flip which reorders the 2D tensor into snake order when flattened.\n\n    Args:\n        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].\n\n    Returns:\n        (torch.Tensor): The \"snake\" flipped tensor, dimensions will be [..., Ly, Lx].\n    \"\"\"\n\n    if not isinstance(x, torch.Tensor):\n        raise TypeError(\"Function only supports torch.Tensor\")\n\n    y = x.clone()\n\n    for i in range(y.shape[-2]):\n        if i % 2 == 1:\n            y[..., i, :] = torch.flip(y[..., i, :], dims=(-1,))\n\n    return y\n
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"RydbergGPT","text":"

    A large language model (LLM) for Rydberg atom array physics.

    "},{"location":"#architecture","title":"Architecture","text":""},{"location":"#rydberg-system","title":"Rydberg System","text":"\\[ \\hat{H}_{\\mathrm{Rydberg}} = \\sum_{i < j}^{N} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert} \\hat{n}_i \\hat{n}_j - \\delta \\sum_{i}^{N} \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i}^{N} \\hat{\\sigma}_i^{(x)}, \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\]
    • \\(N = L \\times L =\\) number of atoms/qubits
    • \\(i, j =\\) qubit index
    • \\(V_{ij} =\\) blockade interaction between qubits \\(i\\) and \\(j\\)
    • \\(a =\\) Lattice spacing
    • \\(R_b =\\) Rydberg blockade radius
    • \\(\\mathbf{r}_i =\\) the position of qubit \\(i\\)
    • \\(\\hat{n}_i =\\) number operator at qubit \\(i\\)
    • \\(\\delta =\\) detuning at qubit \\(i\\)
    • \\(\\Omega =\\) Rabi frequency at qubit \\(i\\)
    "},{"location":"#transformer","title":"Transformer","text":"

    Vanilla transformer architecture taken from Attention is All You Need.

    • \\(H_i = \\mathrm{GraphNN}(\\mathrm{edges} = V_{ij} \\ ; \\mathrm{nodes}= \\{ \\Omega, \\Delta, R_b, \\beta \\}_i)\\)
    • \\(\\sigma_i =\\) one-hot encoding of measured qubit \\(i\\)
    • \\(P_i = P(\\sigma_i | \\sigma_{< i}) =\\) conditional probability distribution of qubit \\(i\\)

    The transformer encoder represents the Rydberg Hamiltonian with a sequence. The transformer decoder represents the corresponding ground state wavefunction.

    "},{"location":"#acknowledgements","title":"Acknowledgements","text":"

    We sincerely thank the authors of the following very helpful codebases we used when building this repository :

    • Transformer tutorials:
      • Annotated Transformer
      • Illustrated Transformer
    • Transformer quantum state:
      • Predicting Properties of Quantum Systems with Conditional Generative Models
      • Transformer Quantum State
    "},{"location":"#references","title":"References","text":"
    @inproceedings{46201,\ntitle   = {Attention is All You Need},\nauthor  = {Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},\nyear    = {2017},\nURL = {https://arxiv.org/pdf/1706.03762.pdf}\n}\n
    "},{"location":"data/","title":"Data","text":""},{"location":"data/#rydberg-system","title":"Rydberg System","text":"\\[ \\hat{H}_{\\mathrm{Rydberg}} = \\sum_{i < j}^{N} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert} \\hat{n}_i \\hat{n}_j - \\delta \\sum_{i}^{N} \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i}^{N} \\hat{\\sigma}_i^{(x)}, \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\]
    • \\(N = L \\times L =\\) number of atoms/qubits
    • \\(i, j =\\) qubit index
    • \\(V_{ij} =\\) blockade interaction between qubits \\(i\\) and \\(j\\)
    • \\(a =\\) Lattice spacing
    • \\(R_b =\\) Rydberg blockade radius
    • \\(\\mathbf{r}_i =\\) the position of qubit \\(i\\)
    • \\(\\hat{n}_i =\\) number operator at qubit \\(i\\)
    • \\(\\delta =\\) detuning at qubit \\(i\\)
    • \\(\\Omega =\\) Rabi frequency at qubit \\(i\\)
    "},{"location":"data/#dataset","title":"Dataset","text":"

    Consider setting \\(\\Omega = 1\\) and varying the other Hamiltonian parameters independently :

    $$ L = [5, 6, 11, 12, 15, 16] $$ $$ \\delta / \\Omega = [-0.36, -0.13, 0.93, 1.05, 1.17, 1.29, 1.52, 1.76, 2.94, 3.17] $$ $$ R_b / a = [1.05, 1.15, 1.3] $$ $$ \\beta \\Omega = [0.5, 1, 2, 4, 8, 16, 32, 48, 64] $$ There are a total of 8 x 10 x 3 x 9 = 2160 configurations.

    "},{"location":"get_started/","title":"Get Started","text":""},{"location":"get_started/#installation","title":"Installation","text":"

    Clone the repository using the following command:

    git clone https://github.com/PIQuIL/RydbergGPT\n
    Install with pip :
    cd RydbergGPT\npip install .\n

    "},{"location":"get_started/#usage","title":"Usage","text":""},{"location":"get_started/#configuration","title":"Configuration","text":"

    Theconfig.yaml is used to define the hyperparameters for: - Model architecture - Training settings - Data loading - Others

    "},{"location":"get_started/#training","title":"Training","text":"

    To train RydbergGPT locally, execute the train.py with:

    python train.py --config_name=config_small.yaml\n

    "},{"location":"examples/1_Overview/","title":"Overview","text":"
    %load_ext autoreload\n%autoreload 2\n\nimport itertools as it\nimport numpy as np\nimport networkx as nx\nimport matplotlib.pyplot as plt\nfrom torch_geometric.utils import to_networkx\nimport numpy as np\nimport networkx as nx\nimport torch\nfrom torch_geometric.data import Data\n
    import copy\nfrom typing import Tuple\n\nimport torch\nfrom pytorch_lightning import LightningModule\nfrom torch import Tensor, nn\nfrom torch_geometric.nn import GATConv, GCNConv\n\nfrom rydberggpt.models.graph_embedding.models import GraphEmbedding\nfrom rydberggpt.models.rydberg_encoder_decoder import RydbergEncoderDecoder\n\nfrom rydberggpt.models.transformer.layers import DecoderLayer, EncoderLayer\nfrom rydberggpt.models.transformer.models import (\n    Decoder,\n    Encoder,\n    EncoderDecoder,\n    Generator,\n)\nfrom rydberggpt.models.transformer.modules import (\n    PositionalEncoding,\n    PositionwiseFeedForward,\n)\nfrom rydberggpt.utils import to_one_hot\n
    def get_rydberg_graph_encoder_decoder(config):\n    c = copy.deepcopy\n    attn = nn.MultiheadAttention(config.d_model, config.num_heads, batch_first=True)\n    position = PositionalEncoding(config.d_model, config.dropout)\n    ff = PositionwiseFeedForward(config.d_model, config.d_ff, config.dropout)\n\n    model = RydbergEncoderDecoder(\n        encoder=Encoder(\n            EncoderLayer(config.d_model, c(attn), c(ff), config.dropout),\n            config.num_blocks_encoder,\n        ),\n        decoder=Decoder(\n            DecoderLayer(config.d_model, c(attn), c(attn), c(ff), config.dropout),\n            config.num_blocks_decoder,\n        ),\n        src_embed=GraphEmbedding(\n            graph_layer=GCNConv,  # GATConv\n            in_node_dim=config.in_node_dim,\n            d_hidden=config.graph_hidden_dim,\n            d_model=config.d_model,\n            num_layers=config.graph_num_layers,\n            dropout=config.dropout,\n        ),\n        tgt_embed=nn.Sequential(\n            nn.Linear(config.num_states, config.d_model), c(position)\n        ),\n        generator=Generator(config.d_model, 2),\n        config=config,\n    )\n\n    for p in model.parameters():\n        if p.dim() > 1:\n            nn.init.xavier_uniform_(p)\n\n    return model\n

    In our approach, we leverage graph neural networks (GNNs) to process the underlying graph structure of Rydberg atom systems. In these systems, the graph nodes represent the Rydberg atoms, and each node is assigned a node_feature vector containing information about the Rabi frequency (\u03a9), detuning (\u0394), and temperature (\u03b2). The Rydberg blockade radius, which determines the interaction strength between atoms, is encoded as edge attributes in the graph.

    GNNs are powerful tools for learning representations of graph-structured data, capturing both local and global information within the graph. In our model, we employ graph convolutional layers, such as GCNConv, to learn meaningful embeddings of the input graph. These embeddings take into account both node features and edge attributes, enabling the model to learn complex relationships between atoms in the Rydberg system.

    To understand the basics of graph neural networks and their applications, we recommend the following resources:

    1. A Gentle Introduction to Graph Neural Networks: This article provides an accessible and visually appealing introduction to GNNs, covering their motivation, core concepts, and various architectures.

    2. Understanding Convolutions on Graphs: This article dives deeper into the inner workings of GNNs, specifically focusing on convolution operations on graphs. It provides insights into how graph convolutions can be understood as message-passing mechanisms and how they can be generalized.

    3. Pytorch_geometric: PyTorch Geometric is a library for deep learning on irregular input data such as graphs, point clouds, and manifolds. It provides efficient implementations of various GNN layers and models, making it easier to implement and experiment with graph-based neural networks. This resource serves as a guide to getting started with the library and provides documentation for its various features.

    In our Rydberg atom system model, the graph embedding component serves as a crucial bridge between the graph-structured input data and the encoder-decoder architecture. By leveraging the capabilities of GNNs, we can effectively learn complex patterns in the graph structure and enhance the performance of our model for predicting properties of quantum many-body systems.

    "},{"location":"examples/1_Overview/#tutorial-overview","title":"Tutorial: Overview","text":""},{"location":"examples/1_Overview/#introduction","title":"Introduction","text":"

    Machine learning has recently emerged as a powerful tool for predicting properties of quantum many-body systems. Generative models can learn from measurements of a single quantum state to accurately reconstruct the state and predict local observables for many ground states of Hamiltonians. In this tutorial, we focus on Rydberg atom systems and propose the use of conditional generative models to simultaneously represent a family of states by learning shared structures of different quantum states from measurements.

    Refs:

    Predicting Properties of Quantum Systems with Conditional Generative Models

    Transformer Quantum State: A Multi-Purpose Model for Quantum Many-Body Problems

    Bloqade

    "},{"location":"examples/1_Overview/#rydberg-hamiltonian","title":"Rydberg Hamiltonian","text":"

    We consider a system of \\(N=L \\times L\\) atoms arranged on a square lattice. The governing Hamiltonian defining the Rydberg atom array interactions has the following form:

    \\[ \\hat{H} = \\sum_{i<j} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\hat{n}_i \\hat{n}_j -\\delta \\sum_{i=1}^N \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i=1}^N \\hat{\\sigma}^x_i. \\quad (1) \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6}, \\quad (2) \\]

    where \\(\\hat{\\sigma}^{x}_{i} = \\vert g \\rangle_i \\langle r\\vert_i + \\vert r \\rangle_i \\langle g\\vert_i\\), the occupation number operator \\(\\hat{n}_i = \\frac{1}{2} \\left( \\hat{\\sigma}_{i} + \\mathbb{1} \\right) = \\vert r\\rangle_i \\langle r \\vert_i\\) and \\(\\hat{\\sigma}_{i} = \\vert r \\rangle_i \\langle r \\vert_i - \\vert g \\rangle_i \\langle g \\vert_i\\). The experimental settings of a Rydberg atom array are controlled by the detuning from resonance \\(\\delta\\), Rabi frequency \\(\\Omega\\), lattice length scale \\(a\\) and the positions of the atoms \\(\\{\\mathbf{r}_i\\}_i^N\\). From equation (2) above, we obtain a symmetric matrix \\(\\mathbf{V}\\), that encapsulates the relevant information about the lattice geometry, and derive the Rydberg blockade radius \\(R_b\\), within which simultaneous excitations are penalized. Finally, for the purposes of our study, the atom array is considered to be affected by thermal noise, in equilibrium at a temperature \\(T\\). The experimental settings are thus captured by the set of parameters \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\), where \\(\\beta\\) is the inverse temperature.

    "},{"location":"examples/1_Overview/#representation-of-the-quantum-state","title":"Representation of the quantum state","text":"

    Decomposing the joint distribution into a product of conditional distributions in an autoregressive manner,

    \\[ p_{\\theta}(\\boldsymbol{\\sigma}) = \\prod_{i=1}^n p_{\\theta}\\left(\\sigma_i \\mid \\sigma_{i-1}, \\ldots, \\sigma_1\\right). \\]

    where \\(\\theta\\) denotes the set of parameters of the generative model.

    "},{"location":"examples/1_Overview/#the-graph-encoder-decoder-transformer-architecture","title":"The Graph Encoder Decoder Transformer architecture","text":"

    In this tutorial, we will explain the network architecture used in the get_rydberg_graph_encoder_decoder function, which creates a RydbergEncoderDecoder model. This model is designed to process graph-structured data using a combination of Graph Convolutional Networks (GCNs) and the classic Encoder-Decoder architecture as introduced in Vaswani et al..

    "},{"location":"examples/1_Overview/#main-components","title":"Main components","text":"

    The RydbergEncoderDecoder model consists of the following main components:

    Encoder: The encoder processes the input graph data and generates a continuous representation. It consists of multiple EncoderLayer blocks, each containing a multi-head self-attention mechanism and a position-wise feed-forward network, followed by layer normalization and dropout.

    Decoder: The decoder takes the continuous representation generated by the encoder and produces the output predictions. It is composed of multiple DecoderLayer blocks, each containing two multi-head attention mechanisms (self-attention and encoder-decoder attention) and a position-wise feed-forward network, followed by layer normalization and dropout.

    src_embed: This component is responsible for transforming the input graph data into a continuous representation. It uses the GraphEmbedding class, which employs GCNConv layers (or other graph convolution layers, such as GATConv) to process the graph structure. The number of graph layers can be controlled with the num_layers parameter.

    tgt_embed: This is a sequential model that first applies a linear transformation to the target input states and then adds positional encoding to provide information about the sequence order. The positional encoding is applied using the PositionalEncoding class.

    Generator: The generator is a simple linear layer that maps the output of the decoder to the desired output dimension (in this case, 2). It is used for producing the final output predictions.

    In the get_rydberg_graph_encoder_decoder function, the model is created using the provided configuration (config). This configuration contains information about the model's dimensions, number of layers, and other hyperparameters. After initializing the model, the weights of the parameters with more than one dimension are initialized using Xavier uniform initialization.

    Overall, this network architecture combines the power of graph convolutional networks for processing graph-structured data with the sequence-to-sequence learning capabilities of the Encoder-Decoder architecture. This allows the model to effectively learn complex patterns in both the graph structure and the sequence data.

    "},{"location":"examples/1_Overview/#loss-function","title":"Loss function","text":"

    The dataset is composed of \\(N_H\\) Hamiltonians and obtain \\(N_s\\) measurement outcomes for each ground state leading to a training set \\(\\mathcal{D}\\) of size \\(N_HN_s\\). The training objective is the average negative log-likelihood loss,

    \\[ \\mathcal{L}(\\theta) \\approx -\\frac{1}{|\\mathcal{D}|} \\sum_{\\boldsymbol{\\sigma} \\in \\mathcal{D}} \\ln p_{\\theta}(\\boldsymbol{\\sigma}). \\]

    corresponding to maximizing the conditional likelihoods over the observed measurment outcomes.

    "},{"location":"examples/1_Overview/#graph-embedding-in-rydberg-atom-systems","title":"Graph Embedding in Rydberg Atom Systems","text":""},{"location":"examples/2_Dataset/","title":"Dataset","text":"
    %load_ext autoreload\n%autoreload 2\n\nimport os\n\nimport matplotlib.colors as mcolors\nimport matplotlib.pyplot as plt\nimport networkx as nx\nfrom tqdm import tqdm\n\nfrom rydberggpt.data.dataclasses import GridGraph\nfrom rydberggpt.data.graph_structures import get_graph\nfrom rydberggpt.data.rydberg_dataset import get_rydberg_dataloader\nfrom rydberggpt.data.utils_graph import graph_to_dict\nfrom rydberggpt.utils import shift_inputs\n\n\nbase_path = os.path.abspath(\"../\")\n
    n_rows = 4\nn_cols = 4\nnum_atoms = n_rows * n_cols\n\ngraph_config = GridGraph(\n    num_atoms=num_atoms,\n    graph_name=\"grid_graph\",\n    Rb=1.0,\n    delta=1.0,\n    omega=1.0,\n    beta=1.0,\n    n_rows=n_rows,\n    n_cols=n_cols,\n)\n\ngraph = get_graph(graph_config)\ngraph_dict = graph_to_dict(graph)\ngraph_nx = nx.node_link_graph(graph_dict)\n
    adj_matrix = nx.to_numpy_array(graph_nx)\nplt.imshow(adj_matrix, cmap=\"Blues\")\nplt.title(\"Adjacency Matrix\")\nplt.show()\n

    or plot the graph.

    def plot_graph(graph):\n    # Get node positions from the graph\n    pos = nx.get_node_attributes(graph, \"pos\")\n\n    # Extract edge weights for edge coloring\n    edges, weights = zip(*nx.get_edge_attributes(graph, \"weight\").items())\n\n    # Normalize edge weights for better visualization\n    normalized_weights = [w / max(weights) for w in weights]\n\n    # Calculate edge widths proportional to normalized weights\n    edge_widths = [w * 2 for w in normalized_weights]\n\n    # Create a color map for the edges\n    cmap = plt.cm.Blues\n    norm = mcolors.Normalize(vmin=min(normalized_weights), vmax=max(normalized_weights))\n\n    # Plot the graph\n    fig, ax = plt.subplots(figsize=(8, 8))\n    nx.draw(\n        graph,\n        pos,\n        node_color=\"white\",\n        with_labels=True,\n        font_color=\"black\",\n        edge_cmap=cmap,\n        node_size=400,\n        width=edge_widths,\n        alpha=0.5,\n        edgecolors=\"black\",\n        edgelist=edges,\n        edge_color=normalized_weights,\n        verticalalignment=\"center_baseline\",\n        font_size=12,\n    )\n    plt.title(\"Grid Graph\", fontsize=18)\n\n    # Add a color bar\n    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)\n    sm.set_array([])\n    cbar = plt.colorbar(sm, ax=ax)\n    cbar.set_label(\"1/Distance\")\n\n    plt.show()\n
    plot_graph(graph_nx)\n

    Each node contains a node_feature vector encoding omega, delta and beta.

    import warnings\n\nwith warnings.catch_warnings():\n    warnings.simplefilter(\"ignore\")\n\n    batch_size = 128\n    buffer_size = 2\n    num_workers = 0\n\n    data_path = os.path.join(base_path, \"src/rydberggpt/tests/dataset_test/\")\n\n\n    dataloader = get_rydberg_dataloader(\n        batch_size=batch_size,\n        data_path=data_path,\n        buffer_size=buffer_size,\n        num_workers=num_workers,\n    )\n\n\n    counter = 0\n    for batch in dataloader:\n        print(batch.m_onehot.shape)\n        m_shifted_onehot = shift_inputs(batch.m_onehot)\n        print(m_shifted_onehot.shape)\n\n\n        counter += 1\n\n        if counter > 1:\n            break\n
    \ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\ntorch.Size([128, 36, 2])\n\n
    "},{"location":"examples/2_Dataset/#tutorial-dataset","title":"Tutorial: Dataset","text":"

    In this tutorial we discuss how the dataset is structured, and how to load it to train a model. The dataset is hosted through xanadu.ai and can be accessed via ADD LINK.

    "},{"location":"examples/2_Dataset/#structure","title":"Structure","text":"

    The dataset is build up of smaller subdatasets, each for a specific Hamiltonian parameter regime. Each sub folder contains four files, namely: - config.json: contains the configuration of the dataset - dataset.h5: contains the measurements of shape [num_samples, num_atoms] - graph.json: contains the graph of the dataset - properties.json: contains the observables of the dataset such as energy, magnetization, etc.

    "},{"location":"examples/2_Dataset/#the-system-prompt","title":"The system prompt","text":"

    The transformer encoder takes as input a graph structure. Each graph has num_atoms nodes and each nodes has a node feature vector containing delta, omega and beta.

    Lets generate an example graph and visualize it.

    "},{"location":"examples/2_Dataset/#loading-the-test-dataset","title":"Loading the test dataset","text":"

    We use the datapipes provided via torchdata to load the dataset. During training we sample from the dataset a list of buffer_size subset datasets, and then sample from this new smaller dataset the training batch.

    Each batch contains a datastructure with 2 elements (see rydberggpt.data.dataclasses). The first element is a pytorch_geometrich graph object ( batch.graph, based on the batch). Each graph has num_atoms nodes and each nodes has a node feature vector containing delta, omega and beta. Finally we need the measurement data. These are one-hot encoded and stored in a tensor of shape [num_samples, num_atoms, 2].

    @dataclass\nclass Batch:\n    graph: Data\n    m_onehot: torch.Tensor\n
    "},{"location":"examples/3_Observables/","title":"Observables","text":"
    import os\n\nimport torch\nfrom rydberggpt.models.rydberg_encoder_decoder import get_rydberg_graph_encoder_decoder\nfrom rydberggpt.models.utils import generate_prompt\nfrom rydberggpt.observables.rydberg_energy import (\n    get_rydberg_energy,\n    get_staggered_magnetization,\n    get_x_magnetization,\n)\nfrom rydberggpt.utils import create_config_from_yaml, load_yaml_file\nfrom rydberggpt.utils_ckpt import get_model_from_ckpt\nfrom torch_geometric.data import Batch\n
    device = \"cpu\"\n\nbase_path = os.path.abspath(\"../\")\nlog_path = os.path.join(base_path, \"models/M_1/\")\n\nyaml_dict = load_yaml_file(log_path, \"hparams.yaml\")\nconfig = create_config_from_yaml(yaml_dict)\n\nmodel = get_model_from_ckpt(\n    log_path, model=get_rydberg_graph_encoder_decoder(config), ckpt=\"best\"\n)\nmodel.to(device=device)\nmodel.eval()  # don't forget to set to eval mode\n
    \nRydbergEncoderDecoder(\n  (encoder): Encoder(\n    (layers): ModuleList(\n      (0): EncoderLayer(\n        (self_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (feed_forward): PositionwiseFeedForward(\n          (w_1): Linear(in_features=32, out_features=128, bias=True)\n          (w_2): Linear(in_features=128, out_features=32, bias=True)\n          (dropout): Dropout(p=0.1, inplace=False)\n        )\n        (sublayer): ModuleList(\n          (0-1): 2 x SublayerConnection(\n            (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (decoder): Decoder(\n    (layers): ModuleList(\n      (0-2): 3 x DecoderLayer(\n        (self_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (src_attn): MultiheadAttention(\n          (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)\n        )\n        (feed_forward): PositionwiseFeedForward(\n          (w_1): Linear(in_features=32, out_features=128, bias=True)\n          (w_2): Linear(in_features=128, out_features=32, bias=True)\n          (dropout): Dropout(p=0.1, inplace=False)\n        )\n        (sublayer): ModuleList(\n          (0-2): 3 x SublayerConnection(\n            (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (src_embed): GraphEmbedding(\n    (layers): ModuleList(\n      (0): GraphLayer(\n        (graph_layer): GCNConv(4, 64)\n        (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (1): GCNConv(64, 32)\n    )\n    (final_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n  )\n  (tgt_embed): Sequential(\n    (0): Linear(in_features=2, out_features=32, bias=True)\n    (1): PositionalEncoding(\n      (dropout): Dropout(p=0.1, inplace=False)\n    )\n  )\n  (generator): Generator(\n    (proj): Linear(in_features=32, out_features=2, bias=True)\n  )\n)\n
    L = 5\ndelta = 1.0\nomega = 1.0\nbeta = 64.0\nRb = 1.15\nnum_samples = 5\n\npyg_graph = generate_prompt(\n    model_config=config,\n    n_rows=L,\n    n_cols=L,\n    delta=delta,\n    omega=omega,\n    beta=beta,\n    Rb=Rb,\n)\n
    # duplicate the prompt for num_samples\ncond = [pyg_graph for _ in range(num_samples)]\ncond = Batch.from_data_list(cond)\n\nsamples = model.get_samples(\n    batch_size=len(cond), cond=cond, num_atoms=L**2, fmt_onehot=False\n)\n
    \nGenerating atom 25/25                                                          \n\n
    energy = get_rydberg_energy(model, samples, cond=pyg_graph, device=device)\nprint(energy.mean() / L**2)\n
    \ntensor(0.0248)\n\n
    staggered_magnetization = get_staggered_magnetization(samples, L, L, device=device)\nprint(staggered_magnetization.mean() / L**2)\n
    \ntensor(0.0208)\n\n
    x_magnetization = get_x_magnetization(model, samples, cond=pyg_graph, device=device)\nprint(x_magnetization.mean() / L**2)\n
    \ntensor(0.7317)\n\n
    "},{"location":"examples/3_Observables/#tutorial-observables","title":"Tutorial: Observables","text":""},{"location":"examples/3_Observables/#background","title":"Background","text":"

    In this tutorial, we are going to load a pretrained model, use it to generate new samples, and calculate relevant observables based on these samples.

    We consider a system of \\(N=L \\times L\\) atoms arranged on a square lattice. The governing Hamiltonian defining the Rydberg atom array interactions has the following form:

    \\[ \\hat{H} = \\sum_{i<j} \\frac{C_6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6} \\hat{n}_i \\hat{n}_j -\\delta \\sum_{i=1}^N \\hat{n}_i - \\frac{\\Omega}{2} \\sum_{i=1}^N \\hat{\\sigma}^x_i. \\quad (1) \\] \\[ C_6 = \\Omega \\left( \\frac{R_b}{a} \\right)^6, \\quad V_{ij} = \\frac{a^6}{\\lVert \\mathbf{r}_i - \\mathbf{r}_j \\rVert^6}, \\quad (2) \\]

    where \\(\\hat{\\sigma}^{x}_{i} = \\vert g \\rangle_i \\langle r\\vert_i + \\vert r \\rangle_i \\langle g\\vert_i\\), the occupation number operator \\(\\hat{n}_i = \\frac{1}{2} \\left( \\hat{\\sigma}_{i} + \\mathbb{1} \\right) = \\vert r\\rangle_i \\langle r \\vert_i\\) and \\(\\hat{\\sigma}_{i} = \\vert r \\rangle_i \\langle r \\vert_i - \\vert g \\rangle_i \\langle g \\vert_i\\). The experimental settings of a Rydberg atom array are controlled by the detuning from resonance \\(\\delta\\), Rabi frequency \\(\\Omega\\), lattice length scale \\(a\\) and the positions of the atoms \\(\\{\\mathbf{r}_i\\}_i^N\\). From equation (2) above, we obtain a symmetric matrix \\(\\mathbf{V}\\), that encapsulates the relevant information about the lattice geometry, and derive the Rydberg blockade radius \\(R_b\\), within which simultaneous excitations are penalized. Finally, for the purposes of our study, the atom array is considered to be affected by thermal noise, in equilibrium at a temperature \\(T\\). The experimental settings are thus captured by the set of parameters \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\), where \\(\\beta\\) is the inverse temperature.

    "},{"location":"examples/3_Observables/#loading-the-model","title":"Loading the model","text":"

    We have three pretrained models trained on three different datasets. Model \\(M_1\\) is trained with data for systems of size \\(L=5,6\\) (models/ds_1); Model \\(M_2\\) utilizes datasets for systems with \\(L=5,6,11,12\\) (models/ds_2); and Model \\(M_3\\) is trained on data covering \\(L=5,6,11,12,15,16\\) (models/ds_3).

    Let's start by loading the pretrained model and setting it into eval mode to ensure that the dropout layers are disabled.

    "},{"location":"examples/3_Observables/#generating-system-prompt-and-samples","title":"Generating system prompt and samples","text":"

    Next, let us define our system prompt \\(\\mathbf{x} = (\\Omega, \\delta/\\Omega, R_b/a, \\mathbf{V}, \\beta / \\Omega)\\). Below is a function generate_prompt that generates the required prompt structure to query the trained model. The prompt is a graph structure capturing the relevant information about the system, such as the lattice geometry, the Rydberg blockade radius, the temperature, and the Rabi frequency. The function generate_samples generates samples from the model given the prompt.

    "},{"location":"examples/3_Observables/#system-prompt","title":"System prompt","text":""},{"location":"examples/3_Observables/#generating-samples","title":"Generating samples","text":"

    The sampling function requires a batch of prompts; therefore, we duplicate our pyg_graph prompts as many times as we want to generate samples. The reasoning behind this is to allow the model to generate samples in parallel for different Hamiltonian parameters. This is especially helpful when training variationally.

    "},{"location":"examples/3_Observables/#observables","title":"Observables","text":"

    Now we are ready to calculate observables based on the samples generated. We consider three observables: the stagger-magnetization, x-magnetization and the Rydberg energy.

    "},{"location":"examples/3_Observables/#rydberg-energy","title":"Rydberg energy","text":"

    We consider an estimate of the ground state energy \\(\\langle E \\rangle\\), which is defined as

    \\[ \\langle E \\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_{\\theta}(\\boldsymbol{\\sigma};\\mathbf{x})} \\frac{\\langle \\boldsymbol{\\sigma}|\\widehat{H}|\\Psi_{\\theta}\\rangle}{\\langle \\boldsymbol{\\sigma}|\\Psi_{\\theta}\\rangle}. \\]

    We provide a function get_rydberg_energy that calculates the Rydberg energy of the samples generated. Note that this fn requires a single prompt.

    "},{"location":"examples/3_Observables/#stagger-magnetization","title":"Stagger magnetization","text":"

    The staggered magnetization for the square-lattice Rydberg array defined in its occupation basis. This quantity is the order parameter for the disorder-to-checkerboard quantum phase transition, and can be calculated simply with

    \\[ \\langle\\hat{\\sigma}^{\\text{stag}}\\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_\\theta(\\boldsymbol{\\sigma};\\mathbf{x})} \\left| \\sum_{i=1}^{N} (-1)^i \\frac{n_i(\\boldsymbol{\\sigma}) - 1/2}{N} \\right| , \\]

    where \\(i\\) runs over all \\(N = L \\times L\\) atoms and \\(n_i(\\boldsymbol{\\sigma}) = \\langle \\boldsymbol{\\sigma}| r_i \\rangle\\langle r_i|\\boldsymbol{\\sigma} \\rangle\\) is the occupation number operator acting on atom \\(i\\) in a given configuration \\(\\boldsymbol{\\sigma}\\). Because this observable is diagonal, it can be computed directly from samples inferred from the decoder. The outer sum shows how importance sampling is used to estimate the expectation value over this operator, approximating the probability of a given configuration with the frequency with which it is sampled.

    We provide a function get_staggered_magnetization that calculates the stagger magnetization of the samples generated.

    "},{"location":"examples/3_Observables/#x-magnetization","title":"X-magnetization","text":"

    We consider an off-diagonal observable, where we must make use of the ground state wave function amplitudes of the inferred samples \\(\\Psi(\\boldsymbol{\\sigma}) = \\sqrt{p_{\\theta}(\\boldsymbol{\\sigma})}\\). As an example, we examine the spatially averaged expectation value of \\(\\hat{\\sigma}_x\\), which is defined as

    \\[ \\langle \\hat{\\sigma}^x \\rangle \\approx \\frac{1}{N_s} \\sum_{\\boldsymbol{\\sigma} \\sim p_\\theta(\\boldsymbol{\\sigma};\\mathbf{x})} \\frac{1}{N} \\sum_{\\boldsymbol{\\sigma}' \\in \\mathrm{SSF}(\\boldsymbol{\\sigma})} \\frac{\\Psi_\\theta(\\boldsymbol{\\sigma}')}{\\Psi_\\theta(\\boldsymbol{\\sigma})}, \\]

    where the variable \\(\\left\\{\\boldsymbol{\\sigma'}\\right\\}\\) is the set of configurations that are connected to \\(\\boldsymbol{\\sigma}\\) by a single spin flip (SSF).

    We provide a function get_x_magnetization that calculates the stagger magnetization of the samples generated. Note that we do not have to batch our prompt. The energy is calculated for a single system prompt.

    "},{"location":"reference/data/","title":"Data","text":""},{"location":"reference/data/#rydberggpt.data","title":"rydberggpt.data","text":""},{"location":"reference/data/#rydberggpt.data.dataclasses","title":"dataclasses","text":""},{"location":"reference/data/#rydberggpt.data.dataclasses.BaseGraph","title":"BaseGraph dataclass","text":"

    Bases: ABC

    A base dataclass representing a graph configuration.

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass BaseGraph(ABC):\n    \"\"\"A base dataclass representing a graph configuration.\"\"\"\n\n    num_atoms: int\n    graph_name: str\n    Rb: float\n    delta: float\n    omega: float\n    beta: float\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.Batch","title":"Batch dataclass","text":"

    A dataclass representing a batch of graphs

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass Batch:\n    \"\"\"A dataclass representing a batch of graphs\"\"\"\n\n    graph: Data\n    m_onehot: torch.Tensor\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.GridGraph","title":"GridGraph dataclass","text":"

    Bases: BaseGraph

    A dataclass representing the configuration of a grid graph

    Source code in src\\rydberggpt\\data\\dataclasses.py
    @dataclass\nclass GridGraph(BaseGraph):\n    \"\"\"A dataclass representing the configuration of a grid graph\"\"\"\n\n    n_rows: int\n    n_cols: int\n
    "},{"location":"reference/data/#rydberggpt.data.dataclasses.custom_collate","title":"custom_collate(batch: List[Batch]) -> Batch","text":"

    Custom collate function to handle Batch objects when creating a DataLoader.

    Parameters:

    Name Type Description Default batch List[Batch]

    A list of Batch objects to be collated.

    required

    Returns:

    Type Description Batch

    A single Batch object containing the collated data.

    Source code in src\\rydberggpt\\data\\dataclasses.py
    def custom_collate(batch: List[Batch]) -> Batch:\n    \"\"\"\n    Custom collate function to handle Batch objects when creating a DataLoader.\n\n    Args:\n        batch (List[Batch]): A list of Batch objects to be collated.\n\n    Returns:\n        (Batch): A single Batch object containing the collated data.\n    \"\"\"\n\n    graph_batch = PyGBatch.from_data_list([b.graph for b in batch])\n\n    # NOTE: The graphs, and measurement data are not of the same size. To ensure\n    # a padded tensor suitable for the neural network, we use the to_dense_batch function. This ensures that our\n    # data is padded with zeros.\n    # see: https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/utils/to_dense_batch.html\n\n    m_onehot = to_dense_batch(\n        torch.cat([b.m_onehot for b in batch], axis=-2),\n        batch=graph_batch.batch,\n    )[0].to(torch.float32)\n\n    return Batch(graph=graph_batch, m_onehot=m_onehot)\n
    "},{"location":"reference/data/#rydberggpt.data.graph_structures","title":"graph_structures","text":""},{"location":"reference/data/#rydberggpt.data.graph_structures.generate_grid_graph","title":"generate_grid_graph(n_rows: int, n_cols: int) -> nx.Graph","text":"

    Generates a fully connected grid graph with weights based on the reciprocal of Euclidean distance. Coordinates is in units of lattice constant a.

    Parameters:

    Name Type Description Default n_rows int

    The number of rows in the grid.

    required n_cols int

    The number of columns in the grid.

    required

    Returns:

    Type Description Graph

    The generated grid graph with node positions and edge weights.

    Source code in src\\rydberggpt\\data\\graph_structures.py
    def generate_grid_graph(n_rows: int, n_cols: int) -> nx.Graph:\n    \"\"\"\n    Generates a fully connected grid graph with weights based on the reciprocal of Euclidean distance. Coordinates is in units of lattice constant a.\n\n    Args:\n        n_rows (int): The number of rows in the grid.\n        n_cols (int): The number of columns in the grid.\n\n    Returns:\n        (nx.Graph): The generated grid graph with node positions and edge weights.\n    \"\"\"\n\n    # Create an empty graph\n    graph = nx.Graph()\n\n    # Add nodes with positions as attributes\n    for i in range(n_rows):\n        for j in range(n_cols):\n            node_id = i * n_cols + j\n            graph.add_node(node_id, pos=(i, j))\n\n    # Add fully connected edges with weights as the reciprocal of Euclidean distance\n    for node1 in graph.nodes:\n        pos1 = np.array(graph.nodes[node1][\"pos\"])\n        for node2 in graph.nodes:\n            if node1 != node2:\n                pos2 = np.array(graph.nodes[node2][\"pos\"])\n                interaction_strength = np.linalg.norm(pos1 - pos2) ** (-6)\n                graph.add_edge(node1, node2, weight=interaction_strength)\n\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.graph_structures.get_graph","title":"get_graph(config: BaseGraph) -> nx.Graph","text":"

    Generates a graph based on the given configuration.

    Parameters:

    Name Type Description Default config BaseGraph

    The graph configuration, an instance of a subclass of the BaseGraph dataclass.

    required

    Returns:

    Type Description Graph

    The generated graph based on the configuration.

    Raises:

    Type Description NotImplementedError

    If the graph name provided in the configuration is not implemented.

    Source code in src\\rydberggpt\\data\\graph_structures.py
    def get_graph(config: BaseGraph) -> nx.Graph:\n    \"\"\"\n    Generates a graph based on the given configuration.\n\n    Args:\n        config (BaseGraph): The graph configuration, an instance of a subclass of the BaseGraph dataclass.\n\n    Returns:\n        (nx.Graph): The generated graph based on the configuration.\n\n    Raises:\n        NotImplementedError: If the graph name provided in the configuration is not implemented.\n    \"\"\"\n    if config.graph_name == \"grid_graph\":\n        graph = generate_grid_graph(config.n_rows, config.n_cols)\n\n    else:\n        raise NotImplementedError(f\"Graph name {config.graph_name} not implemented.\")\n\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.rydberg_dataset","title":"rydberg_dataset","text":""},{"location":"reference/data/#rydberggpt.data.rydberg_dataset.build_datapipes","title":"build_datapipes(root_dir: str, batch_size: int, buffer_size: int)","text":"

    Builds a data pipeline for processing files from a specified directory.

    This function initializes a FileLister to list files from the specified directory and its subdirectories. It then demultiplexes the files into three separate data pipes for processing configuration, dataset, and graph files respectively. The configuration and graph files are opened, parsed as JSON, and processed using a custom selection function. The data pipes are then zipped together, shuffled, filtered, and buffered into batches using a custom collate function.

    Parameters:

    Name Type Description Default root_dir str

    The root directory from which to list files.

    required batch_size int

    The number of samples per batch.

    required buffer_size int

    The buffer size to use when buffering data into batches.

    required

    Returns:

    Type Description IterDataPipe

    The final data pipe containing batches of processed data.

    Source code in src\\rydberggpt\\data\\rydberg_dataset.py
    def build_datapipes(root_dir: str, batch_size: int, buffer_size: int):\n    \"\"\"\n    Builds a data pipeline for processing files from a specified directory.\n\n    This function initializes a FileLister to list files from the specified\n    directory and its subdirectories. It then demultiplexes the files into\n    three separate data pipes for processing configuration, dataset, and\n    graph files respectively. The configuration and graph files are opened,\n    parsed as JSON, and processed using a custom selection function.\n    The data pipes are then zipped together, shuffled, filtered, and buffered\n    into batches using a custom collate function.\n\n    Args:\n        root_dir (str): The root directory from which to list files.\n        batch_size (int): The number of samples per batch.\n        buffer_size (int): The buffer size to use when buffering data into batches.\n\n    Returns:\n        (IterDataPipe): The final data pipe containing batches of processed data.\n    \"\"\"\n    file_lister = FileLister([root_dir], recursive=True)\n    config_dp, dataset_dp, graph_dp = file_lister.demux(\n        3,\n        classify_file_fn,\n        drop_none=True,\n        buffer_size=-1,\n    )\n    config_dp = config_dp.open_files().parse_json_files()\n    graph_dp = graph_dp.open_files().parse_json_files()\n    datapipe = config_dp.zip(dataset_dp).zip(graph_dp).map(map_fn)\n    datapipe = datapipe.shuffle()\n    datapipe = Buffer(source_datapipe=datapipe, buffer_size=buffer_size)\n    datapipe = datapipe.batch(batch_size).collate(custom_collate).sharding_filter()\n\n    return datapipe\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph","title":"utils_graph","text":""},{"location":"reference/data/#rydberggpt.data.utils_graph.batch_pyg_data","title":"batch_pyg_data(data_list: List[Data]) -> Data","text":"

    Batch a list of PyTorch Geometric Data objects into a single Data object.

    Parameters:

    Name Type Description Default data_list List[Data]

    List of PyTorch Geometric Data objects.

    required

    Returns:

    Type Description Data

    A single batched Data object containing all input Data objects.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def batch_pyg_data(data_list: List[Data]) -> Data:\n    \"\"\"\n    Batch a list of PyTorch Geometric Data objects into a single Data object.\n\n    Args:\n        data_list: List of PyTorch Geometric Data objects.\n\n    Returns:\n        (Data): A single batched Data object containing all input Data objects.\n    \"\"\"\n    batched_data = PyGBatch.from_data_list(data_list)\n    return batched_data\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.dict_to_graph","title":"dict_to_graph(graph_dict: Dict) -> nx.Graph","text":"

    Create a NetworkX graph from a dictionary.

    Parameters:

    Name Type Description Default graph_dict Dict

    Dictionary representing a NetworkX graph.

    required

    Returns:

    Type Description Graph

    NetworkX graph object.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def dict_to_graph(graph_dict: Dict) -> nx.Graph:\n    \"\"\"\n    Create a NetworkX graph from a dictionary.\n\n    Args:\n        graph_dict: Dictionary representing a NetworkX graph.\n\n    Returns:\n        (nx.Graph): NetworkX graph object.\n    \"\"\"\n    graph = nx.node_link_graph(graph_dict)\n    return graph\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.graph_to_dict","title":"graph_to_dict(graph: nx.Graph) -> Dict","text":"

    Convert a NetworkX graph to a dictionary.

    Parameters:

    Name Type Description Default graph Graph

    NetworkX graph object.

    required

    Returns:

    Type Description Dict

    A dictionary representing the NetworkX graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def graph_to_dict(graph: nx.Graph) -> Dict:\n    \"\"\"\n    Convert a NetworkX graph to a dictionary.\n\n    Args:\n        graph: NetworkX graph object.\n\n    Returns:\n        (Dict): A dictionary representing the NetworkX graph.\n    \"\"\"\n    graph_dict = nx.node_link_data(graph)\n    return graph_dict\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.networkx_to_pyg_data","title":"networkx_to_pyg_data(graph: nx.Graph, node_features: torch.Tensor) -> Data","text":"

    Convert a NetworkX graph to a PyTorch Geometric Data object.

    Parameters:

    Name Type Description Default graph Graph

    NetworkX graph object.

    required

    Returns:

    Type Description Data

    A PyTorch Geometric Data object representing the input graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def networkx_to_pyg_data(graph: nx.Graph, node_features: torch.Tensor) -> Data:\n    \"\"\"\n    Convert a NetworkX graph to a PyTorch Geometric Data object.\n\n    Args:\n        graph: NetworkX graph object.\n\n    Returns:\n        (Data): A PyTorch Geometric Data object representing the input graph.\n    \"\"\"\n\n    x = node_features.repeat(len(graph.nodes()), 1)\n\n    # Convert the edge list to a PyTorch Geometric edge_index tensor\n    edge_index = torch.tensor(list(graph.edges), dtype=torch.long).t().contiguous()\n\n    # Get edge weights from the graph\n    edge_weight = torch.tensor(\n        list(nx.get_edge_attributes(graph, \"weight\").values()), dtype=torch.float\n    )\n\n    # Create a Data object\n    data = Data(x=x, edge_index=edge_index, edge_attr=edge_weight)\n\n    return data\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.pyg_graph_data","title":"pyg_graph_data(config, graph_data)","text":"

    Convert a graph in node-link format to a PyG Data object.

    Parameters:

    Name Type Description Default graph_data Dict

    The graph in node-link format.

    required config_data Dict

    The configuration data for the graph.

    required

    Returns:

    Type Description Data

    The graph as a PyG Data object.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def pyg_graph_data(config, graph_data):\n    \"\"\"\n    Convert a graph in node-link format to a PyG Data object.\n\n    Args:\n        graph_data (Dict): The graph in node-link format.\n        config_data (Dict): The configuration data for the graph.\n\n    Returns:\n        (Data): The graph as a PyG Data object.\n\n    \"\"\"\n    node_features = torch.tensor(\n        [\n            config[\"delta\"],\n            config[\"omega\"],\n            config[\"beta\"],\n            config[\"Rb\"],\n        ],\n        dtype=torch.float32,\n    )\n    graph_nx = nx.node_link_graph(graph_data)\n    pyg_graph = networkx_to_pyg_data(graph_nx, node_features)\n    return pyg_graph\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.read_graph_from_json","title":"read_graph_from_json(file_path: str) -> Dict","text":"

    Read a JSON file and convert it to a dictionary representing a NetworkX graph.

    Parameters:

    Name Type Description Default file_path str

    Path to the JSON file to read.

    required

    Returns:

    Type Description Dict

    A dictionary representing a NetworkX graph.

    Source code in src\\rydberggpt\\data\\utils_graph.py
    def read_graph_from_json(file_path: str) -> Dict:\n    \"\"\"\n    Read a JSON file and convert it to a dictionary representing a NetworkX graph.\n\n    Args:\n        file_path: Path to the JSON file to read.\n\n    Returns:\n        (Dict): A dictionary representing a NetworkX graph.\n    \"\"\"\n    with open(file_path, \"r\") as f:\n        graph_dict = json.load(f)\n    return graph_dict\n
    "},{"location":"reference/data/#rydberggpt.data.utils_graph.save_graph_to_json","title":"save_graph_to_json(graph_dict: Dict, file_path: str) -> None","text":"

    Save a dictionary representing a NetworkX graph to a JSON file.

    Parameters:

    Name Type Description Default graph_dict Dict

    Dictionary representing a NetworkX graph.

    required file_path str

    Path to the JSON file to save.

    required Source code in src\\rydberggpt\\data\\utils_graph.py
    def save_graph_to_json(graph_dict: Dict, file_path: str) -> None:\n    \"\"\"\n    Save a dictionary representing a NetworkX graph to a JSON file.\n\n    Args:\n        graph_dict: Dictionary representing a NetworkX graph.\n        file_path: Path to the JSON file to save.\n    \"\"\"\n    with open(file_path, \"w\") as f:\n        json.dump(graph_dict, f)\n
    "},{"location":"reference/observables/","title":"Observables","text":""},{"location":"reference/observables/#rydberggpt.observables","title":"rydberggpt.observables","text":""},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy","title":"rydberg_energy","text":""},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_rydberg_energy","title":"get_rydberg_energy(model: RydbergEncoderDecoder, samples: torch.Tensor, cond: torch.Tensor, device: torch.device, undo_sample_path=None, undo_sample_path_args=None) -> torch.Tensor","text":"

    Calculates energy of the model based on the Hamiltonian defined by cond (graph).

    Parameters:

    Name Type Description Default model RydbergEncoderDecoder

    Model to estimate energy on.

    required samples Tensor

    Samples drawn from model based on cond.

    required cond Tensor

    A tensor containing the input condition.

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required undo_sample_path Tensor

    Map that undoes the sample path of the model to match the labelling of in the graph.

    None undo_sample_path_args tuple

    Additional arguments for undo_sample_path.

    None

    Returns:

    Type Description Tensor

    A tensor containing the estimated energy of each sample alongside its decomposition into terms.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_rydberg_energy(\n    model: RydbergEncoderDecoder,\n    samples: torch.Tensor,  # dtype=torch.int64\n    cond: torch.Tensor,  # dtype=torch.float32\n    device: torch.device,\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n) -> torch.Tensor:\n    \"\"\"\n    Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n    Args:\n        model (RydbergEncoderDecoder): Model to estimate energy on.\n        samples (torch.Tensor): Samples drawn from model based on cond.\n        cond (torch.Tensor): A tensor containing the input condition.\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n        undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n        undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n    \"\"\"\n\n    model = model.to(device)\n    samples = samples.to(device)\n    cond = cond.to(device)\n\n    delta = cond.x[:, 0]  # Detuning coeffs\n    omega = cond.x[0, 1]  # Rabi frequency\n    # beta = cond.x[0, 2]  # Inverse temperature\n    Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n    # Estimate interaction/Rydberg blockade term\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    interaction = (\n        (\n            unpathed_samples[..., cond.edge_index].prod(dim=-2)\n            * cond.edge_attr[None, ...]\n        ).sum(dim=-1)\n        * Rb**6\n        * omega\n    )\n\n    detuning = (delta * unpathed_samples).sum(1)  # sum over sequence length\n\n    x_magnetization = get_x_magnetization(model, samples, cond, device)\n\n    offdiag_energy = 0.5 * omega * x_magnetization\n    diag_energy = interaction - detuning\n    energy = diag_energy - offdiag_energy\n\n    return torch.stack(\n        [\n            energy,\n            interaction,\n            detuning,\n            diag_energy,\n            offdiag_energy,\n        ]\n    ).T\n
    "},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_staggered_magnetization","title":"get_staggered_magnetization(samples: torch.Tensor, Lx: int, Ly: int, device: torch.device, undo_sample_path=None, undo_sample_path_args=None)","text":"

    Calculates staggered magnetization of the model.

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model.

    required Lx int

    Linear size in the x dimension

    required Ly int

    Linear size in the y dimension

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required undo_sample_path Tensor

    Map that undoes the sample path of the model to match the labelling of in the graph.

    None undo_sample_path_args tuple

    Additional arguments for undo_sample_path.

    None

    Returns:

    Type Description Tensor

    A tensor containing the estimated staggered magnetization of each sample.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_staggered_magnetization(\n    samples: torch.Tensor,\n    Lx: int,\n    Ly: int,\n    device: torch.device,\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n):\n    \"\"\"\n    Calculates staggered magnetization of the model.\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model.\n        Lx (int): Linear size in the x dimension\n        Ly (int): Linear size in the y dimension\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n        undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n        undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated staggered magnetization of each sample.\n    \"\"\"\n\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    unpathed_samples = unpathed_samples.reshape(-1, Ly, Lx)\n\n    unpathed_sigmas = 2 * unpathed_samples - 1\n\n    idcs = np.indices((Ly, Lx))\n    checkerboard = 2 * (idcs.sum(0) % 2) - 1\n    checkerboard = torch.from_numpy(checkerboard).to(device=device)\n\n    staggered_magnetization = torch.abs((checkerboard * unpathed_sigmas).mean((-1, -2)))\n\n    return staggered_magnetization\n
    "},{"location":"reference/observables/#rydberggpt.observables.rydberg_energy.get_x_magnetization","title":"get_x_magnetization(model: RydbergEncoderDecoder, samples: torch.Tensor, cond: torch.Tensor, device: torch.device)","text":"

    Calculates x magnetization of the model.

    Parameters:

    Name Type Description Default model RydbergEncoderDecoder

    Model to estimate energy on.

    required samples Tensor

    Samples drawn from model based on cond.

    required cond Tensor

    A tensor containing the input condition.

    required device str

    The device on which to allocate the tensors. Defaults to \"cpu\".

    required

    Returns:

    Type Description Tensor

    A tensor containing the estimated x magnetization of each sample.

    Source code in src\\rydberggpt\\observables\\rydberg_energy.py
    @torch.no_grad()\ndef get_x_magnetization(\n    model: RydbergEncoderDecoder,\n    samples: torch.Tensor,  # dtype=torch.int64\n    cond: torch.Tensor,  # dtype=torch.float32\n    device: torch.device,\n):\n    \"\"\"\n    Calculates x magnetization of the model.\n\n    Args:\n        model (RydbergEncoderDecoder): Model to estimate energy on.\n        samples (torch.Tensor): Samples drawn from model based on cond.\n        cond (torch.Tensor): A tensor containing the input condition.\n        device (str, optional): The device on which to allocate the tensors. Defaults to \"cpu\".\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n    \"\"\"\n\n    model = model.to(device)\n    samples = samples.to(device)\n    cond = cond.to(device)\n\n    # Create all possible states achievable by a single spin flip\n    flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n    flipped = flipped.reshape(-1, samples.shape[-1])\n\n    # Get propabilities of sampled states and the single spin flipped states\n    sample_log_probs = model.get_log_probs(to_one_hot(samples, 2), cond)\n    flipped_log_probs = model.get_log_probs(to_one_hot(flipped, 2), cond)\n    flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n    # Calculate ratio of the wavefunction for the sampled and flipped states\n    log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n    psi_ratio = torch.exp(log_psi_ratio)\n\n    x_magnetization = psi_ratio.sum(-1)\n    return x_magnetization\n
    "},{"location":"reference/training/","title":"Training","text":""},{"location":"reference/training/#rydberggpt.training","title":"rydberggpt.training","text":""},{"location":"reference/training/#rydberggpt.training.callbacks","title":"callbacks","text":""},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback","title":"module_info_callback","text":""},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback.ModelInfoCallback","title":"ModelInfoCallback","text":"

    Bases: Callback

    A custom PyTorch Lightning callback that logs model information at the start of training.

    This callback extracts and logs information about the model's structure, total parameters, and total trainable parameters at the beginning of the training process. The information is saved as a YAML file in the logger's log directory.

    Source code in src\\rydberggpt\\training\\callbacks\\module_info_callback.py
    class ModelInfoCallback(Callback):\n    \"\"\"\n    A custom PyTorch Lightning callback that logs model information at the start of training.\n\n    This callback extracts and logs information about the model's structure, total parameters, and\n    total trainable parameters at the beginning of the training process. The information is saved\n    as a YAML file in the logger's log directory.\n    \"\"\"\n\n    def on_train_start(self, trainer, pl_module) -> None:\n        \"\"\"\n        Run the callback at the beginning of training.\n\n        Args:\n            trainer (pytorch_lightning.Trainer): The PyTorch Lightning trainer instance.\n            pl_module (pytorch_lightning.LightningModule): The PyTorch Lightning module instance.\n        \"\"\"\n        # This will run at the beginning of training\n        log_path = trainer.logger.log_dir\n\n        summary = ModelSummary(pl_module, max_depth=1)\n        total_parameters = summary.total_parameters\n        total_trainable_parameters = summary.trainable_parameters\n\n        summary_dict = extract_model_info(pl_module.model)\n        summary_dict[\"total_parameters\"] = total_parameters\n        summary_dict[\"total_trainable_parameters\"] = total_trainable_parameters\n\n        # Save the summary dictionary to a YAML file\n        with open(f\"{log_path}/model_info.yaml\", \"w\") as file:\n            yaml.dump(summary_dict, file)\n
    "},{"location":"reference/training/#rydberggpt.training.callbacks.module_info_callback.ModelInfoCallback.on_train_start","title":"on_train_start(trainer, pl_module) -> None","text":"

    Run the callback at the beginning of training.

    Parameters:

    Name Type Description Default trainer Trainer

    The PyTorch Lightning trainer instance.

    required pl_module LightningModule

    The PyTorch Lightning module instance.

    required Source code in src\\rydberggpt\\training\\callbacks\\module_info_callback.py
    def on_train_start(self, trainer, pl_module) -> None:\n    \"\"\"\n    Run the callback at the beginning of training.\n\n    Args:\n        trainer (pytorch_lightning.Trainer): The PyTorch Lightning trainer instance.\n        pl_module (pytorch_lightning.LightningModule): The PyTorch Lightning module instance.\n    \"\"\"\n    # This will run at the beginning of training\n    log_path = trainer.logger.log_dir\n\n    summary = ModelSummary(pl_module, max_depth=1)\n    total_parameters = summary.total_parameters\n    total_trainable_parameters = summary.trainable_parameters\n\n    summary_dict = extract_model_info(pl_module.model)\n    summary_dict[\"total_parameters\"] = total_parameters\n    summary_dict[\"total_trainable_parameters\"] = total_trainable_parameters\n\n    # Save the summary dictionary to a YAML file\n    with open(f\"{log_path}/model_info.yaml\", \"w\") as file:\n        yaml.dump(summary_dict, file)\n
    "},{"location":"reference/training/#rydberggpt.training.logger","title":"logger","text":""},{"location":"reference/training/#rydberggpt.training.logger.setup_logger","title":"setup_logger(log_path)","text":"

    Set up the logger to write logs to a file and the console.

    Source code in src\\rydberggpt\\training\\logger.py
    def setup_logger(log_path):\n    \"\"\"\n    Set up the logger to write logs to a file and the console.\n    \"\"\"\n    # Ensure the log_path exists\n    if not os.path.exists(log_path):\n        os.makedirs(log_path)\n\n    logger = logging.getLogger()\n    logger.setLevel(logging.INFO)\n\n    # Console Handler\n    ch = logging.StreamHandler()\n    ch.setLevel(logging.INFO)\n    formatter = logging.Formatter(\"%(asctime)s - %(levelname)s - %(message)s\")\n    ch.setFormatter(formatter)\n    logger.addHandler(ch)\n\n    # File Handler\n    fh = logging.FileHandler(os.path.join(log_path, \"training.log\"))\n    fh.setLevel(logging.INFO)\n    fh.setFormatter(formatter)\n    logger.addHandler(fh)\n\n    return logger\n
    "},{"location":"reference/training/#rydberggpt.training.loss","title":"loss","text":""},{"location":"reference/training/#rydberggpt.training.loss.NLLLoss","title":"NLLLoss","text":"

    Bases: LightningModule

    This class implements the Negative Log Likelihood (NLL) loss function as a PyTorch Lightning module.

    The NLL loss measures the performance of a classification model where the prediction input is a probability distribution over classes. It is useful in training models for multi-class classification problems.

    The loss is calculated by taking the negative log of the probabilities predicted by the model for the true class labels.

    Methods:

    Name Description forward

    Computes the NLL loss based on the conditional log probabilities and the target values.

    Examples:

    >>> nll_loss = NLLLoss()\n>>> loss = nll_loss(cond_log_probs, tgt)\n
    Source code in src\\rydberggpt\\training\\loss.py
    class NLLLoss(pl.LightningModule):\n    \"\"\"\n    This class implements the Negative Log Likelihood (NLL) loss function as a PyTorch Lightning module.\n\n    The NLL loss measures the performance of a classification model where the prediction input is a probability\n    distribution over classes. It is useful in training models for multi-class classification problems.\n\n    The loss is calculated by taking the negative log of the probabilities predicted by the model for the true class labels.\n\n    Methods:\n        forward:\n            Computes the NLL loss based on the conditional log probabilities and the target values.\n\n    Examples:\n        >>> nll_loss = NLLLoss()\n        >>> loss = nll_loss(cond_log_probs, tgt)\n    \"\"\"\n\n    def __init__(self):\n        super(NLLLoss, self).__init__()\n\n    def forward(self, cond_log_probs: Tensor, tgt: Tensor) -> Tensor:\n        \"\"\"\n        Computes the NLL loss based on the conditional log probabilities and the target values.\n\n        Args:\n            cond_log_probs (Tensor): The conditional log probabilities predicted by the model.\n            tgt (Tensor): The target values.\n\n        Returns:\n            (Tensor): The computed NLL loss.\n        \"\"\"\n        num_atoms = tgt.shape[-2] - (tgt == 0.0).all(-1).sum(-1)\n        log_probs = (cond_log_probs * tgt).sum(dim=(-2, -1))\n        loss = -torch.mean(log_probs / num_atoms)\n        return loss\n
    "},{"location":"reference/training/#rydberggpt.training.loss.NLLLoss.forward","title":"forward(cond_log_probs: Tensor, tgt: Tensor) -> Tensor","text":"

    Computes the NLL loss based on the conditional log probabilities and the target values.

    Parameters:

    Name Type Description Default cond_log_probs Tensor

    The conditional log probabilities predicted by the model.

    required tgt Tensor

    The target values.

    required

    Returns:

    Type Description Tensor

    The computed NLL loss.

    Source code in src\\rydberggpt\\training\\loss.py
    def forward(self, cond_log_probs: Tensor, tgt: Tensor) -> Tensor:\n    \"\"\"\n    Computes the NLL loss based on the conditional log probabilities and the target values.\n\n    Args:\n        cond_log_probs (Tensor): The conditional log probabilities predicted by the model.\n        tgt (Tensor): The target values.\n\n    Returns:\n        (Tensor): The computed NLL loss.\n    \"\"\"\n    num_atoms = tgt.shape[-2] - (tgt == 0.0).all(-1).sum(-1)\n    log_probs = (cond_log_probs * tgt).sum(dim=(-2, -1))\n    loss = -torch.mean(log_probs / num_atoms)\n    return loss\n
    "},{"location":"reference/training/#rydberggpt.training.train","title":"train","text":""},{"location":"reference/training/#rydberggpt.training.trainer","title":"trainer","text":""},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer","title":"RydbergGPTTrainer","text":"

    Bases: LightningModule

    A custom PyTorch Lightning module for training a Rydberg GPT model.

    Parameters:

    Name Type Description Default model Module

    The model to be trained.

    required config dataclass

    A dataclass containing the model's configuration parameters.

    required logger TensorBoardLogger

    A TensorBoard logger instance for logging training progress.

    None example_input_array tensor

    An example input tensor used for generating the model summary.

    None Source code in src\\rydberggpt\\training\\trainer.py
    class RydbergGPTTrainer(pl.LightningModule):\n    \"\"\"\n    A custom PyTorch Lightning module for training a Rydberg GPT model.\n\n    Args:\n        model (nn.Module): The model to be trained.\n        config (dataclass): A dataclass containing the model's configuration parameters.\n        logger (TensorBoardLogger): A TensorBoard logger instance for logging training progress.\n        example_input_array (torch.tensor, optional): An example input tensor used for\n            generating the model summary.\n    \"\"\"\n\n    def __init__(\n        self,\n        model: nn.Module,\n        config: dataclass,\n        logger: TensorBoardLogger = None,\n        example_input_array: torch.tensor = None,\n    ) -> None:\n        super().__init__()\n        self.config = config\n        self.save_hyperparameters(asdict(config))\n        self.model = model\n        self.criterion = getattr(loss, self.config.criterion)()\n        self.example_input_array = example_input_array\n\n    def forward(self, m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Perform a forward pass through the model.\n\n        Args:\n            m_onehot (torch.Tensor): One-hot encoded measurements tensor.\n            cond (torch.Tensor): Conditioning tensor. # TODO prompt\n\n        Returns:\n            (torch.Tensor): Conditional log probabilities tensor.\n        \"\"\"\n        out = self.model.forward(m_onehot, cond)\n        cond_log_probs = self.model.generator(out)\n        return cond_log_probs\n\n    def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:\n        \"\"\"\n        Perform a single training step.\n\n        Args:\n            batch (pl.Batch): A batch of data during training.\n            batch_idx (int): The index of the current batch.\n\n        Returns:\n            (torch.Tensor): The training loss for the current batch.\n        \"\"\"\n        m_shifted_onehot = shift_inputs(batch.m_onehot)\n\n        cond_log_probs = self.forward(m_shifted_onehot, batch.graph)\n        loss = self.criterion(cond_log_probs, batch.m_onehot)\n        self.log(\"train_loss\", loss, sync_dist=True)\n        return loss\n\n    def configure_optimizers(self) -> Dict[str, Union[optim.Optimizer, Dict]]:\n        \"\"\"\n        Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.\n\n        Returns:\n            (Dict[str, Union[optim.Optimizer, Dict]]): A dictionary containing the optimizer and lr_scheduler configurations.\n        \"\"\"\n        optimizer_class = getattr(optim, self.config.optimizer)\n        optimizer = optimizer_class(\n            self.model.parameters(), lr=self.config.learning_rate\n        )\n\n        # Add learning rate scheduler\n        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(\n            optimizer,\n            T_0=self.config.t_initial,  # initial number of epochs in a period\n            T_mult=self.config.t_mult,  # factor to increase the period length after each restart\n            eta_min=self.config.eta_min,  # minimum learning rate\n        )\n\n        # Return both the optimizer and the scheduler\n        return {\n            \"optimizer\": optimizer,\n            \"lr_scheduler\": {\n                \"scheduler\": scheduler,\n                \"interval\": \"epoch\",\n                \"monitor\": \"train_loss\",\n            },\n        }\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.configure_optimizers","title":"configure_optimizers() -> Dict[str, Union[optim.Optimizer, Dict]]","text":"

    Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.

    Returns:

    Type Description Dict[str, Union[Optimizer, Dict]]

    A dictionary containing the optimizer and lr_scheduler configurations.

    Source code in src\\rydberggpt\\training\\trainer.py
    def configure_optimizers(self) -> Dict[str, Union[optim.Optimizer, Dict]]:\n    \"\"\"\n    Configures the optimizer and learning rate scheduler for the RydbergGPTTrainer.\n\n    Returns:\n        (Dict[str, Union[optim.Optimizer, Dict]]): A dictionary containing the optimizer and lr_scheduler configurations.\n    \"\"\"\n    optimizer_class = getattr(optim, self.config.optimizer)\n    optimizer = optimizer_class(\n        self.model.parameters(), lr=self.config.learning_rate\n    )\n\n    # Add learning rate scheduler\n    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(\n        optimizer,\n        T_0=self.config.t_initial,  # initial number of epochs in a period\n        T_mult=self.config.t_mult,  # factor to increase the period length after each restart\n        eta_min=self.config.eta_min,  # minimum learning rate\n    )\n\n    # Return both the optimizer and the scheduler\n    return {\n        \"optimizer\": optimizer,\n        \"lr_scheduler\": {\n            \"scheduler\": scheduler,\n            \"interval\": \"epoch\",\n            \"monitor\": \"train_loss\",\n        },\n    }\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.forward","title":"forward(m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor","text":"

    Perform a forward pass through the model.

    Parameters:

    Name Type Description Default m_onehot Tensor

    One-hot encoded measurements tensor.

    required cond Tensor

    Conditioning tensor. # TODO prompt

    required

    Returns:

    Type Description Tensor

    Conditional log probabilities tensor.

    Source code in src\\rydberggpt\\training\\trainer.py
    def forward(self, m_onehot: torch.Tensor, cond: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Perform a forward pass through the model.\n\n    Args:\n        m_onehot (torch.Tensor): One-hot encoded measurements tensor.\n        cond (torch.Tensor): Conditioning tensor. # TODO prompt\n\n    Returns:\n        (torch.Tensor): Conditional log probabilities tensor.\n    \"\"\"\n    out = self.model.forward(m_onehot, cond)\n    cond_log_probs = self.model.generator(out)\n    return cond_log_probs\n
    "},{"location":"reference/training/#rydberggpt.training.trainer.RydbergGPTTrainer.training_step","title":"training_step(batch: torch.Tensor, batch_idx: int) -> torch.Tensor","text":"

    Perform a single training step.

    Parameters:

    Name Type Description Default batch Batch

    A batch of data during training.

    required batch_idx int

    The index of the current batch.

    required

    Returns:

    Type Description Tensor

    The training loss for the current batch.

    Source code in src\\rydberggpt\\training\\trainer.py
    def training_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:\n    \"\"\"\n    Perform a single training step.\n\n    Args:\n        batch (pl.Batch): A batch of data during training.\n        batch_idx (int): The index of the current batch.\n\n    Returns:\n        (torch.Tensor): The training loss for the current batch.\n    \"\"\"\n    m_shifted_onehot = shift_inputs(batch.m_onehot)\n\n    cond_log_probs = self.forward(m_shifted_onehot, batch.graph)\n    loss = self.criterion(cond_log_probs, batch.m_onehot)\n    self.log(\"train_loss\", loss, sync_dist=True)\n    return loss\n
    "},{"location":"reference/training/#rydberggpt.training.utils","title":"utils","text":""},{"location":"reference/training/#rydberggpt.training.utils.set_example_input_array","title":"set_example_input_array(train_loader: DataLoader) -> Tuple[Any, Any]","text":"

    Get an example input array from the train loader.

    Parameters:

    Name Type Description Default train_loader DataLoader

    The DataLoader instance for the training data.

    required

    Returns:

    Type Description Tuple[Any, Any]

    A tuple containing m_onehot and graph from the example batch.

    Source code in src\\rydberggpt\\training\\utils.py
    def set_example_input_array(train_loader: DataLoader) -> Tuple[Any, Any]:\n    \"\"\"\n    Get an example input array from the train loader.\n\n    Args:\n        train_loader (DataLoader): The DataLoader instance for the training data.\n\n    Returns:\n        (Tuple[Any, Any]): A tuple containing m_onehot and graph from the example batch.\n    \"\"\"\n    logging.info(\"Setting example input array...\")\n    example_batch = next(iter(train_loader))\n    return example_batch.m_onehot, example_batch.graph\n
    "},{"location":"reference/utils/","title":"Utilities","text":""},{"location":"reference/utils/#rydberggpt.utils","title":"rydberggpt.utils","text":""},{"location":"reference/utils/#rydberggpt.utils.create_config_from_yaml","title":"create_config_from_yaml(yaml_content: Dict) -> dataclass","text":"

    Create a dataclass config object from the given YAML content.

    Parameters:

    Name Type Description Default yaml_content Dict

    A dictionary containing the YAML content.

    required

    Returns:

    Type Description dataclass

    A dataclass object representing the config.

    Source code in src\\rydberggpt\\utils.py
    def create_config_from_yaml(yaml_content: Dict) -> dataclass:\n    \"\"\"\n    Create a dataclass config object from the given YAML content.\n\n    Args:\n        yaml_content (Dict): A dictionary containing the YAML content.\n\n    Returns:\n        (dataclass): A dataclass object representing the config.\n    \"\"\"\n    flattened_config = flatten_yaml(yaml_content)\n    Config = create_dataclass_from_dict(\"Config\", flattened_config)\n    return Config(**flattened_config)\n
    "},{"location":"reference/utils/#rydberggpt.utils.create_dataclass_from_dict","title":"create_dataclass_from_dict(name: str, data: Dict[str, Any]) -> Type","text":"

    Create a dataclass from a dictionary.

    Parameters:

    Name Type Description Default name str

    The name of the dataclass.

    required data Dict[str, Any]

    A dictionary containing the dataclass fields and their values.

    required

    Returns:

    Type Description Type

    A new dataclass with the specified name and fields.

    Source code in src\\rydberggpt\\utils.py
    def create_dataclass_from_dict(name: str, data: Dict[str, Any]) -> Type:\n    \"\"\"\n    Create a dataclass from a dictionary.\n\n    Args:\n        name (str): The name of the dataclass.\n        data (Dict[str, Any]): A dictionary containing the dataclass fields and their values.\n\n    Returns:\n        (Type): A new dataclass with the specified name and fields.\n    \"\"\"\n    fields = [(key, type(value)) for key, value in data.items()]\n    return make_dataclass(name, fields)\n
    "},{"location":"reference/utils/#rydberggpt.utils.flatten_yaml","title":"flatten_yaml(yaml_config: Dict[str, Dict[str, Any]]) -> Dict[str, Any]","text":"

    Flatten a nested YAML configuration dictionary.

    Parameters:

    Name Type Description Default yaml_config Dict[str, Dict[str, Any]]

    A nested dictionary representing the YAML configuration.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A flattened dictionary with the nested structure removed.

    Source code in src\\rydberggpt\\utils.py
    def flatten_yaml(yaml_config: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:\n    \"\"\"\n    Flatten a nested YAML configuration dictionary.\n\n    Args:\n        yaml_config (Dict[str, Dict[str, Any]]): A nested dictionary representing the YAML configuration.\n\n    Returns:\n        Dict[str, Any]: A flattened dictionary with the nested structure removed.\n    \"\"\"\n    flattened_config = {}\n    for section, section_values in yaml_config.items():\n        if isinstance(section_values, dict):\n            for key, value in section_values.items():\n                flattened_config[f\"{key}\"] = value\n        else:\n            flattened_config[section] = section_values\n    return flattened_config\n
    "},{"location":"reference/utils/#rydberggpt.utils.load_config_file","title":"load_config_file(checkpoint_path: str, config_file: str = 'hparams.yaml') -> str","text":"

    Load the configuration file associated with a given checkpoint.

    Parameters:

    Name Type Description Default checkpoint_path str

    The path to the checkpoint file.

    required config_file str

    The name of the configuration file, defaults to \"hparams.yaml\".

    'hparams.yaml'

    Returns:

    Type Description str

    The path to the configuration file.

    Raises:

    Type Description FileNotFoundError

    If the configuration file is not found in the specified directory.

    Source code in src\\rydberggpt\\utils.py
    def load_config_file(checkpoint_path: str, config_file: str = \"hparams.yaml\") -> str:\n    \"\"\"\n    Load the configuration file associated with a given checkpoint.\n\n    Args:\n        checkpoint_path (str): The path to the checkpoint file.\n        config_file (str, optional): The name of the configuration file, defaults to \"hparams.yaml\".\n\n    Returns:\n        (str): The path to the configuration file.\n\n    Raises:\n        FileNotFoundError: If the configuration file is not found in the specified directory.\n    \"\"\"\n    config_dir = os.path.dirname(os.path.dirname(checkpoint_path))\n\n    if not os.path.exists(os.path.join(config_dir, config_file)):\n        raise FileNotFoundError(f\"No config file found in {config_dir}\")\n\n    return os.path.join(config_dir, config_file)\n
    "},{"location":"reference/utils/#rydberggpt.utils.load_yaml_file","title":"load_yaml_file(path: str, yaml_file_name: str) -> Dict[str, Any]","text":"

    Load the content of a YAML file given its path and file name.

    Parameters:

    Name Type Description Default path str

    The path to the directory containing the YAML file.

    required yaml_file_name str

    The name of the YAML file.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the YAML content.

    Source code in src\\rydberggpt\\utils.py
    def load_yaml_file(path: str, yaml_file_name: str) -> Dict[str, Any]:\n    \"\"\"\n    Load the content of a YAML file given its path and file name.\n\n    Args:\n        path (str): The path to the directory containing the YAML file.\n        yaml_file_name (str): The name of the YAML file.\n\n    Returns:\n        Dict[str, Any]: A dictionary containing the YAML content.\n    \"\"\"\n    if not yaml_file_name.endswith(\".yaml\"):\n        yaml_file_name += \".yaml\"\n\n    yaml_path = os.path.join(path, yaml_file_name)\n    with open(yaml_path, \"r\") as file:\n        yaml_content = yaml.safe_load(file)\n    return yaml_content\n
    "},{"location":"reference/utils/#rydberggpt.utils.save_to_yaml","title":"save_to_yaml(data: Dict[str, Any], filename: str) -> None","text":"

    Save a dictionary to a file in YAML format.

    Parameters:

    Name Type Description Default data Dict[str, Any]

    The dictionary to be saved.

    required filename str

    The path to the file where the dictionary will be saved.

    required Source code in src\\rydberggpt\\utils.py
    def save_to_yaml(data: Dict[str, Any], filename: str) -> None:\n    \"\"\"\n    Save a dictionary to a file in YAML format.\n\n    Args:\n        data (Dict[str, Any]): The dictionary to be saved.\n        filename (str): The path to the file where the dictionary will be saved.\n    \"\"\"\n    with open(filename, \"w\") as file:\n        yaml.dump(data, file)\n
    "},{"location":"reference/utils/#rydberggpt.utils.shift_inputs","title":"shift_inputs(tensor: torch.Tensor) -> torch.Tensor","text":"

    Shifts the second dimension (S) of the input tensor by one position to the right and pads the beginning with zeros.

    Parameters:

    Name Type Description Default tensor Tensor

    The input tensor of shape [B, S, D].

    required

    Returns:

    Type Description Tensor

    The resulting tensor after the shift and pad operation.

    Source code in src\\rydberggpt\\utils.py
    def shift_inputs(tensor: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Shifts the second dimension (S) of the input tensor by one position to the right\n    and pads the beginning with zeros.\n\n    Args:\n        tensor (torch.Tensor): The input tensor of shape [B, S, D].\n\n    Returns:\n        (torch.Tensor): The resulting tensor after the shift and pad operation.\n    \"\"\"\n    B, _, D = tensor.size()\n    zero_padding = torch.zeros((B, 1, D), device=tensor.device, dtype=tensor.dtype)\n    shifted_tensor = torch.cat((zero_padding, tensor[:, :-1, :]), dim=1)\n    return shifted_tensor\n
    "},{"location":"reference/utils/#rydberggpt.utils.time_and_log","title":"time_and_log(fn: Callable[..., Any]) -> Callable[..., Any]","text":"

    Decorator function to measure the time taken by a function to execute and log it.

    Parameters:

    Name Type Description Default fn Callable[..., Any]

    The function to be wrapped.

    required

    Returns:

    Type Description Callable[..., Any]

    Callable[..., Any]: The wrapped function.

    Usage
    @time_and_log\ndef my_function(arg1, arg2):\n    # function logic here\n
    Source code in src\\rydberggpt\\utils.py
    def time_and_log(fn: Callable[..., Any]) -> Callable[..., Any]:\n    \"\"\"\n    Decorator function to measure the time taken by a function to execute and log it.\n\n    Args:\n        fn (Callable[..., Any]): The function to be wrapped.\n\n    Returns:\n        Callable[..., Any]: The wrapped function.\n\n    Usage:\n        ```py\n        @time_and_log\n        def my_function(arg1, arg2):\n            # function logic here\n        ```\n    \"\"\"\n\n    def wrapped(*args: Any, **kwargs: Any) -> Any:\n        start_time = time.time()\n        result = fn(*args, **kwargs)\n        elapsed_time = time.time() - start_time\n\n        # Convert elapsed time to HH:MM:SS format\n        formatted_time = str(timedelta(seconds=elapsed_time))\n\n        logging.info(f\"{fn.__name__} took {formatted_time} to run.\")\n        return result\n\n    return wrapped\n
    "},{"location":"reference/utils/#rydberggpt.utils.to_one_hot","title":"to_one_hot(data: Union[torch.Tensor, List[int], Tuple[int]], num_classes: int) -> torch.Tensor","text":"

    Converts the input data into one-hot representation.

    Parameters:

    Name Type Description Default data Union[Tensor, List[int], Tuple[int]]

    Input data to be converted into one-hot. It can be a 1D tensor, list or tuple of integers.

    required num_classes int

    Number of classes in the one-hot representation.

    required

    Returns:

    Name Type Description data Tensor

    The one-hot representation of the input data.

    Source code in src\\rydberggpt\\utils.py
    def to_one_hot(\n    data: Union[torch.Tensor, List[int], Tuple[int]], num_classes: int\n) -> torch.Tensor:\n    \"\"\"\n    Converts the input data into one-hot representation.\n\n    Args:\n        data: Input data to be converted into one-hot. It can be a 1D tensor, list or tuple of integers.\n        num_classes: Number of classes in the one-hot representation.\n\n    Returns:\n        data (torch.Tensor): The one-hot representation of the input data.\n    \"\"\"\n\n    if isinstance(data, (list, tuple)):\n        data = torch.tensor(data, dtype=torch.int64)\n    elif not isinstance(data, torch.Tensor):\n        raise TypeError(\"Input data must be a tensor, list or tuple of integers.\")\n\n    data = nn.functional.one_hot(data.long(), num_classes)\n\n    return data.to(torch.float)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt","title":"rydberggpt.utils_ckpt","text":""},{"location":"reference/utils/#rydberggpt.utils_ckpt.find_best_ckpt","title":"find_best_ckpt(log_dir: str) -> Optional[str]","text":"

    Find the best checkpoint file (with the lowest training loss) in the specified log directory.

    Parameters:

    Name Type Description Default log_dir str

    The path to the log directory containing the checkpoint files.

    required

    Returns:

    Type Description str

    The path to the checkpoint file with the lowest training loss.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def find_best_ckpt(log_dir: str) -> Optional[str]:\n    \"\"\"\n    Find the best checkpoint file (with the lowest training loss) in the specified log directory.\n\n    Args:\n        log_dir (str): The path to the log directory containing the checkpoint files.\n\n    Returns:\n        (str): The path to the checkpoint file with the lowest training loss.\n    \"\"\"\n    log_dir = os.path.join(log_dir, \"checkpoints\")\n    ckpt_files = [file for file in os.listdir(log_dir) if file.endswith(\".ckpt\")]\n\n    if not ckpt_files:\n        raise FileNotFoundError(f\"No checkpoint files found in {log_dir}\")\n\n    # Extract the training loss from the ckpt filenames\n    ckpt_losses = []\n    for file in ckpt_files:\n        match = re.search(r\"train_loss=(\\d+\\.\\d+)\", file)\n        if match:\n            ckpt_losses.append(float(match.group(1)))\n        else:\n            ckpt_losses.append(float(\"inf\"))\n\n    # Find the index of the ckpt with the lowest training loss\n    best_ckpt_index = ckpt_losses.index(min(ckpt_losses))\n    best_ckpt = ckpt_files[best_ckpt_index]\n\n    return os.path.join(log_dir, best_ckpt)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.find_latest_ckpt","title":"find_latest_ckpt(log_dir: str)","text":"

    Find the latest checkpoint file (based on modification time) in the specified log directory.

    Parameters:

    Name Type Description Default log_dir str

    The path to the log directory containing the checkpoint files.

    required

    Returns:

    Type Description str

    The path to the latest checkpoint file.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def find_latest_ckpt(log_dir: str):\n    \"\"\"\n    Find the latest checkpoint file (based on modification time) in the specified log directory.\n\n    Args:\n        log_dir (str): The path to the log directory containing the checkpoint files.\n\n    Returns:\n        (str): The path to the latest checkpoint file.\n    \"\"\"\n    log_dir = os.path.join(log_dir, \"checkpoints\")\n    ckpt_files = [file for file in os.listdir(log_dir) if file.endswith(\".ckpt\")]\n\n    if not ckpt_files:\n        raise FileNotFoundError(f\"No checkpoint files found in {log_dir}\")\n\n    ckpt_files.sort(key=lambda x: os.path.getmtime(os.path.join(log_dir, x)))\n    latest_ckpt = ckpt_files[-1]\n    return os.path.join(log_dir, latest_ckpt)\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.get_ckpt_path","title":"get_ckpt_path(from_ckpt: int, log_dir: str = 'logs/lightning_logs') -> str","text":"

    Get the checkpoint path from a specified checkpoint version number.

    Parameters:

    Name Type Description Default from_ckpt int

    The version number of the checkpoint.

    required log_dir str

    The root directory where checkpoints are stored. Defaults to \"logs/lightning_logs\".

    'logs/lightning_logs'

    Returns:

    Type Description str

    The path to the specified checkpoint version directory.

    Raises:

    Type Description FileNotFoundError

    If no checkpoint is found in the specified directory.

    Source code in src\\rydberggpt\\utils_ckpt.py
    def get_ckpt_path(from_ckpt: int, log_dir: str = \"logs/lightning_logs\") -> str:\n    \"\"\"\n    Get the checkpoint path from a specified checkpoint version number.\n\n    Args:\n        from_ckpt (int): The version number of the checkpoint.\n        log_dir (str, optional): The root directory where checkpoints are stored.\n                                Defaults to \"logs/lightning_logs\".\n\n    Returns:\n        (str): The path to the specified checkpoint version directory.\n\n    Raises:\n        FileNotFoundError: If no checkpoint is found in the specified directory.\n    \"\"\"\n    log_dir = os.path.join(log_dir, f\"version_{from_ckpt}\")\n\n    if log_dir is None:\n        raise FileNotFoundError(f\"No checkpoint found in {log_dir}\")\n\n    return log_dir\n
    "},{"location":"reference/utils/#rydberggpt.utils_ckpt.get_model_from_ckpt","title":"get_model_from_ckpt(log_path: str, model: nn.Module, ckpt: str = 'best', trainer: pl.LightningModule = RydbergGPTTrainer) -> nn.Module","text":"

    Load a model from a specified checkpoint file in the log directory.

    Parameters:

    Name Type Description Default log_path str

    The path to the log directory containing the checkpoint files.

    required model Module

    The model class to load.

    required ckpt str

    The checkpoint to load. Must be either \"best\" or \"latest\". Defaults to \"best\".

    'best' trainer LightningModule

    The trainer class to use for loading the model. Defaults to RydbergGPTTrainer.

    RydbergGPTTrainer

    Returns:

    Type Description Module

    The loaded model.

    Raises:

    Type Description ValueError

    If the value of ckpt is not \"best\" or \"latest\".

    Source code in src\\rydberggpt\\utils_ckpt.py
    def get_model_from_ckpt(\n    log_path: str,\n    model: nn.Module,\n    ckpt: str = \"best\",\n    trainer: pl.LightningModule = RydbergGPTTrainer,\n) -> nn.Module:\n    \"\"\"\n    Load a model from a specified checkpoint file in the log directory.\n\n    Args:\n        log_path (str): The path to the log directory containing the checkpoint files.\n        model (nn.Module): The model class to load.\n        ckpt (str, optional): The checkpoint to load. Must be either \"best\" or \"latest\". Defaults to \"best\".\n        trainer (pl.LightningModule, optional): The trainer class to use for loading the model. Defaults to RydbergGPTTrainer.\n\n    Returns:\n        (nn.Module): The loaded model.\n\n    Raises:\n        ValueError: If the value of ckpt is not \"best\" or \"latest\".\n    \"\"\"\n    if ckpt == \"best\":\n        ckpt_path = find_best_ckpt(log_path)\n    elif ckpt == \"last\":\n        ckpt_path = find_latest_ckpt(log_path)\n    else:\n        raise ValueError(f\"ckpt must be 'best' or 'latest', not {ckpt}\")\n\n    yaml_dict = load_yaml_file(log_path, \"hparams.yaml\")\n    config = create_config_from_yaml(yaml_dict)\n\n    rydberg_gpt_trainer = trainer.load_from_checkpoint(\n        ckpt_path,\n        model=model,\n        config=config,\n        logger=None,\n        example_input_array=None,\n    )\n    return rydberg_gpt_trainer.model\n
    "},{"location":"reference/models/","title":"Models","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction","title":"rydberggpt.models.rydberg_decoder_wavefunction","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction","title":"RydbergDecoderWavefunction","text":"

    Bases: RydbergEncoderDecoder

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    class RydbergDecoderWavefunction(RydbergEncoderDecoder):\n    def __init__(\n        self,\n        cond: Batch,\n        encoder: Encoder,\n        decoder: Decoder,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: Generator,\n        config=None,\n    ):\n        super().__init__(\n            encoder.eval(),\n            decoder,\n            src_embed.eval(),\n            tgt_embed,\n            generator,\n            config,\n        )\n\n        if hasattr(cond, \"num_graphs\") and cond.num_graphs > 1:\n            raise ValueError(\"cond should represent a single Hamiltonian/graph\")\n\n        self.N = cond.num_nodes\n        self.cond = cond\n\n        for p in self.encoder.parameters():\n            p.requires_grad_(False)\n        for p in self.src_embed.parameters():\n            p.requires_grad_(False)\n\n        memory, batch_mask = self.encode(cond)\n        self.register_buffer(\"memory\", memory)\n        self.register_buffer(\"batch_mask\", batch_mask)\n        pass\n\n    def forward(self, tgt: torch.Tensor) -> torch.Tensor:\n        memory = self.memory.repeat([*tgt.shape[:-2], 1, 1])\n        batch_mask = self.batch_mask.repeat([*tgt.shape[:-2], 1])\n\n        return self.decode(tgt, memory, batch_mask)\n\n    @classmethod\n    def from_rydberg_encoder_decoder(cls, cond: Batch, model: RydbergEncoderDecoder):\n        \"\"\"\n        Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.\n\n        Args:\n            cond (Batch): The Hamiltonian/graph.\n            model (RydbergEncoderDecoder): The model used to generate a RydbergDecoderWavefunction.\n\n        Returns:\n            (RydbergDecoderWavefunction): The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.\n\n        \"\"\"\n        return cls(\n            cond,\n            model.encoder,\n            model.decoder,\n            model.src_embed,\n            model.tgt_embed,\n            model.generator,\n            model.config,\n        )\n\n    pass\n\n    def get_log_probs(self, x: torch.Tensor):\n        \"\"\"\n        Compute the log probabilities of a given input tensor.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The log probabilities.\n        \"\"\"\n\n        assert (\n            len(x.shape) == 3 and x.shape[-1] == 2\n        ), \"The input must be one hot encoded\"\n\n        y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n        y = y.to(x)  # Match dtype and device\n        y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n        y = self.forward(y)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n\n        y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n        return y\n\n    def get_samples(\n        self,\n        batch_size: int,\n        fmt_onehot: bool = True,\n        requires_grad: bool = False,\n        verbose: bool = True,\n    ):\n        \"\"\"\n        Generate samples using the forward pass and sampling from the conditional probabilities.\n        The samples can be returned either in one-hot encoding format or in label format,\n        according to the `fmt_onehot` argument.\n\n        Args:\n            batch_size (int): The number of samples to generate.\n            fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n              in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n            requires_grad (bool, optional): A flag to determine if grad is needed when sampling. Defaults to False,\n            verbose (bool, optional): A flag indicating whether to print sampling progress. Defaults to True,\n\n        Returns:\n            (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n        \"\"\"\n        if verbose:\n            print(\"\")\n\n        num_atoms = self.N\n\n        m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n        for i in range(num_atoms):\n            if verbose:\n                print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n                sys.stdout.flush()\n\n            y = self.forward(m)  # EncoderDecoder forward pass\n            y = self.generator(y)  # Conditional log probs\n            y = y[:, -1, :]  # Next conditional log probs\n\n            if requires_grad:\n                y = F.gumbel_softmax(logits=y, tau=1, hard=True)[..., None, :]\n\n            else:\n                y = torch.distributions.Categorical(logits=y).sample(\n                    [\n                        1,\n                    ]\n                )  # Sample from next conditional log probs\n                y = y.reshape(y.shape[1], 1)  # Reshape\n                y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n            m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n        if fmt_onehot:\n            m = m[:, 1:, :]  # Remove initial token\n        else:\n            m = m[:, 1:, -1]  # Remove initial token and put into label format\n\n        if verbose:\n            print(\"\")\n        return m\n\n    def get_x_magnetization(\n        self,\n        samples: torch.Tensor,  # dtype=torch.int64\n    ):\n        \"\"\"\n        Calculates x magnetization of the model.\n\n        Args:\n            samples (torch.Tensor): Samples drawn from model based on cond.\n\n        Returns:\n            (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n        \"\"\"\n\n        # Create all possible states achievable by a single spin flip\n        flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n        flipped = flipped.reshape(-1, samples.shape[-1])\n\n        # Get propabilities of sampled states and the single spin flipped states\n        sample_log_probs = self.get_log_probs(to_one_hot(samples, 2))\n        flipped_log_probs = self.get_log_probs(to_one_hot(flipped, 2))\n        flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n        # Calculate ratio of the wavefunction for the sampled and flipped states\n        log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n        psi_ratio = torch.exp(log_psi_ratio)\n\n        x_magnetization = psi_ratio.sum(-1)\n        return x_magnetization\n\n    def get_rydberg_energy(\n        self,\n        samples: torch.Tensor,  # dtype=torch.int64\n        undo_sample_path=None,\n        undo_sample_path_args=None,\n    ) -> torch.Tensor:\n        \"\"\"\n        Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n        Args:\n            samples (torch.Tensor): Samples drawn from model based on cond.\n           undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n           undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n        Returns:\n            (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n        \"\"\"\n\n        samples = samples\n        cond = self.cond\n\n        delta = cond.x[:, 0]  # Detuning coeffs\n        omega = cond.x[0, 1]  # Rabi frequency\n        # beta = cond.x[0, 2]  # Inverse Temperature\n        Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n        # Estimate interaction/Rydberg blockade term\n        if undo_sample_path is not None:\n            unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n        else:\n            unpathed_samples = samples\n\n        interaction = (\n            (\n                unpathed_samples[..., cond.edge_index].prod(dim=-2)\n                * cond.edge_attr[None, ...]\n            ).sum(dim=-1)\n            * Rb**6\n            * omega\n        )\n\n        # Estimate detuning term\n        detuning = (delta * unpathed_samples).sum(-1)  # sum over sequence length\n\n        # Estimate sigma_x\n        x_magnetization = self.get_x_magnetization(samples)\n        offdiag_energy = 0.5 * omega * x_magnetization\n\n        # Diagonal part of energy\n        diag_energy = interaction - detuning\n\n        energy = diag_energy - offdiag_energy  # Energy estimate\n\n        return torch.stack(\n            [\n                energy,\n                interaction,\n                detuning,\n                diag_energy,\n                offdiag_energy,\n            ]\n        ).T\n\n    def variational_loss(\n        self, batch_size: int, undo_sample_path, undo_sample_path_args\n    ):\n        samples = self.get_samples(\n            batch_size=batch_size, fmt_onehot=False, requires_grad=True, verbose=False\n        )\n\n        N = self.N\n        omega = self.cond.x[0, 1]\n\n        energy = self.get_rydberg_energy(\n            samples=samples,\n            undo_sample_path=undo_sample_path,\n            undo_sample_path_args=undo_sample_path_args,\n        )[..., 0].mean() / (N * omega)\n\n        return energy\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.from_rydberg_encoder_decoder","title":"from_rydberg_encoder_decoder(cond: Batch, model: RydbergEncoderDecoder) classmethod","text":"

    Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.

    Parameters:

    Name Type Description Default cond Batch

    The Hamiltonian/graph.

    required model RydbergEncoderDecoder

    The model used to generate a RydbergDecoderWavefunction.

    required

    Returns:

    Type Description RydbergDecoderWavefunction

    The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    @classmethod\ndef from_rydberg_encoder_decoder(cls, cond: Batch, model: RydbergEncoderDecoder):\n    \"\"\"\n    Create RydbergDecoderWavefunction from a RydbergEncodeDecoder model and a Hamiltonian/graph.\n\n    Args:\n        cond (Batch): The Hamiltonian/graph.\n        model (RydbergEncoderDecoder): The model used to generate a RydbergDecoderWavefunction.\n\n    Returns:\n        (RydbergDecoderWavefunction): The wavefunction taken from a trained RydergEncoderDecoder model for the groundstate of the Hamiltonian/graph specified by cond.\n\n    \"\"\"\n    return cls(\n        cond,\n        model.encoder,\n        model.decoder,\n        model.src_embed,\n        model.tgt_embed,\n        model.generator,\n        model.config,\n    )\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_log_probs","title":"get_log_probs(x: torch.Tensor)","text":"

    Compute the log probabilities of a given input tensor.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The log probabilities.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_log_probs(self, x: torch.Tensor):\n    \"\"\"\n    Compute the log probabilities of a given input tensor.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The log probabilities.\n    \"\"\"\n\n    assert (\n        len(x.shape) == 3 and x.shape[-1] == 2\n    ), \"The input must be one hot encoded\"\n\n    y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n    y = y.to(x)  # Match dtype and device\n    y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n    y = self.forward(y)  # EncoderDecoder forward pass\n    y = self.generator(y)  # Conditional log probs\n\n    y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n    return y\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_rydberg_energy","title":"get_rydberg_energy(samples: torch.Tensor, undo_sample_path=None, undo_sample_path_args=None) -> torch.Tensor","text":"

    Calculates energy of the model based on the Hamiltonian defined by cond (graph).

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model based on cond.

    required

    undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph. undo_sample_path_args (tuple): Additional arguments for undo_sample_path.

    Returns:

    Type Description Tensor

    A tensor containing the estimated energy of each sample alongside its decomposition into terms.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_rydberg_energy(\n    self,\n    samples: torch.Tensor,  # dtype=torch.int64\n    undo_sample_path=None,\n    undo_sample_path_args=None,\n) -> torch.Tensor:\n    \"\"\"\n    Calculates energy of the model based on the Hamiltonian defined by cond (graph).\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model based on cond.\n       undo_sample_path (torch.Tensor): Map that undoes the sample path of the model to match the labelling of in the graph.\n       undo_sample_path_args (tuple): Additional arguments for undo_sample_path.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated energy of each sample alongside its decomposition into terms.\n    \"\"\"\n\n    samples = samples\n    cond = self.cond\n\n    delta = cond.x[:, 0]  # Detuning coeffs\n    omega = cond.x[0, 1]  # Rabi frequency\n    # beta = cond.x[0, 2]  # Inverse Temperature\n    Rb = cond.x[0, 3]  # Rydberg Blockade radius\n\n    # Estimate interaction/Rydberg blockade term\n    if undo_sample_path is not None:\n        unpathed_samples = undo_sample_path(samples, *undo_sample_path_args)\n    else:\n        unpathed_samples = samples\n\n    interaction = (\n        (\n            unpathed_samples[..., cond.edge_index].prod(dim=-2)\n            * cond.edge_attr[None, ...]\n        ).sum(dim=-1)\n        * Rb**6\n        * omega\n    )\n\n    # Estimate detuning term\n    detuning = (delta * unpathed_samples).sum(-1)  # sum over sequence length\n\n    # Estimate sigma_x\n    x_magnetization = self.get_x_magnetization(samples)\n    offdiag_energy = 0.5 * omega * x_magnetization\n\n    # Diagonal part of energy\n    diag_energy = interaction - detuning\n\n    energy = diag_energy - offdiag_energy  # Energy estimate\n\n    return torch.stack(\n        [\n            energy,\n            interaction,\n            detuning,\n            diag_energy,\n            offdiag_energy,\n        ]\n    ).T\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_samples","title":"get_samples(batch_size: int, fmt_onehot: bool = True, requires_grad: bool = False, verbose: bool = True)","text":"

    Generate samples using the forward pass and sampling from the conditional probabilities. The samples can be returned either in one-hot encoding format or in label format, according to the fmt_onehot argument.

    Parameters:

    Name Type Description Default batch_size int

    The number of samples to generate.

    required fmt_onehot bool

    A flag to indicate whether to return the samples in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.

    True requires_grad bool

    A flag to determine if grad is needed when sampling. Defaults to False,

    False verbose bool

    A flag indicating whether to print sampling progress. Defaults to True,

    True

    Returns:

    Type Description Tensor

    A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within cond.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_samples(\n    self,\n    batch_size: int,\n    fmt_onehot: bool = True,\n    requires_grad: bool = False,\n    verbose: bool = True,\n):\n    \"\"\"\n    Generate samples using the forward pass and sampling from the conditional probabilities.\n    The samples can be returned either in one-hot encoding format or in label format,\n    according to the `fmt_onehot` argument.\n\n    Args:\n        batch_size (int): The number of samples to generate.\n        fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n          in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n        requires_grad (bool, optional): A flag to determine if grad is needed when sampling. Defaults to False,\n        verbose (bool, optional): A flag indicating whether to print sampling progress. Defaults to True,\n\n    Returns:\n        (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n    \"\"\"\n    if verbose:\n        print(\"\")\n\n    num_atoms = self.N\n\n    m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n    for i in range(num_atoms):\n        if verbose:\n            print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n            sys.stdout.flush()\n\n        y = self.forward(m)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n        y = y[:, -1, :]  # Next conditional log probs\n\n        if requires_grad:\n            y = F.gumbel_softmax(logits=y, tau=1, hard=True)[..., None, :]\n\n        else:\n            y = torch.distributions.Categorical(logits=y).sample(\n                [\n                    1,\n                ]\n            )  # Sample from next conditional log probs\n            y = y.reshape(y.shape[1], 1)  # Reshape\n            y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n        m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n    if fmt_onehot:\n        m = m[:, 1:, :]  # Remove initial token\n    else:\n        m = m[:, 1:, -1]  # Remove initial token and put into label format\n\n    if verbose:\n        print(\"\")\n    return m\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_decoder_wavefunction.RydbergDecoderWavefunction.get_x_magnetization","title":"get_x_magnetization(samples: torch.Tensor)","text":"

    Calculates x magnetization of the model.

    Parameters:

    Name Type Description Default samples Tensor

    Samples drawn from model based on cond.

    required

    Returns:

    Type Description Tensor

    A tensor containing the estimated x magnetization of each sample.

    Source code in src\\rydberggpt\\models\\rydberg_decoder_wavefunction.py
    def get_x_magnetization(\n    self,\n    samples: torch.Tensor,  # dtype=torch.int64\n):\n    \"\"\"\n    Calculates x magnetization of the model.\n\n    Args:\n        samples (torch.Tensor): Samples drawn from model based on cond.\n\n    Returns:\n        (torch.Tensor): A tensor containing the estimated x magnetization of each sample.\n    \"\"\"\n\n    # Create all possible states achievable by a single spin flip\n    flipped = (samples[:, None, :] + torch.eye(samples.shape[-1])[None, ...]) % 2\n    flipped = flipped.reshape(-1, samples.shape[-1])\n\n    # Get propabilities of sampled states and the single spin flipped states\n    sample_log_probs = self.get_log_probs(to_one_hot(samples, 2))\n    flipped_log_probs = self.get_log_probs(to_one_hot(flipped, 2))\n    flipped_log_probs = flipped_log_probs.reshape(-1, samples.shape[-1])\n\n    # Calculate ratio of the wavefunction for the sampled and flipped states\n    log_psi_ratio = 0.5 * (flipped_log_probs - sample_log_probs[:, None])\n    psi_ratio = torch.exp(log_psi_ratio)\n\n    x_magnetization = psi_ratio.sum(-1)\n    return x_magnetization\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder","title":"rydberggpt.models.rydberg_encoder_decoder","text":""},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder","title":"RydbergEncoderDecoder","text":"

    Bases: EncoderDecoder

    RydbergTransformer is a specific implementation of the Encoder-Decoder architecture that uses an encoder and decoder composed of multiple layers of EncoderLayer and DecoderLayer modules, respectively. The encoder and decoder are followed by an embedding layer and a generator layer.

    Parameters:

    Name Type Description Default encoder Encoder[EncoderLayer]

    The encoder module.

    required decoder Decoder[DecoderLayer]

    The decoder module.

    required tgt_embed Module

    The target embeddings module.

    required generator Generator

    The generator module.

    required config dict

    A dictionary of configuration options. Defaults to None.

    None **kwargs

    Additional keyword arguments.

    required Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    class RydbergEncoderDecoder(EncoderDecoder):\n    \"\"\"\n    RydbergTransformer is a specific implementation of the Encoder-Decoder architecture\n    that uses an encoder and decoder composed of multiple layers of EncoderLayer and DecoderLayer\n    modules, respectively. The encoder and decoder are followed by an embedding layer and a generator\n    layer.\n\n    Args:\n        encoder (Encoder[EncoderLayer]): The encoder module.\n        decoder (Decoder[DecoderLayer]): The decoder module.\n        tgt_embed (nn.Module): The target embeddings module.\n        generator (Generator): The generator module.\n        config (dict, optional): A dictionary of configuration options. Defaults to None.\n        **kwargs: Additional keyword arguments.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        encoder: Encoder,\n        decoder: Decoder,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: Generator,\n        config=None,\n    ):\n        super().__init__(encoder, decoder, src_embed, tgt_embed, generator)\n        self.config = config\n\n    @torch.no_grad()\n    def get_log_probs(self, x: torch.Tensor, cond: Batch):\n        \"\"\"\n        Compute the log probabilities of a given input tensor.\n\n        Parameters:\n            x (torch.Tensor): The input tensor.\n            cond (Batch): The conditional graph structure.\n\n        Returns:\n            (torch.Tensor): The log probabilities.\n        \"\"\"\n\n        if not hasattr(cond, \"num_graphs\"):\n            cond = Batch.from_data_list([cond.clone() for _ in range(len(x))])\n\n        assert (\n            len(x.shape) == 3 and x.shape[-1] == 2\n        ), \"The input must be one hot encoded\"\n\n        y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n        y = y.to(x)  # Match dtype and device\n        y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n        y = self.forward(y, cond)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n\n        y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n        return y\n\n    @torch.no_grad()\n    def get_samples(\n        self,\n        batch_size: int,\n        cond: Batch,\n        num_atoms: int,\n        fmt_onehot: bool = True,\n    ):\n        \"\"\"\n        Generate samples using the forward pass and sampling from the conditional probabilities.\n        The samples can be returned either in one-hot encoding format or in label format,\n        according to the `fmt_onehot` argument.\n\n        Args:\n            batch_size (int): The number of samples to generate.\n            cond (torch_geometric.data.Batch): The batch of conditional graph structures.\n            num_atoms (int): The number of atoms to sample. For num_atoms > num_nodes\n              in each graph within `cond`, the extra atoms are padded with zeros (onehot) or nan (label).\n            fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n              in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n\n        Returns:\n            (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n        \"\"\"\n\n        if not hasattr(cond, \"num_graphs\"):\n            cond = Batch.from_data_list([cond.clone() for _ in range(batch_size)])\n\n        assert (\n            cond.num_graphs == batch_size\n        ), \"Incompatible arguments, batch_size ({}) does not match cond.num_graphs ({})\".format(\n            batch_size, cond.num_graphs\n        )\n\n        m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n        for i in range(num_atoms):\n            print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n            sys.stdout.flush()\n\n            y = self.forward(m, cond)  # EncoderDecoder forward pass\n            y = self.generator(y)  # Conditional log probs\n            y = y[:, -1, :]  # Next conditional log probs\n            y = torch.distributions.Categorical(logits=y).sample(\n                [\n                    1,\n                ]\n            )  # Sample from next conditional log probs\n            y = y.reshape(y.shape[1], 1)  # Reshape\n            y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n            m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n        if fmt_onehot:\n            for i in range(m.shape[0]):\n                # Depending on num_nodes/num_atoms in graph pad samples with [0,0]\n                m[i, cond[i].num_nodes + 1 :, :] = 0\n\n            m = m[:, 1:, :]  # Remove initial token\n        else:\n            m = m[:, :, -1]\n\n            for i in range(m.shape[0]):\n                # Depending on num_nodes/num_atoms in graph pad samples with nan\n                m[i, cond[i].num_nodes + 1 :] = torch.nan\n\n            m = m[:, 1:]\n\n        print(\"\")\n        return m\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder.get_log_probs","title":"get_log_probs(x: torch.Tensor, cond: Batch)","text":"

    Compute the log probabilities of a given input tensor.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required cond Batch

    The conditional graph structure.

    required

    Returns:

    Type Description Tensor

    The log probabilities.

    Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    @torch.no_grad()\ndef get_log_probs(self, x: torch.Tensor, cond: Batch):\n    \"\"\"\n    Compute the log probabilities of a given input tensor.\n\n    Parameters:\n        x (torch.Tensor): The input tensor.\n        cond (Batch): The conditional graph structure.\n\n    Returns:\n        (torch.Tensor): The log probabilities.\n    \"\"\"\n\n    if not hasattr(cond, \"num_graphs\"):\n        cond = Batch.from_data_list([cond.clone() for _ in range(len(x))])\n\n    assert (\n        len(x.shape) == 3 and x.shape[-1] == 2\n    ), \"The input must be one hot encoded\"\n\n    y = torch.zeros((x.shape[0], 1, x.shape[-1]))  # Initial token\n    y = y.to(x)  # Match dtype and device\n    y = torch.cat([y, x[:, :-1, :]], axis=-2)  # Append initial token to x\n\n    y = self.forward(y, cond)  # EncoderDecoder forward pass\n    y = self.generator(y)  # Conditional log probs\n\n    y = torch.sum(torch.sum(y * x, axis=-1), axis=-1)  # Log prob of full x\n\n    return y\n
    "},{"location":"reference/models/#rydberggpt.models.rydberg_encoder_decoder.RydbergEncoderDecoder.get_samples","title":"get_samples(batch_size: int, cond: Batch, num_atoms: int, fmt_onehot: bool = True)","text":"

    Generate samples using the forward pass and sampling from the conditional probabilities. The samples can be returned either in one-hot encoding format or in label format, according to the fmt_onehot argument.

    Parameters:

    Name Type Description Default batch_size int

    The number of samples to generate.

    required cond Batch

    The batch of conditional graph structures.

    required num_atoms int

    The number of atoms to sample. For num_atoms > num_nodes in each graph within cond, the extra atoms are padded with zeros (onehot) or nan (label).

    required fmt_onehot bool

    A flag to indicate whether to return the samples in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.

    True

    Returns:

    Type Description Tensor

    A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within cond.

    Source code in src\\rydberggpt\\models\\rydberg_encoder_decoder.py
    @torch.no_grad()\ndef get_samples(\n    self,\n    batch_size: int,\n    cond: Batch,\n    num_atoms: int,\n    fmt_onehot: bool = True,\n):\n    \"\"\"\n    Generate samples using the forward pass and sampling from the conditional probabilities.\n    The samples can be returned either in one-hot encoding format or in label format,\n    according to the `fmt_onehot` argument.\n\n    Args:\n        batch_size (int): The number of samples to generate.\n        cond (torch_geometric.data.Batch): The batch of conditional graph structures.\n        num_atoms (int): The number of atoms to sample. For num_atoms > num_nodes\n          in each graph within `cond`, the extra atoms are padded with zeros (onehot) or nan (label).\n        fmt_onehot (bool, optional): A flag to indicate whether to return the samples\n          in one-hot encoding format. If False, the samples are returned in label format. Defaults to True.\n\n    Returns:\n        (torch.Tensor): A tensor containing the generated samples. The shape of the tensor is (batch_size, num_atoms, 2) for one-hot encoding format, and (batch_size, num_atoms) for label format. The samples are padded according to the number of nodes in each graph within `cond`.\n    \"\"\"\n\n    if not hasattr(cond, \"num_graphs\"):\n        cond = Batch.from_data_list([cond.clone() for _ in range(batch_size)])\n\n    assert (\n        cond.num_graphs == batch_size\n    ), \"Incompatible arguments, batch_size ({}) does not match cond.num_graphs ({})\".format(\n        batch_size, cond.num_graphs\n    )\n\n    m = torch.zeros(batch_size, 1, 2, device=self.device)\n\n    for i in range(num_atoms):\n        print(\"{:<80}\".format(f\"\\rGenerating atom {i+1}/{num_atoms}\"), end=\"\")\n        sys.stdout.flush()\n\n        y = self.forward(m, cond)  # EncoderDecoder forward pass\n        y = self.generator(y)  # Conditional log probs\n        y = y[:, -1, :]  # Next conditional log probs\n        y = torch.distributions.Categorical(logits=y).sample(\n            [\n                1,\n            ]\n        )  # Sample from next conditional log probs\n        y = y.reshape(y.shape[1], 1)  # Reshape\n        y = to_one_hot(y, 2)  # Convert from label to one hot encoding\n\n        m = torch.cat((m, y), dim=-2)  # Append next sample to tensor\n\n    if fmt_onehot:\n        for i in range(m.shape[0]):\n            # Depending on num_nodes/num_atoms in graph pad samples with [0,0]\n            m[i, cond[i].num_nodes + 1 :, :] = 0\n\n        m = m[:, 1:, :]  # Remove initial token\n    else:\n        m = m[:, :, -1]\n\n        for i in range(m.shape[0]):\n            # Depending on num_nodes/num_atoms in graph pad samples with nan\n            m[i, cond[i].num_nodes + 1 :] = torch.nan\n\n        m = m[:, 1:]\n\n    print(\"\")\n    return m\n
    "},{"location":"reference/models/graph/","title":"Graph","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding","title":"rydberggpt.models.graph_embedding","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers","title":"layers","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer","title":"GraphLayer","text":"

    Bases: Module

    Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    class GraphLayer(nn.Module):\n    def __init__(self, graph_layer: nn.Module, norm_layer: nn.Module, dropout: float):\n        \"\"\"\n        A GraphLayer is a single layer in a graph neural network, consisting of\n        a graph layer, normalization layer, and dropout.\n\n        Args:\n            graph_layer (nn.Module): A graph layer, e.g., GCNConv, GATConv, etc.\n            norm_layer (nn.Module): A normalization layer, e.g., LayerNorm or BatchNorm.\n            dropout (float): Dropout probability.\n        \"\"\"\n        super(GraphLayer, self).__init__()\n        self.graph_layer = graph_layer\n        self.norm = norm_layer\n        self.dropout = nn.Dropout(dropout)\n\n    def forward(\n        self, x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Forward pass through the GraphLayer.\n\n        Args:\n            x (torch.Tensor): Node feature matrix.\n            edge_index (Adj): Edge indices.\n            edge_attr (OptTensor): Edge feature matrix.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through the GraphLayer.\n        \"\"\"\n        x = self.graph_layer(x, edge_index, edge_attr)\n        x = F.relu(self.norm(x))\n        x = self.dropout(x)\n        return x\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer.__init__","title":"__init__(graph_layer: nn.Module, norm_layer: nn.Module, dropout: float)","text":"

    A GraphLayer is a single layer in a graph neural network, consisting of a graph layer, normalization layer, and dropout.

    Parameters:

    Name Type Description Default graph_layer Module

    A graph layer, e.g., GCNConv, GATConv, etc.

    required norm_layer Module

    A normalization layer, e.g., LayerNorm or BatchNorm.

    required dropout float

    Dropout probability.

    required Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    def __init__(self, graph_layer: nn.Module, norm_layer: nn.Module, dropout: float):\n    \"\"\"\n    A GraphLayer is a single layer in a graph neural network, consisting of\n    a graph layer, normalization layer, and dropout.\n\n    Args:\n        graph_layer (nn.Module): A graph layer, e.g., GCNConv, GATConv, etc.\n        norm_layer (nn.Module): A normalization layer, e.g., LayerNorm or BatchNorm.\n        dropout (float): Dropout probability.\n    \"\"\"\n    super(GraphLayer, self).__init__()\n    self.graph_layer = graph_layer\n    self.norm = norm_layer\n    self.dropout = nn.Dropout(dropout)\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.layers.GraphLayer.forward","title":"forward(x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor) -> torch.Tensor","text":"

    Forward pass through the GraphLayer.

    Parameters:

    Name Type Description Default x Tensor

    Node feature matrix.

    required edge_index Adj

    Edge indices.

    required edge_attr OptTensor

    Edge feature matrix.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through the GraphLayer.

    Source code in src\\rydberggpt\\models\\graph_embedding\\layers.py
    def forward(\n    self, x: torch.Tensor, edge_index: Adj, edge_attr: OptTensor\n) -> torch.Tensor:\n    \"\"\"\n    Forward pass through the GraphLayer.\n\n    Args:\n        x (torch.Tensor): Node feature matrix.\n        edge_index (Adj): Edge indices.\n        edge_attr (OptTensor): Edge feature matrix.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through the GraphLayer.\n    \"\"\"\n    x = self.graph_layer(x, edge_index, edge_attr)\n    x = F.relu(self.norm(x))\n    x = self.dropout(x)\n    return x\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models","title":"models","text":""},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding","title":"GraphEmbedding","text":"

    Bases: Module

    Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    class GraphEmbedding(torch.nn.Module):\n    def __init__(\n        self,\n        graph_layer: Type[Callable],\n        in_node_dim: int,\n        d_hidden: int,\n        d_model: int,\n        num_layers: int,\n        dropout: float = 0.1,\n    ) -> None:\n        \"\"\"\n        GraphEmbedding class for creating a graph embedding with multiple layers.\n\n        Args:\n            graph_layer (Type[Callable]): The graph layer to be used in the embedding.\n            in_node_dim (int): The input node dimension. (omega, delta, beta)\n            d_hidden (int): The hidden dimension size.\n            d_model (int): The output node dimension.\n            num_layers (int): The number of layers in the graph embedding.\n            dropout (float, optional): The dropout rate. Defaults to 0.1.\n        \"\"\"\n        super(GraphEmbedding, self).__init__()\n\n        self.graph_layer = graph_layer\n        self.layers = ModuleList()\n        self.layers.append(\n            GraphLayer(\n                self.graph_layer(in_node_dim, d_hidden), LayerNorm(d_hidden), dropout\n            )\n        )\n\n        for _ in range(num_layers - 2):\n            self.layers.append(\n                GraphLayer(\n                    self.graph_layer(d_hidden, d_hidden), LayerNorm(d_hidden), dropout\n                )\n            )\n\n        self.layers.append(self.graph_layer(d_hidden, d_model))\n        self.final_norm = LayerNorm(d_model)\n\n    def forward(self, data: Data) -> Tensor:\n        \"\"\"\n        Forward pass through the graph embedding layers.\n\n        Args:\n            data (Data): The input graph data.\n\n        Returns:\n            (Tensor): The output tensor with reshaped dimensions.\n        \"\"\"\n        # [..., num_features], [2, ...] [...]\n        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr\n\n        for layer in self.layers[:-1]:\n            # [..., num_features]\n            x = layer(x, edge_index, edge_attr)\n\n        # [..., d_model]\n        x = self.final_norm(self.layers[-1](x, edge_index, edge_attr))\n\n        x, batch_mask = to_dense_batch(x, data.batch)\n\n        # [B, N, d_model], where N is the number of nodes or the number of atoms\n        return x, batch_mask\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding.__init__","title":"__init__(graph_layer: Type[Callable], in_node_dim: int, d_hidden: int, d_model: int, num_layers: int, dropout: float = 0.1) -> None","text":"

    GraphEmbedding class for creating a graph embedding with multiple layers.

    Parameters:

    Name Type Description Default graph_layer Type[Callable]

    The graph layer to be used in the embedding.

    required in_node_dim int

    The input node dimension. (omega, delta, beta)

    required d_hidden int

    The hidden dimension size.

    required d_model int

    The output node dimension.

    required num_layers int

    The number of layers in the graph embedding.

    required dropout float

    The dropout rate. Defaults to 0.1.

    0.1 Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    def __init__(\n    self,\n    graph_layer: Type[Callable],\n    in_node_dim: int,\n    d_hidden: int,\n    d_model: int,\n    num_layers: int,\n    dropout: float = 0.1,\n) -> None:\n    \"\"\"\n    GraphEmbedding class for creating a graph embedding with multiple layers.\n\n    Args:\n        graph_layer (Type[Callable]): The graph layer to be used in the embedding.\n        in_node_dim (int): The input node dimension. (omega, delta, beta)\n        d_hidden (int): The hidden dimension size.\n        d_model (int): The output node dimension.\n        num_layers (int): The number of layers in the graph embedding.\n        dropout (float, optional): The dropout rate. Defaults to 0.1.\n    \"\"\"\n    super(GraphEmbedding, self).__init__()\n\n    self.graph_layer = graph_layer\n    self.layers = ModuleList()\n    self.layers.append(\n        GraphLayer(\n            self.graph_layer(in_node_dim, d_hidden), LayerNorm(d_hidden), dropout\n        )\n    )\n\n    for _ in range(num_layers - 2):\n        self.layers.append(\n            GraphLayer(\n                self.graph_layer(d_hidden, d_hidden), LayerNorm(d_hidden), dropout\n            )\n        )\n\n    self.layers.append(self.graph_layer(d_hidden, d_model))\n    self.final_norm = LayerNorm(d_model)\n
    "},{"location":"reference/models/graph/#rydberggpt.models.graph_embedding.models.GraphEmbedding.forward","title":"forward(data: Data) -> Tensor","text":"

    Forward pass through the graph embedding layers.

    Parameters:

    Name Type Description Default data Data

    The input graph data.

    required

    Returns:

    Type Description Tensor

    The output tensor with reshaped dimensions.

    Source code in src\\rydberggpt\\models\\graph_embedding\\models.py
    def forward(self, data: Data) -> Tensor:\n    \"\"\"\n    Forward pass through the graph embedding layers.\n\n    Args:\n        data (Data): The input graph data.\n\n    Returns:\n        (Tensor): The output tensor with reshaped dimensions.\n    \"\"\"\n    # [..., num_features], [2, ...] [...]\n    x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr\n\n    for layer in self.layers[:-1]:\n        # [..., num_features]\n        x = layer(x, edge_index, edge_attr)\n\n    # [..., d_model]\n    x = self.final_norm(self.layers[-1](x, edge_index, edge_attr))\n\n    x, batch_mask = to_dense_batch(x, data.batch)\n\n    # [B, N, d_model], where N is the number of nodes or the number of atoms\n    return x, batch_mask\n
    "},{"location":"reference/models/transformer/","title":"Transformer","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer","title":"rydberggpt.models.transformer","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers","title":"layers","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.DecoderLayer","title":"DecoderLayer","text":"

    Bases: Module

    Decoder is made of self-attn, src-attn, and feed forward.

    Parameters:

    Name Type Description Default size int

    The input size. (d_model)

    required self_attn MultiheadAttention

    The self-attention module.

    required src_attn MultiheadAttention

    The source-attention module.

    required feed_forward PositionwiseFeedForward

    The feed forward module.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\layers.py
    class DecoderLayer(nn.Module):\n    \"\"\"\n    Decoder is made of self-attn, src-attn, and feed forward.\n\n    Args:\n        size (int): The input size. (d_model)\n        self_attn (nn.MultiheadAttention): The self-attention module.\n        src_attn (nn.MultiheadAttention): The source-attention module.\n        feed_forward (PositionwiseFeedForward): The feed forward module.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        self_attn: nn.MultiheadAttention,\n        src_attn: nn.MultiheadAttention,\n        feed_forward: PositionwiseFeedForward,\n        dropout: float,\n    ):\n        super(DecoderLayer, self).__init__()\n        self.size = size\n        self.self_attn = self_attn\n        self.src_attn = src_attn\n        self.feed_forward = feed_forward\n        self.sublayer = clones(SublayerConnection(size, dropout), 3)\n\n    def forward(\n        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the decoder.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            memory (torch.Tensor): The memory tensor.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n\n        causal_attn_mask = torch.meshgrid(\n            torch.arange(x.shape[-2], device=x.device),\n            torch.arange(x.shape[-2], device=x.device),\n            indexing=\"ij\",\n        )\n        causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]\n        causal_attn_mask = torch.logical_not(causal_attn_mask)\n\n        batch_key_mask = batch_mask\n        batch_key_mask = torch.logical_not(batch_key_mask)\n\n        m = memory\n        x = self.sublayer[0](\n            x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]\n        )\n        x = self.sublayer[1](\n            x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]\n        )\n        return self.sublayer[2](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.DecoderLayer.forward","title":"forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the decoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required memory Tensor

    The memory tensor.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\layers.py
    def forward(\n    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the decoder.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        memory (torch.Tensor): The memory tensor.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n\n    causal_attn_mask = torch.meshgrid(\n        torch.arange(x.shape[-2], device=x.device),\n        torch.arange(x.shape[-2], device=x.device),\n        indexing=\"ij\",\n    )\n    causal_attn_mask = causal_attn_mask[0] >= causal_attn_mask[1]\n    causal_attn_mask = torch.logical_not(causal_attn_mask)\n\n    batch_key_mask = batch_mask\n    batch_key_mask = torch.logical_not(batch_key_mask)\n\n    m = memory\n    x = self.sublayer[0](\n        x, lambda x: self.self_attn(x, x, x, attn_mask=causal_attn_mask)[0]\n    )\n    x = self.sublayer[1](\n        x, lambda x: self.src_attn(x, m, m, key_padding_mask=batch_key_mask)[0]\n    )\n    return self.sublayer[2](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.EncoderLayer","title":"EncoderLayer","text":"

    Bases: Module

    Encoder is made up of self-attn and feed forward.

    Parameters:

    Name Type Description Default size int

    The input size. (d_model)

    required self_attn MultiheadAttention

    The self-attention module.

    required feed_forward PositionwiseFeedForward

    The feed forward module.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\layers.py
    class EncoderLayer(nn.Module):\n    \"\"\"\n    Encoder is made up of self-attn and feed forward.\n\n    Args:\n        size (int): The input size. (d_model)\n        self_attn (nn.MultiheadAttention): The self-attention module.\n        feed_forward (PositionwiseFeedForward): The feed forward module.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        self_attn: nn.MultiheadAttention,\n        feed_forward: PositionwiseFeedForward,\n        dropout: float,\n    ):\n        super(EncoderLayer, self).__init__()\n        self.self_attn = self_attn\n        self.feed_forward = feed_forward\n        self.sublayer = clones(SublayerConnection(size, dropout), 2)\n        self.size = size\n\n    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the encoder.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n\n        batch_key_mask = batch_mask\n        batch_key_mask = torch.logical_not(batch_key_mask)\n\n        x = self.sublayer[0](\n            x,\n            lambda x: torch.nan_to_num(\n                self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]\n            ),\n        )\n        return self.sublayer[1](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.layers.EncoderLayer.forward","title":"forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the encoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\layers.py
    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the encoder.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n\n    batch_key_mask = batch_mask\n    batch_key_mask = torch.logical_not(batch_key_mask)\n\n    x = self.sublayer[0](\n        x,\n        lambda x: torch.nan_to_num(\n            self.self_attn(x, x, x, key_padding_mask=batch_key_mask)[0]\n        ),\n    )\n    return self.sublayer[1](x, self.feed_forward)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models","title":"models","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder","title":"Decoder","text":"

    Bases: Module

    The core of the transformer, which consists of a stack of decoder layers.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Decoder(nn.Module):\n    \"\"\"\n    The core of the transformer, which consists of a stack of decoder layers.\n    \"\"\"\n\n    def __init__(self, layer: nn.Module, n_layers: int):\n        \"\"\"\n        Initialize the Decoder class.\n\n        Args:\n            layer (nn.Module): A single instance of the decoder layer to be cloned.\n            n_layers (int): The number of decoder layers in the stack.\n        \"\"\"\n        super(Decoder, self).__init__()\n        self.layers = clones(layer, n_layers)\n        self.norm = nn.LayerNorm(layer.size)\n\n    def forward(\n        self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Pass the (masked) input through all layers of the decoder.\n\n        Args:\n            x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).\n            memory (torch.Tensor): The memory tensor, typically the output of the encoder.\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).\n        \"\"\"\n        for layer in self.layers:\n            x = layer(x, memory, batch_mask=batch_mask)\n        return self.norm(x)  # [batch_size, seq_len, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder.__init__","title":"__init__(layer: nn.Module, n_layers: int)","text":"

    Initialize the Decoder class.

    Parameters:

    Name Type Description Default layer Module

    A single instance of the decoder layer to be cloned.

    required n_layers int

    The number of decoder layers in the stack.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, layer: nn.Module, n_layers: int):\n    \"\"\"\n    Initialize the Decoder class.\n\n    Args:\n        layer (nn.Module): A single instance of the decoder layer to be cloned.\n        n_layers (int): The number of decoder layers in the stack.\n    \"\"\"\n    super(Decoder, self).__init__()\n    self.layers = clones(layer, n_layers)\n    self.norm = nn.LayerNorm(layer.size)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Decoder.forward","title":"forward(x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Pass the (masked) input through all layers of the decoder.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor to the decoder of shape (batch_size, seq_length, d_model).

    required memory Tensor

    The memory tensor, typically the output of the encoder.

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(\n    self, x: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Pass the (masked) input through all layers of the decoder.\n\n    Args:\n        x (torch.Tensor): The input tensor to the decoder of shape (batch_size, seq_length, d_model).\n        memory (torch.Tensor): The memory tensor, typically the output of the encoder.\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through all layers of the decoder of shape (batch_size, seq_length, d_model).\n    \"\"\"\n    for layer in self.layers:\n        x = layer(x, memory, batch_mask=batch_mask)\n    return self.norm(x)  # [batch_size, seq_len, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder","title":"Encoder","text":"

    Bases: Module

    The core encoder, which consists of a stack of N layers.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Encoder(nn.Module):\n    \"\"\"\n    The core encoder, which consists of a stack of N layers.\n    \"\"\"\n\n    def __init__(self, layer: nn.Module, N: int):\n        \"\"\"\n        Initialize the Encoder class.\n\n        Args:\n            layer (nn.Module): A single instance of the encoder layer to be cloned.\n            N (int): The number of encoder layers in the stack.\n        \"\"\"\n        super(Encoder, self).__init__()\n        self.layers = clones(layer, N)\n        self.norm = nn.LayerNorm(layer.size)\n\n    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Pass the input through each layer in turn.\n\n        Args:\n            x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).\n            batch_mask (torch.Tensor): The mask tensor for batches.\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through all layers of the encoder,\n                          with the same shape as the input tensor (batch_size, seq_length, d_model).\n        \"\"\"\n        for layer in self.layers:\n            x = layer(x, batch_mask=batch_mask)\n        return self.norm(x)  # [batch_size, seq_length, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder.__init__","title":"__init__(layer: nn.Module, N: int)","text":"

    Initialize the Encoder class.

    Parameters:

    Name Type Description Default layer Module

    A single instance of the encoder layer to be cloned.

    required N int

    The number of encoder layers in the stack.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, layer: nn.Module, N: int):\n    \"\"\"\n    Initialize the Encoder class.\n\n    Args:\n        layer (nn.Module): A single instance of the encoder layer to be cloned.\n        N (int): The number of encoder layers in the stack.\n    \"\"\"\n    super(Encoder, self).__init__()\n    self.layers = clones(layer, N)\n    self.norm = nn.LayerNorm(layer.size)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Encoder.forward","title":"forward(x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Pass the input through each layer in turn.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor to the encoder of shape (batch_size, seq_length, d_model).

    required batch_mask Tensor

    The mask tensor for batches.

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through all layers of the encoder, with the same shape as the input tensor (batch_size, seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, x: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Pass the input through each layer in turn.\n\n    Args:\n        x (torch.Tensor): The input tensor to the encoder of shape (batch_size, seq_length, d_model).\n        batch_mask (torch.Tensor): The mask tensor for batches.\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through all layers of the encoder,\n                      with the same shape as the input tensor (batch_size, seq_length, d_model).\n    \"\"\"\n    for layer in self.layers:\n        x = layer(x, batch_mask=batch_mask)\n    return self.norm(x)  # [batch_size, seq_length, d_model]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder","title":"EncoderDecoder","text":"

    Bases: LightningModule

    A standard Encoder-Decoder architecture. Base for this and many other models.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class EncoderDecoder(pl.LightningModule):\n    \"\"\"\n    A standard Encoder-Decoder architecture. Base for this and many other models.\n    \"\"\"\n\n    def __init__(\n        self,\n        encoder: nn.Module,\n        decoder: nn.Module,\n        src_embed: nn.Module,\n        tgt_embed: nn.Module,\n        generator: nn.Module,\n    ):\n        \"\"\"\n        Initialize the EncoderDecoder class.\n\n        Args:\n            encoder (nn.Module): The encoder module.\n            decoder (nn.Module): The decoder module.\n            tgt_embed (nn.Module): The target embedding module.\n            generator (nn.Module): The generator module.\n        \"\"\"\n        super(EncoderDecoder, self).__init__()\n        self.encoder = encoder\n        self.decoder = decoder\n        self.src_embed = src_embed\n        self.tgt_embed = tgt_embed\n        self.generator = generator\n\n    def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Take in and process masked src and target sequences.\n\n        Args:\n            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n        Returns:\n            (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,\n                          with shape (batch_size, tgt_seq_length, d_model).\n        \"\"\"\n\n        memory, batch_mask = self.encode(src)\n\n        return self.decode(tgt, memory, batch_mask)\n\n    def encode(self, src: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Encode the source tensor.\n\n        Args:\n            src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n        Returns:\n            (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).\n        \"\"\"\n\n        x, batch_mask = self.src_embed(src)\n\n        return self.encoder(x, batch_mask=batch_mask), batch_mask\n\n    def decode(\n        self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n    ) -> torch.Tensor:\n        \"\"\"\n        Decode the target tensor using the memory tensor.\n\n        Args:\n            tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n            memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).\n\n        Returns:\n            (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).\n        \"\"\"\n        return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.__init__","title":"__init__(encoder: nn.Module, decoder: nn.Module, src_embed: nn.Module, tgt_embed: nn.Module, generator: nn.Module)","text":"

    Initialize the EncoderDecoder class.

    Parameters:

    Name Type Description Default encoder Module

    The encoder module.

    required decoder Module

    The decoder module.

    required tgt_embed Module

    The target embedding module.

    required generator Module

    The generator module.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(\n    self,\n    encoder: nn.Module,\n    decoder: nn.Module,\n    src_embed: nn.Module,\n    tgt_embed: nn.Module,\n    generator: nn.Module,\n):\n    \"\"\"\n    Initialize the EncoderDecoder class.\n\n    Args:\n        encoder (nn.Module): The encoder module.\n        decoder (nn.Module): The decoder module.\n        tgt_embed (nn.Module): The target embedding module.\n        generator (nn.Module): The generator module.\n    \"\"\"\n    super(EncoderDecoder, self).__init__()\n    self.encoder = encoder\n    self.decoder = decoder\n    self.src_embed = src_embed\n    self.tgt_embed = tgt_embed\n    self.generator = generator\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.decode","title":"decode(tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor) -> torch.Tensor","text":"

    Decode the target tensor using the memory tensor.

    Parameters:

    Name Type Description Default tgt Tensor

    The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

    required memory Tensor

    The memory tensor of shape (batch_size, src_seq_length, d_model).

    required

    Returns:

    Type Description Tensor

    The decoded tensor of shape (batch_size, tgt_seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def decode(\n    self, tgt: torch.Tensor, memory: torch.Tensor, batch_mask: torch.Tensor\n) -> torch.Tensor:\n    \"\"\"\n    Decode the target tensor using the memory tensor.\n\n    Args:\n        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n        memory (torch.Tensor): The memory tensor of shape (batch_size, src_seq_length, d_model).\n\n    Returns:\n        (torch.Tensor): The decoded tensor of shape (batch_size, tgt_seq_length, d_model).\n    \"\"\"\n    return self.decoder(self.tgt_embed(tgt), memory, batch_mask=batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.encode","title":"encode(src: torch.Tensor) -> torch.Tensor","text":"

    Encode the source tensor.

    Parameters:

    Name Type Description Default src Tensor

    The source tensor of shape (batch_size, src_seq_length, d_model_src).

    required

    Returns:

    Type Description Tensor

    The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def encode(self, src: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Encode the source tensor.\n\n    Args:\n        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n    Returns:\n        (torch.Tensor): The encoded tensor of shape (batch_size, src_seq_length, d_model_tgt).\n    \"\"\"\n\n    x, batch_mask = self.src_embed(src)\n\n    return self.encoder(x, batch_mask=batch_mask), batch_mask\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.EncoderDecoder.forward","title":"forward(tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor","text":"

    Take in and process masked src and target sequences.

    Parameters:

    Name Type Description Default tgt Tensor

    The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).

    required src Tensor

    The source tensor of shape (batch_size, src_seq_length, d_model_src).

    required

    Returns:

    Type Description Tensor

    The output tensor after passing through the encoder-decoder architecture, with shape (batch_size, tgt_seq_length, d_model).

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, tgt: torch.Tensor, src: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Take in and process masked src and target sequences.\n\n    Args:\n        tgt (torch.Tensor): The target tensor of shape (batch_size, tgt_seq_length, d_model_tgt).\n        src (torch.Tensor): The source tensor of shape (batch_size, src_seq_length, d_model_src).\n\n    Returns:\n        (torch.Tensor): The output tensor after passing through the encoder-decoder architecture,\n                      with shape (batch_size, tgt_seq_length, d_model).\n    \"\"\"\n\n    memory, batch_mask = self.encode(src)\n\n    return self.decode(tgt, memory, batch_mask)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator","title":"Generator","text":"

    Bases: Module

    Linear + softmax layer for generation step. vocab_size for Rydberg is 2.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    class Generator(nn.Module):\n    \"\"\"\n    Linear + softmax layer for generation step. vocab_size for Rydberg is 2.\n    \"\"\"\n\n    def __init__(self, d_model: int, vocab_size: int):\n        \"\"\"\n        Initialize the Generator class.\n\n        Args:\n            d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).\n            vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.\n        \"\"\"\n        super(Generator, self).__init__()\n        self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass of the Generator.\n\n        Args:\n            x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).\n\n        Returns:\n            (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),\n                          with log-softmax applied along the last dimension.\n        \"\"\"\n\n        proj_offset = self.proj(x) + 1e-10\n        return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator.__init__","title":"__init__(d_model: int, vocab_size: int)","text":"

    Initialize the Generator class.

    Parameters:

    Name Type Description Default d_model int

    The dimension of the input features (i.e., the last dimension of the input tensor).

    required vocab_size int

    The size of the vocabulary, which determines the last dimension of the output tensor.

    required Source code in src\\rydberggpt\\models\\transformer\\models.py
    def __init__(self, d_model: int, vocab_size: int):\n    \"\"\"\n    Initialize the Generator class.\n\n    Args:\n        d_model (int): The dimension of the input features (i.e., the last dimension of the input tensor).\n        vocab_size (int): The size of the vocabulary, which determines the last dimension of the output tensor.\n    \"\"\"\n    super(Generator, self).__init__()\n    self.proj = nn.Linear(d_model, vocab_size)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.models.Generator.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass of the Generator.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor of shape (batch_size, seq_length, d_model).

    required

    Returns:

    Type Description Tensor

    The output tensor of shape (batch_size, seq_length, vocab_size), with log-softmax applied along the last dimension.

    Source code in src\\rydberggpt\\models\\transformer\\models.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass of the Generator.\n\n    Args:\n        x (torch.Tensor): The input tensor of shape (batch_size, seq_length, d_model).\n\n    Returns:\n        (torch.Tensor): The output tensor of shape (batch_size, seq_length, vocab_size),\n                      with log-softmax applied along the last dimension.\n    \"\"\"\n\n    proj_offset = self.proj(x) + 1e-10\n    return F.log_softmax(proj_offset, dim=-1)  # [batch_size, seq_len, vocab_size]\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules","title":"modules","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.Embeddings","title":"Embeddings","text":"

    Bases: Module

    The embedding layer.

    Parameters:

    Name Type Description Default d_model int

    The embedding size.

    required vocab_size int

    The vocabulary size.

    required Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class Embeddings(nn.Module):\n    \"\"\"\n    The embedding layer.\n\n    Args:\n        d_model (int): The embedding size.\n        vocab_size (int): The vocabulary size.\n    \"\"\"\n\n    def __init__(self, d_model: int, vocab_size: int):\n        super(Embeddings, self).__init__()\n        self.lut = nn.Linear(vocab_size, d_model)\n        self.d_model = d_model\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Parameters:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        x = self.lut(x) * math.sqrt(self.d_model)\n        return x\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.Embeddings.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Parameters:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    x = self.lut(x) * math.sqrt(self.d_model)\n    return x\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionalEncoding","title":"PositionalEncoding","text":"

    Bases: Module

    Implement the PE function.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class PositionalEncoding(nn.Module):\n    \"Implement the PE function.\"\n\n    def __init__(self, d_model, dropout, max_len=5000):\n        super(PositionalEncoding, self).__init__()\n        self.dropout = nn.Dropout(p=dropout)\n\n        # Compute the positional encodings once in log space.\n        pe = torch.zeros(max_len, d_model)\n        position = torch.arange(0, max_len).unsqueeze(1)\n        div_term = torch.exp(\n            torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)\n        )\n        pe[:, 0::2] = torch.sin(position * div_term)\n        pe[:, 1::2] = torch.cos(position * div_term)\n        pe = pe.unsqueeze(0)\n        self.register_buffer(\"pe\", pe)\n\n    def forward(self, x):\n        x = x + self.pe[:, : x.size(1)].requires_grad_(False)\n        return self.dropout(x)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionwiseFeedForward","title":"PositionwiseFeedForward","text":"

    Bases: Module

    A two-layer feed-forward network.

    Parameters:

    Name Type Description Default d_model int

    The input size.

    required d_ff int

    The hidden size.

    required dropout float

    The dropout rate. Defaults to 0.1.

    0.1 Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class PositionwiseFeedForward(nn.Module):\n    \"\"\"\n    A two-layer feed-forward network.\n\n    Args:\n        d_model (int): The input size.\n        d_ff (int): The hidden size.\n        dropout (float, optional): The dropout rate. Defaults to 0.1.\n    \"\"\"\n\n    def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):\n        super(PositionwiseFeedForward, self).__init__()\n        self.w_1 = nn.Linear(d_model, d_ff)\n        self.w_2 = nn.Linear(d_ff, d_model)\n        self.dropout = nn.Dropout(p=dropout)\n\n    def forward(self, x: torch.Tensor) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        return self.w_2(self.dropout(F.relu(self.w_1(x))))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.PositionwiseFeedForward.forward","title":"forward(x: torch.Tensor) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    return self.w_2(self.dropout(F.relu(self.w_1(x))))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.SublayerConnection","title":"SublayerConnection","text":"

    Bases: Module

    This module implements a residual connection followed by a layer norm.

    Parameters:

    Name Type Description Default size int

    The input size.

    required dropout float

    The dropout rate.

    required Source code in src\\rydberggpt\\models\\transformer\\modules.py
    class SublayerConnection(nn.Module):\n    \"\"\"\n    This module implements a residual connection followed by a layer norm.\n\n    Args:\n        size (int): The input size.\n        dropout (float): The dropout rate.\n    \"\"\"\n\n    def __init__(self, size: int, dropout: float):\n        super(SublayerConnection, self).__init__()\n        self.layer_norm = nn.LayerNorm(size)\n        self.dropout = nn.Dropout(dropout)\n\n    def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:\n        \"\"\"\n        Compute the forward pass through the module.\n\n        Args:\n            x (torch.Tensor): The input tensor.\n            sublayer (nn.Module): The sublayer module.\n\n        Returns:\n            (torch.Tensor): The output tensor.\n        \"\"\"\n        # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)\n        # to the input of each sub-block.\n        # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf\n        return x + self.dropout(sublayer(self.layer_norm(x)))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.modules.SublayerConnection.forward","title":"forward(x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor","text":"

    Compute the forward pass through the module.

    Parameters:

    Name Type Description Default x Tensor

    The input tensor.

    required sublayer Module

    The sublayer module.

    required

    Returns:

    Type Description Tensor

    The output tensor.

    Source code in src\\rydberggpt\\models\\transformer\\modules.py
    def forward(self, x: torch.Tensor, sublayer: nn.Module) -> torch.Tensor:\n    \"\"\"\n    Compute the forward pass through the module.\n\n    Args:\n        x (torch.Tensor): The input tensor.\n        sublayer (nn.Module): The sublayer module.\n\n    Returns:\n        (torch.Tensor): The output tensor.\n    \"\"\"\n    # NOTE For GPT2 the authors moved Layer normalization (Ba et al., 2016)\n    # to the input of each sub-block.\n    # see Sec. 2.3 https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf\n    return x + self.dropout(sublayer(self.layer_norm(x)))\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils","title":"utils","text":""},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.clones","title":"clones(module: nn.Module, n_clones: int)","text":"

    helper function which produces n_clones copies of a layer

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def clones(module: nn.Module, n_clones: int):\n    \"\"\"helper function which produces n_clones copies of a layer\"\"\"\n    return nn.ModuleList([copy.deepcopy(module) for _ in range(n_clones)])\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.flattened_snake_flip","title":"flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor","text":"

    Implements a \"snake\" flip which reorders the flattened 2D tensor into snake order.

    Parameters:

    Name Type Description Default x Tensor

    The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].

    required

    Returns:

    Type Description Tensor

    The \"snake\" flipped tensor, dimensions will be [..., Ly * Lx].

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def flattened_snake_flip(x: torch.Tensor, Lx: int, Ly: int) -> torch.Tensor:\n    \"\"\"\n    Implements a \"snake\" flip which reorders the flattened 2D tensor into snake order.\n\n    Args:\n        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly * Lx].\n\n    Returns:\n        (torch.Tensor): The \"snake\" flipped tensor, dimensions will be [..., Ly * Lx].\n    \"\"\"\n    return snake_flip(x.reshape(*x.shape[:-1], Ly, Lx)).reshape(*x.shape[:-1], -1)\n
    "},{"location":"reference/models/transformer/#rydberggpt.models.transformer.utils.snake_flip","title":"snake_flip(x: torch.Tensor) -> torch.Tensor","text":"

    Implements a \"snake\" flip which reorders the 2D tensor into snake order when flattened.

    Parameters:

    Name Type Description Default x Tensor

    The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].

    required

    Returns:

    Type Description Tensor

    The \"snake\" flipped tensor, dimensions will be [..., Ly, Lx].

    Source code in src\\rydberggpt\\models\\transformer\\utils.py
    def snake_flip(x: torch.Tensor) -> torch.Tensor:\n    \"\"\"\n    Implements a \"snake\" flip which reorders the 2D tensor into snake order when flattened.\n\n    Args:\n        x (torch.Tensor): The tensor to apply the snake flip to, dimensions should be [..., Ly, Lx].\n\n    Returns:\n        (torch.Tensor): The \"snake\" flipped tensor, dimensions will be [..., Ly, Lx].\n    \"\"\"\n\n    if not isinstance(x, torch.Tensor):\n        raise TypeError(\"Function only supports torch.Tensor\")\n\n    y = x.clone()\n\n    for i in range(y.shape[-2]):\n        if i % 2 == 1:\n            y[..., i, :] = torch.flip(y[..., i, :], dims=(-1,))\n\n    return y\n
    "}]} \ No newline at end of file