From f32013565ba03cadc128230692142e9ba50a19c8 Mon Sep 17 00:00:00 2001 From: hamiltonke <88093459+hamiltonke@users.noreply.github.com> Date: Thu, 21 Sep 2023 12:42:12 -0400 Subject: [PATCH] Add files via upload updated the `rescale_features` function -- now includes an example for discretizing features (hard-wired for 2 features only) --- QNLP_tutorial_QNNs.ipynb | 1827 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 1827 insertions(+) create mode 100644 QNLP_tutorial_QNNs.ipynb diff --git a/QNLP_tutorial_QNNs.ipynb b/QNLP_tutorial_QNNs.ipynb new file mode 100644 index 0000000..e7d0ca9 --- /dev/null +++ b/QNLP_tutorial_QNNs.ipynb @@ -0,0 +1,1827 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b4f46ddb", + "metadata": {}, + "source": [ + "### QNLP Tutorial (IEEE Quantum Week 2023): A QNN Introduction\n", + "\n", + "This notebook explores the design space of QNNs trained on synthetic data or on a NLP dataset. The goal of this tutorial is to introduce several methods for feature embedding, ansatz construction, and label assignment. \n", + "\n", + "In this tutorial we will use three datsets: first, is a set of random feature vectors with random labels, constructed as in the `TwoMoons` dataset. The second dataset is derived from the `IMDB` (Internet Movie DataBase) dataset: the original data is a collection of 50K text reviews, labeled by sentiment (positive or negative). Prior to this tutorial these features were embedded using length 3 feature vectors using `doc2vec` embedding (introduced in the previous session). Finally, we have an example of multiclass classification using random features with random labels." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b781a3c7", + "metadata": {}, + "outputs": [], + "source": [ + "from pennylane import numpy as np\n", + "import pennylane as qml\n", + "import copy\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "from sklearn.metrics import confusion_matrix\n", + "import seaborn as sns\n", + "\n", + "from sklearn.datasets import make_moons,make_classification" + ] + }, + { + "cell_type": "markdown", + "id": "6e91d547", + "metadata": {}, + "source": [ + "# Construct Unstructured Data (Synthetic)\n", + "\n", + "The `TwoMoons` dataset constructs a sythetic dataset of 2 dimensional features. The two classes are half-cirles. \n", + "\n", + "$200$ samples are generated and $10\\%$ are held out as the test data. \n", + "\n", + "These functions are wrappers of existing functions in `scikit-learn`. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "708980c7", + "metadata": {}, + "outputs": [], + "source": [ + "# Make a dataset of two moons\n", + "def moons(samples, noise = 0.1, random_state=2023):\n", + " \"\"\"\n", + " Args:\n", + " samples (int): number of samples to generate\n", + " center (tuple): center of the circle\n", + " radius (float: radius of the circle\n", + "\n", + " Returns:\n", + " Xvals (array[tuple]): coordinates of points\n", + " yvals (array[int]): classification labels\n", + " \"\"\"\n", + " X, y = make_moons(n_samples=samples, noise=noise,random_state=random_state)\n", + " return np.array(X, requires_grad=False), np.array(y, requires_grad=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9ddd3233", + "metadata": {}, + "outputs": [], + "source": [ + "def partition_data(X,y,test_size=0.2):\n", + " '''\n", + " since the synthetic datasets used in this tutorial are already shuffled\n", + " the partitioning is just separating the first 80% of samples for traning\n", + " and the remaining 20% are held for testing'''\n", + " n_train_samples = int(len(X)*(1.-test_size))\n", + " trainX = X[:n_train_samples]\n", + " trainY = y[:n_train_samples]\n", + " \n", + " testX = X[n_train_samples:]\n", + " testY = y[n_train_samples:]\n", + " return trainX,trainY,testX,testY" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "112bb0c8", + "metadata": {}, + "outputs": [], + "source": [ + "def rescale_features(X,a=-np.pi,b=np.pi,normalize=False,discretize=False,bins=20):\n", + " '''\n", + " can use either simple shift/rescale of features into a range of values [a,b]\n", + " or can use a discrete map of categorical features to rotations\n", + " (c) unique values equally spaced in [a,b]'''\n", + " \n", + "\n", + "\n", + " if not discretize:\n", + " Xvals = X.copy()\n", + " Xmin = np.min(X, axis=0).numpy()\n", + " Xmax = np.max(X, axis=0).numpy()\n", + " Xvals = (b-a) * (Xvals - Xmin) / (Xmax - Xmin) + a\n", + " elif discretize:\n", + " if len(np.unique(X))==X.shape[0]*X.shape[1]:\n", + " Xvals = []\n", + " rot_angles = np.linspace(a,b,num=bins,endpoint=True)\n", + " # pandas.cut(x, bins, right=True, labels=None, retbins=False,\n", + " X0_labels,X0_bins = pd.cut(X[:,0],bins, labels=rot_angles,retbins=True)\n", + " X1_labels,X1_bins = pd.cut(X[:,1],bins, labels=rot_angles,retbins=True)\n", + " my_corpus = []\n", + " for idx in range(len(X0_labels)):\n", + " x0=X0_labels[idx]\n", + " x1=X1_labels[idx]\n", + " Xvals.append([x0,x1])\n", + " Xvals=np.asarray(Xvals)\n", + " else:\n", + " rot_angles = np.linspace(a,b,num=len(np.unique(X)), endpoint=True)\n", + " Xvals = np.asarray([[rot_angles[y].numpy() for y in x] for x in X])\n", + " if normalize:\n", + " row_norms = np.linalg.norm(Xvals,axis=1)\n", + " Xvals = Xvals / row_norms[:, np.newaxis]\n", + " Xvals[np.isnan(Xvals)] = 0\n", + " mask = np.all(np.abs(Xvals) < 1e-10, axis=1)\n", + " pos = np.where(~(mask))\n", + " return Xvals[pos],pos\n", + " else:\n", + " return Xvals" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "02fd3ffb", + "metadata": {}, + "outputs": [], + "source": [ + "def one_hot_encode_labels(y,L):\n", + " '''\n", + " replace class labels with one-hot encoded labels of length L\n", + " example: if L = 2\n", + " 0 -> [1,0]\n", + " 1 -> [0,1]\n", + " '''\n", + " unique_classes = set(y)\n", + " label_map = {}\n", + " for idx,c in enumerate(unique_classes):\n", + " label_vec = [0]*L\n", + " label_vec[idx]=1\n", + " label_map[c]=label_vec.copy()\n", + " one_hot_y=[label_map[iy] for _,iy in enumerate(y)]\n", + " return one_hot_y" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5d65bd7d", + "metadata": {}, + "outputs": [], + "source": [ + "def plot2d_data(X,y,fig=None, ax=None):\n", + " SMALL_SIZE = 12\n", + " MEDIUM_SIZE = 20\n", + " BIGGER_SIZE = 24\n", + "\n", + " plt.rc('font', size=SMALL_SIZE) # controls default text sizes\n", + " plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title\n", + " plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + " plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + " plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + " plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize\n", + " plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title\n", + " color_map={0:'blue',1: 'orange',2:'cyan'}\n", + " shape_map={0:'x',1:'o',2:'^'}\n", + " if fig == None:\n", + " fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n", + " blues = y == 0\n", + " oranges = y == 1\n", + " brights = y == 2\n", + " ax.scatter(X[blues, 0], X[blues, 1], c=color_map[0], s=75,marker=shape_map[0], alpha=0.75)\n", + " ax.scatter(X[oranges, 0], X[oranges, 1], c=color_map[1], s=75,marker=shape_map[1], alpha=0.75)\n", + " ax.scatter(X[brights, 0], X[brights, 1], c=color_map[2], s=75,marker=shape_map[1], alpha=0.75)\n", + " #ax.scatter(X[:,0],X[:,1], c=y, s=75, alpha=0.5,cmap='bwr')\n", + " ax.set_xlabel(\"$x_1$\")\n", + " ax.set_ylabel(\"$x_2$\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b14727fd", + "metadata": {}, + "outputs": [], + "source": [ + "def plot2d_boundary_data(X,y,xx,yy,z_height,fig=None, ax=None):\n", + " SMALL_SIZE = 12\n", + " MEDIUM_SIZE = 20\n", + " BIGGER_SIZE = 24\n", + "\n", + " plt.rc('font', size=SMALL_SIZE) # controls default text sizes\n", + " plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title\n", + " plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels\n", + " plt.rc('xtick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + " plt.rc('ytick', labelsize=MEDIUM_SIZE) # fontsize of the tick labels\n", + " plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize\n", + " plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title\n", + " color_map={0:'blue',1: 'orange',2:'cyan'}\n", + " shape_map={0:'x',1:'o',2:'^'}\n", + " if fig == None:\n", + " fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n", + " blues = y == 0\n", + " oranges = y == 1\n", + " brights = y == 2\n", + " ax.scatter(X[blues, 0], X[blues, 1], c=color_map[0], s=75,marker=shape_map[0], alpha=0.75)\n", + " ax.scatter(X[oranges, 0], X[oranges, 1], c=color_map[1], s=75,marker=shape_map[1], alpha=0.75)\n", + " ax.scatter(X[brights, 0], X[brights, 1], c=color_map[2], s=75,marker=shape_map[1], alpha=0.75)\n", + " ax.contourf(xx, yy, z_height, alpha=0.2,cmap='bwr') \n", + " ax.set_xlabel(\"$x_1$\",fontsize=18)\n", + " ax.set_ylabel(\"$x_2$\",fontsize=18)" + ] + }, + { + "cell_type": "markdown", + "id": "81c185f5", + "metadata": {}, + "source": [ + "The individual features are not syntatically related -- each feature vector is a random point in a high-dimesional space. We arbitrarily shift and rescale the generated vectors so that the values of the feature vectors are in the range $[0, 2\\pi]$. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cb1ef163", + "metadata": {}, + "outputs": [], + "source": [ + "X, y = moons(200)\n", + "X_train,y_train,X_test,y_test = partition_data(X,y)\n", + "\n", + "X_train_discrete = rescale_features(X_train,discretize=True,bins=20)\n", + "X_test_discrete = rescale_features(X_test,discretize=True,bins=20)\n", + "\n", + "X_train = rescale_features(X_train)\n", + "X_test = rescale_features(X_test)" + ] + }, + { + "cell_type": "markdown", + "id": "b43467c1", + "metadata": {}, + "source": [ + "Just making a quick side-by-side comparison, the features of `X_train` should be rescaled to $[-\\pi,\\pi]$." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2a722c82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'discretized data')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "f, ax = plt.subplots(ncols=3,figsize=(35,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "# add a big axis, hide frame\n", + "f.add_subplot(111,frameon=False) \n", + "# hide tick and tick label of the big axis\n", + "plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)\n", + "\n", + "\n", + "f.subplots_adjust( wspace=0.1,hspace=0.2 )\n", + "\n", + "plot2d_data(X,y, fig=f, ax=ax[0])\n", + "ax[0].set_title(\"original generated data\",fontsize=24)\n", + "plot2d_data(X_train,y_train,fig=f,ax=ax[1])\n", + "ax[1].set_title(\"rescaled data\",fontsize=24)\n", + "plot2d_data(X_train_discrete,y_train,fig=f,ax=ax[2])\n", + "ax[2].set_title(\"discretized data\",fontsize=24)" + ] + }, + { + "cell_type": "markdown", + "id": "88496b7f", + "metadata": {}, + "source": [ + "## (Optional): A second synthetic dataset\n", + "\n", + "Instead of using hte `TwoMoons` (or another `scikit-learn` dataset), you can also generate a set of random features labeled with random binary labels" + ] + }, + { + "cell_type": "raw", + "id": "d89ae983", + "metadata": {}, + "source": [ + "noise_features = (2*np.pi)*np.random.random((150,2)) #generate uniform random samples on [0,2pi]\n", + "noise_labels = np.asarray([x.numpy() for x in np.random.binomial(1, 0.5,150)]) #generate random binary labels" + ] + }, + { + "cell_type": "markdown", + "id": "c7a17274", + "metadata": {}, + "source": [ + "# Define Layer Templates" + ] + }, + { + "cell_type": "markdown", + "id": "dd179e7c", + "metadata": {}, + "source": [ + "The first template is the data-encoding layer. This takes as an input a feature vector (x) and encodes it into a single qubit using 3 gates (RY-RZ-RY). " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a383a02b", + "metadata": {}, + "outputs": [], + "source": [ + "def AngleEncodingLayer(x,dev_wires):\n", + " '''\n", + " Layer template that applies the rotation gate layer\n", + " '''\n", + " if len(x)<3*len(dev_wires):\n", + " x_ = np.reshape(np.tile(x,3*len(dev_wires)),(-1,3))\n", + " else:\n", + " x_ = np.reshape(x,(-1,3))\n", + " for idx in range(len(dev_wires)):\n", + " qml.RY(x_[idx][0],wires=dev_wires[idx])\n", + " qml.RZ(x_[idx][1],wires=dev_wires[idx])\n", + " qml.RY(x_[idx][2],wires=dev_wires[idx])\n" + ] + }, + { + "cell_type": "markdown", + "id": "256c6120", + "metadata": {}, + "source": [ + "The second template is the trainable ansatz layer -- for this tutorial we will use a bilayer ansatz built from interlevaed layers of CNOTS followed by trainable rotation gates. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f6227c44", + "metadata": {}, + "outputs": [], + "source": [ + "def EdgeListLayer(params,dev_wires,edge_list=[]):\n", + " ''' Layer template that takes an edge list and builds\n", + " entangling layer - rotation layer - ansatz\n", + "\n", + " params: list of floats (trainable parameters)\n", + "\n", + " wires: list of int\n", + " edge_list: list of lists for CNOT layouts\n", + " '''\n", + " # add entangling layer with CNOTs defined by edge_list\n", + " if len(edge_list) > 0:\n", + " for edx in edge_list:\n", + " qml.CNOT(wires=[dev_wires[edx[0]],dev_wires[edx[1]]])\n", + " for idx in dev_wires:\n", + " qml.Rot(*params[idx], wires=dev_wires[idx])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "543530ae", + "metadata": {}, + "outputs": [], + "source": [ + "class binaryQNN(object):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " '''\n", + " def __init__(self, wires=1,layers=1,shots=None,edge_list=[],\\\n", + " max_iter=32,tol=1e-3,output_qubits=[0],\\\n", + " batch_size=16,learning_rate=0.05,coefs=None,**kwargs):\n", + " self.n_wires = wires #QNN wires\n", + " self.n_layers=layers #QNN layers\n", + " self.edge_list=edge_list #Connectivity for the entangling layers\n", + " self.dev_wires = [np.array(idx, requires_grad=True) for idx in range(self.n_wires)] #whole register\n", + " self.n_shots=shots #samples to take to generate bitstring probabilities\n", + " self.coefs_=coefs #stored model parameters\n", + " self.output_qubits = output_qubits #which qubits to measure for output\n", + " self.device = qml.device(\"default.qubit\", wires=self.dev_wires,shots=self.n_shots) #Pennylane qubit device\n", + " self.learning_rate=learning_rate # learning rate for optimizer\n", + " self.batch_size=batch_size #batch size for batched gradient descent\n", + " self.max_iter = max_iter #maximum number of epochs\n", + " self.tol=tol #tolerance to detect early stopping\n", + " self.wait_time = 10 #number of epochs with no change that triggers early stopping\n", + " \n", + " def build_circuit(self,*args, **kwds):\n", + " raise NotImplementedError\n", + " \n", + " def initialize_params(self):\n", + " params = 2.*np.pi*np.random.random(3*self.n_wires*self.n_layers)\n", + " self.coefs_=params.copy()\n", + " \n", + " def accuracy_score(self,y_true, y_pred):\n", + " \"\"\"Accuracy score.\n", + "\n", + " Args:\n", + " y_true (array[float]): 1-d array of targets\n", + " y_predicted (array[float]): 1-d array of predictions\n", + " Returns:\n", + " score (float): the fraction of correctly classified samples\n", + " \"\"\"\n", + " if (y_true.ndim==1) and (y_pred.ndim==1):\n", + " score = y_true == y_pred\n", + " elif (y_true.ndim>1) and (y_pred.ndim==1):\n", + " score = np.argmax(y_true,axis=1) == y_pred\n", + " else:\n", + " score = np.argmax(y_true,axis=1) == np.argmax(y_pred,axis=1)\n", + " return score.sum() / len(score)\n", + "\n", + " def make_predictions(self,params,x):\n", + " \"\"\"\n", + " assign labels to some data features\n", + " \"\"\"\n", + " predicted = []\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " P = qnode_(params,x[i])\n", + " decoded=np.argmax(P)\n", + " predicted.append(decoded)\n", + " return np.array(predicted)\n", + "\n", + " def class_probabilities(self,params,x):\n", + " predicted = []\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " P = qnode_(params,x[i])\n", + " predicted.append(P)\n", + " return np.array(predicted)\n", + "\n", + " def loss_function(self,params,x, y):\n", + " \"\"\"\n", + " Cost function to be minimized.\n", + "\n", + " Args:\n", + " params (array[float]): array of parameters\n", + " x (array[float]): 2-d array of input vectors\n", + " y (array[float]): 1-d array of targets\n", + "\n", + " Returns:\n", + " float: loss value to be minimized\n", + " \"\"\"\n", + " # Compute prediction for each input in data batch\n", + " loss = 0.0\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " qp = qnode_(params,x[i])\n", + " yval=y[i]\n", + " if yval==0:\n", + " yp = np.array([1, 0])\n", + " loss = loss -np.sum(yp * np.log(qp+10**-12))\n", + " else:\n", + " yp = np.array([0,1])\n", + " loss = loss -np.sum(yp * np.log(qp+10**-12))\n", + " return loss / len(x)\n", + "\n", + " def iterate_minibatches(self,inputs, targets, batch_size):\n", + " \"\"\"\n", + " A generator for batches of the input data\n", + "\n", + " Args:\n", + " inputs (array[float]): input data\n", + " targets (array[float]): targets\n", + "\n", + " Returns:\n", + " inputs (array[float]): one batch of input data of length `batch_size`\n", + " targets (array[float]): one batch of targets of length `batch_size`\n", + " \"\"\"\n", + " for start_idx in range(0, inputs.shape[0] - batch_size + 1, batch_size):\n", + " idxs = slice(start_idx, start_idx + batch_size)\n", + " yield inputs[idxs], targets[idxs]\n", + " \n", + " def fit(self,X,y,Xtest=None,ytest=None):\n", + " \"\"\"\n", + " implement gradient based training\n", + " \"\"\"\n", + " if (Xtest is not None) and (ytest is not None):\n", + " #if you provide the testing data that will allso be used during evaluating hte loss and accuracy curves\n", + " self.loss_curve_test = []\n", + " self.accuracy_curve_test = []\n", + " \n", + " opt = qml.optimize.AdamOptimizer(self.learning_rate, beta1=0.9, beta2=0.999)\n", + "\n", + " if self.coefs_ is None:\n", + " # initialize random weights\n", + " self.initialize_params()\n", + " params = self.coefs_.copy()\n", + " else:\n", + " params = self.coefs_\n", + "\n", + " if len(X)self.tol:\n", + " best_loss = loss\n", + " iter_count=0\n", + " if iter_count>self.wait_time:\n", + " print('early stopping ')\n", + " print(\n", + " \"Epoch: {:2d} | Loss: {:3f} | Train accuracy: {:3f}\".format(\n", + " it+1, loss, accuracy\n", + " )\n", + " )\n", + " break\n", + " else:\n", + " print(\n", + " \"Epoch: {:2d} | Loss: {:3f} | Train accuracy: {:3f}\".format(\n", + " it+1, loss, accuracy\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a5de330f", + "metadata": {}, + "outputs": [], + "source": [ + "class druQNN(binaryQNN):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " using data re-uploading\n", + " '''\n", + " def __init__(self, **kwargs):\n", + " super(druQNN,self).__init__(**kwargs)\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def _reset_(self):\n", + " self.coefs_=None\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def build_circuit(self,params,x=None):\n", + " shape = (-1,len(self.dev_wires),3)\n", + " params = np.asarray(params).reshape(shape)\n", + " #x = np.resize(x, params.shape[0]-1)\n", + " \n", + " for idx in range(params.shape[0]-1):\n", + " qml.layer(AngleEncodingLayer,1,x=x,dev_wires=self.dev_wires)\n", + " qml.layer(EdgeListLayer, 1,[params[idx]],\\\n", + " dev_wires=self.dev_wires,\\\n", + " edge_list=self.edge_list)\n", + " return qml.probs(wires=[self.dev_wires[ix] for ix in self.output_qubits])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "6e2e28f4", + "metadata": {}, + "outputs": [], + "source": [ + "class basicQNN(binaryQNN):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " using data re-uploading\n", + " '''\n", + " def __init__(self,**kwargs):\n", + " super(basicQNN,self).__init__(**kwargs)\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def _reset_(self):\n", + " self.coefs_=None\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def build_circuit(self,params,x=None):\n", + " shape = (-1,len(self.dev_wires),3)\n", + " params = np.asarray(params).reshape(shape)\n", + " #x = np.resize(x, params.shape[0]-1)\n", + " qml.layer(AngleEncodingLayer,1,x=x,dev_wires=self.dev_wires)\n", + " for idx in range(params.shape[0]-1):\n", + " qml.layer(EdgeListLayer, 1,[params[idx]],\\\n", + " dev_wires=self.dev_wires,\\\n", + " edge_list=self.edge_list)\n", + " return qml.probs(wires=[self.dev_wires[ix] for ix in self.output_qubits])" + ] + }, + { + "cell_type": "markdown", + "id": "cd236299", + "metadata": {}, + "source": [ + "# Example 1: train a quantum classifier for binary classification " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c35777b1", + "metadata": {}, + "outputs": [], + "source": [ + "n_layers = 6 # number of ansatz layers\n", + "n_qubits = 3 # number of qubits\n", + "max_steps= 15 # maximum number of epochs \n", + "alpha = 0.05\n", + "edge_list = [0,1],[2,1]" + ] + }, + { + "cell_type": "markdown", + "id": "35b89f41", + "metadata": {}, + "source": [ + "So for these trial runs the QNN is built with 3 qubits. Overall each feature vector needs to have shape (3,3). In the definition of the endcoding layer, the first step is to tile the passed feature vector-- this can be used to redundantly encode a feature (x) in severeal qubits. \n" + ] + }, + { + "cell_type": "markdown", + "id": "d33142b4", + "metadata": {}, + "source": [ + "To ensure that the features are encoded uniformly we need to add a 3rd dimension.\n", + "\n", + "Let's add a 3rd feature that is uninformative noise" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f3787a4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.55728475, 0.16374129, 0.03862261])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_padNoise = np.asarray(np.hstack((X_train,0.1*np.random.random((len(X_train),1))-0.05)),requires_grad=False)\n", + "X_test_padNoise = np.asarray(np.hstack((X_test,0.1*np.random.random((len(X_test),1))-0.05)),requires_grad=False)\n", + "\n", + "X_train_padNoise[0].numpy()" + ] + }, + { + "cell_type": "markdown", + "id": "743ddefd", + "metadata": {}, + "source": [ + "Alternatively this 3rd dimension could have been all zero -- this will reduce the third gate in the angle encoding layer (RY) to an indentity gate" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5446fcbd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.55728475, 0.16374129, 0. ])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_padZero = np.asarray(np.hstack((X_train,np.zeros((len(X_train),1)))),requires_grad=False)\n", + "X_test_padZero = np.asarray(np.hstack((X_test,np.zeros((len(X_test),1)))),requires_grad=False)\n", + "\n", + "X_train_padZero[0].numpy()" + ] + }, + { + "cell_type": "markdown", + "id": "1fa285eb", + "metadata": {}, + "source": [ + "Another option would be to only encode a single feature in a single qubit, and replace all remaining gates with identities" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e7c29027", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.55728475, 0.16374129, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. ])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_dilute = np.asarray(np.hstack((X_train,np.zeros((len(X_train),7)))),requires_grad=False)\n", + "X_test_dilute = np.asarray(np.hstack((X_test,np.zeros((len(X_test),7)))),requires_grad=False)\n", + "\n", + "X_train_dilute[0].numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2c122285", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.49604095, 0.16534698, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. ])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_discrete = np.asarray(np.hstack((X_train_discrete,np.zeros((len(X_train_discrete),7)))),requires_grad=False)\n", + "X_test_discrete = np.asarray(np.hstack((X_test_discrete,np.zeros((len(X_test_discrete),7)))),requires_grad=False)\n", + "\n", + "X_train_discrete[0].numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "eb859a93", + "metadata": {}, + "outputs": [], + "source": [ + "basic_classifier_ = basicQNN(wires=n_qubits,shots=None,\\\n", + " max_iter=max_steps,edge_list=edge_list,\\\n", + " layers=n_layers,batch_size=32,\\\n", + " learning_rate=alpha\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "26968f44", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_wires': 3,\n", + " 'n_layers': 6,\n", + " 'edge_list': ([0, 1], [2, 1]),\n", + " 'dev_wires': [tensor(0, requires_grad=True),\n", + " tensor(1, requires_grad=True),\n", + " tensor(2, requires_grad=True)],\n", + " 'n_shots': None,\n", + " 'coefs_': None,\n", + " 'output_qubits': [0],\n", + " 'device': ,\n", + " 'learning_rate': 0.05,\n", + " 'batch_size': 32,\n", + " 'max_iter': 15,\n", + " 'tol': 0.001,\n", + " 'wait_time': 10,\n", + " 'loss_curve': [],\n", + " 'accuracy_curve': []}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basic_classifier_.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4550ff80", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "initial loss and accuracy (random model): 0.9951733035724024 0.3625\n", + "Epoch: 1 | Loss: 0.605363 | Train accuracy: 0.693750\n", + "Epoch: 2 | Loss: 0.569405 | Train accuracy: 0.612500\n", + "Epoch: 3 | Loss: 0.558104 | Train accuracy: 0.693750\n", + "Epoch: 4 | Loss: 0.539677 | Train accuracy: 0.718750\n", + "Epoch: 5 | Loss: 0.544186 | Train accuracy: 0.718750\n", + "Epoch: 6 | Loss: 0.530777 | Train accuracy: 0.656250\n", + "Epoch: 7 | Loss: 0.532916 | Train accuracy: 0.668750\n", + "Epoch: 8 | Loss: 0.530325 | Train accuracy: 0.687500\n", + "Epoch: 9 | Loss: 0.530421 | Train accuracy: 0.693750\n", + "Epoch: 10 | Loss: 0.529168 | Train accuracy: 0.668750\n", + "Epoch: 11 | Loss: 0.529022 | Train accuracy: 0.675000\n", + "Epoch: 12 | Loss: 0.529404 | Train accuracy: 0.681250\n", + "Epoch: 13 | Loss: 0.528718 | Train accuracy: 0.668750\n", + "Epoch: 14 | Loss: 0.528669 | Train accuracy: 0.668750\n", + "Epoch: 15 | Loss: 0.528834 | Train accuracy: 0.675000\n" + ] + } + ], + "source": [ + "basic_classifier_.fit(X_train_dilute,y_train,X_test_dilute,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6ad9a3a0", + "metadata": {}, + "outputs": [], + "source": [ + "bc_dilute_model = copy.deepcopy(basic_classifier_.__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "413e93cb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "initial loss and accuracy (random model): 0.8946561361186868 0.475\n", + "Epoch: 1 | Loss: 0.563786 | Train accuracy: 0.718750\n", + "Epoch: 2 | Loss: 0.530503 | Train accuracy: 0.662500\n", + "Epoch: 3 | Loss: 0.514228 | Train accuracy: 0.706250\n", + "Epoch: 4 | Loss: 0.503182 | Train accuracy: 0.731250\n", + "Epoch: 5 | Loss: 0.497404 | Train accuracy: 0.731250\n", + "Epoch: 6 | Loss: 0.492449 | Train accuracy: 0.706250\n", + "Epoch: 7 | Loss: 0.487364 | Train accuracy: 0.743750\n", + "Epoch: 8 | Loss: 0.486383 | Train accuracy: 0.737500\n", + "Epoch: 9 | Loss: 0.486425 | Train accuracy: 0.743750\n", + "Epoch: 10 | Loss: 0.485516 | Train accuracy: 0.750000\n", + "Epoch: 11 | Loss: 0.485106 | Train accuracy: 0.743750\n", + "Epoch: 12 | Loss: 0.484911 | Train accuracy: 0.750000\n", + "Epoch: 13 | Loss: 0.484814 | Train accuracy: 0.743750\n", + "Epoch: 14 | Loss: 0.484794 | Train accuracy: 0.750000\n", + "Epoch: 15 | Loss: 0.484732 | Train accuracy: 0.750000\n" + ] + } + ], + "source": [ + "basic_classifier_._reset_()\n", + "basic_classifier_.fit(X_train_discrete,y_train,X_test_discrete,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "e4d95ab8", + "metadata": {}, + "outputs": [], + "source": [ + "bc_discrete_model = copy.deepcopy(basic_classifier_.__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "b3b23811", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.0, 1.0)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "ax[0].plot(bc_dilute_model['loss_curve'],'r+',ms=25,label='train')\n", + "ax[0].plot(bc_dilute_model['loss_curve_test'],'b.',ms=25,label='test')\n", + "ax[0].set_ylim(0,1)\n", + "\n", + "ax[1].plot(bc_discrete_model['loss_curve'],'r+',ms=25,label='train')\n", + "ax[1].plot(bc_discrete_model['loss_curve_test'],'b.',ms=25,label='test')\n", + "ax[1].set_ylim(0,1)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "78b68022", + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = basic_classifier_.make_predictions(bc_discrete_model['coefs_'],X_train_discrete)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "82274d68", + "metadata": {}, + "outputs": [], + "source": [ + "xx, yy = np.meshgrid(np.arange(-np.pi, np.pi+0.1, 0.1), np.arange(-np.pi, np.pi+0.1, 0.1))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ae20a8a0", + "metadata": {}, + "outputs": [], + "source": [ + "BC_boundary = basic_classifier_.make_predictions(bc_discrete_model['coefs_'],np.c_[xx.ravel(), yy.ravel(),np.zeros((len(xx.ravel()),7))])\n", + "BC_boundary = BC_boundary.reshape(xx.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "1afb36ed", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "# add a big axis, hide frame\n", + "f.add_subplot(111,frameon=False) \n", + "# hide tick and tick label of the big axis\n", + "plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)\n", + "\n", + "\n", + "f.subplots_adjust( wspace=0.1,hspace=0.2 )\n", + "\n", + "plot2d_data(X_train_discrete,y_train, fig=f, ax=ax[0])\n", + "plot2d_boundary_data(X_train_discrete,y_pred,xx,yy,BC_boundary, fig=f, ax=ax[1])\n" + ] + }, + { + "cell_type": "markdown", + "id": "da73a2ec", + "metadata": {}, + "source": [ + "A word of caution -- with the `QNN` class, the final trained model (i.e. parameters) are stored in `.coefs_`. Also the loss trace and accuracies tracked during training are stored in `.loss_curve` and `.accuracy_curve`. These can be saved to a file for future use.\n", + "\n", + "If you was to train on a different set of data, the training will start with those stored values. To re-initialize the model make sure to set `.coefs_ = None`, `.loss_curve=[]`, and `.accuracy_curve=[]`. This can be done using the `.reset()` method.\n", + "\n", + "For example, if you want to re-train the same QNN using the features padded with random noise." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "2b99b0e0", + "metadata": {}, + "outputs": [], + "source": [ + "basic_classifier_._reset_()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "dbcaf9ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "initial loss and accuracy (random model): 0.7879930232973773 0.55\n", + "Epoch: 1 | Loss: 0.529834 | Train accuracy: 0.756250\n", + "Epoch: 2 | Loss: 0.468426 | Train accuracy: 0.806250\n", + "Epoch: 3 | Loss: 0.465213 | Train accuracy: 0.800000\n", + "Epoch: 4 | Loss: 0.461767 | Train accuracy: 0.800000\n", + "Epoch: 5 | Loss: 0.456449 | Train accuracy: 0.775000\n", + "Epoch: 6 | Loss: 0.452417 | Train accuracy: 0.793750\n", + "Epoch: 7 | Loss: 0.450744 | Train accuracy: 0.781250\n", + "Epoch: 8 | Loss: 0.449373 | Train accuracy: 0.787500\n", + "Epoch: 9 | Loss: 0.448215 | Train accuracy: 0.806250\n", + "Epoch: 10 | Loss: 0.447213 | Train accuracy: 0.800000\n", + "Epoch: 11 | Loss: 0.446798 | Train accuracy: 0.793750\n", + "Epoch: 12 | Loss: 0.446379 | Train accuracy: 0.806250\n", + "Epoch: 13 | Loss: 0.445944 | Train accuracy: 0.806250\n", + "Epoch: 14 | Loss: 0.445673 | Train accuracy: 0.806250\n", + "Epoch: 15 | Loss: 0.445504 | Train accuracy: 0.800000\n" + ] + } + ], + "source": [ + "basic_classifier_.fit(X_train_padZero,y_train,X_test_padZero,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e5871ac6", + "metadata": {}, + "outputs": [], + "source": [ + "bc_padNoise_model = copy.deepcopy(basic_classifier_.__dict__)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "f325cf74", + "metadata": {}, + "outputs": [], + "source": [ + "BC_padNoise_boundary = basic_classifier_.make_predictions(bc_padNoise_model['coefs_'],np.c_[xx.ravel(), yy.ravel(),0.1*np.random.random((len(xx.ravel()),1))-0.05])\n", + "BC_padNoise_boundary = BC_padNoise_boundary.reshape(xx.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "374b3261", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "7345ed11", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "ax[0].plot(basic_classifier_.loss_curve,'r+',ms=25,label='train')\n", + "ax[0].plot(basic_classifier_.loss_curve_test,'b.',ms=25,label='test')\n", + "ax[0].set_ylim(0,1)\n", + "\n", + "plot2d_boundary_data(X_train_padNoise,y_train,xx,yy,BC_padNoise_boundary, fig=f, ax=ax[1])\n" + ] + }, + { + "cell_type": "markdown", + "id": "d685b1e8", + "metadata": {}, + "source": [ + "## Using a Data Re Uploading QNN\n", + "\n", + "The examples above can also be executed using the data re-uploading QNN. The same processed features can be used. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da6d2e3f", + "metadata": {}, + "outputs": [], + "source": [ + "dru_classifier_ = druQNN(wires=n_qubits,shots=None,\\\n", + " max_iter=max_steps,edge_list=edge_list,\\\n", + " layers=n_layers,batch_size=32,\\\n", + " learning_rate=alpha\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d154b3c", + "metadata": {}, + "outputs": [], + "source": [ + "dru_classifier_.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "370bb692", + "metadata": {}, + "outputs": [], + "source": [ + "dru_classifier_.fit(X_train_padNoise,y_train,X_test_padNoise,y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e666ee1", + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "ax[0].plot(dru_classifier_.loss_curve,'r+',ms=25,label='train')\n", + "ax[0].plot(dru_classifier_.loss_curve_test,'b.',ms=25,label='test')\n", + "ax[0].set_title(\"Loss Curves\",fontsize=24)\n", + "ax[0].legend()\n", + "ax[1].plot(dru_classifier_.accuracy_curve,'r+',ms=25,label='train')\n", + "ax[1].plot(dru_classifier_.accuracy_curve_test,'b.',ms=25,label='test')\n", + "ax[1].set_title(\"Accuracy Curves\",fontsize=24)\n", + "ax[1].legend()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0a026ed", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "1b94fe52", + "metadata": {}, + "source": [ + "## Using a set of text features\n", + "\n", + "The previous two datasets (`TwoMoons` and `noise`). This next example uses features extracted from the IMDB dataset for a text classifiation (discerning positive versus negative reviews). The features in the external file `imdb_data_subset.csv` have been procesed using `doc2vec` to generate features of length 2. \n", + "\n", + "The full IMDB dataset contains 50K labeled examples. But due to time, and for demonstration purposes we do not train over the entire dataset. Instead, we only extract a small fraction of the dataset for training and testing. There are 1000 samples in the file `imdb_data_subset.csv` and they are shuffled already, so we can use simple slciing of the data samples. " + ] + }, + { + "cell_type": "markdown", + "id": "cce1fab3", + "metadata": {}, + "source": [ + "## Import IMDB features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08cea959", + "metadata": {}, + "outputs": [], + "source": [ + "imdb_data = pd.read_csv('imdb_data_subset.csv')\n", + "imdb_data = imdb_data.replace({'sentiment': {'positive': 1, 'negative': 0}}) #map the class labels to binary values\n", + "\n", + "\n", + "imdb_training_data = imdb_data.head(150)\n", + "imdb_training_features = np.asarray([np.asarray(eval(x)) for x in imdb_training_data.doc2vec.values],requires_grad=False)\n", + "imdb_training_features = rescale_features(imdb_training_features)\n", + "\n", + "imdb_training_labels = np.asarray([int(x) for x in imdb_training_data.sentiment.values])\n", + "\n", + "imdb_testing_data = imdb_data.tail(50)\n", + "imdb_testing_features = np.asarray([np.asarray(eval(x)) for x in imdb_testing_data.doc2vec.values],requires_grad=False)\n", + "imdb_testing_features = rescale_features(imdb_testing_features)\n", + "imdb_testing_labels = np.asarray([int(x) for x in imdb_testing_data.sentiment.values])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "057e681f", + "metadata": {}, + "outputs": [], + "source": [ + "dru_classifier_._reset_()\n", + "dru_classifier_.fit(imdb_training_features,imdb_training_labels,imdb_testing_features,imdb_testing_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c090bf7", + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "ax[0].plot(dru_classifier_.loss_curve,'r+',ms=25,label='train')\n", + "ax[0].plot(dru_classifier_.loss_curve_test,'b.',ms=25,label='test')\n", + "ax[0].set_title(\"Loss Curves\",fontsize=24)\n", + "ax[0].legend()\n", + "ax[1].plot(dru_classifier_.accuracy_curve,'r+',ms=25,label='train')\n", + "ax[1].plot(dru_classifier_.accuracy_curve_test,'b.',ms=25,label='test')\n", + "ax[1].set_title(\"Accuracy Curves\",fontsize=24)\n", + "ax[1].legend()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd793cb5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fa7ff74", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "9c30f36c", + "metadata": {}, + "source": [ + "# Multiclass Classification\n", + "\n", + "We can also use QNNs for multiclass classification. The modifications needed for our exisitng `binaryQNN` class are:\n", + "* use Categorical Cross Entropy instead of Binary Cross Entropy\n", + "* Define a way to extract more than 2 labels\n", + "\n", + "The label extraction method we are choosing to use is implemented with 3 steps: \n", + "1) measure 3 qubits and generate a distribution over bitstrings (more than just `0` and `1`) \n", + "\n", + "2) downselecting the low weight bitstrings (`001`, `010`, `100`) \n", + "\n", + "3) renormalizing these amplitudes using a sigmoid function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72a247b5", + "metadata": {}, + "outputs": [], + "source": [ + "class multiclassQNN(object):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " '''\n", + " def __init__(self, wires=3,layers=1,shots=None,edge_list=[],\\\n", + " max_iter=32,tol=1e-3,output_qubits=[0,1,2],\\\n", + " batch_size=16,learning_rate=0.05,coefs=None,**kwargs):\n", + " self.n_wires = wires #QNN wires\n", + " self.n_layers=layers #QNN layers\n", + " self.edge_list=edge_list #Connectivity for the entangling layers\n", + " self.dev_wires = [np.array(idx, requires_grad=True) for idx in range(self.n_wires)] #whole register\n", + " self.n_shots=shots #samples to take to generate bitstring probabilities\n", + " self.coefs_=coefs #stored model parameters\n", + " assert wires>=len(output_qubits), 'need to add more qubits to your circuit'\n", + " self.output_qubits = output_qubits #which qubits to measure for output\n", + " self.device = qml.device(\"default.qubit\", wires=self.dev_wires,shots=self.n_shots) #Pennylane qubit device\n", + " self.learning_rate=learning_rate # learning rate for optimizer\n", + " self.batch_size=batch_size #batch size for batched gradient descent\n", + " self.max_iter = max_iter #maximum number of epochs\n", + " self.tol=tol #tolerance to detect early stopping\n", + " self.wait_time = 10 #number of epochs with no change that triggers early stopping\n", + " \n", + " def build_circuit(self,*args, **kwds):\n", + " raise NotImplementedError\n", + " \n", + " def initialize_params(self):\n", + " params = 2.*np.pi*np.random.random(3*self.n_wires*self.n_layers)\n", + " self.coefs_=params.copy()\n", + " \n", + " def accuracy_score(self,y_true, y_pred):\n", + " \"\"\"Accuracy score.\n", + "\n", + " Args:\n", + " y_true (array[float]): 1-d array of targets\n", + " y_predicted (array[float]): 1-d array of predictions\n", + " Returns:\n", + " score (float): the fraction of correctly classified samples\n", + " \"\"\"\n", + " if (y_true.ndim==1) and (y_pred.ndim==1):\n", + " score = y_true == y_pred\n", + " elif (y_true.ndim>1) and (y_pred.ndim==1):\n", + " score = np.argmax(y_true,axis=1) == y_pred\n", + " else:\n", + " score = np.argmax(y_true,axis=1) == np.argmax(y_pred,axis=1)\n", + " return score.sum() / len(score)\n", + "\n", + " def softmax(self,P):\n", + " \"\"\"Compute softmax values for each sets of scores in x.\"\"\"\n", + " P = np.asarray(P).astype(float)\n", + " e_x = np.exp(P - np.max(P))\n", + " return e_x / e_x.sum()\n", + "\n", + " def make_predictions(self,params,x):\n", + " \"\"\"\n", + " assign labels to some data features\n", + " \"\"\"\n", + " predicted = []\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " decoded = np.zeros(len(self.output_qubits))\n", + " P = qnode_(params,x[i])\n", + " w_states = [P[idc] for idc in [1,2,4]] # downselect on low weight states\n", + " decoded[np.argmax(self.softmax(w_states))]=1\n", + " predicted.append(decoded)\n", + " return np.array(predicted)\n", + "\n", + " def class_probabilities(self,params,x):\n", + " predicted = []\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " P = qnode_(params,x[i])\n", + " w_states = [P[idc] for idc in [1,2,4]] # downselect on low weight states\n", + " class_probs=self.softmax(w_states)\n", + " predicted.append(class_probs)\n", + " return np.array(predicted)\n", + "\n", + " def loss_function(self,params,x, y):\n", + " \"\"\"\n", + " Cost function to be minimized.\n", + "\n", + " Args:\n", + " params (array[float]): array of parameters\n", + " x (array[float]): 2-d array of input vectors\n", + " y (array[float]): 1-d array of targets\n", + "\n", + " Returns:\n", + " float: loss value to be minimized\n", + " \"\"\"\n", + " # Compute prediction for each input in data batch\n", + " loss = 0.0\n", + " qnode_ = qml.QNode(self.build_circuit, self.device)\n", + " for i in range(len(x)):\n", + " P = qnode_(params,x[i])\n", + " w_states = [P[idc] for idc in [1,2,4]] # downselect on low weight states\n", + " qp = self.softmax(w_states)\n", + " yp = y[i] # one hot encoded label\n", + " loss = loss - np.sum(yp * np.log(qp+10**-12))\n", + " return loss / len(x)\n", + "\n", + " def iterate_minibatches(self,inputs, targets, batch_size):\n", + " \"\"\"\n", + " A generator for batches of the input data\n", + "\n", + " Args:\n", + " inputs (array[float]): input data\n", + " targets (array[float]): targets\n", + "\n", + " Returns:\n", + " inputs (array[float]): one batch of input data of length `batch_size`\n", + " targets (array[float]): one batch of targets of length `batch_size`\n", + " \"\"\"\n", + " for start_idx in range(0, inputs.shape[0] - batch_size + 1, batch_size):\n", + " idxs = slice(start_idx, start_idx + batch_size)\n", + " yield inputs[idxs], targets[idxs]\n", + " \n", + " def fit(self,X,y,Xtest=None,ytest=None):\n", + " \"\"\"\n", + " implement gradient based training\n", + " \"\"\"\n", + " if (Xtest is not None) and (ytest is not None):\n", + " #if you provide the testing data that will allso be used during evaluating hte loss and accuracy curves\n", + " self.loss_curve_test = []\n", + " self.accuracy_curve_test = []\n", + " \n", + " opt = qml.optimize.AdamOptimizer(self.learning_rate, beta1=0.9, beta2=0.999)\n", + "\n", + " if self.coefs_ is None:\n", + " # initialize random weights\n", + " self.initialize_params()\n", + " params = self.coefs_.copy()\n", + " else:\n", + " params = self.coefs_\n", + "\n", + " if len(X)self.tol:\n", + " best_loss = loss\n", + " iter_count=0\n", + " if iter_count>self.wait_time:\n", + " print('early stopping ')\n", + " print(\n", + " \"Epoch: {:2d} | Loss: {:3f} | Train accuracy: {:3f}\".format(\n", + " it+1, loss, accuracy\n", + " )\n", + " )\n", + " break\n", + " else:\n", + " print(\n", + " \"Epoch: {:2d} | Loss: {:3f} | Train accuracy: {:3f}\".format(\n", + " it+1, loss, accuracy\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e6ea8be", + "metadata": {}, + "outputs": [], + "source": [ + "class MultiClassDruQNN(multiclassQNN):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " using data re-uploading\n", + " '''\n", + " def __init__(self, **kwargs):\n", + " super(MultiClassDruQNN,self).__init__(**kwargs)\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def _reset_(self):\n", + " self.coefs_=None\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def build_circuit(self,params,x=None):\n", + " shape = (-1,len(self.dev_wires),3)\n", + " params = np.asarray(params).reshape(shape)\n", + " #x = np.resize(x, params.shape[0]-1)\n", + " \n", + " for idx in range(params.shape[0]-1):\n", + " qml.layer(AngleEncodingLayer,1,x=x,dev_wires=self.dev_wires)\n", + " qml.layer(EdgeListLayer, 1,[params[idx]],\\\n", + " dev_wires=self.dev_wires,\\\n", + " edge_list=self.edge_list)\n", + " return qml.probs(wires=[self.dev_wires[ix] for ix in self.output_qubits])\n", + "\n", + "class MultiClassBasicQNN(multiclassQNN):\n", + " '''\n", + " build a Quantum Neural Network where label extraction is done via bitstring probabilities\n", + " using data re-uploading\n", + " '''\n", + " def __init__(self,**kwargs):\n", + " super(MultiClassBasicQNN,self).__init__(**kwargs)\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def _reset_(self):\n", + " self.coefs_=None\n", + " self.loss_curve=[]\n", + " self.accuracy_curve=[]\n", + " \n", + " def build_circuit(self,params,x=None):\n", + " shape = (-1,len(self.dev_wires),3)\n", + " params = np.asarray(params).reshape(shape)\n", + " #x = np.resize(x, params.shape[0]-1)\n", + " qml.layer(AngleEncodingLayer,1,x=x,dev_wires=self.dev_wires)\n", + " for idx in range(params.shape[0]-1):\n", + " qml.layer(EdgeListLayer, 1,[params[idx]],\\\n", + " dev_wires=self.dev_wires,\\\n", + " edge_list=self.edge_list)\n", + " return qml.probs(wires=[self.dev_wires[ix] for ix in self.output_qubits])" + ] + }, + { + "cell_type": "markdown", + "id": "ddf653b8", + "metadata": {}, + "source": [ + "## Generate the Data\n", + "\n", + "For this example, we're going to use the random features, random labels dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39fea3dc", + "metadata": {}, + "outputs": [], + "source": [ + "noise_train_features = (2*np.pi)*np.random.random((150,9)) #generate uniform random samples on [0,2pi]\n", + "noise_test_features = (2*np.pi)*np.random.random((50,9)) #generate uniform random samples on [0,2pi]\n", + "\n", + "noise_train_labels = np.asarray(np.random.choice(3, size=150, replace=True),requires_grad=False) #generate 3 random categorical labels\n", + "noise_test_labels = np.asarray(np.random.choice(3, size=50, replace=True),requires_grad=False) #generate 3 random categorical labels\n", + "\n", + "noise_onehot_train_labels = np.asarray(one_hot_encode_labels(noise_train_labels,3),requires_grad=False)\n", + "noise_onehot_test_labels = np.asarray(one_hot_encode_labels(noise_test_labels,3),requires_grad=False)" + ] + }, + { + "cell_type": "markdown", + "id": "1dd04987", + "metadata": {}, + "source": [ + "# Build a Multiclass Classifier" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee9f2e05", + "metadata": {}, + "outputs": [], + "source": [ + "n_layers = 6 # number of ansatz layers\n", + "n_qubits = 3 # number of qubits\n", + "max_steps= 15 # maximum number of epochs \n", + "alpha = 0.05\n", + "edge_list = [0,1],[2,1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11514907", + "metadata": {}, + "outputs": [], + "source": [ + "multiclass_classifier_ = MultiClassDruQNN(wires=n_qubits,shots=None,\\\n", + " max_iter=max_steps,edge_list=edge_list,\\\n", + " layers=n_layers,batch_size=32,\\\n", + " learning_rate=alpha,output_qubits=[0,1,2]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4661641", + "metadata": {}, + "outputs": [], + "source": [ + "multiclass_classifier_.__dict__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcf7e8fe", + "metadata": {}, + "outputs": [], + "source": [ + "multiclass_classifier_.fit(noise_train_features,noise_onehot_train_labels,noise_test_features,noise_onehot_test_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "751be9c1", + "metadata": {}, + "outputs": [], + "source": [ + "f, ax = plt.subplots(ncols=2,figsize=(24,12))\n", + "\n", + "plt.rc('text',usetex=False)\n", + "plt.rc('font',family='serif')\n", + "plt.rc('xtick',labelsize=18)\n", + "plt.rc('ytick',labelsize=18)\n", + "plt.rc('legend',**{'numpoints':1,'fontsize':18,'handlelength':2})\n", + "\n", + "ax[0].plot(multiclass_classifier_.loss_curve,'r+',ms=25,label='train')\n", + "ax[0].plot(multiclass_classifier_.loss_curve_test,'b.',ms=25,label='test')\n", + "ax[0].set_title(\"Loss Curves\",fontsize=24)\n", + "ax[0].legend()\n", + "ax[1].plot(multiclass_classifier_.accuracy_curve,'r+',ms=25,label='train')\n", + "ax[1].plot(multiclass_classifier_.accuracy_curve_test,'b.',ms=25,label='test')\n", + "ax[1].set_title(\"Accuracy Curves\",fontsize=24)\n", + "ax[1].legend()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3dea5110", + "metadata": {}, + "outputs": [], + "source": [ + "predicted_train_labels = multiclass_classifier_.make_predictions(multiclass_classifier_.coefs_,noise_train_features)\n", + "predicted_test_labels = multiclass_classifier_.make_predictions(multiclass_classifier_.coefs_,noise_test_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e30b1ed7", + "metadata": {}, + "outputs": [], + "source": [ + "predicted_train_categorical = np.argmax(predicted_train_labels,axis=1)\n", + "predicted_test_categorical = np.argmax(predicted_test_labels,axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00dac179", + "metadata": {}, + "outputs": [], + "source": [ + "temp_train = np.argmax(noise_onehot_train_labels,axis=1)\n", + "temp_test = np.argmax(noise_onehot_test_labels,axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b585c94", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(confusion_matrix(temp,temp,normalize='all'),annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36f01022", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(confusion_matrix(temp_train,predicted_train_categorical,normalize='all'),annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b09522d", + "metadata": {}, + "outputs": [], + "source": [ + "sns.heatmap(confusion_matrix(temp_test,predicted_test_categorical,normalize='all'),annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9c42b11", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}