|
878 | 878 | "source": [
|
879 | 879 | "import numpy as np\n",
|
880 | 880 | "import cvxpy as cp\n",
|
| 881 | + "from scipy import linalg\n", |
881 | 882 | "\n",
|
882 | 883 | "class BnBAlgorithmBase:\n",
|
883 | 884 | " def __init__(self, x=None, y=None):\n",
|
|
917 | 918 | " self.x = x\n",
|
918 | 919 | " self.y = y\n",
|
919 | 920 | "\n",
|
920 |
| - " def sync_kernel_from_smt(self):\n", |
921 |
| - " corr_map = {\n", |
922 |
| - " \"pow_exp\": \"pow_exp\", # treat as Gaussian if power=2\n", |
923 |
| - " \"squar_exp\": \"pow_exp\", # SMT sometimes uses this name for Gaussian\n", |
924 |
| - " \"abs_exp\": \"matern12\", # ν = 1/2\n", |
925 |
| - " \"matern32\": \"matern32\",\n", |
926 |
| - " \"matern52\": \"matern52\",\n", |
927 |
| - " }\n", |
| 921 | + " def sync_from_smt(self):\n", |
928 | 922 | " \n",
|
929 |
| - " corr_type = self.gpsurrogate.surrogatesmt.options[\"corr\"]\n", |
930 |
| - " self.kernel_spec = corr_map[corr_type]\n", |
931 |
| - " self.set_kernel(self.kernel_spec)\n", |
932 |
| - " self.theta = np.asarray(self.gpsurrogate.surrogatesmt.corr.theta, dtype=float)\n", |
933 |
| - "\n", |
934 |
| - " # If pow_exp, ensure it's actually Gaussian (power=2); otherwise raise.\n", |
935 |
| - " if corr_type == \"pow_exp\":\n", |
936 |
| - " power = getattr(self.gpsurrogate.surrogatesmt.options, \"pow_exp_power\", 2)\n", |
937 |
| - " if power != 2:\n", |
938 |
| - " raise ValueError(f\"pow_exp_power={power} not supported in isotropic form; need power=2 for SE.\")\n", |
939 |
| - "\n", |
940 |
| - " # Map θ (SMT) -> ℓ for ARD scaling used in d\n", |
941 |
| - " if self.kernel_spec == \"pow_exp\": # Gaussian\n", |
942 |
| - " self.ell = 1.0 / np.sqrt(self.theta)\n", |
943 |
| - " else: # Matérn family\n", |
944 |
| - " self.ell = 1.0 / self.theta\n", |
945 |
| - " \n", |
946 |
| - " def sync_smt_parameterts(self):\n", |
947 |
| - "\n", |
948 | 923 | " sm = self.gpsurrogate.surrogatesmt\n",
|
| 924 | + " par = sm.optimal_par\n", |
949 | 925 | "\n",
|
950 |
| - " # --- map SMT corr name to the 5 supported kernels ---\n", |
951 |
| - " corr_map = {\n", |
952 |
| - " \"pow_exp\": \"pow_exp\", # power-exponential (Gaussian if power=2)\n", |
953 |
| - " \"squar_exp\":\"pow_exp\", # SMT alias for Gaussian\n", |
954 |
| - " \"abs_exp\": \"matern12\", # exp(-sum θ|dx|) == Matérn ν=1/2 product\n", |
955 |
| - " \"matern32\": \"matern32\",\n", |
956 |
| - " \"matern52\": \"matern52\",\n", |
957 |
| - " }\n", |
958 |
| - "\n", |
959 |
| - " corr_type = sm.options[\"corr\"]\n", |
960 |
| - " if corr_type not in corr_map:\n", |
961 |
| - " raise ValueError(f\"Unsupported SMT corr '{corr_type}' for kernel bounds.\")\n", |
962 |
| - " self.kernel_spec = corr_map[corr_type]\n", |
| 926 | + " corr = sm.options[\"corr\"]\n", |
| 927 | + " if corr in (\"squar_exp\", \"pow_exp\"):\n", |
| 928 | + " p = float(sm.options.get(\"pow_exp_power\", 2.0))\n", |
| 929 | + " if abs(p - 2.0) > 1e-12:\n", |
| 930 | + " raise ValueError(\"Single-d bounds support pow_exp only for p=1 or p=2\")\n", |
| 931 | + " self.kernel_spec = \"pow_exp\"\n", |
| 932 | + " self.p = p # 1 (abs-exp) or 2 (SE)\n", |
| 933 | + " elif corr == \"abs_exp\":\n", |
| 934 | + " self.kernel_spec = \"pow_exp\"\n", |
| 935 | + " self.p = 1.0\n", |
| 936 | + " elif corr == \"matern32\":\n", |
| 937 | + " self.kernel_spec = \"matern32\"\n", |
| 938 | + " elif corr == \"matern52\":\n", |
| 939 | + " self.kernel_spec = \"matern52\"\n", |
| 940 | + " else:\n", |
| 941 | + " raise ValueError(f\"Unsupported SMT corr '{corr}'\")\n", |
963 | 942 | "\n",
|
964 |
| - " # --- pull trained hyperparams (prefer optimal_theta) ---\n", |
965 | 943 | " theta = getattr(sm, \"optimal_theta\", None)\n",
|
966 |
| - " if theta is None:\n", |
967 |
| - " theta = sm.corr.theta\n", |
968 |
| - " self.theta = np.asarray(theta, dtype=float).ravel()\n", |
| 944 | + " if theta is None: theta = sm.corr.theta\n", |
| 945 | + " self.theta = np.asarray(theta, float).ravel()\n", |
| 946 | + "\n", |
| 947 | + " self.X_offset, self.X_scale = sm.X_offset, sm.X_scale\n", |
| 948 | + " self.Xc = (sm.X - self.X_offset) / self.X_scale # normalized training inputs\n", |
| 949 | + " self._normalize = lambda x: (np.asarray(x, float) - self.X_offset) / self.X_scale\n", |
| 950 | + " \n", |
| 951 | + " if sm.options[\"poly\"] != \"constant\":\n", |
| 952 | + " raise NotImplementedError(\"μ-bounds assume poly='constant'\")\n", |
| 953 | + " self.beta0 = float(np.asarray(par[\"beta\"]).ravel()[0])\n", |
| 954 | + " self.gamma = np.asarray(par[\"gamma\"], float).ravel()\n", |
969 | 955 | "\n",
|
| 956 | + " \n", |
| 957 | + " # --- Correlation factor and process variance ---\n", |
| 958 | + " self.C = par[\"C\"] # Cholesky of R (lower)\n", |
| 959 | + " self.sigma2 = float(par.get(\"sigma2\", 1.0))\n", |
| 960 | + " self.sigma2_ri = float(par.get(\"sigma2_ri\", self.sigma2)) # usually same unless RI\n", |
| 961 | + "\n", |
| 962 | + " # Fast primitive: triangular solve with C (prefer this over forming R^{-1})\n", |
| 963 | + " self._solve_C = lambda v: linalg.solve_triangular(self.C, v, lower=True) \n", |
970 | 964 | "\n",
|
971 | 965 | " def set_kernel(self, kernel_spec):\n",
|
972 | 966 | "\n",
|
|
990 | 984 | " (1 + np.sqrt(5) * np.sqrt(d) + (5/3) * d) *\n",
|
991 | 985 | " np.exp(-np.sqrt(5) * np.sqrt(d))\n",
|
992 | 986 | " )\n",
|
993 |
| - "\n", |
994 |
| - "\n", |
995 |
| - " def set_covmatrix(self, x):\n", |
996 |
| - "\n", |
997 |
| - " n = x.shape[0]\n", |
998 |
| - " ell = np.asarray(self.ell, dtype=float)\n", |
999 |
| - " nugget = float(self.gpsurrogate.surrogatesmt.options['nugget'])\n", |
1000 |
| - " self.K = np.zeros((n, n))\n", |
| 987 | + " \n", |
| 988 | + " def ker_bounds(self, l, u):\n", |
1001 | 989 | " \n",
|
1002 |
| - " # Compute symmetric kernel matrix\n", |
1003 |
| - " for i in range(n):\n", |
1004 |
| - " for j in range(i, n):\n", |
1005 |
| - "\n", |
1006 |
| - " d = np.sum(((x[i] - x[j]) / ell)**2) \n", |
1007 |
| - " \n", |
1008 |
| - " self.K[i, j] = self.kernel_func(d)\n", |
1009 |
| - " self.K[j, i] = self.K[i, j]\n", |
1010 |
| - "\n", |
1011 |
| - " # Add nugget for stability\n", |
1012 |
| - " self.K += nugget * np.eye(n)\n", |
1013 |
| - "\n", |
1014 |
| - " # --- Compute inverse ---\n", |
1015 |
| - " try:\n", |
1016 |
| - " self.K_inv = np.linalg.inv(self.K)\n", |
1017 |
| - " #print(\"K_inv:\\n\", self.K_inv)\n", |
1018 |
| - " except np.linalg.LinAlgError:\n", |
1019 |
| - " print(\"ERROR: Singular K detected, cannot invert.\")\n", |
| 990 | + " \"\"\"\n", |
| 991 | + " Tight monotone bounds for k(x, X_i) over box [l,u] (original units).\n", |
| 992 | + " Returns kL, kU of shape (nt,), consistent with SMT’s kernels.\n", |
| 993 | + " \"\"\"\n", |
| 994 | + " \n", |
| 995 | + " # normalize the box\n", |
| 996 | + " l_c = self._normalize(l).ravel()\n", |
| 997 | + " u_c = self._normalize(u).ravel()\n", |
1020 | 998 | "\n",
|
1021 |
| - " def ker_bounds(self, x, l, u):\n", |
| 999 | + " Xc = self.Xc # (nt, d)\n", |
| 1000 | + " th = self.theta.ravel() # (d,)\n", |
| 1001 | + " spec = self.kernel_spec\n", |
1022 | 1002 | "\n",
|
1023 |
| - " ell = np.asarray(self.ell)\n", |
1024 |
| - " kL, kU = [], []\n", |
| 1003 | + " # per-point, per-dimension distance extremes (normalized space)\n", |
| 1004 | + " dmin = np.maximum(0.0, np.maximum(l_c - Xc, Xc - u_c)) # (nt,d)\n", |
| 1005 | + " dmax = np.maximum(np.abs(l_c - Xc), np.abs(u_c - Xc)) # (nt,d)\n", |
1025 | 1006 | "\n",
|
1026 |
| - " for xi in x:\n", |
| 1007 | + " if spec == \"pow_exp\":\n", |
| 1008 | + " # power-exponential: k = exp(-sum_j θ_j |dx_j|^p)\n", |
| 1009 | + " p = getattr(self, \"p\", 2.0)\n", |
| 1010 | + " s_min = (th * (dmin ** p)).sum(axis=1)\n", |
| 1011 | + " s_max = (th * (dmax ** p)).sum(axis=1)\n", |
| 1012 | + " kU = np.exp(-s_min) # max on box\n", |
| 1013 | + " kL = np.exp(-s_max) # min on box\n", |
1027 | 1014 | "\n",
|
1028 |
| - " # per-dim nearest/farthest distances to the box\n", |
1029 |
| - " dmin = np.maximum(0.0, np.maximum(l - xi, xi - u)) # (d,)\n", |
1030 |
| - " dmax = np.maximum(np.abs(l - xi), np.abs(u - xi)) # (d,)\n", |
| 1015 | + " elif spec == \"matern12\":\n", |
| 1016 | + " # Matérn ν=1/2 (a.k.a. abs-exp): k = exp(-sum_j θ_j |dx_j|)\n", |
| 1017 | + " s_min = (th * dmin).sum(axis=1)\n", |
| 1018 | + " s_max = (th * dmax).sum(axis=1)\n", |
| 1019 | + " kU = np.exp(-s_min)\n", |
| 1020 | + " kL = np.exp(-s_max)\n", |
1031 | 1021 | "\n",
|
1032 |
| - " d_L = np.sum((dmax / ell)**2) # largest distance -> lower kernel\n", |
1033 |
| - " d_U = np.sum((dmin / ell)**2) # smallest distance -> upper kernel\n", |
| 1022 | + " elif spec == \"matern32\":\n", |
| 1023 | + " # SMT separable form: ∏_j (1 + √3 θ_j |dx_j|) exp(-√3 θ_j |dx_j|)\n", |
| 1024 | + " a = np.sqrt(3.0) * th\n", |
| 1025 | + " gmin = (1 + a * dmin) * np.exp(-a * dmin)\n", |
| 1026 | + " gmax = (1 + a * dmax) * np.exp(-a * dmax)\n", |
| 1027 | + " kU = np.prod(gmin, axis=1)\n", |
| 1028 | + " kL = np.prod(gmax, axis=1)\n", |
1034 | 1029 | "\n",
|
1035 |
| - " kL.append(self.kernel_func(d_L))\n", |
1036 |
| - " kU.append(self.kernel_func(d_U))\n", |
| 1030 | + " elif spec == \"matern52\":\n", |
| 1031 | + " # SMT separable form: ∏_j (1 + √5 θ_j |dx_j| + (5/3) θ_j^2 dx_j^2) exp(-√5 θ_j |dx_j|)\n", |
| 1032 | + " b = np.sqrt(5.0) * th\n", |
| 1033 | + " btmin, btmax = b * dmin, b * dmax\n", |
| 1034 | + " gmin = (1 + btmin + (btmin**2)/3.0) * np.exp(-btmin)\n", |
| 1035 | + " gmax = (1 + btmax + (btmax**2)/3.0) * np.exp(-btmax)\n", |
| 1036 | + " kU = np.prod(gmin, axis=1)\n", |
| 1037 | + " kL = np.prod(gmax, axis=1)\n", |
1037 | 1038 | "\n",
|
1038 |
| - " return np.array(kL), np.array(kU)\n", |
| 1039 | + " else:\n", |
| 1040 | + " raise ValueError(f\"Unsupported kernel_spec: {spec}\")\n", |
1039 | 1041 | "\n",
|
| 1042 | + " return kL, kU\n", |
1040 | 1043 | "\n",
|
1041 |
| - " def mu_bounds(self, y, kL, kU):\n", |
| 1044 | + " def mu_bounds(self, kL, kU):\n", |
1042 | 1045 | "\n",
|
1043 |
| - " alpha = self.K_inv @ y\n", |
1044 |
| - " mu_U = np.sum(alpha * np.where(alpha >= 0, kU, kL))\n", |
1045 |
| - " mu_L = np.sum(alpha * np.where(alpha >= 0, kL, kU))\n", |
| 1046 | + " lo = np.where(self.gamma >= 0.0, kL, kU)\n", |
| 1047 | + " hi = np.where(self.gamma >= 0.0, kU, kL)\n", |
1046 | 1048 | "\n",
|
| 1049 | + " mu_L = self.beta0 + float(np.dot(self.gamma, lo))\n", |
| 1050 | + " mu_U = self.beta0 + float(np.dot(self.gamma, hi))\n", |
1047 | 1051 | " return mu_L, mu_U\n",
|
1048 | 1052 | "\n",
|
1049 |
| - " def sigma2_U(self, kL, kU):\n", |
| 1053 | + " def sigma2_bounds(self, kL, kU, lb_passes=2, clip_nonneg=True):\n", |
| 1054 | + " \n", |
| 1055 | + " \"\"\"\n", |
| 1056 | + " Variance bounds over r ∈ [kL,kU] for SMT KRG (poly='constant').\n", |
1050 | 1057 | "\n",
|
1051 |
| - " # Set up QP to solve for upper variance bound\n", |
1052 |
| - " var = cp.Variable(len(kU))\n", |
1053 |
| - " #Add constant values here constants = \n", |
1054 |
| - " obj = cp.Maximize(1 - cp.quad_form(var, self.K_inv))\n", |
1055 |
| - " constraints = [var >= kL, var <= kU]\n", |
1056 |
| - " prob = cp.Problem(obj, constraints)\n", |
1057 |
| - " prob.solve(solver=cp.OSQP)\n", |
| 1058 | + " Upper bound: exact convex QP (includes GLS + σ²).\n", |
| 1059 | + " Lower bound: small coordinate-descent heuristic on the full bracket.\n", |
1058 | 1060 | "\n",
|
1059 |
| - " sigma2_U = prob.value\n", |
1060 |
| - " return max(sigma2_U, 0)\n", |
| 1061 | + " Returns\n", |
| 1062 | + " -------\n", |
| 1063 | + " sigma2_U : float\n", |
| 1064 | + " sigma2_L : float\n", |
| 1065 | + " \"\"\"\n", |
| 1066 | + " \n", |
| 1067 | + " # ---------- sanitize / prerequisites ----------\n", |
| 1068 | + " kL = np.asarray(kL, float).ravel()\n", |
| 1069 | + " kU = np.asarray(kU, float).ravel()\n", |
| 1070 | + " n = kL.size\n", |
| 1071 | + " assert hasattr(self, \"C\") and hasattr(self, \"sigma2\"), \"Call sync_from_smt() first\"\n", |
| 1072 | + " assert kL.shape == kU.shape == (n,) and np.all(kL <= kU), \"bad kL/kU\"\n", |
1061 | 1073 | "\n",
|
1062 |
| - " def sigma2_L(self,kL, kU, epsilon = 1e-6, random_seed= 42):\n", |
| 1074 | + " C = self.C\n", |
| 1075 | + " sigma2 = float(self.sigma2)\n", |
| 1076 | + " ones = np.ones(n)\n", |
1063 | 1077 | "\n",
|
1064 |
| - " # Randomly initialize a point in the bounds\n", |
1065 |
| - " np.random.seed(random_seed)\n", |
1066 |
| - " var = np.random.uniform(kL, kU)\n", |
| 1078 | + " # a = R^{-1} 1 and S = 1^T R^{-1} 1 via two triangular solves (no explicit R^{-1})\n", |
| 1079 | + " tmp = linalg.solve_triangular(C, ones, lower=True) # C tmp = 1\n", |
| 1080 | + " a_vec = linalg.solve_triangular(C.T, tmp, lower=False) # C^T a = tmp\n", |
| 1081 | + " S = float(ones @ a_vec) # > 0\n", |
1067 | 1082 | "\n",
|
1068 |
| - " # Initialize active coordinates\n", |
1069 |
| - " active_coords = set(range(len(kL)))\n", |
| 1083 | + " # Dimensionless SMT variance bracket f(r) = 1 - ||C^{-1}r||^2 + (1 - a^T r)^2 / S\n", |
| 1084 | + " def bracket(r):\n", |
| 1085 | + " r = np.asarray(r, float).ravel()\n", |
| 1086 | + " rt = linalg.solve_triangular(C, r, lower=True) # C^{-1} r\n", |
| 1087 | + " tau = float(a_vec @ r)\n", |
| 1088 | + " return 1.0 - float(rt @ rt) + (1.0 - tau)**2 / S\n", |
1070 | 1089 | "\n",
|
1071 |
| - " # Define the function to minimize\n", |
1072 |
| - " def f(k_vec): return 1 - k_vec @ self.K_inv @ k_vec\n", |
1073 |
| - " f_curr = f(var)\n", |
| 1090 | + " # ---------- UPPER bound: one convex QP in z with r = C z ----------\n", |
| 1091 | + " A = C.T @ a_vec # shape (n,)\n", |
| 1092 | + " Qz = np.eye(n) - np.outer(A, A)/S # PSD (rank-1 update)\n", |
| 1093 | + " bz = (2.0 / S) * A\n", |
1074 | 1094 | "\n",
|
1075 |
| - " # Iteratively improve the point by evaluating each coordinate direction.\n", |
1076 |
| - " while active_coords:\n", |
1077 |
| - " improvement = False\n", |
1078 |
| - " for i in list(active_coords):\n", |
1079 |
| - " for val in [kL[i], kU[i]]:\n", |
1080 |
| - " var_new = var.copy()\n", |
1081 |
| - " var_new[i] = val\n", |
1082 |
| - " f_val = f(var_new)\n", |
1083 |
| - " if f_val < f_curr - epsilon:\n", |
1084 |
| - " var = var_new\n", |
1085 |
| - " f_curr = f_val\n", |
1086 |
| - " improvement = True\n", |
1087 |
| - " break\n", |
1088 |
| - " if not improvement:\n", |
1089 |
| - " active_coords.remove(i)\n", |
1090 |
| - " if not improvement:\n", |
1091 |
| - " break\n", |
| 1095 | + " z = cp.Variable(n)\n", |
| 1096 | + " obj = 0.5 * cp.quad_form(z, Qz) + bz @ z\n", |
| 1097 | + " cons = [C @ z >= kL, C @ z <= kU]\n", |
| 1098 | + " cp.Problem(cp.Minimize(obj), cons).solve(solver=\"OSQP\")\n", |
| 1099 | + "\n", |
| 1100 | + " r_ub = (C @ z.value).reshape(-1)\n", |
| 1101 | + " f_ub = bracket(r_ub)\n", |
| 1102 | + " if clip_nonneg: f_ub = max(f_ub, 0.0)\n", |
| 1103 | + " sigma2_U = sigma2 * f_ub\n", |
1092 | 1104 | "\n",
|
1093 |
| - " sigma2_L = f_curr\n", |
1094 |
| - " return max(sigma2_L, 0)\n", |
| 1105 | + " if self.BnB_LBmethod != \"IPOPT\":\n", |
| 1106 | + " \n", |
| 1107 | + " # ---------- LOWER bound: tiny corner-seeking pass on full bracket ----------\n", |
| 1108 | + " r = r_ub.copy()\n", |
| 1109 | + " f = bracket(r)\n", |
| 1110 | + " for _ in range(max(0, int(lb_passes))):\n", |
| 1111 | + " improved = False\n", |
| 1112 | + " for i in range(n):\n", |
| 1113 | + " # try snapping coord i to each bound\n", |
| 1114 | + " r_lo = r.copy(); r_lo[i] = kL[i]; f_lo = bracket(r_lo)\n", |
| 1115 | + " r_hi = r.copy(); r_hi[i] = kU[i]; f_hi = bracket(r_hi)\n", |
| 1116 | + " # keep best (minimizing f)\n", |
| 1117 | + " if f_lo + 1e-6 < f: r, f, improved = r_lo, f_lo, True\n", |
| 1118 | + " if f_hi + 1e-6 < f: r, f, improved = r_hi, f_hi, True\n", |
| 1119 | + " if not improved:\n", |
| 1120 | + " break\n", |
| 1121 | + " if clip_nonneg: f = max(f, 0.0)\n", |
| 1122 | + " sigma2_L = sigma2 * f\n", |
1095 | 1123 | "\n",
|
| 1124 | + " else:\n", |
| 1125 | + " sigma2_L = 0\n", |
| 1126 | + " \n", |
| 1127 | + " return sigma2_U, sigma2_L\n", |
| 1128 | + " \n", |
1096 | 1129 | " def rs_ei(self, y, mu, sigma):\n",
|
1097 | 1130 | " \n",
|
1098 | 1131 | " y_min = np.min(y)\n",
|
|
0 commit comments