Skip to content

Commit c5daf34

Browse files
new info added from smt
1 parent 9b45fc5 commit c5daf34

File tree

1 file changed

+157
-124
lines changed

1 file changed

+157
-124
lines changed

src/hiopbbpy/opt/bnb/BODemoNLP.ipynb

Lines changed: 157 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,7 @@
878878
"source": [
879879
"import numpy as np\n",
880880
"import cvxpy as cp\n",
881+
"from scipy import linalg\n",
881882
"\n",
882883
"class BnBAlgorithmBase:\n",
883884
" def __init__(self, x=None, y=None):\n",
@@ -917,56 +918,49 @@
917918
" self.x = x\n",
918919
" self.y = y\n",
919920
"\n",
920-
" def sync_kernel_from_smt(self):\n",
921-
" corr_map = {\n",
922-
" \"pow_exp\": \"pow_exp\", # treat as Gaussian if power=2\n",
923-
" \"squar_exp\": \"pow_exp\", # SMT sometimes uses this name for Gaussian\n",
924-
" \"abs_exp\": \"matern12\", # ν = 1/2\n",
925-
" \"matern32\": \"matern32\",\n",
926-
" \"matern52\": \"matern52\",\n",
927-
" }\n",
921+
" def sync_from_smt(self):\n",
928922
" \n",
929-
" corr_type = self.gpsurrogate.surrogatesmt.options[\"corr\"]\n",
930-
" self.kernel_spec = corr_map[corr_type]\n",
931-
" self.set_kernel(self.kernel_spec)\n",
932-
" self.theta = np.asarray(self.gpsurrogate.surrogatesmt.corr.theta, dtype=float)\n",
933-
"\n",
934-
" # If pow_exp, ensure it's actually Gaussian (power=2); otherwise raise.\n",
935-
" if corr_type == \"pow_exp\":\n",
936-
" power = getattr(self.gpsurrogate.surrogatesmt.options, \"pow_exp_power\", 2)\n",
937-
" if power != 2:\n",
938-
" raise ValueError(f\"pow_exp_power={power} not supported in isotropic form; need power=2 for SE.\")\n",
939-
"\n",
940-
" # Map θ (SMT) -> ℓ for ARD scaling used in d\n",
941-
" if self.kernel_spec == \"pow_exp\": # Gaussian\n",
942-
" self.ell = 1.0 / np.sqrt(self.theta)\n",
943-
" else: # Matérn family\n",
944-
" self.ell = 1.0 / self.theta\n",
945-
" \n",
946-
" def sync_smt_parameterts(self):\n",
947-
"\n",
948923
" sm = self.gpsurrogate.surrogatesmt\n",
924+
" par = sm.optimal_par\n",
949925
"\n",
950-
" # --- map SMT corr name to the 5 supported kernels ---\n",
951-
" corr_map = {\n",
952-
" \"pow_exp\": \"pow_exp\", # power-exponential (Gaussian if power=2)\n",
953-
" \"squar_exp\":\"pow_exp\", # SMT alias for Gaussian\n",
954-
" \"abs_exp\": \"matern12\", # exp(-sum θ|dx|) == Matérn ν=1/2 product\n",
955-
" \"matern32\": \"matern32\",\n",
956-
" \"matern52\": \"matern52\",\n",
957-
" }\n",
958-
"\n",
959-
" corr_type = sm.options[\"corr\"]\n",
960-
" if corr_type not in corr_map:\n",
961-
" raise ValueError(f\"Unsupported SMT corr '{corr_type}' for kernel bounds.\")\n",
962-
" self.kernel_spec = corr_map[corr_type]\n",
926+
" corr = sm.options[\"corr\"]\n",
927+
" if corr in (\"squar_exp\", \"pow_exp\"):\n",
928+
" p = float(sm.options.get(\"pow_exp_power\", 2.0))\n",
929+
" if abs(p - 2.0) > 1e-12:\n",
930+
" raise ValueError(\"Single-d bounds support pow_exp only for p=1 or p=2\")\n",
931+
" self.kernel_spec = \"pow_exp\"\n",
932+
" self.p = p # 1 (abs-exp) or 2 (SE)\n",
933+
" elif corr == \"abs_exp\":\n",
934+
" self.kernel_spec = \"pow_exp\"\n",
935+
" self.p = 1.0\n",
936+
" elif corr == \"matern32\":\n",
937+
" self.kernel_spec = \"matern32\"\n",
938+
" elif corr == \"matern52\":\n",
939+
" self.kernel_spec = \"matern52\"\n",
940+
" else:\n",
941+
" raise ValueError(f\"Unsupported SMT corr '{corr}'\")\n",
963942
"\n",
964-
" # --- pull trained hyperparams (prefer optimal_theta) ---\n",
965943
" theta = getattr(sm, \"optimal_theta\", None)\n",
966-
" if theta is None:\n",
967-
" theta = sm.corr.theta\n",
968-
" self.theta = np.asarray(theta, dtype=float).ravel()\n",
944+
" if theta is None: theta = sm.corr.theta\n",
945+
" self.theta = np.asarray(theta, float).ravel()\n",
946+
"\n",
947+
" self.X_offset, self.X_scale = sm.X_offset, sm.X_scale\n",
948+
" self.Xc = (sm.X - self.X_offset) / self.X_scale # normalized training inputs\n",
949+
" self._normalize = lambda x: (np.asarray(x, float) - self.X_offset) / self.X_scale\n",
950+
" \n",
951+
" if sm.options[\"poly\"] != \"constant\":\n",
952+
" raise NotImplementedError(\"μ-bounds assume poly='constant'\")\n",
953+
" self.beta0 = float(np.asarray(par[\"beta\"]).ravel()[0])\n",
954+
" self.gamma = np.asarray(par[\"gamma\"], float).ravel()\n",
969955
"\n",
956+
" \n",
957+
" # --- Correlation factor and process variance ---\n",
958+
" self.C = par[\"C\"] # Cholesky of R (lower)\n",
959+
" self.sigma2 = float(par.get(\"sigma2\", 1.0))\n",
960+
" self.sigma2_ri = float(par.get(\"sigma2_ri\", self.sigma2)) # usually same unless RI\n",
961+
"\n",
962+
" # Fast primitive: triangular solve with C (prefer this over forming R^{-1})\n",
963+
" self._solve_C = lambda v: linalg.solve_triangular(self.C, v, lower=True) \n",
970964
"\n",
971965
" def set_kernel(self, kernel_spec):\n",
972966
"\n",
@@ -990,109 +984,148 @@
990984
" (1 + np.sqrt(5) * np.sqrt(d) + (5/3) * d) *\n",
991985
" np.exp(-np.sqrt(5) * np.sqrt(d))\n",
992986
" )\n",
993-
"\n",
994-
"\n",
995-
" def set_covmatrix(self, x):\n",
996-
"\n",
997-
" n = x.shape[0]\n",
998-
" ell = np.asarray(self.ell, dtype=float)\n",
999-
" nugget = float(self.gpsurrogate.surrogatesmt.options['nugget'])\n",
1000-
" self.K = np.zeros((n, n))\n",
987+
" \n",
988+
" def ker_bounds(self, l, u):\n",
1001989
" \n",
1002-
" # Compute symmetric kernel matrix\n",
1003-
" for i in range(n):\n",
1004-
" for j in range(i, n):\n",
1005-
"\n",
1006-
" d = np.sum(((x[i] - x[j]) / ell)**2) \n",
1007-
" \n",
1008-
" self.K[i, j] = self.kernel_func(d)\n",
1009-
" self.K[j, i] = self.K[i, j]\n",
1010-
"\n",
1011-
" # Add nugget for stability\n",
1012-
" self.K += nugget * np.eye(n)\n",
1013-
"\n",
1014-
" # --- Compute inverse ---\n",
1015-
" try:\n",
1016-
" self.K_inv = np.linalg.inv(self.K)\n",
1017-
" #print(\"K_inv:\\n\", self.K_inv)\n",
1018-
" except np.linalg.LinAlgError:\n",
1019-
" print(\"ERROR: Singular K detected, cannot invert.\")\n",
990+
" \"\"\"\n",
991+
" Tight monotone bounds for k(x, X_i) over box [l,u] (original units).\n",
992+
" Returns kL, kU of shape (nt,), consistent with SMT’s kernels.\n",
993+
" \"\"\"\n",
994+
" \n",
995+
" # normalize the box\n",
996+
" l_c = self._normalize(l).ravel()\n",
997+
" u_c = self._normalize(u).ravel()\n",
1020998
"\n",
1021-
" def ker_bounds(self, x, l, u):\n",
999+
" Xc = self.Xc # (nt, d)\n",
1000+
" th = self.theta.ravel() # (d,)\n",
1001+
" spec = self.kernel_spec\n",
10221002
"\n",
1023-
" ell = np.asarray(self.ell)\n",
1024-
" kL, kU = [], []\n",
1003+
" # per-point, per-dimension distance extremes (normalized space)\n",
1004+
" dmin = np.maximum(0.0, np.maximum(l_c - Xc, Xc - u_c)) # (nt,d)\n",
1005+
" dmax = np.maximum(np.abs(l_c - Xc), np.abs(u_c - Xc)) # (nt,d)\n",
10251006
"\n",
1026-
" for xi in x:\n",
1007+
" if spec == \"pow_exp\":\n",
1008+
" # power-exponential: k = exp(-sum_j θ_j |dx_j|^p)\n",
1009+
" p = getattr(self, \"p\", 2.0)\n",
1010+
" s_min = (th * (dmin ** p)).sum(axis=1)\n",
1011+
" s_max = (th * (dmax ** p)).sum(axis=1)\n",
1012+
" kU = np.exp(-s_min) # max on box\n",
1013+
" kL = np.exp(-s_max) # min on box\n",
10271014
"\n",
1028-
" # per-dim nearest/farthest distances to the box\n",
1029-
" dmin = np.maximum(0.0, np.maximum(l - xi, xi - u)) # (d,)\n",
1030-
" dmax = np.maximum(np.abs(l - xi), np.abs(u - xi)) # (d,)\n",
1015+
" elif spec == \"matern12\":\n",
1016+
" # Matérn ν=1/2 (a.k.a. abs-exp): k = exp(-sum_j θ_j |dx_j|)\n",
1017+
" s_min = (th * dmin).sum(axis=1)\n",
1018+
" s_max = (th * dmax).sum(axis=1)\n",
1019+
" kU = np.exp(-s_min)\n",
1020+
" kL = np.exp(-s_max)\n",
10311021
"\n",
1032-
" d_L = np.sum((dmax / ell)**2) # largest distance -> lower kernel\n",
1033-
" d_U = np.sum((dmin / ell)**2) # smallest distance -> upper kernel\n",
1022+
" elif spec == \"matern32\":\n",
1023+
" # SMT separable form: ∏_j (1 + √3 θ_j |dx_j|) exp(-√3 θ_j |dx_j|)\n",
1024+
" a = np.sqrt(3.0) * th\n",
1025+
" gmin = (1 + a * dmin) * np.exp(-a * dmin)\n",
1026+
" gmax = (1 + a * dmax) * np.exp(-a * dmax)\n",
1027+
" kU = np.prod(gmin, axis=1)\n",
1028+
" kL = np.prod(gmax, axis=1)\n",
10341029
"\n",
1035-
" kL.append(self.kernel_func(d_L))\n",
1036-
" kU.append(self.kernel_func(d_U))\n",
1030+
" elif spec == \"matern52\":\n",
1031+
" # SMT separable form: ∏_j (1 + √5 θ_j |dx_j| + (5/3) θ_j^2 dx_j^2) exp(-√5 θ_j |dx_j|)\n",
1032+
" b = np.sqrt(5.0) * th\n",
1033+
" btmin, btmax = b * dmin, b * dmax\n",
1034+
" gmin = (1 + btmin + (btmin**2)/3.0) * np.exp(-btmin)\n",
1035+
" gmax = (1 + btmax + (btmax**2)/3.0) * np.exp(-btmax)\n",
1036+
" kU = np.prod(gmin, axis=1)\n",
1037+
" kL = np.prod(gmax, axis=1)\n",
10371038
"\n",
1038-
" return np.array(kL), np.array(kU)\n",
1039+
" else:\n",
1040+
" raise ValueError(f\"Unsupported kernel_spec: {spec}\")\n",
10391041
"\n",
1042+
" return kL, kU\n",
10401043
"\n",
1041-
" def mu_bounds(self, y, kL, kU):\n",
1044+
" def mu_bounds(self, kL, kU):\n",
10421045
"\n",
1043-
" alpha = self.K_inv @ y\n",
1044-
" mu_U = np.sum(alpha * np.where(alpha >= 0, kU, kL))\n",
1045-
" mu_L = np.sum(alpha * np.where(alpha >= 0, kL, kU))\n",
1046+
" lo = np.where(self.gamma >= 0.0, kL, kU)\n",
1047+
" hi = np.where(self.gamma >= 0.0, kU, kL)\n",
10461048
"\n",
1049+
" mu_L = self.beta0 + float(np.dot(self.gamma, lo))\n",
1050+
" mu_U = self.beta0 + float(np.dot(self.gamma, hi))\n",
10471051
" return mu_L, mu_U\n",
10481052
"\n",
1049-
" def sigma2_U(self, kL, kU):\n",
1053+
" def sigma2_bounds(self, kL, kU, lb_passes=2, clip_nonneg=True):\n",
1054+
" \n",
1055+
" \"\"\"\n",
1056+
" Variance bounds over r ∈ [kL,kU] for SMT KRG (poly='constant').\n",
10501057
"\n",
1051-
" # Set up QP to solve for upper variance bound\n",
1052-
" var = cp.Variable(len(kU))\n",
1053-
" #Add constant values here constants = \n",
1054-
" obj = cp.Maximize(1 - cp.quad_form(var, self.K_inv))\n",
1055-
" constraints = [var >= kL, var <= kU]\n",
1056-
" prob = cp.Problem(obj, constraints)\n",
1057-
" prob.solve(solver=cp.OSQP)\n",
1058+
" Upper bound: exact convex QP (includes GLS + σ²).\n",
1059+
" Lower bound: small coordinate-descent heuristic on the full bracket.\n",
10581060
"\n",
1059-
" sigma2_U = prob.value\n",
1060-
" return max(sigma2_U, 0)\n",
1061+
" Returns\n",
1062+
" -------\n",
1063+
" sigma2_U : float\n",
1064+
" sigma2_L : float\n",
1065+
" \"\"\"\n",
1066+
" \n",
1067+
" # ---------- sanitize / prerequisites ----------\n",
1068+
" kL = np.asarray(kL, float).ravel()\n",
1069+
" kU = np.asarray(kU, float).ravel()\n",
1070+
" n = kL.size\n",
1071+
" assert hasattr(self, \"C\") and hasattr(self, \"sigma2\"), \"Call sync_from_smt() first\"\n",
1072+
" assert kL.shape == kU.shape == (n,) and np.all(kL <= kU), \"bad kL/kU\"\n",
10611073
"\n",
1062-
" def sigma2_L(self,kL, kU, epsilon = 1e-6, random_seed= 42):\n",
1074+
" C = self.C\n",
1075+
" sigma2 = float(self.sigma2)\n",
1076+
" ones = np.ones(n)\n",
10631077
"\n",
1064-
" # Randomly initialize a point in the bounds\n",
1065-
" np.random.seed(random_seed)\n",
1066-
" var = np.random.uniform(kL, kU)\n",
1078+
" # a = R^{-1} 1 and S = 1^T R^{-1} 1 via two triangular solves (no explicit R^{-1})\n",
1079+
" tmp = linalg.solve_triangular(C, ones, lower=True) # C tmp = 1\n",
1080+
" a_vec = linalg.solve_triangular(C.T, tmp, lower=False) # C^T a = tmp\n",
1081+
" S = float(ones @ a_vec) # > 0\n",
10671082
"\n",
1068-
" # Initialize active coordinates\n",
1069-
" active_coords = set(range(len(kL)))\n",
1083+
" # Dimensionless SMT variance bracket f(r) = 1 - ||C^{-1}r||^2 + (1 - a^T r)^2 / S\n",
1084+
" def bracket(r):\n",
1085+
" r = np.asarray(r, float).ravel()\n",
1086+
" rt = linalg.solve_triangular(C, r, lower=True) # C^{-1} r\n",
1087+
" tau = float(a_vec @ r)\n",
1088+
" return 1.0 - float(rt @ rt) + (1.0 - tau)**2 / S\n",
10701089
"\n",
1071-
" # Define the function to minimize\n",
1072-
" def f(k_vec): return 1 - k_vec @ self.K_inv @ k_vec\n",
1073-
" f_curr = f(var)\n",
1090+
" # ---------- UPPER bound: one convex QP in z with r = C z ----------\n",
1091+
" A = C.T @ a_vec # shape (n,)\n",
1092+
" Qz = np.eye(n) - np.outer(A, A)/S # PSD (rank-1 update)\n",
1093+
" bz = (2.0 / S) * A\n",
10741094
"\n",
1075-
" # Iteratively improve the point by evaluating each coordinate direction.\n",
1076-
" while active_coords:\n",
1077-
" improvement = False\n",
1078-
" for i in list(active_coords):\n",
1079-
" for val in [kL[i], kU[i]]:\n",
1080-
" var_new = var.copy()\n",
1081-
" var_new[i] = val\n",
1082-
" f_val = f(var_new)\n",
1083-
" if f_val < f_curr - epsilon:\n",
1084-
" var = var_new\n",
1085-
" f_curr = f_val\n",
1086-
" improvement = True\n",
1087-
" break\n",
1088-
" if not improvement:\n",
1089-
" active_coords.remove(i)\n",
1090-
" if not improvement:\n",
1091-
" break\n",
1095+
" z = cp.Variable(n)\n",
1096+
" obj = 0.5 * cp.quad_form(z, Qz) + bz @ z\n",
1097+
" cons = [C @ z >= kL, C @ z <= kU]\n",
1098+
" cp.Problem(cp.Minimize(obj), cons).solve(solver=\"OSQP\")\n",
1099+
"\n",
1100+
" r_ub = (C @ z.value).reshape(-1)\n",
1101+
" f_ub = bracket(r_ub)\n",
1102+
" if clip_nonneg: f_ub = max(f_ub, 0.0)\n",
1103+
" sigma2_U = sigma2 * f_ub\n",
10921104
"\n",
1093-
" sigma2_L = f_curr\n",
1094-
" return max(sigma2_L, 0)\n",
1105+
" if self.BnB_LBmethod != \"IPOPT\":\n",
1106+
" \n",
1107+
" # ---------- LOWER bound: tiny corner-seeking pass on full bracket ----------\n",
1108+
" r = r_ub.copy()\n",
1109+
" f = bracket(r)\n",
1110+
" for _ in range(max(0, int(lb_passes))):\n",
1111+
" improved = False\n",
1112+
" for i in range(n):\n",
1113+
" # try snapping coord i to each bound\n",
1114+
" r_lo = r.copy(); r_lo[i] = kL[i]; f_lo = bracket(r_lo)\n",
1115+
" r_hi = r.copy(); r_hi[i] = kU[i]; f_hi = bracket(r_hi)\n",
1116+
" # keep best (minimizing f)\n",
1117+
" if f_lo + 1e-6 < f: r, f, improved = r_lo, f_lo, True\n",
1118+
" if f_hi + 1e-6 < f: r, f, improved = r_hi, f_hi, True\n",
1119+
" if not improved:\n",
1120+
" break\n",
1121+
" if clip_nonneg: f = max(f, 0.0)\n",
1122+
" sigma2_L = sigma2 * f\n",
10951123
"\n",
1124+
" else:\n",
1125+
" sigma2_L = 0\n",
1126+
" \n",
1127+
" return sigma2_U, sigma2_L\n",
1128+
" \n",
10961129
" def rs_ei(self, y, mu, sigma):\n",
10971130
" \n",
10981131
" y_min = np.min(y)\n",

0 commit comments

Comments
 (0)