|
| 1 | +/* |
| 2 | + * Copyright 2025 okome. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +package net.siisise.lang; |
| 17 | + |
| 18 | +import net.siisise.io.Packet; |
| 19 | +import net.siisise.io.PacketA; |
| 20 | +import net.siisise.math.Matics; |
| 21 | + |
| 22 | +/** |
| 23 | + * RFC 3492 Punycode |
| 24 | + * 参考 https://qiita.com/msmania/items/dc0e2b8c2c5de0707435 |
| 25 | + */ |
| 26 | +public class Punycode { |
| 27 | + private static final int BASE = 36; |
| 28 | + private static final int TMIN = 1; |
| 29 | + private static final int TMAX = 26; |
| 30 | + private static final int SKEW = 38; |
| 31 | + private static final int DAMP = 700; |
| 32 | + private static final int INITIAL_BIAS = 72; |
| 33 | + private static final int INITIAL_N = 128; |
| 34 | + private static final char DELIMIT = '-'; |
| 35 | + |
| 36 | + /** |
| 37 | + * 国際化ドメイン名をACE xn-- ASCII列に変換する |
| 38 | + * @param u |
| 39 | + * @return Punycode |
| 40 | + */ |
| 41 | + public static java.lang.String toASCII(java.lang.String u) { |
| 42 | + CodePoint cp = new CodePoint(u); |
| 43 | + if ( cp.length() >= 64 ) { |
| 44 | + throw new IllegalStateException(); |
| 45 | + } |
| 46 | + int[] cpch = cp.codePoints().toArray(); |
| 47 | + |
| 48 | + Packet st = new PacketA(); |
| 49 | + |
| 50 | + // 分離とソート unicodeの大きい方から code と位置に変換 |
| 51 | + do { |
| 52 | + int index = -1; |
| 53 | + int co = 0; // (n, i) |
| 54 | + for (int i = cpch.length - 1; i >= 0; i--) { |
| 55 | + if ( cpch[i] > co ) { |
| 56 | + index = i; |
| 57 | + co = cpch[i]; |
| 58 | + } |
| 59 | + } |
| 60 | + if ( co < 128 ) { |
| 61 | + break; |
| 62 | + } |
| 63 | + co = co * cpch.length + index; |
| 64 | + st.backWrite(Bin.toByte(co)); |
| 65 | + |
| 66 | + int[] tmpch = new int[cpch.length - 1]; |
| 67 | + System.arraycopy(cpch, 0, tmpch, 0, index); |
| 68 | + System.arraycopy(cpch, index+1, tmpch, index, tmpch.length - index); |
| 69 | + cpch = tmpch; |
| 70 | + } while ( true ); |
| 71 | + // 残ったのがASCII |
| 72 | + StringBuilder sb = new StringBuilder(); |
| 73 | + for (int c : cpch) { |
| 74 | + sb.append((char)c); // ASCIIのみ |
| 75 | + } |
| 76 | + if (sb.length() > 0) { |
| 77 | + // ASCII あり |
| 78 | + // xn-- を付ける場合 ASCII + 国際化両方あり |
| 79 | + sb.append('-'); |
| 80 | + } |
| 81 | + if (st.size() == 0) { // 国際化なし ASCIIのみ |
| 82 | + return sb.toString(); |
| 83 | + } |
| 84 | + |
| 85 | + // delta変換 |
| 86 | + int n = INITIAL_N; |
| 87 | + int bias = INITIAL_BIAS; |
| 88 | + |
| 89 | + byte[] dc = new byte[4]; |
| 90 | + int tn = cpch.length; |
| 91 | + int c = n * tn - 1; |
| 92 | + tn++; |
| 93 | + int d = DAMP; |
| 94 | + while (st.length() > 0) { |
| 95 | + int ostat = c + n + 1; |
| 96 | + st.read(dc); |
| 97 | + c = Bin.btoi(dc)[0]; |
| 98 | + n = c / tn; |
| 99 | + int delta = c - ostat; |
| 100 | + sb.append(toCh(delta, bias)); // delta からコード |
| 101 | + bias = adapt(delta, d, tn); |
| 102 | + d = 2; |
| 103 | + tn++; |
| 104 | + } |
| 105 | + |
| 106 | + return sb.toString(); |
| 107 | + } |
| 108 | + |
| 109 | + /** |
| 110 | + * bias の重み. |
| 111 | + * @param delta 前の差分 |
| 112 | + * @param div |
| 113 | + * @param tn n番目の文字 (1開始) |
| 114 | + * @return |
| 115 | + */ |
| 116 | + private static int adapt(int delta, int div, int tn) { |
| 117 | + // 1. |
| 118 | + delta /= div; |
| 119 | + // 2. |
| 120 | + delta += delta / tn; |
| 121 | + int n = 0; |
| 122 | + while (delta > ((BASE - TMIN) * TMAX) / 2) { |
| 123 | + delta /= BASE - TMIN; |
| 124 | + n++; |
| 125 | + } |
| 126 | + return (BASE * n) + (((BASE - TMIN + 1) * delta) / (delta + SKEW)); |
| 127 | + |
| 128 | + } |
| 129 | + |
| 130 | + /** |
| 131 | + * 下からj桁目ぐらいの閾値 thresholds |
| 132 | + * BASE(36) * (j+1) - bias |
| 133 | + * 最小 TMIN 最大 TMAX に制限 |
| 134 | + * @param j |
| 135 | + * @param bias 可変値 |
| 136 | + * @return t_j |
| 137 | + */ |
| 138 | + private static int t(int j, int bias) { |
| 139 | + return Matics.range(BASE * (j+1) - bias, TMIN, TMAX); |
| 140 | + } |
| 141 | + |
| 142 | + static final char[] CODE = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9'}; |
| 143 | + |
| 144 | + /** |
| 145 | + * 1文字デルタからコード |
| 146 | + * BASE = 36 |
| 147 | + * @param n delta |
| 148 | + * @return |
| 149 | + */ |
| 150 | + private static java.lang.String toCh(int n, int bias) { |
| 151 | + StringBuilder sb = new StringBuilder(); |
| 152 | + int i = 0; |
| 153 | + int k = t(i, bias); |
| 154 | + while (n >= k) { |
| 155 | + n -= k; |
| 156 | + sb.append(CODE[k + (n % (BASE - k))]); |
| 157 | + n = n / (BASE - k); |
| 158 | + i++; |
| 159 | + k = t(i, bias); |
| 160 | + } |
| 161 | + sb.append(CODE[n]); |
| 162 | + return sb.toString(); |
| 163 | + } |
| 164 | + |
| 165 | + public static java.lang.String toUnicode(java.lang.String a) { |
| 166 | + int delimit_index = a.lastIndexOf(DELIMIT); |
| 167 | + StringBuilder sb = new StringBuilder(); |
| 168 | + if ( delimit_index >= 0) { |
| 169 | + java.lang.String ascii = a.substring(0,delimit_index); |
| 170 | + if (delimit_index == a.length() - 1) { |
| 171 | + return ascii; |
| 172 | + } |
| 173 | + sb.append(ascii); |
| 174 | + } |
| 175 | + |
| 176 | + char[] ex = a.substring(delimit_index + 1).toCharArray(); |
| 177 | + int of = 0; |
| 178 | + int w = 1; |
| 179 | + int bias = INITIAL_BIAS; |
| 180 | + int d = DAMP; |
| 181 | + int n = 0; |
| 182 | + int tn = sb.codePointCount(0, sb.length()) + 1; |
| 183 | + int c = INITIAL_N * tn; |
| 184 | + for (int i = 0; i < ex.length; i++) { |
| 185 | + int k = t(i - of, bias); |
| 186 | + int m = num(ex[i]); |
| 187 | + n += m * w; |
| 188 | + if (m >= k) { |
| 189 | + w *= BASE - k; |
| 190 | + } else { |
| 191 | + c += n; |
| 192 | + int idx = c % tn; |
| 193 | + c /= tn; |
| 194 | + char[] cp = Character.toChars(c); |
| 195 | + sb.insert(sb.offsetByCodePoints(0, idx), cp); |
| 196 | + |
| 197 | + // 次の文字 |
| 198 | + of = i+1; |
| 199 | + bias = adapt(n, d, tn); |
| 200 | + tn++; |
| 201 | + c = c*tn + idx + 1; |
| 202 | + |
| 203 | + d = 2; |
| 204 | + w = 1; |
| 205 | + n = 0; |
| 206 | + } |
| 207 | + } |
| 208 | + |
| 209 | + return sb.toString(); |
| 210 | + } |
| 211 | + |
| 212 | + static int num(char ch) { |
| 213 | + if ( ch >= 'a' && ch <= 'z') { |
| 214 | + return ch - 'a'; |
| 215 | + } else if ( ch >= 'A' && ch <= 'Z') { |
| 216 | + return ch - 'A'; |
| 217 | + } else if ( ch >= '0' && ch <= '9') { |
| 218 | + return ch - '0' + 26; |
| 219 | + } |
| 220 | + throw new IllegalStateException(); |
| 221 | + } |
| 222 | + |
| 223 | +} |
0 commit comments