-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStringUtil.java
277 lines (246 loc) · 9.92 KB
/
StringUtil.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
package org.jsoup.internal;
import org.jsoup.helper.Validate;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Stack;
/**
* A minimal String utility class. Designed for internal jsoup use only.
*/
public final class StringUtil {
// memoised padding up to 21
static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " "};
/**
* Join a collection of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Collection strings, String sep) {
return join(strings.iterator(), sep);
}
/**
* Join a collection of strings by a separator
* @param strings iterator of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(Iterator strings, String sep) {
if (!strings.hasNext())
return "";
String start = strings.next().toString();
if (!strings.hasNext()) // only one, avoid builder
return start;
StringBuilder sb = StringUtil.borrowBuilder().append(start);
while (strings.hasNext()) {
sb.append(sep);
sb.append(strings.next());
}
return StringUtil.releaseBuilder(sb);
}
/**
* Join an array of strings by a separator
* @param strings collection of string objects
* @param sep string to place between strings
* @return joined string
*/
public static String join(String[] strings, String sep) {
return join(Arrays.asList(strings), sep);
}
/**
* Returns space padding
* @param width amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
if (width < padding.length)
return padding[width];
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
return String.valueOf(out);
}
/**
* Tests if a string is blank: null, empty, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
if (string == null || string.length() == 0)
return true;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chars
*/
public static boolean isNumeric(String string) {
if (string == null || string.length() == 0)
return false;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!Character.isDigit(string.codePointAt(i)))
return false;
}
return true;
}
/**
* Tests if a code point is "whitespace" as defined in the HTML spec. Used for output HTML.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
* @see #isActuallyWhitespace(int)
*/
public static boolean isWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}
/**
* Tests if a code point is "whitespace" as defined by what it looks like. Used for Element.text etc.
* @param c code point to test
* @return true if code point is whitespace, false otherwise
*/
public static boolean isActuallyWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == 160;
// 160 is (non-breaking space). Not in the spec but expected.
}
public static boolean isInvisibleChar(int c) {
return Character.getType(c) == 16 && (c == 8203 || c == 8204 || c == 8205 || c == 173);
// zero width sp, zw non join, zw join, soft hyphen
}
/**
* Normalise the whitespace within this string; multiple spaces collapse to a single, and all whitespace characters
* (e.g. newline, tab) convert to a simple space
* @param string content to normalise
* @return normalised string
*/
public static String normaliseWhitespace(String string) {
StringBuilder sb = StringUtil.borrowBuilder();
appendNormalisedWhitespace(sb, string, false);
return StringUtil.releaseBuilder(sb);
}
/**
* After normalizing the whitespace within a string, appends it to a string builder.
* @param accum builder to append to
* @param string string to normalize whitespace within
* @param stripLeading set to true if you wish to remove any leading whitespace
*/
public static void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
boolean lastWasWhite = false;
boolean reachedNonWhite = false;
int len = string.length();
int c;
for (int i = 0; i < len; i+= Character.charCount(c)) {
c = string.codePointAt(i);
if (isActuallyWhitespace(c)) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite)
continue;
accum.append(' ');
lastWasWhite = true;
}
else if (!isInvisibleChar(c)) {
accum.appendCodePoint(c);
lastWasWhite = false;
reachedNonWhite = true;
}
}
}
public static boolean in(final String needle, final String... haystack) {
final int len = haystack.length;
for (int i = 0; i < len; i++) {
if (haystack[i].equals(needle))
return true;
}
return false;
}
public static boolean inSorted(String needle, String[] haystack) {
return Arrays.binarySearch(haystack, needle) >= 0;
}
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param base the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return the resolved absolute URL
* @throws MalformedURLException if an error occurred generating the URL
*/
public static URL resolve(URL base, String relUrl) throws MalformedURLException {
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?"))
relUrl = base.getPath() + relUrl;
// workaround: //example.com + ./foo = //example.com/./foo, not //example.com/foo
if (relUrl.indexOf('.') == 0 && base.getFile().indexOf('/') != 0) {
base = new URL(base.getProtocol(), base.getHost(), base.getPort(), "/" + base.getFile());
}
return new URL(base, relUrl);
}
/**
* Create a new absolute URL, from a provided existing absolute URL and a relative URL component.
* @param baseUrl the existing absolute base URL
* @param relUrl the relative URL to resolve. (If it's already absolute, it will be returned)
* @return an absolute URL if one was able to be generated, or the empty string if not
*/
public static String resolve(final String baseUrl, final String relUrl) {
URL base;
try {
try {
base = new URL(baseUrl);
} catch (MalformedURLException e) {
// the base is unsuitable, but the attribute/rel may be abs on its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
return resolve(base, relUrl).toExternalForm();
} catch (MalformedURLException e) {
return "";
}
}
private static final Stack<StringBuilder> builders = new Stack<>();
/**
* Maintains cached StringBuilders in a flyweight pattern, to minimize new StringBuilder GCs. The StringBuilder is
* prevented from growing too large.
* <p>
* Care must be taken to release the builder once its work has been completed, with {@see #releaseBuilder}
* @return an empty StringBuilder
* @
*/
public static StringBuilder borrowBuilder() {
synchronized (builders) {
return builders.empty() ?
new StringBuilder(MaxCachedBuilderSize) :
builders.pop();
}
}
/**
* Release a borrowed builder. Care must be taken not to use the builder after it has been returned, as its
* contents may be changed by this method, or by a concurrent thread.
* @param sb the StringBuilder to release.
* @return the string value of the released String Builder (as an incentive to release it!).
*/
public static String releaseBuilder(StringBuilder sb) {
Validate.notNull(sb);
String string = sb.toString();
if (sb.length() > MaxCachedBuilderSize)
sb = new StringBuilder(MaxCachedBuilderSize); // make sure it hasn't grown too big
else
sb.delete(0, sb.length()); // make sure it's emptied on release
synchronized (builders) {
builders.push(sb);
while (builders.size() > MaxIdleBuilders) {
builders.pop();
}
}
return string;
}
private static final int MaxCachedBuilderSize = 8 * 1024;
private static final int MaxIdleBuilders = 8;
}