1 /**
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.util;
5
6 import java.util.ArrayList;
7 import java.util.Iterator;
8 import java.util.List;
9
10 public class StringUtil {
11
12 public static final String[] EMPTY_STRINGS = new String[0];
13 private static final boolean supportsUTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");
14 private static final String[] ENTITIES;
15
16 static {
17 ENTITIES = new String[256 - 126];
18 for (int i = 126; i <= 255; i++) {
19 ENTITIES[i - 126] = "&#" + i + ';';
20 }
21 }
22
23 public static String replaceString(String original, char oldChar, String newString) {
24
25 String fixedNew = newString == null ? "" : newString;
26
27 StringBuffer desc = new StringBuffer();
28 int index = original.indexOf(oldChar);
29 int last = 0;
30 while (index != -1) {
31 desc.append(original.substring(last, index));
32 desc.append(fixedNew);
33 last = index + 1;
34 index = original.indexOf(oldChar, last);
35 }
36 desc.append(original.substring(last));
37 return desc.toString();
38 }
39
40 public static String replaceString(String original, String oldString, String newString) {
41
42 String fixedNew = newString == null ? "" : newString;
43
44 StringBuffer desc = new StringBuffer();
45 int index = original.indexOf(oldString);
46 int last = 0;
47 while (index != -1) {
48 desc.append(original.substring(last, index));
49 desc.append(fixedNew);
50 last = index + oldString.length();
51 index = original.indexOf(oldString, last);
52 }
53 desc.append(original.substring(last));
54 return desc.toString();
55 }
56
57 /**
58 * Appends to a StringBuffer the String src where non-ASCII and
59 * XML special chars are escaped.
60 *
61 * @param buf The destination XML stream
62 * @param src The String to append to the stream
63 */
64 public static void appendXmlEscaped(StringBuffer buf, String src) {
65 appendXmlEscaped(buf, src, supportsUTF8);
66 }
67
68 public static String htmlEncode(String string) {
69 String encoded = StringUtil.replaceString(string, '&', "&");
70 encoded = StringUtil.replaceString(encoded, '<', "<");
71 return StringUtil.replaceString(encoded, '>', ">");
72 }
73
74
75
76 private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {
77 char c;
78 for (int i = 0; i < src.length(); i++) {
79 c = src.charAt(i);
80 if (c > '~') {
81 if (!supportUTF8) {
82 if (c <= 255) {
83 buf.append(ENTITIES[c - 126]);
84 } else {
85 buf.append("&u").append(Integer.toHexString(c)).append(';');
86 }
87 } else {
88 buf.append(c);
89 }
90 } else if (c == '&')
91 buf.append("&");
92 else if (c == '"')
93 buf.append(""");
94 else if (c == '<')
95 buf.append("<");
96 else if (c == '>')
97 buf.append(">");
98 else
99 buf.append(c);
100 }
101 }
102
103 /**
104 * Parses the input source using the delimiter specified. This method is much
105 * faster than using the StringTokenizer or String.split(char) approach and
106 * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
107 *
108 * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
109 *
110 * @param source String
111 * @param delimiter char
112 * @return String[]
113 */
114 public static String[] substringsOf(String source, char delimiter) {
115
116 if (source == null || source.length() == 0) {
117 return EMPTY_STRINGS;
118 }
119
120 int delimiterCount = 0;
121 int length = source.length();
122 char[] chars = source.toCharArray();
123
124 for (int i=0; i<length; i++) {
125 if (chars[i] == delimiter) delimiterCount++;
126 }
127
128 if (delimiterCount == 0) return new String[] { source };
129
130 String results[] = new String[delimiterCount+1];
131
132 int i = 0;
133 int offset = 0;
134
135 while (offset <= length) {
136 int pos = source.indexOf(delimiter, offset);
137 if (pos < 0) pos = length;
138 results[i++] = pos == offset ? "" : source.substring(offset, pos);
139 offset = pos + 1;
140 }
141
142 return results;
143 }
144
145 /**
146 * Much more efficient than StringTokenizer.
147 *
148 * @param str String
149 * @param separator char
150 * @return String[]
151 */
152 public static String[] substringsOf(String str, String separator) {
153
154 if (str == null || str.length() == 0) {
155 return EMPTY_STRINGS;
156 }
157
158 int index = str.indexOf(separator);
159 if (index == -1) {
160 return new String[]{str};
161 }
162
163 List<String> list = new ArrayList<String>();
164 int currPos = 0;
165 int len = separator.length();
166 while (index != -1) {
167 list.add(str.substring(currPos, index));
168 currPos = index + len;
169 index = str.indexOf(separator, currPos);
170 }
171 list.add(str.substring(currPos));
172 return list.toArray(new String[list.size()]);
173 }
174
175
176 /**
177 * Copies the elements returned by the iterator onto the string buffer
178 * each delimited by the separator.
179 *
180 * @param sb StringBuffer
181 * @param iter Iterator
182 * @param separator String
183 */
184 public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {
185
186 if (!iter.hasNext()) return;
187
188 sb.append(iter.next());
189
190 while (iter.hasNext()) {
191 sb.append(separator);
192 sb.append(iter.next());
193 }
194 }
195 /**
196 * Return the length of the shortest string in the array.
197 * If any one of them is null then it returns 0.
198 *
199 * @param strings String[]
200 * @return int
201 */
202 public static int lengthOfShortestIn(String[] strings) {
203
204 int minLength = Integer.MAX_VALUE;
205
206 for (int i=0; i<strings.length; i++) {
207 if (strings[i] == null) return 0;
208 minLength = Math.min(minLength, strings[i].length());
209 }
210
211 return minLength;
212 }
213
214 /**
215 * Determine the maximum number of common leading whitespace characters
216 * the strings share in the same sequence. Useful for determining how
217 * many leading characters can be removed to shift all the text in the
218 * strings to the left without misaligning them.
219 *
220 * @param strings String[]
221 * @return int
222 */
223 public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
224
225 int shortest = lengthOfShortestIn(strings);
226 if (shortest == 0) return 0;
227
228 char[] matches = new char[shortest];
229
230 String str;
231 for (int m=0; m<matches.length; m++) {
232 matches[m] = strings[0].charAt(m);
233 if (!Character.isWhitespace(matches[m])) return m;
234 for (int i=0; i<strings.length; i++) {
235 str = strings[i];
236 if (str.charAt(m) != matches[m]) return m;
237 }
238 }
239
240 return shortest;
241 }
242
243 /**
244 * Trims off the leading characters off the strings up to the trimDepth
245 * specified. Returns the same strings if trimDepth = 0
246 *
247 * @param strings
248 * @param trimDepth
249 * @return String[]
250 */
251 public static String[] trimStartOn(String[] strings, int trimDepth) {
252
253 if (trimDepth == 0) return strings;
254
255 String[] results = new String[strings.length];
256 for (int i=0; i<strings.length; i++) {
257 results[i] = strings[i].substring(trimDepth);
258 }
259 return results;
260 }
261
262 /**
263 * Left pads a string.
264 * @param s The String to pad
265 * @param length The desired minimum length of the resulting padded String
266 * @return The resulting left padded String
267 */
268 public static String lpad(String s, int length) {
269 String res = s;
270 if (length - s.length() > 0) {
271 char [] arr = new char[length - s.length()];
272 java.util.Arrays.fill(arr, ' ');
273 res = new StringBuffer(length).append(arr).append(s).toString();
274 }
275 return res;
276 }
277
278 /**
279 * Are the two String values the same.
280 * The Strings can be optionally trimmed before checking.
281 * The Strings can be optionally compared ignoring case.
282 * The Strings can be have embedded whitespace standardized before comparing.
283 * Two null values are treated as equal.
284 *
285 * @param s1 The first String.
286 * @param s2 The second String.
287 * @param trim Indicates if the Strings should be trimmed before comparison.
288 * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
289 * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
290 * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
291 */
292 public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {
293 if (s1 == s2) {
294 return true;
295 } else if (s1 == null || s2 == null) {
296 return false;
297 } else {
298 if (trim) {
299 s1 = s1.trim();
300 s2 = s2.trim();
301 }
302 if (standardizeWhitespace) {
303
304 s1 = s1.replaceAll("\\s+", " ");
305 s2 = s2.replaceAll("\\s+", " ");
306 }
307 return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
308 }
309 }
310 }