001/*
002 * Copyright 2007-2020 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright 2007-2020 Ping Identity Corporation
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *    http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020/*
021 * Copyright (C) 2008-2020 Ping Identity Corporation
022 *
023 * This program is free software; you can redistribute it and/or modify
024 * it under the terms of the GNU General Public License (GPLv2 only)
025 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
026 * as published by the Free Software Foundation.
027 *
028 * This program is distributed in the hope that it will be useful,
029 * but WITHOUT ANY WARRANTY; without even the implied warranty of
030 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
031 * GNU General Public License for more details.
032 *
033 * You should have received a copy of the GNU General Public License
034 * along with this program; if not, see <http://www.gnu.org/licenses>.
035 */
036package com.unboundid.ldap.matchingrules;
037
038
039
040import com.unboundid.asn1.ASN1OctetString;
041import com.unboundid.util.StaticUtils;
042import com.unboundid.util.ThreadSafety;
043import com.unboundid.util.ThreadSafetyLevel;
044
045
046
047/**
048 * This class provides an implementation of a matching rule that uses
049 * case-insensitive matching that also treats multiple consecutive (non-escaped)
050 * spaces as a single space.
051 */
052@ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
053public final class CaseIgnoreStringMatchingRule
054       extends AcceptAllSimpleMatchingRule
055{
056  /**
057   * The singleton instance that will be returned from the {@code getInstance}
058   * method.
059   */
060  private static final CaseIgnoreStringMatchingRule INSTANCE =
061       new CaseIgnoreStringMatchingRule();
062
063
064
065  /**
066   * The name for the caseIgnoreMatch equality matching rule.
067   */
068  public static final String EQUALITY_RULE_NAME = "caseIgnoreMatch";
069
070
071
072  /**
073   * The name for the caseIgnoreMatch equality matching rule, formatted in all
074   * lowercase characters.
075   */
076  static final String LOWER_EQUALITY_RULE_NAME =
077       StaticUtils.toLowerCase(EQUALITY_RULE_NAME);
078
079
080
081  /**
082   * The OID for the caseIgnoreMatch equality matching rule.
083   */
084  public static final String EQUALITY_RULE_OID = "2.5.13.2";
085
086
087
088  /**
089   * The name for the caseIgnoreOrderingMatch ordering matching rule.
090   */
091  public static final String ORDERING_RULE_NAME = "caseIgnoreOrderingMatch";
092
093
094
095  /**
096   * The name for the caseIgnoreOrderingMatch ordering matching rule, formatted
097   * in all lowercase characters.
098   */
099  static final String LOWER_ORDERING_RULE_NAME =
100       StaticUtils.toLowerCase(ORDERING_RULE_NAME);
101
102
103
104  /**
105   * The OID for the caseIgnoreOrderingMatch ordering matching rule.
106   */
107  public static final String ORDERING_RULE_OID = "2.5.13.3";
108
109
110
111  /**
112   * The name for the caseIgnoreSubstringsMatch substring matching rule.
113   */
114  public static final String SUBSTRING_RULE_NAME = "caseIgnoreSubstringsMatch";
115
116
117
118  /**
119   * The name for the caseIgnoreSubstringsMatch substring matching rule,
120   * formatted in all lowercase characters.
121   */
122  static final String LOWER_SUBSTRING_RULE_NAME =
123       StaticUtils.toLowerCase(SUBSTRING_RULE_NAME);
124
125
126
127  /**
128   * The OID for the caseIgnoreSubstringsMatch substring matching rule.
129   */
130  public static final String SUBSTRING_RULE_OID = "2.5.13.4";
131
132
133
134  /**
135   * The serial version UID for this serializable class.
136   */
137  private static final long serialVersionUID = -1293370922676445525L;
138
139
140
141  /**
142   * Creates a new instance of this case ignore string matching rule.
143   */
144  public CaseIgnoreStringMatchingRule()
145  {
146    // No implementation is required.
147  }
148
149
150
151  /**
152   * Retrieves a singleton instance of this matching rule.
153   *
154   * @return  A singleton instance of this matching rule.
155   */
156  public static CaseIgnoreStringMatchingRule getInstance()
157  {
158    return INSTANCE;
159  }
160
161
162
163  /**
164   * {@inheritDoc}
165   */
166  @Override()
167  public String getEqualityMatchingRuleName()
168  {
169    return EQUALITY_RULE_NAME;
170  }
171
172
173
174  /**
175   * {@inheritDoc}
176   */
177  @Override()
178  public String getEqualityMatchingRuleOID()
179  {
180    return EQUALITY_RULE_OID;
181  }
182
183
184
185  /**
186   * {@inheritDoc}
187   */
188  @Override()
189  public String getOrderingMatchingRuleName()
190  {
191    return ORDERING_RULE_NAME;
192  }
193
194
195
196  /**
197   * {@inheritDoc}
198   */
199  @Override()
200  public String getOrderingMatchingRuleOID()
201  {
202    return ORDERING_RULE_OID;
203  }
204
205
206
207  /**
208   * {@inheritDoc}
209   */
210  @Override()
211  public String getSubstringMatchingRuleName()
212  {
213    return SUBSTRING_RULE_NAME;
214  }
215
216
217
218  /**
219   * {@inheritDoc}
220   */
221  @Override()
222  public String getSubstringMatchingRuleOID()
223  {
224    return SUBSTRING_RULE_OID;
225  }
226
227
228
229  /**
230   * {@inheritDoc}
231   */
232  @Override()
233  public boolean valuesMatch(final ASN1OctetString value1,
234                             final ASN1OctetString value2)
235  {
236    // Try to use a quick, no-copy determination if possible.  If this fails,
237    // then we'll fall back on a more thorough, but more costly, approach.
238    final byte[] value1Bytes = value1.getValue();
239    final byte[] value2Bytes = value2.getValue();
240    if (value1Bytes.length == value2Bytes.length)
241    {
242      for (int i=0; i< value1Bytes.length; i++)
243      {
244        final byte b1 = value1Bytes[i];
245        final byte b2 = value2Bytes[i];
246
247        if (((b1 & 0x7F) != (b1 & 0xFF)) ||
248            ((b2 & 0x7F) != (b2 & 0xFF)))
249        {
250          return normalize(value1).equals(normalize(value2));
251        }
252        else if (b1 != b2)
253        {
254          if ((b1 == ' ') || (b2 == ' '))
255          {
256            return normalize(value1).equals(normalize(value2));
257          }
258          else if (Character.isUpperCase((char) b1))
259          {
260            final char c = Character.toLowerCase((char) b1);
261            if (c != ((char) b2))
262            {
263              return false;
264            }
265          }
266          else if (Character.isUpperCase((char) b2))
267          {
268            final char c = Character.toLowerCase((char) b2);
269            if (c != ((char) b1))
270            {
271              return false;
272            }
273          }
274          else
275          {
276            return false;
277          }
278        }
279      }
280
281      // If we've gotten to this point, then the values must be equal.
282      return true;
283    }
284    else
285    {
286      return normalizeInternal(value1, false, (byte) 0x00).equals(
287                  normalizeInternal(value2, false, (byte) 0x00));
288    }
289  }
290
291
292
293  /**
294   * {@inheritDoc}
295   */
296  @Override()
297  public ASN1OctetString normalize(final ASN1OctetString value)
298  {
299    return normalizeInternal(value, false, (byte) 0x00);
300  }
301
302
303
304  /**
305   * {@inheritDoc}
306   */
307  @Override()
308  public ASN1OctetString normalizeSubstring(final ASN1OctetString value,
309                                            final byte substringType)
310  {
311    return normalizeInternal(value, true, substringType);
312  }
313
314
315
316  /**
317   * Normalizes the provided value for use in either an equality or substring
318   * matching operation.
319   *
320   * @param  value          The value to be normalized.
321   * @param  isSubstring    Indicates whether the value should be normalized as
322   *                        part of a substring assertion rather than an
323   *                        equality assertion.
324   * @param  substringType  The substring type for the element, if it is to be
325   *                        part of a substring assertion.
326   *
327   * @return  The appropriately normalized form of the provided value.
328   */
329  private static ASN1OctetString normalizeInternal(final ASN1OctetString value,
330                                                   final boolean isSubstring,
331                                                   final byte substringType)
332  {
333    final byte[] valueBytes = value.getValue();
334    if (valueBytes.length == 0)
335    {
336      return value;
337    }
338
339    final boolean trimInitial;
340    final boolean trimFinal;
341    if (isSubstring)
342    {
343      switch (substringType)
344      {
345        case SUBSTRING_TYPE_SUBINITIAL:
346          trimInitial = true;
347          trimFinal   = false;
348          break;
349
350        case SUBSTRING_TYPE_SUBFINAL:
351          trimInitial = false;
352          trimFinal   = true;
353          break;
354
355        default:
356          trimInitial = false;
357          trimFinal   = false;
358          break;
359      }
360    }
361    else
362    {
363      trimInitial = true;
364      trimFinal   = true;
365    }
366
367    // Count the number of duplicate spaces in the value, and determine whether
368    // there are any non-space characters.  Also, see if there are any non-ASCII
369    // characters.
370    boolean containsNonSpace = false;
371    boolean lastWasSpace = trimInitial;
372    int numDuplicates = 0;
373    for (final byte b : valueBytes)
374    {
375      if ((b & 0x7F) != (b & 0xFF))
376      {
377        return normalizeNonASCII(value, trimInitial, trimFinal);
378      }
379
380      if (b == ' ')
381      {
382        if (lastWasSpace)
383        {
384          numDuplicates++;
385        }
386        else
387        {
388          lastWasSpace = true;
389        }
390      }
391      else
392      {
393        containsNonSpace = true;
394        lastWasSpace = false;
395      }
396    }
397
398    if (! containsNonSpace)
399    {
400      return new ASN1OctetString(" ");
401    }
402
403    if (lastWasSpace && trimFinal)
404    {
405      numDuplicates++;
406    }
407
408
409    // Create a new byte array to hold the normalized value.
410    lastWasSpace = trimInitial;
411    int targetPos = 0;
412    final byte[] normalizedBytes = new byte[valueBytes.length - numDuplicates];
413    for (int i=0; i < valueBytes.length; i++)
414    {
415      switch (valueBytes[i])
416      {
417        case ' ':
418          if (lastWasSpace || (trimFinal && (i == (valueBytes.length - 1))))
419          {
420            // No action is required.
421          }
422          else
423          {
424            // This condition is needed to handle the special case in which
425            // there are multiple spaces at the end of the value.
426            if (targetPos < normalizedBytes.length)
427            {
428              normalizedBytes[targetPos++] = ' ';
429              lastWasSpace = true;
430            }
431          }
432
433          break;
434        case 'A':
435          normalizedBytes[targetPos++] = 'a';
436          lastWasSpace = false;
437          break;
438        case 'B':
439          normalizedBytes[targetPos++] = 'b';
440          lastWasSpace = false;
441          break;
442        case 'C':
443          normalizedBytes[targetPos++] = 'c';
444          lastWasSpace = false;
445          break;
446        case 'D':
447          normalizedBytes[targetPos++] = 'd';
448          lastWasSpace = false;
449          break;
450        case 'E':
451          normalizedBytes[targetPos++] = 'e';
452          lastWasSpace = false;
453          break;
454        case 'F':
455          normalizedBytes[targetPos++] = 'f';
456          lastWasSpace = false;
457          break;
458        case 'G':
459          normalizedBytes[targetPos++] = 'g';
460          lastWasSpace = false;
461          break;
462        case 'H':
463          normalizedBytes[targetPos++] = 'h';
464          lastWasSpace = false;
465          break;
466        case 'I':
467          normalizedBytes[targetPos++] = 'i';
468          lastWasSpace = false;
469          break;
470        case 'J':
471          normalizedBytes[targetPos++] = 'j';
472          lastWasSpace = false;
473          break;
474        case 'K':
475          normalizedBytes[targetPos++] = 'k';
476          lastWasSpace = false;
477          break;
478        case 'L':
479          normalizedBytes[targetPos++] = 'l';
480          lastWasSpace = false;
481          break;
482        case 'M':
483          normalizedBytes[targetPos++] = 'm';
484          lastWasSpace = false;
485          break;
486        case 'N':
487          normalizedBytes[targetPos++] = 'n';
488          lastWasSpace = false;
489          break;
490        case 'O':
491          normalizedBytes[targetPos++] = 'o';
492          lastWasSpace = false;
493          break;
494        case 'P':
495          normalizedBytes[targetPos++] = 'p';
496          lastWasSpace = false;
497          break;
498        case 'Q':
499          normalizedBytes[targetPos++] = 'q';
500          lastWasSpace = false;
501          break;
502        case 'R':
503          normalizedBytes[targetPos++] = 'r';
504          lastWasSpace = false;
505          break;
506        case 'S':
507          normalizedBytes[targetPos++] = 's';
508          lastWasSpace = false;
509          break;
510        case 'T':
511          normalizedBytes[targetPos++] = 't';
512          lastWasSpace = false;
513          break;
514        case 'U':
515          normalizedBytes[targetPos++] = 'u';
516          lastWasSpace = false;
517          break;
518        case 'V':
519          normalizedBytes[targetPos++] = 'v';
520          lastWasSpace = false;
521          break;
522        case 'W':
523          normalizedBytes[targetPos++] = 'w';
524          lastWasSpace = false;
525          break;
526        case 'X':
527          normalizedBytes[targetPos++] = 'x';
528          lastWasSpace = false;
529          break;
530        case 'Y':
531          normalizedBytes[targetPos++] = 'y';
532          lastWasSpace = false;
533          break;
534        case 'Z':
535          normalizedBytes[targetPos++] = 'z';
536          lastWasSpace = false;
537          break;
538        default:
539          normalizedBytes[targetPos++] = valueBytes[i];
540          lastWasSpace = false;
541          break;
542      }
543    }
544
545
546    return new ASN1OctetString(normalizedBytes);
547  }
548
549
550
551  /**
552   * Normalizes the provided value a string representation, properly handling
553   * any non-ASCII characters.
554   *
555   * @param  value        The value to be normalized.
556   * @param  trimInitial  Indicates whether to trim off all leading spaces at
557   *                      the beginning of the value.
558   * @param  trimFinal    Indicates whether to trim off all trailing spaces at
559   *                      the end of the value.
560   *
561   * @return  The normalized form of the value.
562   */
563  private static ASN1OctetString normalizeNonASCII(final ASN1OctetString value,
564                                                   final boolean trimInitial,
565                                                   final boolean trimFinal)
566  {
567    final StringBuilder buffer = new StringBuilder(value.stringValue());
568
569    int pos = 0;
570    boolean lastWasSpace = trimInitial;
571    while (pos < buffer.length())
572    {
573      final char c = buffer.charAt(pos++);
574      if (c == ' ')
575      {
576        if (lastWasSpace || (trimFinal && (pos >= buffer.length())))
577        {
578          buffer.deleteCharAt(--pos);
579        }
580        else
581        {
582          lastWasSpace = true;
583        }
584      }
585      else
586      {
587        if (Character.isHighSurrogate(c))
588        {
589          if (pos < buffer.length())
590          {
591            final char c2 = buffer.charAt(pos++);
592            if (Character.isLowSurrogate(c2))
593            {
594              final int codePoint = Character.toCodePoint(c, c2);
595              if (Character.isUpperCase(codePoint))
596              {
597                final int lowerCaseCodePoint = Character.toLowerCase(codePoint);
598                buffer.setCharAt((pos-2),
599                     Character.highSurrogate(lowerCaseCodePoint));
600                buffer.setCharAt((pos-1),
601                     Character.lowSurrogate(lowerCaseCodePoint));
602              }
603            }
604          }
605        }
606        else if (Character.isUpperCase(c))
607        {
608          buffer.setCharAt((pos-1), Character.toLowerCase(c));
609        }
610
611        lastWasSpace = false;
612      }
613    }
614
615    // It is possible that there could be an extra space at the end.  If that's
616    // the case, then remove it.
617    if (trimFinal && (buffer.length() > 0) &&
618        (buffer.charAt(buffer.length() - 1) == ' '))
619    {
620      buffer.deleteCharAt(buffer.length() - 1);
621    }
622
623    return new ASN1OctetString(buffer.toString());
624  }
625}