001/* 002 * Copyright 2007-2019 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2008-2019 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldif; 022 023 024 025import java.io.BufferedReader; 026import java.io.Closeable; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.InputStream; 030import java.io.InputStreamReader; 031import java.io.IOException; 032import java.nio.charset.StandardCharsets; 033import java.text.ParseException; 034import java.util.ArrayList; 035import java.util.Collection; 036import java.util.Iterator; 037import java.util.HashSet; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Set; 041import java.util.concurrent.BlockingQueue; 042import java.util.concurrent.ArrayBlockingQueue; 043import java.util.concurrent.TimeUnit; 044import java.util.concurrent.atomic.AtomicBoolean; 045import java.nio.charset.Charset; 046 047import com.unboundid.asn1.ASN1OctetString; 048import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 049import com.unboundid.ldap.matchingrules.MatchingRule; 050import com.unboundid.ldap.sdk.Attribute; 051import com.unboundid.ldap.sdk.Control; 052import com.unboundid.ldap.sdk.Entry; 053import com.unboundid.ldap.sdk.Modification; 054import com.unboundid.ldap.sdk.ModificationType; 055import com.unboundid.ldap.sdk.LDAPException; 056import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 057import com.unboundid.ldap.sdk.schema.Schema; 058import com.unboundid.util.AggregateInputStream; 059import com.unboundid.util.Base64; 060import com.unboundid.util.Debug; 061import com.unboundid.util.LDAPSDKThreadFactory; 062import com.unboundid.util.StaticUtils; 063import com.unboundid.util.ThreadSafety; 064import com.unboundid.util.ThreadSafetyLevel; 065import com.unboundid.util.Validator; 066import com.unboundid.util.parallel.AsynchronousParallelProcessor; 067import com.unboundid.util.parallel.Result; 068import com.unboundid.util.parallel.ParallelProcessor; 069import com.unboundid.util.parallel.Processor; 070 071import static com.unboundid.ldif.LDIFMessages.*; 072 073/** 074 * This class provides an LDIF reader, which can be used to read and decode 075 * entries and change records from a data source using the LDAP Data Interchange 076 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 077 * <BR> 078 * This class is not synchronized. If multiple threads read from the 079 * LDIFReader, they must be synchronized externally. 080 * <BR><BR> 081 * <H2>Example</H2> 082 * The following example iterates through all entries contained in an LDIF file 083 * and attempts to add them to a directory server: 084 * <PRE> 085 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 086 * 087 * int entriesRead = 0; 088 * int entriesAdded = 0; 089 * int errorsEncountered = 0; 090 * while (true) 091 * { 092 * Entry entry; 093 * try 094 * { 095 * entry = ldifReader.readEntry(); 096 * if (entry == null) 097 * { 098 * // All entries have been read. 099 * break; 100 * } 101 * 102 * entriesRead++; 103 * } 104 * catch (LDIFException le) 105 * { 106 * errorsEncountered++; 107 * if (le.mayContinueReading()) 108 * { 109 * // A recoverable error occurred while attempting to read a change 110 * // record, at or near line number le.getLineNumber() 111 * // The entry will be skipped, but we'll try to keep reading from the 112 * // LDIF file. 113 * continue; 114 * } 115 * else 116 * { 117 * // An unrecoverable error occurred while attempting to read an entry 118 * // at or near line number le.getLineNumber() 119 * // No further LDIF processing will be performed. 120 * break; 121 * } 122 * } 123 * catch (IOException ioe) 124 * { 125 * // An I/O error occurred while attempting to read from the LDIF file. 126 * // No further LDIF processing will be performed. 127 * errorsEncountered++; 128 * break; 129 * } 130 * 131 * LDAPResult addResult; 132 * try 133 * { 134 * addResult = connection.add(entry); 135 * // If we got here, then the change should have been processed 136 * // successfully. 137 * entriesAdded++; 138 * } 139 * catch (LDAPException le) 140 * { 141 * // If we got here, then the change attempt failed. 142 * addResult = le.toLDAPResult(); 143 * errorsEncountered++; 144 * } 145 * } 146 * 147 * ldifReader.close(); 148 * </PRE> 149 */ 150@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 151public final class LDIFReader 152 implements Closeable 153{ 154 /** 155 * The default buffer size (128KB) that will be used when reading from the 156 * data source. 157 */ 158 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 159 160 161 162 /* 163 * When processing asynchronously, this determines how many of the allocated 164 * worker threads are used to parse each batch of read entries. 165 */ 166 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 167 168 169 170 /** 171 * When processing asynchronously, this specifies the size of the pending and 172 * completed queues. 173 */ 174 private static final int ASYNC_QUEUE_SIZE = 500; 175 176 177 178 /** 179 * Special entry used internally to signal that the LDIFReaderEntryTranslator 180 * has signalled that a read Entry should be skipped by returning null, 181 * which normally implies EOF. 182 */ 183 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 184 185 186 187 /** 188 * The default base path that will be prepended to relative paths. It will 189 * end with a trailing slash. 190 */ 191 private static final String DEFAULT_RELATIVE_BASE_PATH; 192 static 193 { 194 final File currentDir; 195 final String currentDirString = System.getProperty("user.dir"); 196 if (currentDirString == null) 197 { 198 currentDir = new File("."); 199 } 200 else 201 { 202 currentDir = new File(currentDirString); 203 } 204 205 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 206 if (currentDirAbsolutePath.endsWith(File.separator)) 207 { 208 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 209 } 210 else 211 { 212 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 213 } 214 } 215 216 217 218 // The buffered reader that will be used to read LDIF data. 219 private final BufferedReader reader; 220 221 // The behavior that should be exhibited when encountering duplicate attribute 222 // values. 223 private volatile DuplicateValueBehavior duplicateValueBehavior; 224 225 // A line number counter. 226 private long lineNumberCounter = 0; 227 228 // The change record translator to use, if any. 229 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 230 231 // The entry translator to use, if any. 232 private final LDIFReaderEntryTranslator entryTranslator; 233 234 // The schema that will be used when processing, if applicable. 235 private Schema schema; 236 237 // Specifies the base path that will be prepended to relative paths for file 238 // URLs. 239 private volatile String relativeBasePath; 240 241 // The behavior that should be exhibited with regard to illegal trailing 242 // spaces in attribute values. 243 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 244 245 // True iff we are processing asynchronously. 246 private final boolean isAsync; 247 248 // 249 // The following only apply to asynchronous processing. 250 // 251 252 // Parses entries asynchronously. 253 private final AsynchronousParallelProcessor<UnparsedLDIFRecord,LDIFRecord> 254 asyncParser; 255 256 // Set to true when the end of the input is reached. 257 private final AtomicBoolean asyncParsingComplete; 258 259 // The records that have been read and parsed. 260 private final BlockingQueue<Result<UnparsedLDIFRecord,LDIFRecord>> 261 asyncParsedRecords; 262 263 264 265 /** 266 * Creates a new LDIF reader that will read data from the specified file. 267 * 268 * @param path The path to the file from which the data is to be read. It 269 * must not be {@code null}. 270 * 271 * @throws IOException If a problem occurs while opening the file for 272 * reading. 273 */ 274 public LDIFReader(final String path) 275 throws IOException 276 { 277 this(new FileInputStream(path)); 278 } 279 280 281 282 /** 283 * Creates a new LDIF reader that will read data from the specified file 284 * and parses the LDIF records asynchronously using the specified number of 285 * threads. 286 * 287 * @param path The path to the file from which the data is to be read. It 288 * must not be {@code null}. 289 * @param numParseThreads If this value is greater than zero, then the 290 * specified number of threads will be used to 291 * asynchronously read and parse the LDIF file. 292 * 293 * @throws IOException If a problem occurs while opening the file for 294 * reading. 295 * 296 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 297 * constructor for more details about asynchronous processing. 298 */ 299 public LDIFReader(final String path, final int numParseThreads) 300 throws IOException 301 { 302 this(new FileInputStream(path), numParseThreads); 303 } 304 305 306 307 /** 308 * Creates a new LDIF reader that will read data from the specified file. 309 * 310 * @param file The file from which the data is to be read. It must not be 311 * {@code null}. 312 * 313 * @throws IOException If a problem occurs while opening the file for 314 * reading. 315 */ 316 public LDIFReader(final File file) 317 throws IOException 318 { 319 this(new FileInputStream(file)); 320 } 321 322 323 324 /** 325 * Creates a new LDIF reader that will read data from the specified file 326 * and optionally parses the LDIF records asynchronously using the specified 327 * number of threads. 328 * 329 * @param file The file from which the data is to be read. It 330 * must not be {@code null}. 331 * @param numParseThreads If this value is greater than zero, then the 332 * specified number of threads will be used to 333 * asynchronously read and parse the LDIF file. 334 * 335 * @throws IOException If a problem occurs while opening the file for 336 * reading. 337 */ 338 public LDIFReader(final File file, final int numParseThreads) 339 throws IOException 340 { 341 this(new FileInputStream(file), numParseThreads); 342 } 343 344 345 346 /** 347 * Creates a new LDIF reader that will read data from the specified files in 348 * the order in which they are provided and optionally parses the LDIF records 349 * asynchronously using the specified number of threads. 350 * 351 * @param files The files from which the data is to be read. It 352 * must not be {@code null} or empty. 353 * @param numParseThreads If this value is greater than zero, then the 354 * specified number of threads will be used to 355 * asynchronously read and parse the LDIF file. 356 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 357 * before they are returned. This is normally 358 * {@code null}, which causes entries to be returned 359 * unaltered. This is particularly useful when 360 * parsing the input file in parallel because the 361 * entry translation is also done in parallel. 362 * 363 * @throws IOException If a problem occurs while opening the file for 364 * reading. 365 */ 366 public LDIFReader(final File[] files, final int numParseThreads, 367 final LDIFReaderEntryTranslator entryTranslator) 368 throws IOException 369 { 370 this(files, numParseThreads, entryTranslator, null); 371 } 372 373 374 375 /** 376 * Creates a new LDIF reader that will read data from the specified files in 377 * the order in which they are provided and optionally parses the LDIF records 378 * asynchronously using the specified number of threads. 379 * 380 * @param files The files from which the data is to be 381 * read. It must not be {@code null} or 382 * empty. 383 * @param numParseThreads If this value is greater than zero, then 384 * the specified number of threads will be 385 * used to asynchronously read and parse the 386 * LDIF file. 387 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 388 * entries before they are returned. This is 389 * normally {@code null}, which causes entries 390 * to be returned unaltered. This is 391 * particularly useful when parsing the input 392 * file in parallel because the entry 393 * translation is also done in parallel. 394 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 395 * apply to change records before they are 396 * returned. This is normally {@code null}, 397 * which causes change records to be returned 398 * unaltered. This is particularly useful 399 * when parsing the input file in parallel 400 * because the change record translation is 401 * also done in parallel. 402 * 403 * @throws IOException If a problem occurs while opening the file for 404 * reading. 405 */ 406 public LDIFReader(final File[] files, final int numParseThreads, 407 final LDIFReaderEntryTranslator entryTranslator, 408 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 409 throws IOException 410 { 411 this(files, numParseThreads, entryTranslator, changeRecordTranslator, 412 "UTF-8"); 413 } 414 415 416 417 /** 418 * Creates a new LDIF reader that will read data from the specified files in 419 * the order in which they are provided and optionally parses the LDIF records 420 * asynchronously using the specified number of threads. 421 * 422 * @param files The files from which the data is to be 423 * read. It must not be {@code null} or 424 * empty. 425 * @param numParseThreads If this value is greater than zero, then 426 * the specified number of threads will be 427 * used to asynchronously read and parse the 428 * LDIF file. 429 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 430 * entries before they are returned. This is 431 * normally {@code null}, which causes entries 432 * to be returned unaltered. This is 433 * particularly useful when parsing the input 434 * file in parallel because the entry 435 * translation is also done in parallel. 436 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 437 * apply to change records before they are 438 * returned. This is normally {@code null}, 439 * which causes change records to be returned 440 * unaltered. This is particularly useful 441 * when parsing the input file in parallel 442 * because the change record translation is 443 * also done in parallel. 444 * @param characterSet The character set to use when reading from 445 * the input stream. It must not be 446 * {@code null}. 447 * 448 * @throws IOException If a problem occurs while opening the file for 449 * reading. 450 */ 451 public LDIFReader(final File[] files, final int numParseThreads, 452 final LDIFReaderEntryTranslator entryTranslator, 453 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 454 final String characterSet) 455 throws IOException 456 { 457 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 458 changeRecordTranslator, characterSet); 459 } 460 461 462 463 /** 464 * Creates a new aggregate input stream that will read data from the specified 465 * files. If there are multiple files, then a "padding" file will be inserted 466 * between them to ensure that there is at least one blank line between the 467 * end of one file and the beginning of another. 468 * 469 * @param files The files from which the data is to be read. It must not be 470 * {@code null} or empty. 471 * 472 * @return The input stream to use to read data from the provided files. 473 * 474 * @throws IOException If a problem is encountered while attempting to 475 * create the input stream. 476 */ 477 private static InputStream createAggregateInputStream(final File... files) 478 throws IOException 479 { 480 if (files.length == 0) 481 { 482 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 483 } 484 else 485 { 486 return new AggregateInputStream(true, files); 487 } 488 } 489 490 491 492 /** 493 * Creates a new LDIF reader that will read data from the provided input 494 * stream. 495 * 496 * @param inputStream The input stream from which the data is to be read. 497 * It must not be {@code null}. 498 */ 499 public LDIFReader(final InputStream inputStream) 500 { 501 this(inputStream, 0); 502 } 503 504 505 506 /** 507 * Creates a new LDIF reader that will read data from the specified stream 508 * and parses the LDIF records asynchronously using the specified number of 509 * threads. 510 * 511 * @param inputStream The input stream from which the data is to be read. 512 * It must not be {@code null}. 513 * @param numParseThreads If this value is greater than zero, then the 514 * specified number of threads will be used to 515 * asynchronously read and parse the LDIF file. 516 * 517 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 518 * constructor for more details about asynchronous processing. 519 */ 520 public LDIFReader(final InputStream inputStream, final int numParseThreads) 521 { 522 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 523 this(new BufferedReader( 524 new InputStreamReader(inputStream, StandardCharsets.UTF_8), 525 DEFAULT_BUFFER_SIZE), 526 numParseThreads); 527 } 528 529 530 531 /** 532 * Creates a new LDIF reader that will read data from the specified stream 533 * and parses the LDIF records asynchronously using the specified number of 534 * threads. 535 * 536 * @param inputStream The input stream from which the data is to be read. 537 * It must not be {@code null}. 538 * @param numParseThreads If this value is greater than zero, then the 539 * specified number of threads will be used to 540 * asynchronously read and parse the LDIF file. 541 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 542 * entries before they are returned. This is normally 543 * {@code null}, which causes entries to be returned 544 * unaltered. This is particularly useful when parsing 545 * the input file in parallel because the entry 546 * translation is also done in parallel. 547 * 548 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 549 * constructor for more details about asynchronous processing. 550 */ 551 public LDIFReader(final InputStream inputStream, final int numParseThreads, 552 final LDIFReaderEntryTranslator entryTranslator) 553 { 554 this(inputStream, numParseThreads, entryTranslator, null); 555 } 556 557 558 559 /** 560 * Creates a new LDIF reader that will read data from the specified stream 561 * and parses the LDIF records asynchronously using the specified number of 562 * threads. 563 * 564 * @param inputStream The input stream from which the data is to 565 * be read. It must not be {@code null}. 566 * @param numParseThreads If this value is greater than zero, then 567 * the specified number of threads will be 568 * used to asynchronously read and parse the 569 * LDIF file. 570 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 571 * entries before they are returned. This is 572 * normally {@code null}, which causes entries 573 * to be returned unaltered. This is 574 * particularly useful when parsing the input 575 * file in parallel because the entry 576 * translation is also done in parallel. 577 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 578 * apply to change records before they are 579 * returned. This is normally {@code null}, 580 * which causes change records to be returned 581 * unaltered. This is particularly useful 582 * when parsing the input file in parallel 583 * because the change record translation is 584 * also done in parallel. 585 * 586 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 587 * constructor for more details about asynchronous processing. 588 */ 589 public LDIFReader(final InputStream inputStream, final int numParseThreads, 590 final LDIFReaderEntryTranslator entryTranslator, 591 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 592 { 593 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 594 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator, 595 "UTF-8"); 596 } 597 598 599 600 /** 601 * Creates a new LDIF reader that will read data from the specified stream 602 * and parses the LDIF records asynchronously using the specified number of 603 * threads. 604 * 605 * @param inputStream The input stream from which the data is to 606 * be read. It must not be {@code null}. 607 * @param numParseThreads If this value is greater than zero, then 608 * the specified number of threads will be 609 * used to asynchronously read and parse the 610 * LDIF file. 611 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 612 * entries before they are returned. This is 613 * normally {@code null}, which causes entries 614 * to be returned unaltered. This is 615 * particularly useful when parsing the input 616 * file in parallel because the entry 617 * translation is also done in parallel. 618 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 619 * apply to change records before they are 620 * returned. This is normally {@code null}, 621 * which causes change records to be returned 622 * unaltered. This is particularly useful 623 * when parsing the input file in parallel 624 * because the change record translation is 625 * also done in parallel. 626 * @param characterSet The character set to use when reading from 627 * the input stream. It must not be 628 * {@code null}. 629 * 630 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 631 * constructor for more details about asynchronous processing. 632 */ 633 public LDIFReader(final InputStream inputStream, final int numParseThreads, 634 final LDIFReaderEntryTranslator entryTranslator, 635 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 636 final String characterSet) 637 { 638 this(new BufferedReader( 639 new InputStreamReader(inputStream, Charset.forName(characterSet)), 640 DEFAULT_BUFFER_SIZE), 641 numParseThreads, entryTranslator, changeRecordTranslator); 642 } 643 644 645 646 /** 647 * Creates a new LDIF reader that will use the provided buffered reader to 648 * read the LDIF data. The encoding of the underlying Reader must be set to 649 * "UTF-8" as required by RFC 2849. 650 * 651 * @param reader The buffered reader that will be used to read the LDIF 652 * data. It must not be {@code null}. 653 */ 654 public LDIFReader(final BufferedReader reader) 655 { 656 this(reader, 0); 657 } 658 659 660 661 /** 662 * Creates a new LDIF reader that will read data from the specified buffered 663 * reader and parses the LDIF records asynchronously using the specified 664 * number of threads. The encoding of the underlying Reader must be set to 665 * "UTF-8" as required by RFC 2849. 666 * 667 * @param reader The buffered reader that will be used to read the LDIF data. 668 * It must not be {@code null}. 669 * @param numParseThreads If this value is greater than zero, then the 670 * specified number of threads will be used to 671 * asynchronously read and parse the LDIF file. 672 * 673 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 674 * constructor for more details about asynchronous processing. 675 */ 676 public LDIFReader(final BufferedReader reader, final int numParseThreads) 677 { 678 this(reader, numParseThreads, null); 679 } 680 681 682 683 /** 684 * Creates a new LDIF reader that will read data from the specified buffered 685 * reader and parses the LDIF records asynchronously using the specified 686 * number of threads. The encoding of the underlying Reader must be set to 687 * "UTF-8" as required by RFC 2849. 688 * 689 * @param reader The buffered reader that will be used to read the LDIF data. 690 * It must not be {@code null}. 691 * @param numParseThreads If this value is greater than zero, then the 692 * specified number of threads will be used to 693 * asynchronously read and parse the LDIF file. 694 * This should only be set to greater than zero when 695 * performance analysis has demonstrated that reading 696 * and parsing the LDIF is a bottleneck. The default 697 * synchronous processing is normally fast enough. 698 * There is little benefit in passing in a value 699 * greater than four (unless there is an 700 * LDIFReaderEntryTranslator that does time-consuming 701 * processing). A value of zero implies the 702 * default behavior of reading and parsing LDIF 703 * records synchronously when one of the read 704 * methods is called. 705 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 706 * entries before they are returned. This is normally 707 * {@code null}, which causes entries to be returned 708 * unaltered. This is particularly useful when parsing 709 * the input file in parallel because the entry 710 * translation is also done in parallel. 711 */ 712 public LDIFReader(final BufferedReader reader, 713 final int numParseThreads, 714 final LDIFReaderEntryTranslator entryTranslator) 715 { 716 this(reader, numParseThreads, entryTranslator, null); 717 } 718 719 720 721 /** 722 * Creates a new LDIF reader that will read data from the specified buffered 723 * reader and parses the LDIF records asynchronously using the specified 724 * number of threads. The encoding of the underlying Reader must be set to 725 * "UTF-8" as required by RFC 2849. 726 * 727 * @param reader The buffered reader that will be used to 728 * read the LDIF data. It must not be 729 * {@code null}. 730 * @param numParseThreads If this value is greater than zero, then 731 * the specified number of threads will be 732 * used to asynchronously read and parse the 733 * LDIF file. 734 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 735 * entries before they are returned. This is 736 * normally {@code null}, which causes entries 737 * to be returned unaltered. This is 738 * particularly useful when parsing the input 739 * file in parallel because the entry 740 * translation is also done in parallel. 741 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 742 * apply to change records before they are 743 * returned. This is normally {@code null}, 744 * which causes change records to be returned 745 * unaltered. This is particularly useful 746 * when parsing the input file in parallel 747 * because the change record translation is 748 * also done in parallel. 749 */ 750 public LDIFReader(final BufferedReader reader, final int numParseThreads, 751 final LDIFReaderEntryTranslator entryTranslator, 752 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 753 { 754 Validator.ensureNotNull(reader); 755 Validator.ensureTrue(numParseThreads >= 0, 756 "LDIFReader.numParseThreads must not be negative."); 757 758 this.reader = reader; 759 this.entryTranslator = entryTranslator; 760 this.changeRecordTranslator = changeRecordTranslator; 761 762 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 763 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 764 765 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 766 767 if (numParseThreads == 0) 768 { 769 isAsync = false; 770 asyncParser = null; 771 asyncParsingComplete = null; 772 asyncParsedRecords = null; 773 } 774 else 775 { 776 isAsync = true; 777 asyncParsingComplete = new AtomicBoolean(false); 778 779 // Decodes entries in parallel. 780 final LDAPSDKThreadFactory threadFactory = 781 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 782 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 783 new ParallelProcessor<>( 784 new RecordParser(), threadFactory, numParseThreads, 785 ASYNC_MIN_PER_PARSING_THREAD); 786 787 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 788 ArrayBlockingQueue<>(ASYNC_QUEUE_SIZE); 789 790 // The output queue must be a little more than twice as big as the input 791 // queue to more easily handle being shutdown in the middle of processing 792 // when the queues are full and threads are blocked. 793 asyncParsedRecords = new ArrayBlockingQueue<>(2 * ASYNC_QUEUE_SIZE + 100); 794 795 asyncParser = new AsynchronousParallelProcessor<>(pendingQueue, 796 parallelParser, asyncParsedRecords); 797 798 final LineReaderThread lineReaderThread = new LineReaderThread(); 799 lineReaderThread.start(); 800 } 801 } 802 803 804 805 /** 806 * Reads entries from the LDIF file with the specified path and returns them 807 * as a {@code List}. This is a convenience method that should only be used 808 * for data sets that are small enough so that running out of memory isn't a 809 * concern. 810 * 811 * @param path The path to the LDIF file containing the entries to be read. 812 * 813 * @return A list of the entries read from the given LDIF file. 814 * 815 * @throws IOException If a problem occurs while attempting to read data 816 * from the specified file. 817 * 818 * @throws LDIFException If a problem is encountered while attempting to 819 * decode data read as LDIF. 820 */ 821 public static List<Entry> readEntries(final String path) 822 throws IOException, LDIFException 823 { 824 return readEntries(new LDIFReader(path)); 825 } 826 827 828 829 /** 830 * Reads entries from the specified LDIF file and returns them as a 831 * {@code List}. This is a convenience method that should only be used for 832 * data sets that are small enough so that running out of memory isn't a 833 * concern. 834 * 835 * @param file A reference to the LDIF file containing the entries to be 836 * read. 837 * 838 * @return A list of the entries read from the given LDIF file. 839 * 840 * @throws IOException If a problem occurs while attempting to read data 841 * from the specified file. 842 * 843 * @throws LDIFException If a problem is encountered while attempting to 844 * decode data read as LDIF. 845 */ 846 public static List<Entry> readEntries(final File file) 847 throws IOException, LDIFException 848 { 849 return readEntries(new LDIFReader(file)); 850 } 851 852 853 854 /** 855 * Reads and decodes LDIF entries from the provided input stream and 856 * returns them as a {@code List}. This is a convenience method that should 857 * only be used for data sets that are small enough so that running out of 858 * memory isn't a concern. 859 * 860 * @param inputStream The input stream from which the entries should be 861 * read. The input stream will be closed before 862 * returning. 863 * 864 * @return A list of the entries read from the given input stream. 865 * 866 * @throws IOException If a problem occurs while attempting to read data 867 * from the input stream. 868 * 869 * @throws LDIFException If a problem is encountered while attempting to 870 * decode data read as LDIF. 871 */ 872 public static List<Entry> readEntries(final InputStream inputStream) 873 throws IOException, LDIFException 874 { 875 return readEntries(new LDIFReader(inputStream)); 876 } 877 878 879 880 /** 881 * Reads entries from the provided LDIF reader and returns them as a list. 882 * 883 * @param reader The reader from which the entries should be read. It will 884 * be closed before returning. 885 * 886 * @return A list of the entries read from the provided reader. 887 * 888 * @throws IOException If a problem was encountered while attempting to read 889 * data from the LDIF data source. 890 * 891 * @throws LDIFException If a problem is encountered while attempting to 892 * decode data read as LDIF. 893 */ 894 private static List<Entry> readEntries(final LDIFReader reader) 895 throws IOException, LDIFException 896 { 897 try 898 { 899 final ArrayList<Entry> entries = new ArrayList<>(10); 900 while (true) 901 { 902 final Entry e = reader.readEntry(); 903 if (e == null) 904 { 905 break; 906 } 907 908 entries.add(e); 909 } 910 911 return entries; 912 } 913 finally 914 { 915 reader.close(); 916 } 917 } 918 919 920 921 /** 922 * Closes this LDIF reader and the underlying LDIF source. 923 * 924 * @throws IOException If a problem occurs while closing the underlying LDIF 925 * source. 926 */ 927 @Override() 928 public void close() 929 throws IOException 930 { 931 reader.close(); 932 933 if (isAsync()) 934 { 935 // Closing the reader will trigger the LineReaderThread to complete, but 936 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 937 // this, we clear out the completed output queue, which is larger than 938 // the input queue, so the LineReaderThread will stop reading and 939 // shutdown the asyncParser. 940 asyncParsedRecords.clear(); 941 } 942 } 943 944 945 946 /** 947 * Indicates whether to ignore any duplicate values encountered while reading 948 * LDIF records. 949 * 950 * @return {@code true} if duplicate values should be ignored, or 951 * {@code false} if any LDIF records containing duplicate values 952 * should be rejected. 953 * 954 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead. 955 */ 956 @Deprecated() 957 public boolean ignoreDuplicateValues() 958 { 959 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 960 } 961 962 963 964 /** 965 * Specifies whether to ignore any duplicate values encountered while reading 966 * LDIF records. 967 * 968 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 969 * attribute values encountered while reading 970 * LDIF records. 971 * 972 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead. 973 */ 974 @Deprecated() 975 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 976 { 977 if (ignoreDuplicateValues) 978 { 979 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 980 } 981 else 982 { 983 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 984 } 985 } 986 987 988 989 /** 990 * Retrieves the behavior that should be exhibited if the LDIF reader 991 * encounters an entry with duplicate values. 992 * 993 * @return The behavior that should be exhibited if the LDIF reader 994 * encounters an entry with duplicate values. 995 */ 996 public DuplicateValueBehavior getDuplicateValueBehavior() 997 { 998 return duplicateValueBehavior; 999 } 1000 1001 1002 1003 /** 1004 * Specifies the behavior that should be exhibited if the LDIF reader 1005 * encounters an entry with duplicate values. 1006 * 1007 * @param duplicateValueBehavior The behavior that should be exhibited if 1008 * the LDIF reader encounters an entry with 1009 * duplicate values. 1010 */ 1011 public void setDuplicateValueBehavior( 1012 final DuplicateValueBehavior duplicateValueBehavior) 1013 { 1014 this.duplicateValueBehavior = duplicateValueBehavior; 1015 } 1016 1017 1018 1019 /** 1020 * Indicates whether to strip off any illegal trailing spaces that may appear 1021 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1022 * specification strongly recommends that any value which legitimately 1023 * contains trailing spaces be base64-encoded, and any spaces which appear 1024 * after the end of non-base64-encoded values may therefore be considered 1025 * invalid. If any such trailing spaces are encountered in an LDIF record and 1026 * they are not to be stripped, then an {@link LDIFException} will be thrown 1027 * for that record. 1028 * <BR><BR> 1029 * Note that this applies only to spaces after the end of a value, and not to 1030 * spaces which may appear at the end of a line for a value that is wrapped 1031 * and continued on the next line. 1032 * 1033 * @return {@code true} if illegal trailing spaces should be stripped off, or 1034 * {@code false} if LDIF records containing illegal trailing spaces 1035 * should be rejected. 1036 * 1037 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead. 1038 */ 1039 @Deprecated() 1040 public boolean stripTrailingSpaces() 1041 { 1042 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 1043 } 1044 1045 1046 1047 /** 1048 * Specifies whether to strip off any illegal trailing spaces that may appear 1049 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1050 * specification strongly recommends that any value which legitimately 1051 * contains trailing spaces be base64-encoded, and any spaces which appear 1052 * after the end of non-base64-encoded values may therefore be considered 1053 * invalid. If any such trailing spaces are encountered in an LDIF record and 1054 * they are not to be stripped, then an {@link LDIFException} will be thrown 1055 * for that record. 1056 * <BR><BR> 1057 * Note that this applies only to spaces after the end of a value, and not to 1058 * spaces which may appear at the end of a line for a value that is wrapped 1059 * and continued on the next line. 1060 * 1061 * @param stripTrailingSpaces Indicates whether to strip off any illegal 1062 * trailing spaces, or {@code false} if LDIF 1063 * records containing them should be rejected. 1064 * 1065 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead. 1066 */ 1067 @Deprecated() 1068 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1069 { 1070 trailingSpaceBehavior = stripTrailingSpaces 1071 ? TrailingSpaceBehavior.STRIP 1072 : TrailingSpaceBehavior.REJECT; 1073 } 1074 1075 1076 1077 /** 1078 * Retrieves the behavior that should be exhibited when encountering attribute 1079 * values which are not base64-encoded but contain trailing spaces. The LDIF 1080 * specification strongly recommends that any value which legitimately 1081 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1082 * may be configured to automatically strip these spaces, to preserve them, or 1083 * to reject any entry or change record containing them. 1084 * 1085 * @return The behavior that should be exhibited when encountering attribute 1086 * values which are not base64-encoded but contain trailing spaces. 1087 */ 1088 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1089 { 1090 return trailingSpaceBehavior; 1091 } 1092 1093 1094 1095 /** 1096 * Specifies the behavior that should be exhibited when encountering attribute 1097 * values which are not base64-encoded but contain trailing spaces. The LDIF 1098 * specification strongly recommends that any value which legitimately 1099 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1100 * may be configured to automatically strip these spaces, to preserve them, or 1101 * to reject any entry or change record containing them. 1102 * 1103 * @param trailingSpaceBehavior The behavior that should be exhibited when 1104 * encountering attribute values which are not 1105 * base64-encoded but contain trailing spaces. 1106 */ 1107 public void setTrailingSpaceBehavior( 1108 final TrailingSpaceBehavior trailingSpaceBehavior) 1109 { 1110 this.trailingSpaceBehavior = trailingSpaceBehavior; 1111 } 1112 1113 1114 1115 /** 1116 * Retrieves the base path that will be prepended to relative paths in order 1117 * to obtain an absolute path. This will only be used for "file:" URLs that 1118 * have paths which do not begin with a slash. 1119 * 1120 * @return The base path that will be prepended to relative paths in order to 1121 * obtain an absolute path. 1122 */ 1123 public String getRelativeBasePath() 1124 { 1125 return relativeBasePath; 1126 } 1127 1128 1129 1130 /** 1131 * Specifies the base path that will be prepended to relative paths in order 1132 * to obtain an absolute path. This will only be used for "file:" URLs that 1133 * have paths which do not begin with a space. 1134 * 1135 * @param relativeBasePath The base path that will be prepended to relative 1136 * paths in order to obtain an absolute path. 1137 */ 1138 public void setRelativeBasePath(final String relativeBasePath) 1139 { 1140 setRelativeBasePath(new File(relativeBasePath)); 1141 } 1142 1143 1144 1145 /** 1146 * Specifies the base path that will be prepended to relative paths in order 1147 * to obtain an absolute path. This will only be used for "file:" URLs that 1148 * have paths which do not begin with a space. 1149 * 1150 * @param relativeBasePath The base path that will be prepended to relative 1151 * paths in order to obtain an absolute path. 1152 */ 1153 public void setRelativeBasePath(final File relativeBasePath) 1154 { 1155 final String path = relativeBasePath.getAbsolutePath(); 1156 if (path.endsWith(File.separator)) 1157 { 1158 this.relativeBasePath = path; 1159 } 1160 else 1161 { 1162 this.relativeBasePath = path + File.separator; 1163 } 1164 } 1165 1166 1167 1168 /** 1169 * Retrieves the schema that will be used when reading LDIF records, if 1170 * defined. 1171 * 1172 * @return The schema that will be used when reading LDIF records, or 1173 * {@code null} if no schema should be used and all attributes should 1174 * be treated as case-insensitive strings. 1175 */ 1176 public Schema getSchema() 1177 { 1178 return schema; 1179 } 1180 1181 1182 1183 /** 1184 * Specifies the schema that should be used when reading LDIF records. 1185 * 1186 * @param schema The schema that should be used when reading LDIF records, 1187 * or {@code null} if no schema should be used and all 1188 * attributes should be treated as case-insensitive strings. 1189 */ 1190 public void setSchema(final Schema schema) 1191 { 1192 this.schema = schema; 1193 } 1194 1195 1196 1197 /** 1198 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1199 * change record. 1200 * 1201 * @return The record read from the LDIF source, or {@code null} if there are 1202 * no more entries to be read. 1203 * 1204 * @throws IOException If a problem occurs while trying to read from the 1205 * LDIF source. 1206 * 1207 * @throws LDIFException If the data read could not be parsed as an entry or 1208 * an LDIF change record. 1209 */ 1210 public LDIFRecord readLDIFRecord() 1211 throws IOException, LDIFException 1212 { 1213 if (isAsync()) 1214 { 1215 return readLDIFRecordAsync(); 1216 } 1217 else 1218 { 1219 return readLDIFRecordInternal(); 1220 } 1221 } 1222 1223 1224 1225 /** 1226 * Reads an entry from the LDIF source. 1227 * 1228 * @return The entry read from the LDIF source, or {@code null} if there are 1229 * no more entries to be read. 1230 * 1231 * @throws IOException If a problem occurs while attempting to read from the 1232 * LDIF source. 1233 * 1234 * @throws LDIFException If the data read could not be parsed as an entry. 1235 */ 1236 public Entry readEntry() 1237 throws IOException, LDIFException 1238 { 1239 if (isAsync()) 1240 { 1241 return readEntryAsync(); 1242 } 1243 else 1244 { 1245 return readEntryInternal(); 1246 } 1247 } 1248 1249 1250 1251 /** 1252 * Reads an LDIF change record from the LDIF source. The LDIF record must 1253 * have a changetype. 1254 * 1255 * @return The change record read from the LDIF source, or {@code null} if 1256 * there are no more records to be read. 1257 * 1258 * @throws IOException If a problem occurs while attempting to read from the 1259 * LDIF source. 1260 * 1261 * @throws LDIFException If the data read could not be parsed as an LDIF 1262 * change record. 1263 */ 1264 public LDIFChangeRecord readChangeRecord() 1265 throws IOException, LDIFException 1266 { 1267 return readChangeRecord(false); 1268 } 1269 1270 1271 1272 /** 1273 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1274 * record does not have a changetype, then it may be assumed to be an add 1275 * change record. 1276 * 1277 * @param defaultAdd Indicates whether an LDIF record not containing a 1278 * changetype should be retrieved as an add change record. 1279 * If this is {@code false} and the record read does not 1280 * include a changetype, then an {@link LDIFException} 1281 * will be thrown. 1282 * 1283 * @return The change record read from the LDIF source, or {@code null} if 1284 * there are no more records to be read. 1285 * 1286 * @throws IOException If a problem occurs while attempting to read from the 1287 * LDIF source. 1288 * 1289 * @throws LDIFException If the data read could not be parsed as an LDIF 1290 * change record. 1291 */ 1292 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1293 throws IOException, LDIFException 1294 { 1295 if (isAsync()) 1296 { 1297 return readChangeRecordAsync(defaultAdd); 1298 } 1299 else 1300 { 1301 return readChangeRecordInternal(defaultAdd); 1302 } 1303 } 1304 1305 1306 1307 /** 1308 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1309 * thread. 1310 * 1311 * @return The next parsed record or {@code null} if there are no more 1312 * records to read. 1313 * 1314 * @throws IOException If IOException was thrown when reading or parsing 1315 * the record. 1316 * 1317 * @throws LDIFException If LDIFException was thrown parsing the record. 1318 */ 1319 private LDIFRecord readLDIFRecordAsync() 1320 throws IOException, LDIFException 1321 { 1322 Result<UnparsedLDIFRecord, LDIFRecord> result; 1323 LDIFRecord record = null; 1324 while (record == null) 1325 { 1326 result = readLDIFRecordResultAsync(); 1327 if (result == null) 1328 { 1329 return null; 1330 } 1331 1332 record = result.getOutput(); 1333 1334 // This is a special value that means we should skip this Entry. We have 1335 // to use something different than null because null means EOF. 1336 if (record == SKIP_ENTRY) 1337 { 1338 record = null; 1339 } 1340 } 1341 return record; 1342 } 1343 1344 1345 1346 /** 1347 * Reads an entry asynchronously from the LDIF source. 1348 * 1349 * @return The entry read from the LDIF source, or {@code null} if there are 1350 * no more entries to be read. 1351 * 1352 * @throws IOException If a problem occurs while attempting to read from the 1353 * LDIF source. 1354 * @throws LDIFException If the data read could not be parsed as an entry. 1355 */ 1356 private Entry readEntryAsync() 1357 throws IOException, LDIFException 1358 { 1359 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1360 LDIFRecord record = null; 1361 while (record == null) 1362 { 1363 result = readLDIFRecordResultAsync(); 1364 if (result == null) 1365 { 1366 return null; 1367 } 1368 1369 record = result.getOutput(); 1370 1371 // This is a special value that means we should skip this Entry. We have 1372 // to use something different than null because null means EOF. 1373 if (record == SKIP_ENTRY) 1374 { 1375 record = null; 1376 } 1377 } 1378 1379 if (record instanceof Entry) 1380 { 1381 return (Entry) record; 1382 } 1383 else if (record instanceof LDIFChangeRecord) 1384 { 1385 try 1386 { 1387 // Some LDIFChangeRecord can be converted to an Entry. This is really 1388 // an edge case though. 1389 return ((LDIFChangeRecord)record).toEntry(); 1390 } 1391 catch (final LDIFException e) 1392 { 1393 Debug.debugException(e); 1394 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1395 throw new LDIFException(e.getExceptionMessage(), 1396 firstLineNumber, true, e); 1397 } 1398 } 1399 1400 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1401 "LDIFChangeRecord"); 1402 } 1403 1404 1405 1406 /** 1407 * Reads an LDIF change record from the LDIF source asynchronously. 1408 * Optionally, if the LDIF record does not have a changetype, then it may be 1409 * assumed to be an add change record. 1410 * 1411 * @param defaultAdd Indicates whether an LDIF record not containing a 1412 * changetype should be retrieved as an add change record. 1413 * If this is {@code false} and the record read does not 1414 * include a changetype, then an {@link LDIFException} will 1415 * be thrown. 1416 * 1417 * @return The change record read from the LDIF source, or {@code null} if 1418 * there are no more records to be read. 1419 * 1420 * @throws IOException If a problem occurs while attempting to read from the 1421 * LDIF source. 1422 * @throws LDIFException If the data read could not be parsed as an LDIF 1423 * change record. 1424 */ 1425 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1426 throws IOException, LDIFException 1427 { 1428 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1429 LDIFRecord record = null; 1430 while (record == null) 1431 { 1432 result = readLDIFRecordResultAsync(); 1433 if (result == null) 1434 { 1435 return null; 1436 } 1437 1438 record = result.getOutput(); 1439 1440 // This is a special value that means we should skip this Entry. We have 1441 // to use something different than null because null means EOF. 1442 if (record == SKIP_ENTRY) 1443 { 1444 record = null; 1445 } 1446 } 1447 1448 if (record instanceof LDIFChangeRecord) 1449 { 1450 return (LDIFChangeRecord) record; 1451 } 1452 else if (record instanceof Entry) 1453 { 1454 if (defaultAdd) 1455 { 1456 return new LDIFAddChangeRecord((Entry) record); 1457 } 1458 else 1459 { 1460 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1461 throw new LDIFException( 1462 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1463 true); 1464 } 1465 } 1466 1467 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1468 "LDIFChangeRecord"); 1469 } 1470 1471 1472 1473 /** 1474 * Reads the next LDIF record, which was read and parsed asynchronously by 1475 * separate threads. 1476 * 1477 * @return The next LDIF record or {@code null} if there are no more records. 1478 * 1479 * @throws IOException If a problem occurs while attempting to read from the 1480 * LDIF source. 1481 * 1482 * @throws LDIFException If the data read could not be parsed as an entry. 1483 */ 1484 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1485 throws IOException, LDIFException 1486 { 1487 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1488 1489 // If the asynchronous reading and parsing is complete, then we don't have 1490 // to block waiting for the next record to show up on the queue. If there 1491 // isn't a record there, then return null (EOF) right away. 1492 if (asyncParsingComplete.get()) 1493 { 1494 result = asyncParsedRecords.poll(); 1495 } 1496 else 1497 { 1498 try 1499 { 1500 // We probably could just do a asyncParsedRecords.take() here, but 1501 // there are some edge case error scenarios where 1502 // asyncParsingComplete might be set without a special EOF sentinel 1503 // Result enqueued. So to guard against this, we have a very cautious 1504 // polling interval of 1 second. During normal processing, we never 1505 // have to wait for this to expire, when there is something to do 1506 // (like shutdown). 1507 while ((result == null) && (!asyncParsingComplete.get())) 1508 { 1509 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1510 } 1511 1512 // There's a very small chance that we missed the value, so double-check 1513 if (result == null) 1514 { 1515 result = asyncParsedRecords.poll(); 1516 } 1517 } 1518 catch (final InterruptedException e) 1519 { 1520 Debug.debugException(e); 1521 Thread.currentThread().interrupt(); 1522 throw new IOException(e); 1523 } 1524 } 1525 if (result == null) 1526 { 1527 return null; 1528 } 1529 1530 rethrow(result.getFailureCause()); 1531 1532 // Check if we reached the end of the input 1533 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1534 if (unparsedRecord.isEOF()) 1535 { 1536 // This might have been set already by the LineReaderThread, but 1537 // just in case it hasn't gotten to it yet, do so here. 1538 asyncParsingComplete.set(true); 1539 1540 // Enqueue this EOF result again for any other thread that might be 1541 // blocked in asyncParsedRecords.take() even though having multiple 1542 // threads call this method concurrently breaks the contract of this 1543 // class. 1544 try 1545 { 1546 asyncParsedRecords.put(result); 1547 } 1548 catch (final InterruptedException e) 1549 { 1550 // We shouldn't ever get interrupted because the put won't ever block. 1551 // Once we are done reading, this is the only item left in the queue, 1552 // so we should always be able to re-enqueue it. 1553 Debug.debugException(e); 1554 Thread.currentThread().interrupt(); 1555 } 1556 return null; 1557 } 1558 1559 return result; 1560 } 1561 1562 1563 1564 /** 1565 * Indicates whether this LDIF reader was constructed to perform asynchronous 1566 * processing. 1567 * 1568 * @return {@code true} if this LDIFReader was constructed to perform 1569 * asynchronous processing, or {@code false} if not. 1570 */ 1571 private boolean isAsync() 1572 { 1573 return isAsync; 1574 } 1575 1576 1577 1578 /** 1579 * If not {@code null}, rethrows the specified Throwable as either an 1580 * IOException or LDIFException. 1581 * 1582 * @param t The exception to rethrow. If it's {@code null}, then nothing 1583 * is thrown. 1584 * 1585 * @throws IOException If t is an IOException or a checked Exception that 1586 * is not an LDIFException. 1587 * @throws LDIFException If t is an LDIFException. 1588 */ 1589 static void rethrow(final Throwable t) 1590 throws IOException, LDIFException 1591 { 1592 if (t == null) 1593 { 1594 return; 1595 } 1596 1597 if (t instanceof IOException) 1598 { 1599 throw (IOException) t; 1600 } 1601 else if (t instanceof LDIFException) 1602 { 1603 throw (LDIFException) t; 1604 } 1605 else if (t instanceof RuntimeException) 1606 { 1607 throw (RuntimeException) t; 1608 } 1609 else if (t instanceof Error) 1610 { 1611 throw (Error) t; 1612 } 1613 else 1614 { 1615 throw new IOException(t); 1616 } 1617 } 1618 1619 1620 1621 /** 1622 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1623 * change record. 1624 * 1625 * @return The record read from the LDIF source, or {@code null} if there are 1626 * no more entries to be read. 1627 * 1628 * @throws IOException If a problem occurs while trying to read from the 1629 * LDIF source. 1630 * @throws LDIFException If the data read could not be parsed as an entry or 1631 * an LDIF change record. 1632 */ 1633 private LDIFRecord readLDIFRecordInternal() 1634 throws IOException, LDIFException 1635 { 1636 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1637 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1638 } 1639 1640 1641 1642 /** 1643 * Reads an entry from the LDIF source. 1644 * 1645 * @return The entry read from the LDIF source, or {@code null} if there are 1646 * no more entries to be read. 1647 * 1648 * @throws IOException If a problem occurs while attempting to read from the 1649 * LDIF source. 1650 * @throws LDIFException If the data read could not be parsed as an entry. 1651 */ 1652 private Entry readEntryInternal() 1653 throws IOException, LDIFException 1654 { 1655 Entry e = null; 1656 while (e == null) 1657 { 1658 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1659 if (unparsedRecord.isEOF()) 1660 { 1661 return null; 1662 } 1663 1664 e = decodeEntry(unparsedRecord, relativeBasePath); 1665 Debug.debugLDIFRead(e); 1666 1667 if (entryTranslator != null) 1668 { 1669 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1670 } 1671 } 1672 return e; 1673 } 1674 1675 1676 1677 /** 1678 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1679 * record does not have a changetype, then it may be assumed to be an add 1680 * change record. 1681 * 1682 * @param defaultAdd Indicates whether an LDIF record not containing a 1683 * changetype should be retrieved as an add change record. 1684 * If this is {@code false} and the record read does not 1685 * include a changetype, then an {@link LDIFException} will 1686 * be thrown. 1687 * 1688 * @return The change record read from the LDIF source, or {@code null} if 1689 * there are no more records to be read. 1690 * 1691 * @throws IOException If a problem occurs while attempting to read from the 1692 * LDIF source. 1693 * @throws LDIFException If the data read could not be parsed as an LDIF 1694 * change record. 1695 */ 1696 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1697 throws IOException, LDIFException 1698 { 1699 LDIFChangeRecord r = null; 1700 while (r == null) 1701 { 1702 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1703 if (unparsedRecord.isEOF()) 1704 { 1705 return null; 1706 } 1707 1708 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1709 schema); 1710 Debug.debugLDIFRead(r); 1711 1712 if (changeRecordTranslator != null) 1713 { 1714 r = changeRecordTranslator.translate(r, 1715 unparsedRecord.getFirstLineNumber()); 1716 } 1717 } 1718 return r; 1719 } 1720 1721 1722 1723 /** 1724 * Reads a record (either an entry or a change record) from the LDIF source 1725 * and places it in the line list. 1726 * 1727 * @return The line number for the first line of the entry that was read. 1728 * 1729 * @throws IOException If a problem occurs while attempting to read from the 1730 * LDIF source. 1731 * 1732 * @throws LDIFException If the data read could not be parsed as a valid 1733 * LDIF record. 1734 */ 1735 private UnparsedLDIFRecord readUnparsedRecord() 1736 throws IOException, LDIFException 1737 { 1738 final ArrayList<StringBuilder> lineList = new ArrayList<>(20); 1739 boolean lastWasComment = false; 1740 long firstLineNumber = lineNumberCounter + 1; 1741 while (true) 1742 { 1743 final String line = reader.readLine(); 1744 lineNumberCounter++; 1745 1746 if (line == null) 1747 { 1748 // We've hit the end of the LDIF source. If we haven't read any entry 1749 // data, then return null. Otherwise, the last entry wasn't followed by 1750 // a blank line, which is OK, and we should decode that entry. 1751 if (lineList.isEmpty()) 1752 { 1753 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1754 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1755 } 1756 else 1757 { 1758 break; 1759 } 1760 } 1761 1762 if (line.isEmpty()) 1763 { 1764 // It's a blank line. If we have read entry data, then this signals the 1765 // end of the entry. Otherwise, it's an extra space between entries, 1766 // which is OK. 1767 lastWasComment = false; 1768 if (lineList.isEmpty()) 1769 { 1770 firstLineNumber++; 1771 continue; 1772 } 1773 else 1774 { 1775 break; 1776 } 1777 } 1778 1779 if (line.charAt(0) == ' ') 1780 { 1781 // The line starts with a space, which means that it must be a 1782 // continuation of the previous line. This is true even if the last 1783 // line was a comment. 1784 if (lastWasComment) 1785 { 1786 // What we've read is part of a comment, so we don't care about its 1787 // content. 1788 } 1789 else if (lineList.isEmpty()) 1790 { 1791 throw new LDIFException( 1792 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1793 lineNumberCounter, false); 1794 } 1795 else 1796 { 1797 lineList.get(lineList.size() - 1).append(line.substring(1)); 1798 lastWasComment = false; 1799 } 1800 } 1801 else if (line.charAt(0) == '#') 1802 { 1803 lastWasComment = true; 1804 } 1805 else 1806 { 1807 // We want to make sure that we skip over the "version:" line if it 1808 // exists, but that should only occur at the beginning of an entry where 1809 // it can't be confused with a possible "version" attribute. 1810 if (lineList.isEmpty() && line.startsWith("version:")) 1811 { 1812 lastWasComment = true; 1813 } 1814 else 1815 { 1816 lineList.add(new StringBuilder(line)); 1817 lastWasComment = false; 1818 } 1819 } 1820 } 1821 1822 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1823 trailingSpaceBehavior, schema, firstLineNumber); 1824 } 1825 1826 1827 1828 /** 1829 * Decodes the provided set of LDIF lines as an entry. The provided set of 1830 * lines must contain exactly one entry. Long lines may be wrapped as per the 1831 * LDIF specification, and it is acceptable to have one or more blank lines 1832 * following the entry. A default trailing space behavior of 1833 * {@link TrailingSpaceBehavior#REJECT} will be used. 1834 * 1835 * @param ldifLines The set of lines that comprise the LDIF representation 1836 * of the entry. It must not be {@code null} or empty. 1837 * 1838 * @return The entry read from LDIF. 1839 * 1840 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1841 * entry. 1842 */ 1843 public static Entry decodeEntry(final String... ldifLines) 1844 throws LDIFException 1845 { 1846 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1847 TrailingSpaceBehavior.REJECT, null, ldifLines), 1848 DEFAULT_RELATIVE_BASE_PATH); 1849 Debug.debugLDIFRead(e); 1850 return e; 1851 } 1852 1853 1854 1855 /** 1856 * Decodes the provided set of LDIF lines as an entry. The provided set of 1857 * lines must contain exactly one entry. Long lines may be wrapped as per the 1858 * LDIF specification, and it is acceptable to have one or more blank lines 1859 * following the entry. A default trailing space behavior of 1860 * {@link TrailingSpaceBehavior#REJECT} will be used. 1861 * 1862 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1863 * attribute values encountered while parsing. 1864 * @param schema The schema to use when parsing the record, 1865 * if applicable. 1866 * @param ldifLines The set of lines that comprise the LDIF 1867 * representation of the entry. It must not be 1868 * {@code null} or empty. 1869 * 1870 * @return The entry read from LDIF. 1871 * 1872 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1873 * entry. 1874 */ 1875 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1876 final Schema schema, 1877 final String... ldifLines) 1878 throws LDIFException 1879 { 1880 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1881 schema, ldifLines); 1882 } 1883 1884 1885 1886 /** 1887 * Decodes the provided set of LDIF lines as an entry. The provided set of 1888 * lines must contain exactly one entry. Long lines may be wrapped as per the 1889 * LDIF specification, and it is acceptable to have one or more blank lines 1890 * following the entry. 1891 * 1892 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1893 * attribute values encountered while parsing. 1894 * @param trailingSpaceBehavior The behavior that should be exhibited when 1895 * encountering attribute values which are not 1896 * base64-encoded but contain trailing spaces. 1897 * It must not be {@code null}. 1898 * @param schema The schema to use when parsing the record, 1899 * if applicable. 1900 * @param ldifLines The set of lines that comprise the LDIF 1901 * representation of the entry. It must not be 1902 * {@code null} or empty. 1903 * 1904 * @return The entry read from LDIF. 1905 * 1906 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1907 * entry. 1908 */ 1909 public static Entry decodeEntry( 1910 final boolean ignoreDuplicateValues, 1911 final TrailingSpaceBehavior trailingSpaceBehavior, 1912 final Schema schema, 1913 final String... ldifLines) throws LDIFException 1914 { 1915 final Entry e = decodeEntry(prepareRecord( 1916 (ignoreDuplicateValues 1917 ? DuplicateValueBehavior.STRIP 1918 : DuplicateValueBehavior.REJECT), 1919 trailingSpaceBehavior, schema, ldifLines), 1920 DEFAULT_RELATIVE_BASE_PATH); 1921 Debug.debugLDIFRead(e); 1922 return e; 1923 } 1924 1925 1926 1927 /** 1928 * Decodes the provided set of LDIF lines as an LDIF change record. The 1929 * provided set of lines must contain exactly one change record and it must 1930 * include a changetype. Long lines may be wrapped as per the LDIF 1931 * specification, and it is acceptable to have one or more blank lines 1932 * following the entry. 1933 * 1934 * @param ldifLines The set of lines that comprise the LDIF representation 1935 * of the change record. It must not be {@code null} or 1936 * empty. 1937 * 1938 * @return The change record read from LDIF. 1939 * 1940 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1941 * change record. 1942 */ 1943 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1944 throws LDIFException 1945 { 1946 return decodeChangeRecord(false, ldifLines); 1947 } 1948 1949 1950 1951 /** 1952 * Decodes the provided set of LDIF lines as an LDIF change record. The 1953 * provided set of lines must contain exactly one change record. Long lines 1954 * may be wrapped as per the LDIF specification, and it is acceptable to have 1955 * one or more blank lines following the entry. 1956 * 1957 * @param defaultAdd Indicates whether an LDIF record not containing a 1958 * changetype should be retrieved as an add change record. 1959 * If this is {@code false} and the record read does not 1960 * include a changetype, then an {@link LDIFException} 1961 * will be thrown. 1962 * @param ldifLines The set of lines that comprise the LDIF representation 1963 * of the change record. It must not be {@code null} or 1964 * empty. 1965 * 1966 * @return The change record read from LDIF. 1967 * 1968 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1969 * change record. 1970 */ 1971 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1972 final String... ldifLines) 1973 throws LDIFException 1974 { 1975 final LDIFChangeRecord r = 1976 decodeChangeRecord( 1977 prepareRecord(DuplicateValueBehavior.STRIP, 1978 TrailingSpaceBehavior.REJECT, null, ldifLines), 1979 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1980 Debug.debugLDIFRead(r); 1981 return r; 1982 } 1983 1984 1985 1986 /** 1987 * Decodes the provided set of LDIF lines as an LDIF change record. The 1988 * provided set of lines must contain exactly one change record. Long lines 1989 * may be wrapped as per the LDIF specification, and it is acceptable to have 1990 * one or more blank lines following the entry. 1991 * 1992 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1993 * attribute values encountered while parsing. 1994 * @param schema The schema to use when processing the change 1995 * record, or {@code null} if no schema should 1996 * be used and all values should be treated as 1997 * case-insensitive strings. 1998 * @param defaultAdd Indicates whether an LDIF record not 1999 * containing a changetype should be retrieved 2000 * as an add change record. If this is 2001 * {@code false} and the record read does not 2002 * include a changetype, then an 2003 * {@link LDIFException} will be thrown. 2004 * @param ldifLines The set of lines that comprise the LDIF 2005 * representation of the change record. It 2006 * must not be {@code null} or empty. 2007 * 2008 * @return The change record read from LDIF. 2009 * 2010 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2011 * change record. 2012 */ 2013 public static LDIFChangeRecord decodeChangeRecord( 2014 final boolean ignoreDuplicateValues, 2015 final Schema schema, 2016 final boolean defaultAdd, 2017 final String... ldifLines) 2018 throws LDIFException 2019 { 2020 return decodeChangeRecord(ignoreDuplicateValues, 2021 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 2022 } 2023 2024 2025 2026 /** 2027 * Decodes the provided set of LDIF lines as an LDIF change record. The 2028 * provided set of lines must contain exactly one change record. Long lines 2029 * may be wrapped as per the LDIF specification, and it is acceptable to have 2030 * one or more blank lines following the entry. 2031 * 2032 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 2033 * attribute values encountered while parsing. 2034 * @param trailingSpaceBehavior The behavior that should be exhibited when 2035 * encountering attribute values which are not 2036 * base64-encoded but contain trailing spaces. 2037 * It must not be {@code null}. 2038 * @param schema The schema to use when processing the change 2039 * record, or {@code null} if no schema should 2040 * be used and all values should be treated as 2041 * case-insensitive strings. 2042 * @param defaultAdd Indicates whether an LDIF record not 2043 * containing a changetype should be retrieved 2044 * as an add change record. If this is 2045 * {@code false} and the record read does not 2046 * include a changetype, then an 2047 * {@link LDIFException} will be thrown. 2048 * @param ldifLines The set of lines that comprise the LDIF 2049 * representation of the change record. It 2050 * must not be {@code null} or empty. 2051 * 2052 * @return The change record read from LDIF. 2053 * 2054 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2055 * change record. 2056 */ 2057 public static LDIFChangeRecord decodeChangeRecord( 2058 final boolean ignoreDuplicateValues, 2059 final TrailingSpaceBehavior trailingSpaceBehavior, 2060 final Schema schema, 2061 final boolean defaultAdd, 2062 final String... ldifLines) 2063 throws LDIFException 2064 { 2065 final LDIFChangeRecord r = decodeChangeRecord( 2066 prepareRecord( 2067 (ignoreDuplicateValues 2068 ? DuplicateValueBehavior.STRIP 2069 : DuplicateValueBehavior.REJECT), 2070 trailingSpaceBehavior, schema, ldifLines), 2071 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2072 Debug.debugLDIFRead(r); 2073 return r; 2074 } 2075 2076 2077 2078 /** 2079 * Parses the provided set of lines into a list of {@code StringBuilder} 2080 * objects suitable for decoding into an entry or LDIF change record. 2081 * Comments will be ignored and wrapped lines will be unwrapped. 2082 * 2083 * @param duplicateValueBehavior The behavior that should be exhibited if 2084 * the LDIF reader encounters an entry with 2085 * duplicate values. 2086 * @param trailingSpaceBehavior The behavior that should be exhibited when 2087 * encountering attribute values which are not 2088 * base64-encoded but contain trailing spaces. 2089 * @param schema The schema to use when parsing the record, 2090 * if applicable. 2091 * @param ldifLines The set of lines that comprise the record 2092 * to decode. It must not be {@code null} or 2093 * empty. 2094 * 2095 * @return The prepared list of {@code StringBuilder} objects ready to be 2096 * decoded. 2097 * 2098 * @throws LDIFException If the provided lines do not contain valid LDIF 2099 * content. 2100 */ 2101 private static UnparsedLDIFRecord prepareRecord( 2102 final DuplicateValueBehavior duplicateValueBehavior, 2103 final TrailingSpaceBehavior trailingSpaceBehavior, 2104 final Schema schema, final String... ldifLines) 2105 throws LDIFException 2106 { 2107 Validator.ensureNotNull(ldifLines); 2108 Validator.ensureFalse(ldifLines.length == 0, 2109 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2110 2111 boolean lastWasComment = false; 2112 final ArrayList<StringBuilder> lineList = new ArrayList<>(ldifLines.length); 2113 for (int i=0; i < ldifLines.length; i++) 2114 { 2115 final String line = ldifLines[i]; 2116 if (line.isEmpty()) 2117 { 2118 // This is only acceptable if there are no more non-empty lines in the 2119 // array. 2120 for (int j=i+1; j < ldifLines.length; j++) 2121 { 2122 if (! ldifLines[j].isEmpty()) 2123 { 2124 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2125 ldifLines, null); 2126 } 2127 2128 // If we've gotten here, then we know that we're at the end of the 2129 // entry. If we have read data, then we can decode it as an entry. 2130 // Otherwise, there was no real data in the provided LDIF lines. 2131 if (lineList.isEmpty()) 2132 { 2133 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2134 ldifLines, null); 2135 } 2136 else 2137 { 2138 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2139 trailingSpaceBehavior, schema, 0); 2140 } 2141 } 2142 } 2143 2144 if (line.charAt(0) == ' ') 2145 { 2146 if (i > 0) 2147 { 2148 if (! lastWasComment) 2149 { 2150 lineList.get(lineList.size() - 1).append(line.substring(1)); 2151 } 2152 } 2153 else 2154 { 2155 throw new LDIFException( 2156 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2157 true, ldifLines, null); 2158 } 2159 } 2160 else if (line.charAt(0) == '#') 2161 { 2162 lastWasComment = true; 2163 } 2164 else 2165 { 2166 lineList.add(new StringBuilder(line)); 2167 lastWasComment = false; 2168 } 2169 } 2170 2171 if (lineList.isEmpty()) 2172 { 2173 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2174 } 2175 else 2176 { 2177 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2178 trailingSpaceBehavior, schema, 0); 2179 } 2180 } 2181 2182 2183 2184 /** 2185 * Decodes the unparsed record that was read from the LDIF source. It may be 2186 * either an entry or an LDIF change record. 2187 * 2188 * @param unparsedRecord The unparsed LDIF record that was read from the 2189 * input. It must not be {@code null} or empty. 2190 * @param relativeBasePath The base path that will be prepended to relative 2191 * paths in order to obtain an absolute path. 2192 * @param schema The schema to use when parsing. 2193 * 2194 * @return The parsed record, or {@code null} if there are no more entries to 2195 * be read. 2196 * 2197 * @throws LDIFException If the data read could not be parsed as an entry or 2198 * an LDIF change record. 2199 */ 2200 private static LDIFRecord decodeRecord( 2201 final UnparsedLDIFRecord unparsedRecord, 2202 final String relativeBasePath, 2203 final Schema schema) 2204 throws LDIFException 2205 { 2206 // If there was an error reading from the input, then we rethrow it here. 2207 final Exception readError = unparsedRecord.getFailureCause(); 2208 if (readError != null) 2209 { 2210 if (readError instanceof LDIFException) 2211 { 2212 // If the error was an LDIFException, which will normally be the case, 2213 // then rethrow it with all of the same state. We could just 2214 // throw (LDIFException) readError; 2215 // but that's considered bad form. 2216 final LDIFException ldifEx = (LDIFException) readError; 2217 throw new LDIFException(ldifEx.getMessage(), 2218 ldifEx.getLineNumber(), 2219 ldifEx.mayContinueReading(), 2220 ldifEx.getDataLines(), 2221 ldifEx.getCause()); 2222 } 2223 else 2224 { 2225 throw new LDIFException(StaticUtils.getExceptionMessage(readError), 2226 -1, true, readError); 2227 } 2228 } 2229 2230 if (unparsedRecord.isEOF()) 2231 { 2232 return null; 2233 } 2234 2235 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2236 if (unparsedRecord.getLineList() == null) 2237 { 2238 return null; // We can get here if there was an error reading the lines. 2239 } 2240 2241 final LDIFRecord r; 2242 if (lineList.size() == 1) 2243 { 2244 r = decodeEntry(unparsedRecord, relativeBasePath); 2245 } 2246 else 2247 { 2248 final String lowerSecondLine = 2249 StaticUtils.toLowerCase(lineList.get(1).toString()); 2250 if (lowerSecondLine.startsWith("control:") || 2251 lowerSecondLine.startsWith("changetype:")) 2252 { 2253 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2254 } 2255 else 2256 { 2257 r = decodeEntry(unparsedRecord, relativeBasePath); 2258 } 2259 } 2260 2261 Debug.debugLDIFRead(r); 2262 return r; 2263 } 2264 2265 2266 2267 /** 2268 * Decodes the provided set of LDIF lines as an entry. The provided list must 2269 * not contain any blank lines or comments, and lines are not allowed to be 2270 * wrapped. 2271 * 2272 * @param unparsedRecord The unparsed LDIF record that was read from the 2273 * input. It must not be {@code null} or empty. 2274 * @param relativeBasePath The base path that will be prepended to relative 2275 * paths in order to obtain an absolute path. 2276 * 2277 * @return The entry read from LDIF. 2278 * 2279 * @throws LDIFException If the provided LDIF data cannot be read as an 2280 * entry. 2281 */ 2282 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2283 final String relativeBasePath) 2284 throws LDIFException 2285 { 2286 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2287 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2288 2289 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2290 2291 // The first line must start with either "version:" or "dn:". If the first 2292 // line starts with "version:" then the second must start with "dn:". 2293 StringBuilder line = iterator.next(); 2294 handleTrailingSpaces(line, null, firstLineNumber, 2295 unparsedRecord.getTrailingSpaceBehavior()); 2296 int colonPos = line.indexOf(":"); 2297 if ((colonPos > 0) && 2298 line.substring(0, colonPos).equalsIgnoreCase("version")) 2299 { 2300 // The first line is "version:". Under most conditions, this will be 2301 // handled by the LDIF reader, but this can happen if you call 2302 // decodeEntry with a set of data that includes a version. At any rate, 2303 // read the next line, which must specify the DN. 2304 line = iterator.next(); 2305 handleTrailingSpaces(line, null, firstLineNumber, 2306 unparsedRecord.getTrailingSpaceBehavior()); 2307 } 2308 2309 colonPos = line.indexOf(":"); 2310 if ((colonPos < 0) || 2311 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2312 { 2313 throw new LDIFException( 2314 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2315 firstLineNumber, true, ldifLines, null); 2316 } 2317 2318 final String dn; 2319 final int length = line.length(); 2320 if (length == (colonPos+1)) 2321 { 2322 // The colon was the last character on the line. This is acceptable and 2323 // indicates that the entry has the null DN. 2324 dn = ""; 2325 } 2326 else if (line.charAt(colonPos+1) == ':') 2327 { 2328 // Skip over any spaces leading up to the value, and then the rest of the 2329 // string is the base64-encoded DN. 2330 int pos = colonPos+2; 2331 while ((pos < length) && (line.charAt(pos) == ' ')) 2332 { 2333 pos++; 2334 } 2335 2336 try 2337 { 2338 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2339 dn = new String(dnBytes, StandardCharsets.UTF_8); 2340 } 2341 catch (final ParseException pe) 2342 { 2343 Debug.debugException(pe); 2344 throw new LDIFException( 2345 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2346 pe.getMessage()), 2347 firstLineNumber, true, ldifLines, pe); 2348 } 2349 catch (final Exception e) 2350 { 2351 Debug.debugException(e); 2352 throw new LDIFException( 2353 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2354 firstLineNumber, true, ldifLines, e); 2355 } 2356 } 2357 else 2358 { 2359 // Skip over any spaces leading up to the value, and then the rest of the 2360 // string is the DN. 2361 int pos = colonPos+1; 2362 while ((pos < length) && (line.charAt(pos) == ' ')) 2363 { 2364 pos++; 2365 } 2366 2367 dn = line.substring(pos); 2368 } 2369 2370 2371 // The remaining lines must be the attributes for the entry. However, we 2372 // will allow the case in which an entry does not have any attributes, to be 2373 // able to support reading search result entries in which no attributes were 2374 // returned. 2375 if (! iterator.hasNext()) 2376 { 2377 return new Entry(dn, unparsedRecord.getSchema()); 2378 } 2379 2380 return new Entry(dn, unparsedRecord.getSchema(), 2381 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2382 unparsedRecord.getTrailingSpaceBehavior(), 2383 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2384 firstLineNumber)); 2385 } 2386 2387 2388 2389 /** 2390 * Decodes the provided set of LDIF lines as a change record. The provided 2391 * list must not contain any blank lines or comments, and lines are not 2392 * allowed to be wrapped. 2393 * 2394 * @param unparsedRecord The unparsed LDIF record that was read from the 2395 * input. It must not be {@code null} or empty. 2396 * @param relativeBasePath The base path that will be prepended to relative 2397 * paths in order to obtain an absolute path. 2398 * @param defaultAdd Indicates whether an LDIF record not containing a 2399 * changetype should be retrieved as an add change 2400 * record. If this is {@code false} and the record 2401 * read does not include a changetype, then an 2402 * {@link LDIFException} will be thrown. 2403 * @param schema The schema to use in parsing. 2404 * 2405 * @return The change record read from LDIF. 2406 * 2407 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2408 * change record. 2409 */ 2410 private static LDIFChangeRecord decodeChangeRecord( 2411 final UnparsedLDIFRecord unparsedRecord, 2412 final String relativeBasePath, 2413 final boolean defaultAdd, 2414 final Schema schema) 2415 throws LDIFException 2416 { 2417 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2418 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2419 2420 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2421 2422 // The first line must start with either "version:" or "dn:". If the first 2423 // line starts with "version:" then the second must start with "dn:". 2424 StringBuilder line = iterator.next(); 2425 handleTrailingSpaces(line, null, firstLineNumber, 2426 unparsedRecord.getTrailingSpaceBehavior()); 2427 int colonPos = line.indexOf(":"); 2428 int linesRead = 1; 2429 if ((colonPos > 0) && 2430 line.substring(0, colonPos).equalsIgnoreCase("version")) 2431 { 2432 // The first line is "version:". Under most conditions, this will be 2433 // handled by the LDIF reader, but this can happen if you call 2434 // decodeEntry with a set of data that includes a version. At any rate, 2435 // read the next line, which must specify the DN. 2436 line = iterator.next(); 2437 linesRead++; 2438 handleTrailingSpaces(line, null, firstLineNumber, 2439 unparsedRecord.getTrailingSpaceBehavior()); 2440 } 2441 2442 colonPos = line.indexOf(":"); 2443 if ((colonPos < 0) || 2444 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2445 { 2446 throw new LDIFException( 2447 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2448 firstLineNumber, true, ldifLines, null); 2449 } 2450 2451 final String dn; 2452 final int length = line.length(); 2453 if (length == (colonPos+1)) 2454 { 2455 // The colon was the last character on the line. This is acceptable and 2456 // indicates that the entry has the null DN. 2457 dn = ""; 2458 } 2459 else if (line.charAt(colonPos+1) == ':') 2460 { 2461 // Skip over any spaces leading up to the value, and then the rest of the 2462 // string is the base64-encoded DN. 2463 int pos = colonPos+2; 2464 while ((pos < length) && (line.charAt(pos) == ' ')) 2465 { 2466 pos++; 2467 } 2468 2469 try 2470 { 2471 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2472 dn = new String(dnBytes, StandardCharsets.UTF_8); 2473 } 2474 catch (final ParseException pe) 2475 { 2476 Debug.debugException(pe); 2477 throw new LDIFException( 2478 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2479 pe.getMessage()), 2480 firstLineNumber, true, ldifLines, pe); 2481 } 2482 catch (final Exception e) 2483 { 2484 Debug.debugException(e); 2485 throw new LDIFException( 2486 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2487 e), 2488 firstLineNumber, true, ldifLines, e); 2489 } 2490 } 2491 else 2492 { 2493 // Skip over any spaces leading up to the value, and then the rest of the 2494 // string is the DN. 2495 int pos = colonPos+1; 2496 while ((pos < length) && (line.charAt(pos) == ' ')) 2497 { 2498 pos++; 2499 } 2500 2501 dn = line.substring(pos); 2502 } 2503 2504 2505 // An LDIF change record may contain zero or more controls, with the end of 2506 // the controls signified by the changetype. The changetype element must be 2507 // present, unless defaultAdd is true in which case the first thing that is 2508 // neither control or changetype will trigger the start of add attribute 2509 // parsing. 2510 if (! iterator.hasNext()) 2511 { 2512 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2513 firstLineNumber, true, ldifLines, null); 2514 } 2515 2516 String changeType; 2517 ArrayList<Control> controls = null; 2518 while (true) 2519 { 2520 line = iterator.next(); 2521 handleTrailingSpaces(line, dn, firstLineNumber, 2522 unparsedRecord.getTrailingSpaceBehavior()); 2523 colonPos = line.indexOf(":"); 2524 if (colonPos < 0) 2525 { 2526 throw new LDIFException( 2527 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2528 firstLineNumber, true, ldifLines, null); 2529 } 2530 2531 final String token = StaticUtils.toLowerCase(line.substring(0, colonPos)); 2532 if (token.equals("control")) 2533 { 2534 if (controls == null) 2535 { 2536 controls = new ArrayList<>(5); 2537 } 2538 2539 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2540 relativeBasePath)); 2541 } 2542 else if (token.equals("changetype")) 2543 { 2544 changeType = 2545 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2546 break; 2547 } 2548 else if (defaultAdd) 2549 { 2550 // The line we read wasn't a control or changetype declaration, so we'll 2551 // assume it's an attribute in an add record. However, we're not ready 2552 // for that yet, and since we can't rewind an iterator we'll create a 2553 // new one that hasn't yet gotten to this line. 2554 changeType = "add"; 2555 iterator = ldifLines.iterator(); 2556 for (int i=0; i < linesRead; i++) 2557 { 2558 iterator.next(); 2559 } 2560 break; 2561 } 2562 else 2563 { 2564 throw new LDIFException( 2565 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2566 firstLineNumber), 2567 firstLineNumber, true, ldifLines, null); 2568 } 2569 2570 linesRead++; 2571 } 2572 2573 2574 // Make sure that the change type is acceptable and then decode the rest of 2575 // the change record accordingly. 2576 final String lowerChangeType = StaticUtils.toLowerCase(changeType); 2577 if (lowerChangeType.equals("add")) 2578 { 2579 // There must be at least one more line. If not, then that's an error. 2580 // Otherwise, parse the rest of the data as attribute-value pairs. 2581 if (iterator.hasNext()) 2582 { 2583 final Collection<Attribute> attrs = 2584 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2585 unparsedRecord.getTrailingSpaceBehavior(), 2586 unparsedRecord.getSchema(), ldifLines, iterator, 2587 relativeBasePath, firstLineNumber); 2588 final Attribute[] attributes = new Attribute[attrs.size()]; 2589 final Iterator<Attribute> attrIterator = attrs.iterator(); 2590 for (int i=0; i < attributes.length; i++) 2591 { 2592 attributes[i] = attrIterator.next(); 2593 } 2594 2595 return new LDIFAddChangeRecord(dn, attributes, controls); 2596 } 2597 else 2598 { 2599 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2600 firstLineNumber, true, ldifLines, null); 2601 } 2602 } 2603 else if (lowerChangeType.equals("delete")) 2604 { 2605 // There shouldn't be any more data. If there is, then that's an error. 2606 // Otherwise, we can just return the delete change record with what we 2607 // already know. 2608 if (iterator.hasNext()) 2609 { 2610 throw new LDIFException( 2611 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2612 firstLineNumber, true, ldifLines, null); 2613 } 2614 else 2615 { 2616 return new LDIFDeleteChangeRecord(dn, controls); 2617 } 2618 } 2619 else if (lowerChangeType.equals("modify")) 2620 { 2621 // There must be at least one more line. If not, then that's an error. 2622 // Otherwise, parse the rest of the data as a set of modifications. 2623 if (iterator.hasNext()) 2624 { 2625 final Modification[] mods = parseModifications(dn, 2626 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2627 firstLineNumber, schema); 2628 return new LDIFModifyChangeRecord(dn, mods, controls); 2629 } 2630 else 2631 { 2632 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2633 firstLineNumber, true, ldifLines, null); 2634 } 2635 } 2636 else if (lowerChangeType.equals("moddn") || 2637 lowerChangeType.equals("modrdn")) 2638 { 2639 // There must be at least one more line. If not, then that's an error. 2640 // Otherwise, parse the rest of the data as a set of modifications. 2641 if (iterator.hasNext()) 2642 { 2643 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2644 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2645 } 2646 else 2647 { 2648 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2649 firstLineNumber, true, ldifLines, null); 2650 } 2651 } 2652 else 2653 { 2654 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2655 firstLineNumber), 2656 firstLineNumber, true, ldifLines, null); 2657 } 2658 } 2659 2660 2661 2662 /** 2663 * Decodes information about a control from the provided line. 2664 * 2665 * @param line The line to process. 2666 * @param colonPos The position of the colon that separates the 2667 * control token string from tbe encoded control. 2668 * @param firstLineNumber The line number for the start of the record. 2669 * @param ldifLines The lines that comprise the LDIF representation 2670 * of the full record being parsed. 2671 * @param relativeBasePath The base path that will be prepended to relative 2672 * paths in order to obtain an absolute path. 2673 * 2674 * @return The decoded control. 2675 * 2676 * @throws LDIFException If a problem is encountered while trying to decode 2677 * the changetype. 2678 */ 2679 private static Control decodeControl(final StringBuilder line, 2680 final int colonPos, 2681 final long firstLineNumber, 2682 final ArrayList<StringBuilder> ldifLines, 2683 final String relativeBasePath) 2684 throws LDIFException 2685 { 2686 final String controlString; 2687 int length = line.length(); 2688 if (length == (colonPos+1)) 2689 { 2690 // The colon was the last character on the line. This is not 2691 // acceptable. 2692 throw new LDIFException( 2693 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2694 firstLineNumber, true, ldifLines, null); 2695 } 2696 else if (line.charAt(colonPos+1) == ':') 2697 { 2698 // Skip over any spaces leading up to the value, and then the rest of 2699 // the string is the base64-encoded control representation. This is 2700 // unusual and unnecessary, but is nevertheless acceptable. 2701 int pos = colonPos+2; 2702 while ((pos < length) && (line.charAt(pos) == ' ')) 2703 { 2704 pos++; 2705 } 2706 2707 try 2708 { 2709 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2710 controlString = new String(controlBytes, StandardCharsets.UTF_8); 2711 } 2712 catch (final ParseException pe) 2713 { 2714 Debug.debugException(pe); 2715 throw new LDIFException( 2716 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2717 firstLineNumber, pe.getMessage()), 2718 firstLineNumber, true, ldifLines, pe); 2719 } 2720 catch (final Exception e) 2721 { 2722 Debug.debugException(e); 2723 throw new LDIFException( 2724 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2725 firstLineNumber, true, ldifLines, e); 2726 } 2727 } 2728 else 2729 { 2730 // Skip over any spaces leading up to the value, and then the rest of 2731 // the string is the encoded control. 2732 int pos = colonPos+1; 2733 while ((pos < length) && (line.charAt(pos) == ' ')) 2734 { 2735 pos++; 2736 } 2737 2738 controlString = line.substring(pos); 2739 } 2740 2741 // If the resulting control definition is empty, then that's invalid. 2742 if (controlString.isEmpty()) 2743 { 2744 throw new LDIFException( 2745 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2746 firstLineNumber, true, ldifLines, null); 2747 } 2748 2749 2750 // The first element of the control must be the OID, and it must be followed 2751 // by a space (to separate it from the criticality), a colon (to separate it 2752 // from the value and indicate a default criticality of false), or the end 2753 // of the line (to indicate a default criticality of false and no value). 2754 String oid = null; 2755 boolean hasCriticality = false; 2756 boolean hasValue = false; 2757 int pos = 0; 2758 length = controlString.length(); 2759 while (pos < length) 2760 { 2761 final char c = controlString.charAt(pos); 2762 if (c == ':') 2763 { 2764 // This indicates that there is no criticality and that the value 2765 // immediately follows the OID. 2766 oid = controlString.substring(0, pos++); 2767 hasValue = true; 2768 break; 2769 } 2770 else if (c == ' ') 2771 { 2772 // This indicates that there is a criticality. We don't know anything 2773 // about the presence of a value yet. 2774 oid = controlString.substring(0, pos++); 2775 hasCriticality = true; 2776 break; 2777 } 2778 else 2779 { 2780 pos++; 2781 } 2782 } 2783 2784 if (oid == null) 2785 { 2786 // This indicates that the string representation of the control is only 2787 // the OID. 2788 return new Control(controlString, false); 2789 } 2790 2791 2792 // See if we need to read the criticality. If so, then do so now. 2793 // Otherwise, assume a default criticality of false. 2794 final boolean isCritical; 2795 if (hasCriticality) 2796 { 2797 // Skip over any spaces before the criticality. 2798 while (controlString.charAt(pos) == ' ') 2799 { 2800 pos++; 2801 } 2802 2803 // Read until we find a colon or the end of the string. 2804 final int criticalityStartPos = pos; 2805 while (pos < length) 2806 { 2807 final char c = controlString.charAt(pos); 2808 if (c == ':') 2809 { 2810 hasValue = true; 2811 break; 2812 } 2813 else 2814 { 2815 pos++; 2816 } 2817 } 2818 2819 final String criticalityString = 2820 StaticUtils.toLowerCase(controlString.substring(criticalityStartPos, 2821 pos)); 2822 if (criticalityString.equals("true")) 2823 { 2824 isCritical = true; 2825 } 2826 else if (criticalityString.equals("false")) 2827 { 2828 isCritical = false; 2829 } 2830 else 2831 { 2832 throw new LDIFException( 2833 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2834 firstLineNumber), 2835 firstLineNumber, true, ldifLines, null); 2836 } 2837 2838 if (hasValue) 2839 { 2840 pos++; 2841 } 2842 } 2843 else 2844 { 2845 isCritical = false; 2846 } 2847 2848 // See if we need to read the value. If so, then do so now. It may be 2849 // a string, or it may be base64-encoded. It could conceivably even be read 2850 // from a URL. 2851 final ASN1OctetString value; 2852 if (hasValue) 2853 { 2854 // The character immediately after the colon that precedes the value may 2855 // be one of the following: 2856 // - A second colon (optionally followed by a single space) to indicate 2857 // that the value is base64-encoded. 2858 // - A less-than symbol to indicate that the value should be read from a 2859 // location specified by a URL. 2860 // - A single space that precedes the non-base64-encoded value. 2861 // - The first character of the non-base64-encoded value. 2862 switch (controlString.charAt(pos)) 2863 { 2864 case ':': 2865 try 2866 { 2867 if (controlString.length() == (pos+1)) 2868 { 2869 value = new ASN1OctetString(); 2870 } 2871 else if (controlString.charAt(pos+1) == ' ') 2872 { 2873 value = new ASN1OctetString( 2874 Base64.decode(controlString.substring(pos+2))); 2875 } 2876 else 2877 { 2878 value = new ASN1OctetString( 2879 Base64.decode(controlString.substring(pos+1))); 2880 } 2881 } 2882 catch (final Exception e) 2883 { 2884 Debug.debugException(e); 2885 throw new LDIFException( 2886 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2887 firstLineNumber, StaticUtils.getExceptionMessage(e)), 2888 firstLineNumber, true, ldifLines, e); 2889 } 2890 break; 2891 case '<': 2892 try 2893 { 2894 final String urlString; 2895 if (controlString.charAt(pos+1) == ' ') 2896 { 2897 urlString = controlString.substring(pos+2); 2898 } 2899 else 2900 { 2901 urlString = controlString.substring(pos+1); 2902 } 2903 value = new ASN1OctetString(retrieveURLBytes(urlString, 2904 relativeBasePath, firstLineNumber)); 2905 } 2906 catch (final Exception e) 2907 { 2908 Debug.debugException(e); 2909 throw new LDIFException( 2910 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2911 firstLineNumber, StaticUtils.getExceptionMessage(e)), 2912 firstLineNumber, true, ldifLines, e); 2913 } 2914 break; 2915 case ' ': 2916 value = new ASN1OctetString(controlString.substring(pos+1)); 2917 break; 2918 default: 2919 value = new ASN1OctetString(controlString.substring(pos)); 2920 break; 2921 } 2922 } 2923 else 2924 { 2925 value = null; 2926 } 2927 2928 return new Control(oid, isCritical, value); 2929 } 2930 2931 2932 2933 /** 2934 * Decodes the changetype element from the provided line. 2935 * 2936 * @param line The line to process. 2937 * @param colonPos The position of the colon that separates the 2938 * changetype string from its value. 2939 * @param firstLineNumber The line number for the start of the record. 2940 * @param ldifLines The lines that comprise the LDIF representation of 2941 * the full record being parsed. 2942 * 2943 * @return The decoded changetype string. 2944 * 2945 * @throws LDIFException If a problem is encountered while trying to decode 2946 * the changetype. 2947 */ 2948 private static String decodeChangeType(final StringBuilder line, 2949 final int colonPos, final long firstLineNumber, 2950 final ArrayList<StringBuilder> ldifLines) 2951 throws LDIFException 2952 { 2953 final int length = line.length(); 2954 if (length == (colonPos+1)) 2955 { 2956 // The colon was the last character on the line. This is not 2957 // acceptable. 2958 throw new LDIFException( 2959 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2960 true, ldifLines, null); 2961 } 2962 else if (line.charAt(colonPos+1) == ':') 2963 { 2964 // Skip over any spaces leading up to the value, and then the rest of 2965 // the string is the base64-encoded changetype. This is unusual and 2966 // unnecessary, but is nevertheless acceptable. 2967 int pos = colonPos+2; 2968 while ((pos < length) && (line.charAt(pos) == ' ')) 2969 { 2970 pos++; 2971 } 2972 2973 try 2974 { 2975 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 2976 return new String(changeTypeBytes, StandardCharsets.UTF_8); 2977 } 2978 catch (final ParseException pe) 2979 { 2980 Debug.debugException(pe); 2981 throw new LDIFException( 2982 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 2983 pe.getMessage()), 2984 firstLineNumber, true, ldifLines, pe); 2985 } 2986 catch (final Exception e) 2987 { 2988 Debug.debugException(e); 2989 throw new LDIFException( 2990 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 2991 firstLineNumber, true, ldifLines, e); 2992 } 2993 } 2994 else 2995 { 2996 // Skip over any spaces leading up to the value, and then the rest of 2997 // the string is the changetype. 2998 int pos = colonPos+1; 2999 while ((pos < length) && (line.charAt(pos) == ' ')) 3000 { 3001 pos++; 3002 } 3003 3004 return line.substring(pos); 3005 } 3006 } 3007 3008 3009 3010 /** 3011 * Parses the data available through the provided iterator as a collection of 3012 * attributes suitable for use in an entry or an add change record. 3013 * 3014 * @param dn The DN of the record being read. 3015 * @param duplicateValueBehavior The behavior that should be exhibited if 3016 * the LDIF reader encounters an entry with 3017 * duplicate values. 3018 * @param trailingSpaceBehavior The behavior that should be exhibited when 3019 * encountering attribute values which are not 3020 * base64-encoded but contain trailing spaces. 3021 * @param schema The schema to use when parsing the 3022 * attributes, or {@code null} if none is 3023 * needed. 3024 * @param ldifLines The lines that comprise the LDIF 3025 * representation of the full record being 3026 * parsed. 3027 * @param iterator The iterator to use to access the attribute 3028 * lines. 3029 * @param relativeBasePath The base path that will be prepended to 3030 * relative paths in order to obtain an 3031 * absolute path. 3032 * @param firstLineNumber The line number for the start of the 3033 * record. 3034 * 3035 * @return The collection of attributes that were read. 3036 * 3037 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3038 * set of attributes. 3039 */ 3040 private static ArrayList<Attribute> parseAttributes(final String dn, 3041 final DuplicateValueBehavior duplicateValueBehavior, 3042 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 3043 final ArrayList<StringBuilder> ldifLines, 3044 final Iterator<StringBuilder> iterator, final String relativeBasePath, 3045 final long firstLineNumber) 3046 throws LDIFException 3047 { 3048 final LinkedHashMap<String,Object> attributes = 3049 new LinkedHashMap<>(StaticUtils.computeMapCapacity(ldifLines.size())); 3050 while (iterator.hasNext()) 3051 { 3052 final StringBuilder line = iterator.next(); 3053 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3054 final int colonPos = line.indexOf(":"); 3055 if (colonPos <= 0) 3056 { 3057 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3058 firstLineNumber, true, ldifLines, null); 3059 } 3060 3061 final String attributeName = line.substring(0, colonPos); 3062 final String lowerName = StaticUtils.toLowerCase(attributeName); 3063 3064 final MatchingRule matchingRule; 3065 if (schema == null) 3066 { 3067 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 3068 } 3069 else 3070 { 3071 matchingRule = 3072 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 3073 } 3074 3075 Attribute attr; 3076 final LDIFAttribute ldifAttr; 3077 final Object attrObject = attributes.get(lowerName); 3078 if (attrObject == null) 3079 { 3080 attr = null; 3081 ldifAttr = null; 3082 } 3083 else 3084 { 3085 if (attrObject instanceof Attribute) 3086 { 3087 attr = (Attribute) attrObject; 3088 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3089 attr.getRawValues()[0]); 3090 attributes.put(lowerName, ldifAttr); 3091 } 3092 else 3093 { 3094 attr = null; 3095 ldifAttr = (LDIFAttribute) attrObject; 3096 } 3097 } 3098 3099 final int length = line.length(); 3100 if (length == (colonPos+1)) 3101 { 3102 // This means that the attribute has a zero-length value, which is 3103 // acceptable. 3104 if (attrObject == null) 3105 { 3106 attr = new Attribute(attributeName, matchingRule, ""); 3107 attributes.put(lowerName, attr); 3108 } 3109 else 3110 { 3111 try 3112 { 3113 if (! ldifAttr.addValue(new ASN1OctetString(), 3114 duplicateValueBehavior)) 3115 { 3116 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3117 { 3118 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3119 firstLineNumber, attributeName), firstLineNumber, true, 3120 ldifLines, null); 3121 } 3122 } 3123 } 3124 catch (final LDAPException le) 3125 { 3126 throw new LDIFException( 3127 ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, firstLineNumber, 3128 attributeName, StaticUtils.getExceptionMessage(le)), 3129 firstLineNumber, true, ldifLines, le); 3130 } 3131 } 3132 } 3133 else if (line.charAt(colonPos+1) == ':') 3134 { 3135 // Skip over any spaces leading up to the value, and then the rest of 3136 // the string is the base64-encoded attribute value. 3137 int pos = colonPos+2; 3138 while ((pos < length) && (line.charAt(pos) == ' ')) 3139 { 3140 pos++; 3141 } 3142 3143 try 3144 { 3145 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3146 if (attrObject == null) 3147 { 3148 attr = new Attribute(attributeName, matchingRule, valueBytes); 3149 attributes.put(lowerName, attr); 3150 } 3151 else 3152 { 3153 try 3154 { 3155 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3156 duplicateValueBehavior)) 3157 { 3158 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3159 { 3160 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3161 firstLineNumber, attributeName), firstLineNumber, true, 3162 ldifLines, null); 3163 } 3164 } 3165 } 3166 catch (final LDAPException le) 3167 { 3168 throw new LDIFException( 3169 ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, firstLineNumber, 3170 attributeName, StaticUtils.getExceptionMessage(le)), 3171 firstLineNumber, true, ldifLines, le); 3172 } 3173 } 3174 } 3175 catch (final ParseException pe) 3176 { 3177 Debug.debugException(pe); 3178 throw new LDIFException( 3179 ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(attributeName, 3180 firstLineNumber, pe.getMessage()), 3181 firstLineNumber, true, ldifLines, pe); 3182 } 3183 } 3184 else if (line.charAt(colonPos+1) == '<') 3185 { 3186 // Skip over any spaces leading up to the value, and then the rest of 3187 // the string is a URL that indicates where to get the real content. 3188 // At the present time, we'll only support the file URLs. 3189 int pos = colonPos+2; 3190 while ((pos < length) && (line.charAt(pos) == ' ')) 3191 { 3192 pos++; 3193 } 3194 3195 final byte[] urlBytes; 3196 final String urlString = line.substring(pos); 3197 try 3198 { 3199 urlBytes = 3200 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3201 } 3202 catch (final Exception e) 3203 { 3204 Debug.debugException(e); 3205 throw new LDIFException( 3206 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3207 firstLineNumber, e), 3208 firstLineNumber, true, ldifLines, e); 3209 } 3210 3211 if (attrObject == null) 3212 { 3213 attr = new Attribute(attributeName, matchingRule, urlBytes); 3214 attributes.put(lowerName, attr); 3215 } 3216 else 3217 { 3218 try 3219 { 3220 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3221 duplicateValueBehavior)) 3222 { 3223 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3224 { 3225 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3226 firstLineNumber, attributeName), firstLineNumber, true, 3227 ldifLines, null); 3228 } 3229 } 3230 } 3231 catch (final LDIFException le) 3232 { 3233 Debug.debugException(le); 3234 throw le; 3235 } 3236 catch (final Exception e) 3237 { 3238 Debug.debugException(e); 3239 throw new LDIFException( 3240 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3241 firstLineNumber, e), 3242 firstLineNumber, true, ldifLines, e); 3243 } 3244 } 3245 } 3246 else 3247 { 3248 // Skip over any spaces leading up to the value, and then the rest of 3249 // the string is the value. 3250 int pos = colonPos+1; 3251 while ((pos < length) && (line.charAt(pos) == ' ')) 3252 { 3253 pos++; 3254 } 3255 3256 final String valueString = line.substring(pos); 3257 if (attrObject == null) 3258 { 3259 attr = new Attribute(attributeName, matchingRule, valueString); 3260 attributes.put(lowerName, attr); 3261 } 3262 else 3263 { 3264 try 3265 { 3266 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3267 duplicateValueBehavior)) 3268 { 3269 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3270 { 3271 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3272 firstLineNumber, attributeName), firstLineNumber, true, 3273 ldifLines, null); 3274 } 3275 } 3276 } 3277 catch (final LDAPException le) 3278 { 3279 throw new LDIFException( 3280 ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, firstLineNumber, 3281 attributeName, StaticUtils.getExceptionMessage(le)), 3282 firstLineNumber, true, ldifLines, le); 3283 } 3284 } 3285 } 3286 } 3287 3288 final ArrayList<Attribute> attrList = new ArrayList<>(attributes.size()); 3289 for (final Object o : attributes.values()) 3290 { 3291 if (o instanceof Attribute) 3292 { 3293 attrList.add((Attribute) o); 3294 } 3295 else 3296 { 3297 attrList.add(((LDIFAttribute) o).toAttribute()); 3298 } 3299 } 3300 3301 return attrList; 3302 } 3303 3304 3305 3306 /** 3307 * Retrieves the bytes that make up the file referenced by the given URL. 3308 * 3309 * @param urlString The string representation of the URL to retrieve. 3310 * @param relativeBasePath The base path that will be prepended to relative 3311 * paths in order to obtain an absolute path. 3312 * @param firstLineNumber The line number for the start of the record. 3313 * 3314 * @return The bytes contained in the specified file, or an empty array if 3315 * the specified file is empty. 3316 * 3317 * @throws LDIFException If the provided URL is malformed or references a 3318 * nonexistent file. 3319 * 3320 * @throws IOException If a problem is encountered while attempting to read 3321 * from the target file. 3322 */ 3323 private static byte[] retrieveURLBytes(final String urlString, 3324 final String relativeBasePath, 3325 final long firstLineNumber) 3326 throws LDIFException, IOException 3327 { 3328 int pos; 3329 final String path; 3330 final String lowerURLString = StaticUtils.toLowerCase(urlString); 3331 if (lowerURLString.startsWith("file:/")) 3332 { 3333 pos = 6; 3334 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3335 { 3336 pos++; 3337 } 3338 3339 path = urlString.substring(pos-1); 3340 } 3341 else if (lowerURLString.startsWith("file:")) 3342 { 3343 // A file: URL that doesn't include a slash will be interpreted as a 3344 // relative path. 3345 path = relativeBasePath + urlString.substring(5); 3346 } 3347 else 3348 { 3349 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3350 firstLineNumber, true); 3351 } 3352 3353 final File f = new File(path); 3354 if (! f.exists()) 3355 { 3356 throw new LDIFException( 3357 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3358 firstLineNumber, true); 3359 } 3360 3361 // In order to conserve memory, we'll only allow values to be read from 3362 // files no larger than 10 megabytes. 3363 final long fileSize = f.length(); 3364 if (fileSize > (10 * 1024 * 1024)) 3365 { 3366 throw new LDIFException( 3367 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3368 (10*1024*1024)), 3369 firstLineNumber, true); 3370 } 3371 3372 int fileBytesRemaining = (int) fileSize; 3373 final byte[] fileData = new byte[(int) fileSize]; 3374 final FileInputStream fis = new FileInputStream(f); 3375 try 3376 { 3377 int fileBytesRead = 0; 3378 while (fileBytesRead < fileSize) 3379 { 3380 final int bytesRead = 3381 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3382 if (bytesRead < 0) 3383 { 3384 // We hit the end of the file before we expected to. This shouldn't 3385 // happen unless the file size changed since we first looked at it, 3386 // which we won't allow. 3387 throw new LDIFException( 3388 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3389 f.getAbsolutePath()), 3390 firstLineNumber, true); 3391 } 3392 3393 fileBytesRead += bytesRead; 3394 fileBytesRemaining -= bytesRead; 3395 } 3396 3397 if (fis.read() != -1) 3398 { 3399 // There is still more data to read. This shouldn't happen unless the 3400 // file size changed since we first looked at it, which we won't allow. 3401 throw new LDIFException( 3402 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3403 firstLineNumber, true); 3404 } 3405 } 3406 finally 3407 { 3408 fis.close(); 3409 } 3410 3411 return fileData; 3412 } 3413 3414 3415 3416 /** 3417 * Parses the data available through the provided iterator into an array of 3418 * modifications suitable for use in a modify change record. 3419 * 3420 * @param dn The DN of the entry being parsed. 3421 * @param trailingSpaceBehavior The behavior that should be exhibited when 3422 * encountering attribute values which are not 3423 * base64-encoded but contain trailing spaces. 3424 * @param ldifLines The lines that comprise the LDIF 3425 * representation of the full record being 3426 * parsed. 3427 * @param iterator The iterator to use to access the 3428 * modification data. 3429 * @param firstLineNumber The line number for the start of the record. 3430 * @param schema The schema to use in processing. 3431 * 3432 * @return An array containing the modifications that were read. 3433 * 3434 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3435 * set of modifications. 3436 */ 3437 private static Modification[] parseModifications(final String dn, 3438 final TrailingSpaceBehavior trailingSpaceBehavior, 3439 final ArrayList<StringBuilder> ldifLines, 3440 final Iterator<StringBuilder> iterator, 3441 final long firstLineNumber, final Schema schema) 3442 throws LDIFException 3443 { 3444 final ArrayList<Modification> modList = new ArrayList<>(ldifLines.size()); 3445 3446 while (iterator.hasNext()) 3447 { 3448 // The first line must start with "add:", "delete:", "replace:", or 3449 // "increment:" followed by an attribute name. 3450 StringBuilder line = iterator.next(); 3451 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3452 int colonPos = line.indexOf(":"); 3453 if (colonPos < 0) 3454 { 3455 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3456 firstLineNumber, true, ldifLines, null); 3457 } 3458 3459 final ModificationType modType; 3460 final String modTypeStr = 3461 StaticUtils.toLowerCase(line.substring(0, colonPos)); 3462 if (modTypeStr.equals("add")) 3463 { 3464 modType = ModificationType.ADD; 3465 } 3466 else if (modTypeStr.equals("delete")) 3467 { 3468 modType = ModificationType.DELETE; 3469 } 3470 else if (modTypeStr.equals("replace")) 3471 { 3472 modType = ModificationType.REPLACE; 3473 } 3474 else if (modTypeStr.equals("increment")) 3475 { 3476 modType = ModificationType.INCREMENT; 3477 } 3478 else 3479 { 3480 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3481 firstLineNumber), 3482 firstLineNumber, true, ldifLines, null); 3483 } 3484 3485 String attributeName; 3486 int length = line.length(); 3487 if (length == (colonPos+1)) 3488 { 3489 // The colon was the last character on the line. This is not 3490 // acceptable. 3491 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3492 firstLineNumber), 3493 firstLineNumber, true, ldifLines, null); 3494 } 3495 else if (line.charAt(colonPos+1) == ':') 3496 { 3497 // Skip over any spaces leading up to the value, and then the rest of 3498 // the string is the base64-encoded attribute name. 3499 int pos = colonPos+2; 3500 while ((pos < length) && (line.charAt(pos) == ' ')) 3501 { 3502 pos++; 3503 } 3504 3505 try 3506 { 3507 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3508 attributeName = new String(dnBytes, StandardCharsets.UTF_8); 3509 } 3510 catch (final ParseException pe) 3511 { 3512 Debug.debugException(pe); 3513 throw new LDIFException( 3514 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3515 firstLineNumber, pe.getMessage()), 3516 firstLineNumber, true, ldifLines, pe); 3517 } 3518 catch (final Exception e) 3519 { 3520 Debug.debugException(e); 3521 throw new LDIFException( 3522 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3523 firstLineNumber, e), 3524 firstLineNumber, true, ldifLines, e); 3525 } 3526 } 3527 else 3528 { 3529 // Skip over any spaces leading up to the value, and then the rest of 3530 // the string is the attribute name. 3531 int pos = colonPos+1; 3532 while ((pos < length) && (line.charAt(pos) == ' ')) 3533 { 3534 pos++; 3535 } 3536 3537 attributeName = line.substring(pos); 3538 } 3539 3540 if (attributeName.isEmpty()) 3541 { 3542 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3543 firstLineNumber), 3544 firstLineNumber, true, ldifLines, null); 3545 } 3546 3547 3548 // The next zero or more lines may be the set of attribute values. Keep 3549 // reading until we reach the end of the iterator or until we find a line 3550 // with just a "-". 3551 final ArrayList<ASN1OctetString> valueList = 3552 new ArrayList<>(ldifLines.size()); 3553 while (iterator.hasNext()) 3554 { 3555 line = iterator.next(); 3556 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3557 if (line.toString().equals("-")) 3558 { 3559 break; 3560 } 3561 3562 colonPos = line.indexOf(":"); 3563 if (colonPos < 0) 3564 { 3565 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3566 firstLineNumber, true, ldifLines, null); 3567 } 3568 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3569 { 3570 // There are a couple of cases in which this might be acceptable: 3571 // - If the two names are logically equivalent, but have an alternate 3572 // name (or OID) for the target attribute type, or if there are 3573 // attribute options and the options are just in a different order. 3574 // - If this is the first value for the target attribute and the 3575 // alternate name includes a "binary" option that the original 3576 // attribute name did not have. In this case, all subsequent values 3577 // will also be required to have the binary option. 3578 final String alternateName = line.substring(0, colonPos); 3579 3580 3581 // Check to see if the base names are equivalent. 3582 boolean baseNameEquivalent = false; 3583 final String expectedBaseName = Attribute.getBaseName(attributeName); 3584 final String alternateBaseName = Attribute.getBaseName(alternateName); 3585 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3586 { 3587 baseNameEquivalent = true; 3588 } 3589 else 3590 { 3591 if (schema != null) 3592 { 3593 final AttributeTypeDefinition expectedAT = 3594 schema.getAttributeType(expectedBaseName); 3595 final AttributeTypeDefinition alternateAT = 3596 schema.getAttributeType(alternateBaseName); 3597 if ((expectedAT != null) && (alternateAT != null) && 3598 expectedAT.equals(alternateAT)) 3599 { 3600 baseNameEquivalent = true; 3601 } 3602 } 3603 } 3604 3605 3606 // Check to see if the attribute options are equivalent. 3607 final Set<String> expectedOptions = 3608 Attribute.getOptions(attributeName); 3609 final Set<String> lowerExpectedOptions = new HashSet<>( 3610 StaticUtils.computeMapCapacity(expectedOptions.size())); 3611 for (final String s : expectedOptions) 3612 { 3613 lowerExpectedOptions.add(StaticUtils.toLowerCase(s)); 3614 } 3615 3616 final Set<String> alternateOptions = 3617 Attribute.getOptions(alternateName); 3618 final Set<String> lowerAlternateOptions = new HashSet<>( 3619 StaticUtils.computeMapCapacity(alternateOptions.size())); 3620 for (final String s : alternateOptions) 3621 { 3622 lowerAlternateOptions.add(StaticUtils.toLowerCase(s)); 3623 } 3624 3625 final boolean optionsEquivalent = 3626 lowerAlternateOptions.equals(lowerExpectedOptions); 3627 3628 3629 if (baseNameEquivalent && optionsEquivalent) 3630 { 3631 // This is fine. The two attribute descriptions are logically 3632 // equivalent. We'll continue using the attribute description that 3633 // was provided first. 3634 } 3635 else if (valueList.isEmpty() && baseNameEquivalent && 3636 lowerAlternateOptions.remove("binary") && 3637 lowerAlternateOptions.equals(lowerExpectedOptions)) 3638 { 3639 // This means that the provided value is the first value for the 3640 // attribute, and that the only significant difference is that the 3641 // provided attribute description included an unexpected "binary" 3642 // option. We'll accept this, but will require any additional 3643 // values for this modification to also include the binary option, 3644 // and we'll use the binary option in the attribute that is 3645 // eventually created. 3646 attributeName = alternateName; 3647 } 3648 else 3649 { 3650 // This means that either the base names are different or the sets 3651 // of options are incompatible. This is not acceptable. 3652 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3653 firstLineNumber, 3654 line.substring(0, colonPos), 3655 attributeName), 3656 firstLineNumber, true, ldifLines, null); 3657 } 3658 } 3659 3660 length = line.length(); 3661 final ASN1OctetString value; 3662 if (length == (colonPos+1)) 3663 { 3664 // The colon was the last character on the line. This is fine. 3665 value = new ASN1OctetString(); 3666 } 3667 else if (line.charAt(colonPos+1) == ':') 3668 { 3669 // Skip over any spaces leading up to the value, and then the rest of 3670 // the string is the base64-encoded value. This is unusual and 3671 // unnecessary, but is nevertheless acceptable. 3672 int pos = colonPos+2; 3673 while ((pos < length) && (line.charAt(pos) == ' ')) 3674 { 3675 pos++; 3676 } 3677 3678 try 3679 { 3680 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3681 } 3682 catch (final ParseException pe) 3683 { 3684 Debug.debugException(pe); 3685 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3686 attributeName, firstLineNumber, pe.getMessage()), 3687 firstLineNumber, true, ldifLines, pe); 3688 } 3689 catch (final Exception e) 3690 { 3691 Debug.debugException(e); 3692 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3693 firstLineNumber, e), 3694 firstLineNumber, true, ldifLines, e); 3695 } 3696 } 3697 else 3698 { 3699 // Skip over any spaces leading up to the value, and then the rest of 3700 // the string is the value. 3701 int pos = colonPos+1; 3702 while ((pos < length) && (line.charAt(pos) == ' ')) 3703 { 3704 pos++; 3705 } 3706 3707 value = new ASN1OctetString(line.substring(pos)); 3708 } 3709 3710 valueList.add(value); 3711 } 3712 3713 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3714 valueList.toArray(values); 3715 3716 // If it's an add modification type, then there must be at least one 3717 // value. 3718 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3719 (values.length == 0)) 3720 { 3721 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3722 firstLineNumber), 3723 firstLineNumber, true, ldifLines, null); 3724 } 3725 3726 // If it's an increment modification type, then there must be exactly one 3727 // value. 3728 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3729 (values.length != 1)) 3730 { 3731 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3732 firstLineNumber, attributeName), 3733 firstLineNumber, true, ldifLines, null); 3734 } 3735 3736 modList.add(new Modification(modType, attributeName, values)); 3737 } 3738 3739 final Modification[] mods = new Modification[modList.size()]; 3740 modList.toArray(mods); 3741 return mods; 3742 } 3743 3744 3745 3746 /** 3747 * Parses the data available through the provided iterator as the body of a 3748 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3749 * newsuperior lines). 3750 * 3751 * @param ldifLines The lines that comprise the LDIF 3752 * representation of the full record being 3753 * parsed. 3754 * @param iterator The iterator to use to access the modify DN 3755 * data. 3756 * @param dn The current DN of the entry. 3757 * @param controls The set of controls to include in the change 3758 * record. 3759 * @param trailingSpaceBehavior The behavior that should be exhibited when 3760 * encountering attribute values which are not 3761 * base64-encoded but contain trailing spaces. 3762 * @param firstLineNumber The line number for the start of the record. 3763 * 3764 * @return The decoded modify DN change record. 3765 * 3766 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3767 * modify DN change record. 3768 */ 3769 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3770 final ArrayList<StringBuilder> ldifLines, 3771 final Iterator<StringBuilder> iterator, final String dn, 3772 final List<Control> controls, 3773 final TrailingSpaceBehavior trailingSpaceBehavior, 3774 final long firstLineNumber) 3775 throws LDIFException 3776 { 3777 // The next line must be the new RDN, and it must start with "newrdn:". 3778 StringBuilder line = iterator.next(); 3779 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3780 int colonPos = line.indexOf(":"); 3781 if ((colonPos < 0) || 3782 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3783 { 3784 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3785 firstLineNumber), 3786 firstLineNumber, true, ldifLines, null); 3787 } 3788 3789 final String newRDN; 3790 int length = line.length(); 3791 if (length == (colonPos+1)) 3792 { 3793 // The colon was the last character on the line. This is not acceptable. 3794 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3795 firstLineNumber), 3796 firstLineNumber, true, ldifLines, null); 3797 } 3798 else if (line.charAt(colonPos+1) == ':') 3799 { 3800 // Skip over any spaces leading up to the value, and then the rest of the 3801 // string is the base64-encoded new RDN. 3802 int pos = colonPos+2; 3803 while ((pos < length) && (line.charAt(pos) == ' ')) 3804 { 3805 pos++; 3806 } 3807 3808 try 3809 { 3810 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3811 newRDN = new String(dnBytes, StandardCharsets.UTF_8); 3812 } 3813 catch (final ParseException pe) 3814 { 3815 Debug.debugException(pe); 3816 throw new LDIFException( 3817 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3818 pe.getMessage()), 3819 firstLineNumber, true, ldifLines, pe); 3820 } 3821 catch (final Exception e) 3822 { 3823 Debug.debugException(e); 3824 throw new LDIFException( 3825 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3826 e), 3827 firstLineNumber, true, ldifLines, e); 3828 } 3829 } 3830 else 3831 { 3832 // Skip over any spaces leading up to the value, and then the rest of the 3833 // string is the new RDN. 3834 int pos = colonPos+1; 3835 while ((pos < length) && (line.charAt(pos) == ' ')) 3836 { 3837 pos++; 3838 } 3839 3840 newRDN = line.substring(pos); 3841 } 3842 3843 if (newRDN.isEmpty()) 3844 { 3845 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3846 firstLineNumber), 3847 firstLineNumber, true, ldifLines, null); 3848 } 3849 3850 3851 // The next line must be the deleteOldRDN flag, and it must start with 3852 // 'deleteoldrdn:'. 3853 if (! iterator.hasNext()) 3854 { 3855 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3856 firstLineNumber), 3857 firstLineNumber, true, ldifLines, null); 3858 } 3859 3860 line = iterator.next(); 3861 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3862 colonPos = line.indexOf(":"); 3863 if ((colonPos < 0) || 3864 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3865 { 3866 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3867 firstLineNumber), 3868 firstLineNumber, true, ldifLines, null); 3869 } 3870 3871 final String deleteOldRDNStr; 3872 length = line.length(); 3873 if (length == (colonPos+1)) 3874 { 3875 // The colon was the last character on the line. This is not acceptable. 3876 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3877 firstLineNumber), 3878 firstLineNumber, true, ldifLines, null); 3879 } 3880 else if (line.charAt(colonPos+1) == ':') 3881 { 3882 // Skip over any spaces leading up to the value, and then the rest of the 3883 // string is the base64-encoded value. This is unusual and 3884 // unnecessary, but is nevertheless acceptable. 3885 int pos = colonPos+2; 3886 while ((pos < length) && (line.charAt(pos) == ' ')) 3887 { 3888 pos++; 3889 } 3890 3891 try 3892 { 3893 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3894 deleteOldRDNStr = new String(changeTypeBytes, StandardCharsets.UTF_8); 3895 } 3896 catch (final ParseException pe) 3897 { 3898 Debug.debugException(pe); 3899 throw new LDIFException( 3900 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3901 firstLineNumber, pe.getMessage()), 3902 firstLineNumber, true, ldifLines, pe); 3903 } 3904 catch (final Exception e) 3905 { 3906 Debug.debugException(e); 3907 throw new LDIFException( 3908 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3909 firstLineNumber, e), 3910 firstLineNumber, true, ldifLines, e); 3911 } 3912 } 3913 else 3914 { 3915 // Skip over any spaces leading up to the value, and then the rest of the 3916 // string is the value. 3917 int pos = colonPos+1; 3918 while ((pos < length) && (line.charAt(pos) == ' ')) 3919 { 3920 pos++; 3921 } 3922 3923 deleteOldRDNStr = line.substring(pos); 3924 } 3925 3926 final boolean deleteOldRDN; 3927 if (deleteOldRDNStr.equals("0")) 3928 { 3929 deleteOldRDN = false; 3930 } 3931 else if (deleteOldRDNStr.equals("1")) 3932 { 3933 deleteOldRDN = true; 3934 } 3935 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3936 deleteOldRDNStr.equalsIgnoreCase("no")) 3937 { 3938 // This is technically illegal, but we'll allow it. 3939 deleteOldRDN = false; 3940 } 3941 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3942 deleteOldRDNStr.equalsIgnoreCase("yes")) 3943 { 3944 // This is also technically illegal, but we'll allow it. 3945 deleteOldRDN = false; 3946 } 3947 else 3948 { 3949 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3950 deleteOldRDNStr, firstLineNumber), 3951 firstLineNumber, true, ldifLines, null); 3952 } 3953 3954 3955 // If there is another line, then it must be the new superior DN and it must 3956 // start with "newsuperior:". If this is absent, then it's fine. 3957 final String newSuperiorDN; 3958 if (iterator.hasNext()) 3959 { 3960 line = iterator.next(); 3961 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3962 colonPos = line.indexOf(":"); 3963 if ((colonPos < 0) || 3964 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3965 { 3966 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3967 firstLineNumber), 3968 firstLineNumber, true, ldifLines, null); 3969 } 3970 3971 length = line.length(); 3972 if (length == (colonPos+1)) 3973 { 3974 // The colon was the last character on the line. This is fine. 3975 newSuperiorDN = ""; 3976 } 3977 else if (line.charAt(colonPos+1) == ':') 3978 { 3979 // Skip over any spaces leading up to the value, and then the rest of 3980 // the string is the base64-encoded new superior DN. 3981 int pos = colonPos+2; 3982 while ((pos < length) && (line.charAt(pos) == ' ')) 3983 { 3984 pos++; 3985 } 3986 3987 try 3988 { 3989 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3990 newSuperiorDN = new String(dnBytes, StandardCharsets.UTF_8); 3991 } 3992 catch (final ParseException pe) 3993 { 3994 Debug.debugException(pe); 3995 throw new LDIFException( 3996 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3997 firstLineNumber, pe.getMessage()), 3998 firstLineNumber, true, ldifLines, pe); 3999 } 4000 catch (final Exception e) 4001 { 4002 Debug.debugException(e); 4003 throw new LDIFException( 4004 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 4005 firstLineNumber, e), 4006 firstLineNumber, true, ldifLines, e); 4007 } 4008 } 4009 else 4010 { 4011 // Skip over any spaces leading up to the value, and then the rest of 4012 // the string is the new superior DN. 4013 int pos = colonPos+1; 4014 while ((pos < length) && (line.charAt(pos) == ' ')) 4015 { 4016 pos++; 4017 } 4018 4019 newSuperiorDN = line.substring(pos); 4020 } 4021 } 4022 else 4023 { 4024 newSuperiorDN = null; 4025 } 4026 4027 4028 // There must not be any more lines. 4029 if (iterator.hasNext()) 4030 { 4031 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 4032 firstLineNumber, true, ldifLines, null); 4033 } 4034 4035 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 4036 newSuperiorDN, controls); 4037 } 4038 4039 4040 4041 /** 4042 * Examines the line contained in the provided buffer to determine whether it 4043 * may contain one or more illegal trailing spaces. If it does, then those 4044 * spaces will either be stripped out or an exception will be thrown to 4045 * indicate that they are illegal. 4046 * 4047 * @param buffer The buffer to be examined. 4048 * @param dn The DN of the LDIF record being parsed. It 4049 * may be {@code null} if the DN is not yet 4050 * known (e.g., because the provided line is 4051 * expected to contain that DN). 4052 * @param firstLineNumber The approximate line number in the LDIF 4053 * source on which the LDIF record begins. 4054 * @param trailingSpaceBehavior The behavior that should be exhibited when 4055 * encountering attribute values which are not 4056 * base64-encoded but contain trailing spaces. 4057 * 4058 * @throws LDIFException If the line contained in the provided buffer ends 4059 * with one or more illegal trailing spaces and 4060 * {@code stripTrailingSpaces} was provided with a 4061 * value of {@code false}. 4062 */ 4063 private static void handleTrailingSpaces(final StringBuilder buffer, 4064 final String dn, final long firstLineNumber, 4065 final TrailingSpaceBehavior trailingSpaceBehavior) 4066 throws LDIFException 4067 { 4068 int pos = buffer.length() - 1; 4069 boolean trailingFound = false; 4070 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 4071 { 4072 trailingFound = true; 4073 pos--; 4074 } 4075 4076 if (trailingFound && (buffer.charAt(pos) != ':')) 4077 { 4078 switch (trailingSpaceBehavior) 4079 { 4080 case STRIP: 4081 buffer.setLength(pos+1); 4082 break; 4083 4084 case REJECT: 4085 if (dn == null) 4086 { 4087 throw new LDIFException( 4088 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4089 buffer.toString()), 4090 firstLineNumber, true); 4091 } 4092 else 4093 { 4094 throw new LDIFException( 4095 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4096 firstLineNumber, buffer.toString()), 4097 firstLineNumber, true); 4098 } 4099 4100 case RETAIN: 4101 default: 4102 // No action will be taken. 4103 break; 4104 } 4105 } 4106 } 4107 4108 4109 4110 /** 4111 * This represents an unparsed LDIFRecord. It stores the line number of the 4112 * first line of the record and each line of the record. 4113 */ 4114 private static final class UnparsedLDIFRecord 4115 { 4116 private final ArrayList<StringBuilder> lineList; 4117 private final long firstLineNumber; 4118 private final Exception failureCause; 4119 private final boolean isEOF; 4120 private final DuplicateValueBehavior duplicateValueBehavior; 4121 private final Schema schema; 4122 private final TrailingSpaceBehavior trailingSpaceBehavior; 4123 4124 4125 4126 /** 4127 * Constructor. 4128 * 4129 * @param lineList The lines that comprise the LDIF record. 4130 * @param duplicateValueBehavior The behavior to exhibit if the entry 4131 * contains duplicate attribute values. 4132 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4133 * encountering trailing spaces in 4134 * non-base64-encoded attribute values. 4135 * @param schema The schema to use when parsing, if 4136 * applicable. 4137 * @param firstLineNumber The first line number of the LDIF record. 4138 */ 4139 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4140 final DuplicateValueBehavior duplicateValueBehavior, 4141 final TrailingSpaceBehavior trailingSpaceBehavior, 4142 final Schema schema, final long firstLineNumber) 4143 { 4144 this.lineList = lineList; 4145 this.firstLineNumber = firstLineNumber; 4146 this.duplicateValueBehavior = duplicateValueBehavior; 4147 this.trailingSpaceBehavior = trailingSpaceBehavior; 4148 this.schema = schema; 4149 4150 failureCause = null; 4151 isEOF = 4152 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4153 } 4154 4155 4156 4157 /** 4158 * Constructor. 4159 * 4160 * @param failureCause The Exception thrown when reading from the input. 4161 */ 4162 private UnparsedLDIFRecord(final Exception failureCause) 4163 { 4164 this.failureCause = failureCause; 4165 4166 lineList = null; 4167 firstLineNumber = 0; 4168 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4169 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4170 schema = null; 4171 isEOF = false; 4172 } 4173 4174 4175 4176 /** 4177 * Return the lines that comprise the LDIF record. 4178 * 4179 * @return The lines that comprise the LDIF record. 4180 */ 4181 private ArrayList<StringBuilder> getLineList() 4182 { 4183 return lineList; 4184 } 4185 4186 4187 4188 /** 4189 * Retrieves the behavior to exhibit when encountering duplicate attribute 4190 * values. 4191 * 4192 * @return The behavior to exhibit when encountering duplicate attribute 4193 * values. 4194 */ 4195 private DuplicateValueBehavior getDuplicateValueBehavior() 4196 { 4197 return duplicateValueBehavior; 4198 } 4199 4200 4201 4202 /** 4203 * Retrieves the behavior that should be exhibited when encountering 4204 * attribute values which are not base64-encoded but contain trailing 4205 * spaces. The LDIF specification strongly recommends that any value which 4206 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4207 * LDIF parser may be configured to automatically strip these spaces, to 4208 * preserve them, or to reject any entry or change record containing them. 4209 * 4210 * @return The behavior that should be exhibited when encountering 4211 * attribute values which are not base64-encoded but contain 4212 * trailing spaces. 4213 */ 4214 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4215 { 4216 return trailingSpaceBehavior; 4217 } 4218 4219 4220 4221 /** 4222 * Retrieves the schema that should be used when parsing the record, if 4223 * applicable. 4224 * 4225 * @return The schema that should be used when parsing the record, or 4226 * {@code null} if none should be used. 4227 */ 4228 private Schema getSchema() 4229 { 4230 return schema; 4231 } 4232 4233 4234 4235 /** 4236 * Return the first line number of the LDIF record. 4237 * 4238 * @return The first line number of the LDIF record. 4239 */ 4240 private long getFirstLineNumber() 4241 { 4242 return firstLineNumber; 4243 } 4244 4245 4246 4247 /** 4248 * Return {@code true} iff the end of the input was reached. 4249 * 4250 * @return {@code true} iff the end of the input was reached. 4251 */ 4252 private boolean isEOF() 4253 { 4254 return isEOF; 4255 } 4256 4257 4258 4259 /** 4260 * Returns the reason that reading the record lines failed. This normally 4261 * is only non-null if something bad happened to the input stream (like 4262 * a disk read error). 4263 * 4264 * @return The reason that reading the record lines failed. 4265 */ 4266 private Exception getFailureCause() 4267 { 4268 return failureCause; 4269 } 4270 } 4271 4272 4273 /** 4274 * When processing in asynchronous mode, this thread is responsible for 4275 * reading the raw unparsed records from the input and submitting them for 4276 * processing. 4277 */ 4278 private final class LineReaderThread 4279 extends Thread 4280 { 4281 /** 4282 * Constructor. 4283 */ 4284 private LineReaderThread() 4285 { 4286 super("Asynchronous LDIF line reader"); 4287 setDaemon(true); 4288 } 4289 4290 4291 4292 /** 4293 * Reads raw, unparsed records from the input and submits them for 4294 * processing until the input is finished or closed. 4295 */ 4296 @Override() 4297 public void run() 4298 { 4299 try 4300 { 4301 boolean stopProcessing = false; 4302 while (!stopProcessing) 4303 { 4304 UnparsedLDIFRecord unparsedRecord; 4305 try 4306 { 4307 unparsedRecord = readUnparsedRecord(); 4308 } 4309 catch (final IOException e) 4310 { 4311 Debug.debugException(e); 4312 unparsedRecord = new UnparsedLDIFRecord(e); 4313 stopProcessing = true; 4314 } 4315 catch (final Exception e) 4316 { 4317 Debug.debugException(e); 4318 unparsedRecord = new UnparsedLDIFRecord(e); 4319 } 4320 4321 try 4322 { 4323 asyncParser.submit(unparsedRecord); 4324 } 4325 catch (final InterruptedException e) 4326 { 4327 Debug.debugException(e); 4328 // If this thread is interrupted, then someone wants us to stop 4329 // processing, so that's what we'll do. 4330 Thread.currentThread().interrupt(); 4331 stopProcessing = true; 4332 } 4333 4334 if ((unparsedRecord == null) || unparsedRecord.isEOF()) 4335 { 4336 stopProcessing = true; 4337 } 4338 } 4339 } 4340 finally 4341 { 4342 try 4343 { 4344 asyncParser.shutdown(); 4345 } 4346 catch (final InterruptedException e) 4347 { 4348 Debug.debugException(e); 4349 Thread.currentThread().interrupt(); 4350 } 4351 finally 4352 { 4353 asyncParsingComplete.set(true); 4354 } 4355 } 4356 } 4357 } 4358 4359 4360 4361 /** 4362 * Used to parse Records asynchronously. 4363 */ 4364 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4365 LDIFRecord> 4366 { 4367 /** 4368 * {@inheritDoc} 4369 */ 4370 @Override() 4371 public LDIFRecord process(final UnparsedLDIFRecord input) 4372 throws LDIFException 4373 { 4374 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4375 4376 if ((record instanceof Entry) && (entryTranslator != null)) 4377 { 4378 record = entryTranslator.translate((Entry) record, 4379 input.getFirstLineNumber()); 4380 4381 if (record == null) 4382 { 4383 record = SKIP_ENTRY; 4384 } 4385 } 4386 if ((record instanceof LDIFChangeRecord) && 4387 (changeRecordTranslator != null)) 4388 { 4389 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4390 input.getFirstLineNumber()); 4391 4392 if (record == null) 4393 { 4394 record = SKIP_ENTRY; 4395 } 4396 } 4397 return record; 4398 } 4399 } 4400}