001/* 002 * Copyright 2007-2018 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2008-2018 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldif; 022 023 024 025import java.io.BufferedReader; 026import java.io.Closeable; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.InputStream; 030import java.io.InputStreamReader; 031import java.io.IOException; 032import java.nio.charset.StandardCharsets; 033import java.text.ParseException; 034import java.util.ArrayList; 035import java.util.Collection; 036import java.util.Iterator; 037import java.util.HashSet; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Set; 041import java.util.concurrent.BlockingQueue; 042import java.util.concurrent.ArrayBlockingQueue; 043import java.util.concurrent.TimeUnit; 044import java.util.concurrent.atomic.AtomicBoolean; 045import java.nio.charset.Charset; 046 047import com.unboundid.asn1.ASN1OctetString; 048import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 049import com.unboundid.ldap.matchingrules.MatchingRule; 050import com.unboundid.ldap.sdk.Attribute; 051import com.unboundid.ldap.sdk.Control; 052import com.unboundid.ldap.sdk.Entry; 053import com.unboundid.ldap.sdk.Modification; 054import com.unboundid.ldap.sdk.ModificationType; 055import com.unboundid.ldap.sdk.LDAPException; 056import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 057import com.unboundid.ldap.sdk.schema.Schema; 058import com.unboundid.util.AggregateInputStream; 059import com.unboundid.util.Base64; 060import com.unboundid.util.LDAPSDKThreadFactory; 061import com.unboundid.util.ThreadSafety; 062import com.unboundid.util.ThreadSafetyLevel; 063import com.unboundid.util.parallel.AsynchronousParallelProcessor; 064import com.unboundid.util.parallel.Result; 065import com.unboundid.util.parallel.ParallelProcessor; 066import com.unboundid.util.parallel.Processor; 067 068import static com.unboundid.ldif.LDIFMessages.*; 069import static com.unboundid.util.Debug.*; 070import static com.unboundid.util.StaticUtils.*; 071import static com.unboundid.util.Validator.*; 072 073/** 074 * This class provides an LDIF reader, which can be used to read and decode 075 * entries and change records from a data source using the LDAP Data Interchange 076 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 077 * <BR> 078 * This class is not synchronized. If multiple threads read from the 079 * LDIFReader, they must be synchronized externally. 080 * <BR><BR> 081 * <H2>Example</H2> 082 * The following example iterates through all entries contained in an LDIF file 083 * and attempts to add them to a directory server: 084 * <PRE> 085 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 086 * 087 * int entriesRead = 0; 088 * int entriesAdded = 0; 089 * int errorsEncountered = 0; 090 * while (true) 091 * { 092 * Entry entry; 093 * try 094 * { 095 * entry = ldifReader.readEntry(); 096 * if (entry == null) 097 * { 098 * // All entries have been read. 099 * break; 100 * } 101 * 102 * entriesRead++; 103 * } 104 * catch (LDIFException le) 105 * { 106 * errorsEncountered++; 107 * if (le.mayContinueReading()) 108 * { 109 * // A recoverable error occurred while attempting to read a change 110 * // record, at or near line number le.getLineNumber() 111 * // The entry will be skipped, but we'll try to keep reading from the 112 * // LDIF file. 113 * continue; 114 * } 115 * else 116 * { 117 * // An unrecoverable error occurred while attempting to read an entry 118 * // at or near line number le.getLineNumber() 119 * // No further LDIF processing will be performed. 120 * break; 121 * } 122 * } 123 * catch (IOException ioe) 124 * { 125 * // An I/O error occurred while attempting to read from the LDIF file. 126 * // No further LDIF processing will be performed. 127 * errorsEncountered++; 128 * break; 129 * } 130 * 131 * LDAPResult addResult; 132 * try 133 * { 134 * addResult = connection.add(entry); 135 * // If we got here, then the change should have been processed 136 * // successfully. 137 * entriesAdded++; 138 * } 139 * catch (LDAPException le) 140 * { 141 * // If we got here, then the change attempt failed. 142 * addResult = le.toLDAPResult(); 143 * errorsEncountered++; 144 * } 145 * } 146 * 147 * ldifReader.close(); 148 * </PRE> 149 */ 150@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 151public final class LDIFReader 152 implements Closeable 153{ 154 /** 155 * The default buffer size (128KB) that will be used when reading from the 156 * data source. 157 */ 158 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 159 160 161 162 /* 163 * When processing asynchronously, this determines how many of the allocated 164 * worker threads are used to parse each batch of read entries. 165 */ 166 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 167 168 169 170 /** 171 * When processing asynchronously, this specifies the size of the pending and 172 * completed queues. 173 */ 174 private static final int ASYNC_QUEUE_SIZE = 500; 175 176 177 178 /** 179 * Special entry used internally to signal that the LDIFReaderEntryTranslator 180 * has signalled that a read Entry should be skipped by returning null, 181 * which normally implies EOF. 182 */ 183 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 184 185 186 187 /** 188 * The default base path that will be prepended to relative paths. It will 189 * end with a trailing slash. 190 */ 191 private static final String DEFAULT_RELATIVE_BASE_PATH; 192 static 193 { 194 final File currentDir; 195 final String currentDirString = System.getProperty("user.dir"); 196 if (currentDirString == null) 197 { 198 currentDir = new File("."); 199 } 200 else 201 { 202 currentDir = new File(currentDirString); 203 } 204 205 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 206 if (currentDirAbsolutePath.endsWith(File.separator)) 207 { 208 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 209 } 210 else 211 { 212 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 213 } 214 } 215 216 217 218 // The buffered reader that will be used to read LDIF data. 219 private final BufferedReader reader; 220 221 // The behavior that should be exhibited when encountering duplicate attribute 222 // values. 223 private volatile DuplicateValueBehavior duplicateValueBehavior; 224 225 // A line number counter. 226 private long lineNumberCounter = 0; 227 228 // The change record translator to use, if any. 229 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 230 231 // The entry translator to use, if any. 232 private final LDIFReaderEntryTranslator entryTranslator; 233 234 // The schema that will be used when processing, if applicable. 235 private Schema schema; 236 237 // Specifies the base path that will be prepended to relative paths for file 238 // URLs. 239 private volatile String relativeBasePath; 240 241 // The behavior that should be exhibited with regard to illegal trailing 242 // spaces in attribute values. 243 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 244 245 // True iff we are processing asynchronously. 246 private final boolean isAsync; 247 248 // 249 // The following only apply to asynchronous processing. 250 // 251 252 // Parses entries asynchronously. 253 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord> 254 asyncParser; 255 256 // Set to true when the end of the input is reached. 257 private final AtomicBoolean asyncParsingComplete; 258 259 // The records that have been read and parsed. 260 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>> 261 asyncParsedRecords; 262 263 264 265 /** 266 * Creates a new LDIF reader that will read data from the specified file. 267 * 268 * @param path The path to the file from which the data is to be read. It 269 * must not be {@code null}. 270 * 271 * @throws IOException If a problem occurs while opening the file for 272 * reading. 273 */ 274 public LDIFReader(final String path) 275 throws IOException 276 { 277 this(new FileInputStream(path)); 278 } 279 280 281 282 /** 283 * Creates a new LDIF reader that will read data from the specified file 284 * and parses the LDIF records asynchronously using the specified number of 285 * threads. 286 * 287 * @param path The path to the file from which the data is to be read. It 288 * must not be {@code null}. 289 * @param numParseThreads If this value is greater than zero, then the 290 * specified number of threads will be used to 291 * asynchronously read and parse the LDIF file. 292 * 293 * @throws IOException If a problem occurs while opening the file for 294 * reading. 295 * 296 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 297 * constructor for more details about asynchronous processing. 298 */ 299 public LDIFReader(final String path, final int numParseThreads) 300 throws IOException 301 { 302 this(new FileInputStream(path), numParseThreads); 303 } 304 305 306 307 /** 308 * Creates a new LDIF reader that will read data from the specified file. 309 * 310 * @param file The file from which the data is to be read. It must not be 311 * {@code null}. 312 * 313 * @throws IOException If a problem occurs while opening the file for 314 * reading. 315 */ 316 public LDIFReader(final File file) 317 throws IOException 318 { 319 this(new FileInputStream(file)); 320 } 321 322 323 324 /** 325 * Creates a new LDIF reader that will read data from the specified file 326 * and optionally parses the LDIF records asynchronously using the specified 327 * number of threads. 328 * 329 * @param file The file from which the data is to be read. It 330 * must not be {@code null}. 331 * @param numParseThreads If this value is greater than zero, then the 332 * specified number of threads will be used to 333 * asynchronously read and parse the LDIF file. 334 * 335 * @throws IOException If a problem occurs while opening the file for 336 * reading. 337 */ 338 public LDIFReader(final File file, final int numParseThreads) 339 throws IOException 340 { 341 this(new FileInputStream(file), numParseThreads); 342 } 343 344 345 346 /** 347 * Creates a new LDIF reader that will read data from the specified files in 348 * the order in which they are provided and optionally parses the LDIF records 349 * asynchronously using the specified number of threads. 350 * 351 * @param files The files from which the data is to be read. It 352 * must not be {@code null} or empty. 353 * @param numParseThreads If this value is greater than zero, then the 354 * specified number of threads will be used to 355 * asynchronously read and parse the LDIF file. 356 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 357 * before they are returned. This is normally 358 * {@code null}, which causes entries to be returned 359 * unaltered. This is particularly useful when 360 * parsing the input file in parallel because the 361 * entry translation is also done in parallel. 362 * 363 * @throws IOException If a problem occurs while opening the file for 364 * reading. 365 */ 366 public LDIFReader(final File[] files, final int numParseThreads, 367 final LDIFReaderEntryTranslator entryTranslator) 368 throws IOException 369 { 370 this(files, numParseThreads, entryTranslator, null); 371 } 372 373 374 375 /** 376 * Creates a new LDIF reader that will read data from the specified files in 377 * the order in which they are provided and optionally parses the LDIF records 378 * asynchronously using the specified number of threads. 379 * 380 * @param files The files from which the data is to be 381 * read. It must not be {@code null} or 382 * empty. 383 * @param numParseThreads If this value is greater than zero, then 384 * the specified number of threads will be 385 * used to asynchronously read and parse the 386 * LDIF file. 387 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 388 * entries before they are returned. This is 389 * normally {@code null}, which causes entries 390 * to be returned unaltered. This is 391 * particularly useful when parsing the input 392 * file in parallel because the entry 393 * translation is also done in parallel. 394 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 395 * apply to change records before they are 396 * returned. This is normally {@code null}, 397 * which causes change records to be returned 398 * unaltered. This is particularly useful 399 * when parsing the input file in parallel 400 * because the change record translation is 401 * also done in parallel. 402 * 403 * @throws IOException If a problem occurs while opening the file for 404 * reading. 405 */ 406 public LDIFReader(final File[] files, final int numParseThreads, 407 final LDIFReaderEntryTranslator entryTranslator, 408 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 409 throws IOException 410 { 411 this(files, numParseThreads, entryTranslator, changeRecordTranslator, 412 "UTF-8"); 413 } 414 415 416 417 /** 418 * Creates a new LDIF reader that will read data from the specified files in 419 * the order in which they are provided and optionally parses the LDIF records 420 * asynchronously using the specified number of threads. 421 * 422 * @param files The files from which the data is to be 423 * read. It must not be {@code null} or 424 * empty. 425 * @param numParseThreads If this value is greater than zero, then 426 * the specified number of threads will be 427 * used to asynchronously read and parse the 428 * LDIF file. 429 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 430 * entries before they are returned. This is 431 * normally {@code null}, which causes entries 432 * to be returned unaltered. This is 433 * particularly useful when parsing the input 434 * file in parallel because the entry 435 * translation is also done in parallel. 436 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 437 * apply to change records before they are 438 * returned. This is normally {@code null}, 439 * which causes change records to be returned 440 * unaltered. This is particularly useful 441 * when parsing the input file in parallel 442 * because the change record translation is 443 * also done in parallel. 444 * @param characterSet The character set to use when reading from 445 * the input stream. It must not be 446 * {@code null}. 447 * 448 * @throws IOException If a problem occurs while opening the file for 449 * reading. 450 */ 451 public LDIFReader(final File[] files, final int numParseThreads, 452 final LDIFReaderEntryTranslator entryTranslator, 453 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 454 final String characterSet) 455 throws IOException 456 { 457 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 458 changeRecordTranslator, characterSet); 459 } 460 461 462 463 /** 464 * Creates a new aggregate input stream that will read data from the specified 465 * files. If there are multiple files, then a "padding" file will be inserted 466 * between them to ensure that there is at least one blank line between the 467 * end of one file and the beginning of another. 468 * 469 * @param files The files from which the data is to be read. It must not be 470 * {@code null} or empty. 471 * 472 * @return The input stream to use to read data from the provided files. 473 * 474 * @throws IOException If a problem is encountered while attempting to 475 * create the input stream. 476 */ 477 private static InputStream createAggregateInputStream(final File... files) 478 throws IOException 479 { 480 if (files.length == 0) 481 { 482 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 483 } 484 else 485 { 486 return new AggregateInputStream(true, files); 487 } 488 } 489 490 491 492 /** 493 * Creates a new LDIF reader that will read data from the provided input 494 * stream. 495 * 496 * @param inputStream The input stream from which the data is to be read. 497 * It must not be {@code null}. 498 */ 499 public LDIFReader(final InputStream inputStream) 500 { 501 this(inputStream, 0); 502 } 503 504 505 506 /** 507 * Creates a new LDIF reader that will read data from the specified stream 508 * and parses the LDIF records asynchronously using the specified number of 509 * threads. 510 * 511 * @param inputStream The input stream from which the data is to be read. 512 * It must not be {@code null}. 513 * @param numParseThreads If this value is greater than zero, then the 514 * specified number of threads will be used to 515 * asynchronously read and parse the LDIF file. 516 * 517 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 518 * constructor for more details about asynchronous processing. 519 */ 520 public LDIFReader(final InputStream inputStream, final int numParseThreads) 521 { 522 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 523 this(new BufferedReader( 524 new InputStreamReader(inputStream, StandardCharsets.UTF_8), 525 DEFAULT_BUFFER_SIZE), 526 numParseThreads); 527 } 528 529 530 531 /** 532 * Creates a new LDIF reader that will read data from the specified stream 533 * and parses the LDIF records asynchronously using the specified number of 534 * threads. 535 * 536 * @param inputStream The input stream from which the data is to be read. 537 * It must not be {@code null}. 538 * @param numParseThreads If this value is greater than zero, then the 539 * specified number of threads will be used to 540 * asynchronously read and parse the LDIF file. 541 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 542 * entries before they are returned. This is normally 543 * {@code null}, which causes entries to be returned 544 * unaltered. This is particularly useful when parsing 545 * the input file in parallel because the entry 546 * translation is also done in parallel. 547 * 548 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 549 * constructor for more details about asynchronous processing. 550 */ 551 public LDIFReader(final InputStream inputStream, final int numParseThreads, 552 final LDIFReaderEntryTranslator entryTranslator) 553 { 554 this(inputStream, numParseThreads, entryTranslator, null); 555 } 556 557 558 559 /** 560 * Creates a new LDIF reader that will read data from the specified stream 561 * and parses the LDIF records asynchronously using the specified number of 562 * threads. 563 * 564 * @param inputStream The input stream from which the data is to 565 * be read. It must not be {@code null}. 566 * @param numParseThreads If this value is greater than zero, then 567 * the specified number of threads will be 568 * used to asynchronously read and parse the 569 * LDIF file. 570 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 571 * entries before they are returned. This is 572 * normally {@code null}, which causes entries 573 * to be returned unaltered. This is 574 * particularly useful when parsing the input 575 * file in parallel because the entry 576 * translation is also done in parallel. 577 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 578 * apply to change records before they are 579 * returned. This is normally {@code null}, 580 * which causes change records to be returned 581 * unaltered. This is particularly useful 582 * when parsing the input file in parallel 583 * because the change record translation is 584 * also done in parallel. 585 * 586 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 587 * constructor for more details about asynchronous processing. 588 */ 589 public LDIFReader(final InputStream inputStream, final int numParseThreads, 590 final LDIFReaderEntryTranslator entryTranslator, 591 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 592 { 593 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 594 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator, 595 "UTF-8"); 596 } 597 598 599 600 /** 601 * Creates a new LDIF reader that will read data from the specified stream 602 * and parses the LDIF records asynchronously using the specified number of 603 * threads. 604 * 605 * @param inputStream The input stream from which the data is to 606 * be read. It must not be {@code null}. 607 * @param numParseThreads If this value is greater than zero, then 608 * the specified number of threads will be 609 * used to asynchronously read and parse the 610 * LDIF file. 611 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 612 * entries before they are returned. This is 613 * normally {@code null}, which causes entries 614 * to be returned unaltered. This is 615 * particularly useful when parsing the input 616 * file in parallel because the entry 617 * translation is also done in parallel. 618 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 619 * apply to change records before they are 620 * returned. This is normally {@code null}, 621 * which causes change records to be returned 622 * unaltered. This is particularly useful 623 * when parsing the input file in parallel 624 * because the change record translation is 625 * also done in parallel. 626 * @param characterSet The character set to use when reading from 627 * the input stream. It must not be 628 * {@code null}. 629 * 630 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 631 * constructor for more details about asynchronous processing. 632 */ 633 public LDIFReader(final InputStream inputStream, final int numParseThreads, 634 final LDIFReaderEntryTranslator entryTranslator, 635 final LDIFReaderChangeRecordTranslator changeRecordTranslator, 636 final String characterSet) 637 { 638 this(new BufferedReader( 639 new InputStreamReader(inputStream, Charset.forName(characterSet)), 640 DEFAULT_BUFFER_SIZE), 641 numParseThreads, entryTranslator, changeRecordTranslator); 642 } 643 644 645 646 /** 647 * Creates a new LDIF reader that will use the provided buffered reader to 648 * read the LDIF data. The encoding of the underlying Reader must be set to 649 * "UTF-8" as required by RFC 2849. 650 * 651 * @param reader The buffered reader that will be used to read the LDIF 652 * data. It must not be {@code null}. 653 */ 654 public LDIFReader(final BufferedReader reader) 655 { 656 this(reader, 0); 657 } 658 659 660 661 /** 662 * Creates a new LDIF reader that will read data from the specified buffered 663 * reader and parses the LDIF records asynchronously using the specified 664 * number of threads. The encoding of the underlying Reader must be set to 665 * "UTF-8" as required by RFC 2849. 666 * 667 * @param reader The buffered reader that will be used to read the LDIF data. 668 * It must not be {@code null}. 669 * @param numParseThreads If this value is greater than zero, then the 670 * specified number of threads will be used to 671 * asynchronously read and parse the LDIF file. 672 * 673 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 674 * constructor for more details about asynchronous processing. 675 */ 676 public LDIFReader(final BufferedReader reader, final int numParseThreads) 677 { 678 this(reader, numParseThreads, null); 679 } 680 681 682 683 /** 684 * Creates a new LDIF reader that will read data from the specified buffered 685 * reader and parses the LDIF records asynchronously using the specified 686 * number of threads. The encoding of the underlying Reader must be set to 687 * "UTF-8" as required by RFC 2849. 688 * 689 * @param reader The buffered reader that will be used to read the LDIF data. 690 * It must not be {@code null}. 691 * @param numParseThreads If this value is greater than zero, then the 692 * specified number of threads will be used to 693 * asynchronously read and parse the LDIF file. 694 * This should only be set to greater than zero when 695 * performance analysis has demonstrated that reading 696 * and parsing the LDIF is a bottleneck. The default 697 * synchronous processing is normally fast enough. 698 * There is little benefit in passing in a value 699 * greater than four (unless there is an 700 * LDIFReaderEntryTranslator that does time-consuming 701 * processing). A value of zero implies the 702 * default behavior of reading and parsing LDIF 703 * records synchronously when one of the read 704 * methods is called. 705 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 706 * entries before they are returned. This is normally 707 * {@code null}, which causes entries to be returned 708 * unaltered. This is particularly useful when parsing 709 * the input file in parallel because the entry 710 * translation is also done in parallel. 711 */ 712 public LDIFReader(final BufferedReader reader, 713 final int numParseThreads, 714 final LDIFReaderEntryTranslator entryTranslator) 715 { 716 this(reader, numParseThreads, entryTranslator, null); 717 } 718 719 720 721 /** 722 * Creates a new LDIF reader that will read data from the specified buffered 723 * reader and parses the LDIF records asynchronously using the specified 724 * number of threads. The encoding of the underlying Reader must be set to 725 * "UTF-8" as required by RFC 2849. 726 * 727 * @param reader The buffered reader that will be used to 728 * read the LDIF data. It must not be 729 * {@code null}. 730 * @param numParseThreads If this value is greater than zero, then 731 * the specified number of threads will be 732 * used to asynchronously read and parse the 733 * LDIF file. 734 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 735 * entries before they are returned. This is 736 * normally {@code null}, which causes entries 737 * to be returned unaltered. This is 738 * particularly useful when parsing the input 739 * file in parallel because the entry 740 * translation is also done in parallel. 741 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 742 * apply to change records before they are 743 * returned. This is normally {@code null}, 744 * which causes change records to be returned 745 * unaltered. This is particularly useful 746 * when parsing the input file in parallel 747 * because the change record translation is 748 * also done in parallel. 749 */ 750 public LDIFReader(final BufferedReader reader, final int numParseThreads, 751 final LDIFReaderEntryTranslator entryTranslator, 752 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 753 { 754 ensureNotNull(reader); 755 ensureTrue(numParseThreads >= 0, 756 "LDIFReader.numParseThreads must not be negative."); 757 758 this.reader = reader; 759 this.entryTranslator = entryTranslator; 760 this.changeRecordTranslator = changeRecordTranslator; 761 762 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 763 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 764 765 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 766 767 if (numParseThreads == 0) 768 { 769 isAsync = false; 770 asyncParser = null; 771 asyncParsingComplete = null; 772 asyncParsedRecords = null; 773 } 774 else 775 { 776 isAsync = true; 777 asyncParsingComplete = new AtomicBoolean(false); 778 779 // Decodes entries in parallel. 780 final LDAPSDKThreadFactory threadFactory = 781 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 782 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 783 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>( 784 new RecordParser(), threadFactory, numParseThreads, 785 ASYNC_MIN_PER_PARSING_THREAD); 786 787 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 788 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE); 789 790 // The output queue must be a little more than twice as big as the input 791 // queue to more easily handle being shutdown in the middle of processing 792 // when the queues are full and threads are blocked. 793 asyncParsedRecords = new ArrayBlockingQueue 794 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100); 795 796 asyncParser = new AsynchronousParallelProcessor 797 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser, 798 asyncParsedRecords); 799 800 final LineReaderThread lineReaderThread = new LineReaderThread(); 801 lineReaderThread.start(); 802 } 803 } 804 805 806 807 /** 808 * Reads entries from the LDIF file with the specified path and returns them 809 * as a {@code List}. This is a convenience method that should only be used 810 * for data sets that are small enough so that running out of memory isn't a 811 * concern. 812 * 813 * @param path The path to the LDIF file containing the entries to be read. 814 * 815 * @return A list of the entries read from the given LDIF file. 816 * 817 * @throws IOException If a problem occurs while attempting to read data 818 * from the specified file. 819 * 820 * @throws LDIFException If a problem is encountered while attempting to 821 * decode data read as LDIF. 822 */ 823 public static List<Entry> readEntries(final String path) 824 throws IOException, LDIFException 825 { 826 return readEntries(new LDIFReader(path)); 827 } 828 829 830 831 /** 832 * Reads entries from the specified LDIF file and returns them as a 833 * {@code List}. This is a convenience method that should only be used for 834 * data sets that are small enough so that running out of memory isn't a 835 * concern. 836 * 837 * @param file A reference to the LDIF file containing the entries to be 838 * read. 839 * 840 * @return A list of the entries read from the given LDIF file. 841 * 842 * @throws IOException If a problem occurs while attempting to read data 843 * from the specified file. 844 * 845 * @throws LDIFException If a problem is encountered while attempting to 846 * decode data read as LDIF. 847 */ 848 public static List<Entry> readEntries(final File file) 849 throws IOException, LDIFException 850 { 851 return readEntries(new LDIFReader(file)); 852 } 853 854 855 856 /** 857 * Reads and decodes LDIF entries from the provided input stream and 858 * returns them as a {@code List}. This is a convenience method that should 859 * only be used for data sets that are small enough so that running out of 860 * memory isn't a concern. 861 * 862 * @param inputStream The input stream from which the entries should be 863 * read. The input stream will be closed before 864 * returning. 865 * 866 * @return A list of the entries read from the given input stream. 867 * 868 * @throws IOException If a problem occurs while attempting to read data 869 * from the input stream. 870 * 871 * @throws LDIFException If a problem is encountered while attempting to 872 * decode data read as LDIF. 873 */ 874 public static List<Entry> readEntries(final InputStream inputStream) 875 throws IOException, LDIFException 876 { 877 return readEntries(new LDIFReader(inputStream)); 878 } 879 880 881 882 /** 883 * Reads entries from the provided LDIF reader and returns them as a list. 884 * 885 * @param reader The reader from which the entries should be read. It will 886 * be closed before returning. 887 * 888 * @return A list of the entries read from the provided reader. 889 * 890 * @throws IOException If a problem was encountered while attempting to read 891 * data from the LDIF data source. 892 * 893 * @throws LDIFException If a problem is encountered while attempting to 894 * decode data read as LDIF. 895 */ 896 private static List<Entry> readEntries(final LDIFReader reader) 897 throws IOException, LDIFException 898 { 899 try 900 { 901 final ArrayList<Entry> entries = new ArrayList<Entry>(10); 902 while (true) 903 { 904 final Entry e = reader.readEntry(); 905 if (e == null) 906 { 907 break; 908 } 909 910 entries.add(e); 911 } 912 913 return entries; 914 } 915 finally 916 { 917 reader.close(); 918 } 919 } 920 921 922 923 /** 924 * Closes this LDIF reader and the underlying LDIF source. 925 * 926 * @throws IOException If a problem occurs while closing the underlying LDIF 927 * source. 928 */ 929 public void close() 930 throws IOException 931 { 932 reader.close(); 933 934 if (isAsync()) 935 { 936 // Closing the reader will trigger the LineReaderThread to complete, but 937 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 938 // this, we clear out the completed output queue, which is larger than 939 // the input queue, so the LineReaderThread will stop reading and 940 // shutdown the asyncParser. 941 asyncParsedRecords.clear(); 942 } 943 } 944 945 946 947 /** 948 * Indicates whether to ignore any duplicate values encountered while reading 949 * LDIF records. 950 * 951 * @return {@code true} if duplicate values should be ignored, or 952 * {@code false} if any LDIF records containing duplicate values 953 * should be rejected. 954 * 955 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead. 956 */ 957 @Deprecated() 958 public boolean ignoreDuplicateValues() 959 { 960 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 961 } 962 963 964 965 /** 966 * Specifies whether to ignore any duplicate values encountered while reading 967 * LDIF records. 968 * 969 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 970 * attribute values encountered while reading 971 * LDIF records. 972 * 973 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead. 974 */ 975 @Deprecated() 976 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 977 { 978 if (ignoreDuplicateValues) 979 { 980 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 981 } 982 else 983 { 984 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 985 } 986 } 987 988 989 990 /** 991 * Retrieves the behavior that should be exhibited if the LDIF reader 992 * encounters an entry with duplicate values. 993 * 994 * @return The behavior that should be exhibited if the LDIF reader 995 * encounters an entry with duplicate values. 996 */ 997 public DuplicateValueBehavior getDuplicateValueBehavior() 998 { 999 return duplicateValueBehavior; 1000 } 1001 1002 1003 1004 /** 1005 * Specifies the behavior that should be exhibited if the LDIF reader 1006 * encounters an entry with duplicate values. 1007 * 1008 * @param duplicateValueBehavior The behavior that should be exhibited if 1009 * the LDIF reader encounters an entry with 1010 * duplicate values. 1011 */ 1012 public void setDuplicateValueBehavior( 1013 final DuplicateValueBehavior duplicateValueBehavior) 1014 { 1015 this.duplicateValueBehavior = duplicateValueBehavior; 1016 } 1017 1018 1019 1020 /** 1021 * Indicates whether to strip off any illegal trailing spaces that may appear 1022 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1023 * specification strongly recommends that any value which legitimately 1024 * contains trailing spaces be base64-encoded, and any spaces which appear 1025 * after the end of non-base64-encoded values may therefore be considered 1026 * invalid. If any such trailing spaces are encountered in an LDIF record and 1027 * they are not to be stripped, then an {@link LDIFException} will be thrown 1028 * for that record. 1029 * <BR><BR> 1030 * Note that this applies only to spaces after the end of a value, and not to 1031 * spaces which may appear at the end of a line for a value that is wrapped 1032 * and continued on the next line. 1033 * 1034 * @return {@code true} if illegal trailing spaces should be stripped off, or 1035 * {@code false} if LDIF records containing illegal trailing spaces 1036 * should be rejected. 1037 * 1038 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead. 1039 */ 1040 @Deprecated() 1041 public boolean stripTrailingSpaces() 1042 { 1043 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 1044 } 1045 1046 1047 1048 /** 1049 * Specifies whether to strip off any illegal trailing spaces that may appear 1050 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 1051 * specification strongly recommends that any value which legitimately 1052 * contains trailing spaces be base64-encoded, and any spaces which appear 1053 * after the end of non-base64-encoded values may therefore be considered 1054 * invalid. If any such trailing spaces are encountered in an LDIF record and 1055 * they are not to be stripped, then an {@link LDIFException} will be thrown 1056 * for that record. 1057 * <BR><BR> 1058 * Note that this applies only to spaces after the end of a value, and not to 1059 * spaces which may appear at the end of a line for a value that is wrapped 1060 * and continued on the next line. 1061 * 1062 * @param stripTrailingSpaces Indicates whether to strip off any illegal 1063 * trailing spaces, or {@code false} if LDIF 1064 * records containing them should be rejected. 1065 * 1066 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead. 1067 */ 1068 @Deprecated() 1069 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1070 { 1071 trailingSpaceBehavior = stripTrailingSpaces 1072 ? TrailingSpaceBehavior.STRIP 1073 : TrailingSpaceBehavior.REJECT; 1074 } 1075 1076 1077 1078 /** 1079 * Retrieves the behavior that should be exhibited when encountering attribute 1080 * values which are not base64-encoded but contain trailing spaces. The LDIF 1081 * specification strongly recommends that any value which legitimately 1082 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1083 * may be configured to automatically strip these spaces, to preserve them, or 1084 * to reject any entry or change record containing them. 1085 * 1086 * @return The behavior that should be exhibited when encountering attribute 1087 * values which are not base64-encoded but contain trailing spaces. 1088 */ 1089 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1090 { 1091 return trailingSpaceBehavior; 1092 } 1093 1094 1095 1096 /** 1097 * Specifies the behavior that should be exhibited when encountering attribute 1098 * values which are not base64-encoded but contain trailing spaces. The LDIF 1099 * specification strongly recommends that any value which legitimately 1100 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1101 * may be configured to automatically strip these spaces, to preserve them, or 1102 * to reject any entry or change record containing them. 1103 * 1104 * @param trailingSpaceBehavior The behavior that should be exhibited when 1105 * encountering attribute values which are not 1106 * base64-encoded but contain trailing spaces. 1107 */ 1108 public void setTrailingSpaceBehavior( 1109 final TrailingSpaceBehavior trailingSpaceBehavior) 1110 { 1111 this.trailingSpaceBehavior = trailingSpaceBehavior; 1112 } 1113 1114 1115 1116 /** 1117 * Retrieves the base path that will be prepended to relative paths in order 1118 * to obtain an absolute path. This will only be used for "file:" URLs that 1119 * have paths which do not begin with a slash. 1120 * 1121 * @return The base path that will be prepended to relative paths in order to 1122 * obtain an absolute path. 1123 */ 1124 public String getRelativeBasePath() 1125 { 1126 return relativeBasePath; 1127 } 1128 1129 1130 1131 /** 1132 * Specifies the base path that will be prepended to relative paths in order 1133 * to obtain an absolute path. This will only be used for "file:" URLs that 1134 * have paths which do not begin with a space. 1135 * 1136 * @param relativeBasePath The base path that will be prepended to relative 1137 * paths in order to obtain an absolute path. 1138 */ 1139 public void setRelativeBasePath(final String relativeBasePath) 1140 { 1141 setRelativeBasePath(new File(relativeBasePath)); 1142 } 1143 1144 1145 1146 /** 1147 * Specifies the base path that will be prepended to relative paths in order 1148 * to obtain an absolute path. This will only be used for "file:" URLs that 1149 * have paths which do not begin with a space. 1150 * 1151 * @param relativeBasePath The base path that will be prepended to relative 1152 * paths in order to obtain an absolute path. 1153 */ 1154 public void setRelativeBasePath(final File relativeBasePath) 1155 { 1156 final String path = relativeBasePath.getAbsolutePath(); 1157 if (path.endsWith(File.separator)) 1158 { 1159 this.relativeBasePath = path; 1160 } 1161 else 1162 { 1163 this.relativeBasePath = path + File.separator; 1164 } 1165 } 1166 1167 1168 1169 /** 1170 * Retrieves the schema that will be used when reading LDIF records, if 1171 * defined. 1172 * 1173 * @return The schema that will be used when reading LDIF records, or 1174 * {@code null} if no schema should be used and all attributes should 1175 * be treated as case-insensitive strings. 1176 */ 1177 public Schema getSchema() 1178 { 1179 return schema; 1180 } 1181 1182 1183 1184 /** 1185 * Specifies the schema that should be used when reading LDIF records. 1186 * 1187 * @param schema The schema that should be used when reading LDIF records, 1188 * or {@code null} if no schema should be used and all 1189 * attributes should be treated as case-insensitive strings. 1190 */ 1191 public void setSchema(final Schema schema) 1192 { 1193 this.schema = schema; 1194 } 1195 1196 1197 1198 /** 1199 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1200 * change record. 1201 * 1202 * @return The record read from the LDIF source, or {@code null} if there are 1203 * no more entries to be read. 1204 * 1205 * @throws IOException If a problem occurs while trying to read from the 1206 * LDIF source. 1207 * 1208 * @throws LDIFException If the data read could not be parsed as an entry or 1209 * an LDIF change record. 1210 */ 1211 public LDIFRecord readLDIFRecord() 1212 throws IOException, LDIFException 1213 { 1214 if (isAsync()) 1215 { 1216 return readLDIFRecordAsync(); 1217 } 1218 else 1219 { 1220 return readLDIFRecordInternal(); 1221 } 1222 } 1223 1224 1225 1226 /** 1227 * Reads an entry from the LDIF source. 1228 * 1229 * @return The entry read from the LDIF source, or {@code null} if there are 1230 * no more entries to be read. 1231 * 1232 * @throws IOException If a problem occurs while attempting to read from the 1233 * LDIF source. 1234 * 1235 * @throws LDIFException If the data read could not be parsed as an entry. 1236 */ 1237 public Entry readEntry() 1238 throws IOException, LDIFException 1239 { 1240 if (isAsync()) 1241 { 1242 return readEntryAsync(); 1243 } 1244 else 1245 { 1246 return readEntryInternal(); 1247 } 1248 } 1249 1250 1251 1252 /** 1253 * Reads an LDIF change record from the LDIF source. The LDIF record must 1254 * have a changetype. 1255 * 1256 * @return The change record read from the LDIF source, or {@code null} if 1257 * there are no more records to be read. 1258 * 1259 * @throws IOException If a problem occurs while attempting to read from the 1260 * LDIF source. 1261 * 1262 * @throws LDIFException If the data read could not be parsed as an LDIF 1263 * change record. 1264 */ 1265 public LDIFChangeRecord readChangeRecord() 1266 throws IOException, LDIFException 1267 { 1268 return readChangeRecord(false); 1269 } 1270 1271 1272 1273 /** 1274 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1275 * record does not have a changetype, then it may be assumed to be an add 1276 * change record. 1277 * 1278 * @param defaultAdd Indicates whether an LDIF record not containing a 1279 * changetype should be retrieved as an add change record. 1280 * If this is {@code false} and the record read does not 1281 * include a changetype, then an {@link LDIFException} 1282 * will be thrown. 1283 * 1284 * @return The change record read from the LDIF source, or {@code null} if 1285 * there are no more records to be read. 1286 * 1287 * @throws IOException If a problem occurs while attempting to read from the 1288 * LDIF source. 1289 * 1290 * @throws LDIFException If the data read could not be parsed as an LDIF 1291 * change record. 1292 */ 1293 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1294 throws IOException, LDIFException 1295 { 1296 if (isAsync()) 1297 { 1298 return readChangeRecordAsync(defaultAdd); 1299 } 1300 else 1301 { 1302 return readChangeRecordInternal(defaultAdd); 1303 } 1304 } 1305 1306 1307 1308 /** 1309 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1310 * thread. 1311 * 1312 * @return The next parsed record or {@code null} if there are no more 1313 * records to read. 1314 * 1315 * @throws IOException If IOException was thrown when reading or parsing 1316 * the record. 1317 * 1318 * @throws LDIFException If LDIFException was thrown parsing the record. 1319 */ 1320 private LDIFRecord readLDIFRecordAsync() 1321 throws IOException, LDIFException 1322 { 1323 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1324 LDIFRecord record = null; 1325 while (record == null) 1326 { 1327 result = readLDIFRecordResultAsync(); 1328 if (result == null) 1329 { 1330 return null; 1331 } 1332 1333 record = result.getOutput(); 1334 1335 // This is a special value that means we should skip this Entry. We have 1336 // to use something different than null because null means EOF. 1337 if (record == SKIP_ENTRY) 1338 { 1339 record = null; 1340 } 1341 } 1342 return record; 1343 } 1344 1345 1346 1347 /** 1348 * Reads an entry asynchronously from the LDIF source. 1349 * 1350 * @return The entry read from the LDIF source, or {@code null} if there are 1351 * no more entries to be read. 1352 * 1353 * @throws IOException If a problem occurs while attempting to read from the 1354 * LDIF source. 1355 * @throws LDIFException If the data read could not be parsed as an entry. 1356 */ 1357 private Entry readEntryAsync() 1358 throws IOException, LDIFException 1359 { 1360 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1361 LDIFRecord record = null; 1362 while (record == null) 1363 { 1364 result = readLDIFRecordResultAsync(); 1365 if (result == null) 1366 { 1367 return null; 1368 } 1369 1370 record = result.getOutput(); 1371 1372 // This is a special value that means we should skip this Entry. We have 1373 // to use something different than null because null means EOF. 1374 if (record == SKIP_ENTRY) 1375 { 1376 record = null; 1377 } 1378 } 1379 1380 if (record instanceof Entry) 1381 { 1382 return (Entry) record; 1383 } 1384 else if (record instanceof LDIFChangeRecord) 1385 { 1386 try 1387 { 1388 // Some LDIFChangeRecord can be converted to an Entry. This is really 1389 // an edge case though. 1390 return ((LDIFChangeRecord)record).toEntry(); 1391 } 1392 catch (final LDIFException e) 1393 { 1394 debugException(e); 1395 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1396 throw new LDIFException(e.getExceptionMessage(), 1397 firstLineNumber, true, e); 1398 } 1399 } 1400 1401 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1402 "LDIFChangeRecord"); 1403 } 1404 1405 1406 1407 /** 1408 * Reads an LDIF change record from the LDIF source asynchronously. 1409 * Optionally, if the LDIF record does not have a changetype, then it may be 1410 * assumed to be an add change record. 1411 * 1412 * @param defaultAdd Indicates whether an LDIF record not containing a 1413 * changetype should be retrieved as an add change record. 1414 * If this is {@code false} and the record read does not 1415 * include a changetype, then an {@link LDIFException} will 1416 * be thrown. 1417 * 1418 * @return The change record read from the LDIF source, or {@code null} if 1419 * there are no more records to be read. 1420 * 1421 * @throws IOException If a problem occurs while attempting to read from the 1422 * LDIF source. 1423 * @throws LDIFException If the data read could not be parsed as an LDIF 1424 * change record. 1425 */ 1426 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1427 throws IOException, LDIFException 1428 { 1429 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1430 LDIFRecord record = null; 1431 while (record == null) 1432 { 1433 result = readLDIFRecordResultAsync(); 1434 if (result == null) 1435 { 1436 return null; 1437 } 1438 1439 record = result.getOutput(); 1440 1441 // This is a special value that means we should skip this Entry. We have 1442 // to use something different than null because null means EOF. 1443 if (record == SKIP_ENTRY) 1444 { 1445 record = null; 1446 } 1447 } 1448 1449 if (record instanceof LDIFChangeRecord) 1450 { 1451 return (LDIFChangeRecord) record; 1452 } 1453 else if (record instanceof Entry) 1454 { 1455 if (defaultAdd) 1456 { 1457 return new LDIFAddChangeRecord((Entry) record); 1458 } 1459 else 1460 { 1461 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1462 throw new LDIFException( 1463 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1464 true); 1465 } 1466 } 1467 1468 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1469 "LDIFChangeRecord"); 1470 } 1471 1472 1473 1474 /** 1475 * Reads the next LDIF record, which was read and parsed asynchronously by 1476 * separate threads. 1477 * 1478 * @return The next LDIF record or {@code null} if there are no more records. 1479 * 1480 * @throws IOException If a problem occurs while attempting to read from the 1481 * LDIF source. 1482 * 1483 * @throws LDIFException If the data read could not be parsed as an entry. 1484 */ 1485 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1486 throws IOException, LDIFException 1487 { 1488 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1489 1490 // If the asynchronous reading and parsing is complete, then we don't have 1491 // to block waiting for the next record to show up on the queue. If there 1492 // isn't a record there, then return null (EOF) right away. 1493 if (asyncParsingComplete.get()) 1494 { 1495 result = asyncParsedRecords.poll(); 1496 } 1497 else 1498 { 1499 try 1500 { 1501 // We probably could just do a asyncParsedRecords.take() here, but 1502 // there are some edge case error scenarios where 1503 // asyncParsingComplete might be set without a special EOF sentinel 1504 // Result enqueued. So to guard against this, we have a very cautious 1505 // polling interval of 1 second. During normal processing, we never 1506 // have to wait for this to expire, when there is something to do 1507 // (like shutdown). 1508 while ((result == null) && (!asyncParsingComplete.get())) 1509 { 1510 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1511 } 1512 1513 // There's a very small chance that we missed the value, so double-check 1514 if (result == null) 1515 { 1516 result = asyncParsedRecords.poll(); 1517 } 1518 } 1519 catch (final InterruptedException e) 1520 { 1521 debugException(e); 1522 Thread.currentThread().interrupt(); 1523 throw new IOException(e); 1524 } 1525 } 1526 if (result == null) 1527 { 1528 return null; 1529 } 1530 1531 rethrow(result.getFailureCause()); 1532 1533 // Check if we reached the end of the input 1534 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1535 if (unparsedRecord.isEOF()) 1536 { 1537 // This might have been set already by the LineReaderThread, but 1538 // just in case it hasn't gotten to it yet, do so here. 1539 asyncParsingComplete.set(true); 1540 1541 // Enqueue this EOF result again for any other thread that might be 1542 // blocked in asyncParsedRecords.take() even though having multiple 1543 // threads call this method concurrently breaks the contract of this 1544 // class. 1545 try 1546 { 1547 asyncParsedRecords.put(result); 1548 } 1549 catch (final InterruptedException e) 1550 { 1551 // We shouldn't ever get interrupted because the put won't ever block. 1552 // Once we are done reading, this is the only item left in the queue, 1553 // so we should always be able to re-enqueue it. 1554 debugException(e); 1555 Thread.currentThread().interrupt(); 1556 } 1557 return null; 1558 } 1559 1560 return result; 1561 } 1562 1563 1564 1565 /** 1566 * Indicates whether this LDIF reader was constructed to perform asynchronous 1567 * processing. 1568 * 1569 * @return {@code true} if this LDIFReader was constructed to perform 1570 * asynchronous processing, or {@code false} if not. 1571 */ 1572 private boolean isAsync() 1573 { 1574 return isAsync; 1575 } 1576 1577 1578 1579 /** 1580 * If not {@code null}, rethrows the specified Throwable as either an 1581 * IOException or LDIFException. 1582 * 1583 * @param t The exception to rethrow. If it's {@code null}, then nothing 1584 * is thrown. 1585 * 1586 * @throws IOException If t is an IOException or a checked Exception that 1587 * is not an LDIFException. 1588 * @throws LDIFException If t is an LDIFException. 1589 */ 1590 static void rethrow(final Throwable t) 1591 throws IOException, LDIFException 1592 { 1593 if (t == null) 1594 { 1595 return; 1596 } 1597 1598 if (t instanceof IOException) 1599 { 1600 throw (IOException) t; 1601 } 1602 else if (t instanceof LDIFException) 1603 { 1604 throw (LDIFException) t; 1605 } 1606 else if (t instanceof RuntimeException) 1607 { 1608 throw (RuntimeException) t; 1609 } 1610 else if (t instanceof Error) 1611 { 1612 throw (Error) t; 1613 } 1614 else 1615 { 1616 throw new IOException(t); 1617 } 1618 } 1619 1620 1621 1622 /** 1623 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1624 * change record. 1625 * 1626 * @return The record read from the LDIF source, or {@code null} if there are 1627 * no more entries to be read. 1628 * 1629 * @throws IOException If a problem occurs while trying to read from the 1630 * LDIF source. 1631 * @throws LDIFException If the data read could not be parsed as an entry or 1632 * an LDIF change record. 1633 */ 1634 private LDIFRecord readLDIFRecordInternal() 1635 throws IOException, LDIFException 1636 { 1637 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1638 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1639 } 1640 1641 1642 1643 /** 1644 * Reads an entry from the LDIF source. 1645 * 1646 * @return The entry read from the LDIF source, or {@code null} if there are 1647 * no more entries to be read. 1648 * 1649 * @throws IOException If a problem occurs while attempting to read from the 1650 * LDIF source. 1651 * @throws LDIFException If the data read could not be parsed as an entry. 1652 */ 1653 private Entry readEntryInternal() 1654 throws IOException, LDIFException 1655 { 1656 Entry e = null; 1657 while (e == null) 1658 { 1659 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1660 if (unparsedRecord.isEOF()) 1661 { 1662 return null; 1663 } 1664 1665 e = decodeEntry(unparsedRecord, relativeBasePath); 1666 debugLDIFRead(e); 1667 1668 if (entryTranslator != null) 1669 { 1670 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1671 } 1672 } 1673 return e; 1674 } 1675 1676 1677 1678 /** 1679 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1680 * record does not have a changetype, then it may be assumed to be an add 1681 * change record. 1682 * 1683 * @param defaultAdd Indicates whether an LDIF record not containing a 1684 * changetype should be retrieved as an add change record. 1685 * If this is {@code false} and the record read does not 1686 * include a changetype, then an {@link LDIFException} will 1687 * be thrown. 1688 * 1689 * @return The change record read from the LDIF source, or {@code null} if 1690 * there are no more records to be read. 1691 * 1692 * @throws IOException If a problem occurs while attempting to read from the 1693 * LDIF source. 1694 * @throws LDIFException If the data read could not be parsed as an LDIF 1695 * change record. 1696 */ 1697 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1698 throws IOException, LDIFException 1699 { 1700 LDIFChangeRecord r = null; 1701 while (r == null) 1702 { 1703 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1704 if (unparsedRecord.isEOF()) 1705 { 1706 return null; 1707 } 1708 1709 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1710 schema); 1711 debugLDIFRead(r); 1712 1713 if (changeRecordTranslator != null) 1714 { 1715 r = changeRecordTranslator.translate(r, 1716 unparsedRecord.getFirstLineNumber()); 1717 } 1718 } 1719 return r; 1720 } 1721 1722 1723 1724 /** 1725 * Reads a record (either an entry or a change record) from the LDIF source 1726 * and places it in the line list. 1727 * 1728 * @return The line number for the first line of the entry that was read. 1729 * 1730 * @throws IOException If a problem occurs while attempting to read from the 1731 * LDIF source. 1732 * 1733 * @throws LDIFException If the data read could not be parsed as a valid 1734 * LDIF record. 1735 */ 1736 private UnparsedLDIFRecord readUnparsedRecord() 1737 throws IOException, LDIFException 1738 { 1739 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20); 1740 boolean lastWasComment = false; 1741 long firstLineNumber = lineNumberCounter + 1; 1742 while (true) 1743 { 1744 final String line = reader.readLine(); 1745 lineNumberCounter++; 1746 1747 if (line == null) 1748 { 1749 // We've hit the end of the LDIF source. If we haven't read any entry 1750 // data, then return null. Otherwise, the last entry wasn't followed by 1751 // a blank line, which is OK, and we should decode that entry. 1752 if (lineList.isEmpty()) 1753 { 1754 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1755 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1756 } 1757 else 1758 { 1759 break; 1760 } 1761 } 1762 1763 if (line.length() == 0) 1764 { 1765 // It's a blank line. If we have read entry data, then this signals the 1766 // end of the entry. Otherwise, it's an extra space between entries, 1767 // which is OK. 1768 lastWasComment = false; 1769 if (lineList.isEmpty()) 1770 { 1771 firstLineNumber++; 1772 continue; 1773 } 1774 else 1775 { 1776 break; 1777 } 1778 } 1779 1780 if (line.charAt(0) == ' ') 1781 { 1782 // The line starts with a space, which means that it must be a 1783 // continuation of the previous line. This is true even if the last 1784 // line was a comment. 1785 if (lastWasComment) 1786 { 1787 // What we've read is part of a comment, so we don't care about its 1788 // content. 1789 } 1790 else if (lineList.isEmpty()) 1791 { 1792 throw new LDIFException( 1793 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1794 lineNumberCounter, false); 1795 } 1796 else 1797 { 1798 lineList.get(lineList.size() - 1).append(line.substring(1)); 1799 lastWasComment = false; 1800 } 1801 } 1802 else if (line.charAt(0) == '#') 1803 { 1804 lastWasComment = true; 1805 } 1806 else 1807 { 1808 // We want to make sure that we skip over the "version:" line if it 1809 // exists, but that should only occur at the beginning of an entry where 1810 // it can't be confused with a possible "version" attribute. 1811 if (lineList.isEmpty() && line.startsWith("version:")) 1812 { 1813 lastWasComment = true; 1814 } 1815 else 1816 { 1817 lineList.add(new StringBuilder(line)); 1818 lastWasComment = false; 1819 } 1820 } 1821 } 1822 1823 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1824 trailingSpaceBehavior, schema, firstLineNumber); 1825 } 1826 1827 1828 1829 /** 1830 * Decodes the provided set of LDIF lines as an entry. The provided set of 1831 * lines must contain exactly one entry. Long lines may be wrapped as per the 1832 * LDIF specification, and it is acceptable to have one or more blank lines 1833 * following the entry. A default trailing space behavior of 1834 * {@link TrailingSpaceBehavior#REJECT} will be used. 1835 * 1836 * @param ldifLines The set of lines that comprise the LDIF representation 1837 * of the entry. It must not be {@code null} or empty. 1838 * 1839 * @return The entry read from LDIF. 1840 * 1841 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1842 * entry. 1843 */ 1844 public static Entry decodeEntry(final String... ldifLines) 1845 throws LDIFException 1846 { 1847 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1848 TrailingSpaceBehavior.REJECT, null, ldifLines), 1849 DEFAULT_RELATIVE_BASE_PATH); 1850 debugLDIFRead(e); 1851 return e; 1852 } 1853 1854 1855 1856 /** 1857 * Decodes the provided set of LDIF lines as an entry. The provided set of 1858 * lines must contain exactly one entry. Long lines may be wrapped as per the 1859 * LDIF specification, and it is acceptable to have one or more blank lines 1860 * following the entry. A default trailing space behavior of 1861 * {@link TrailingSpaceBehavior#REJECT} will be used. 1862 * 1863 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1864 * attribute values encountered while parsing. 1865 * @param schema The schema to use when parsing the record, 1866 * if applicable. 1867 * @param ldifLines The set of lines that comprise the LDIF 1868 * representation of the entry. It must not be 1869 * {@code null} or empty. 1870 * 1871 * @return The entry read from LDIF. 1872 * 1873 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1874 * entry. 1875 */ 1876 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1877 final Schema schema, 1878 final String... ldifLines) 1879 throws LDIFException 1880 { 1881 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1882 schema, ldifLines); 1883 } 1884 1885 1886 1887 /** 1888 * Decodes the provided set of LDIF lines as an entry. The provided set of 1889 * lines must contain exactly one entry. Long lines may be wrapped as per the 1890 * LDIF specification, and it is acceptable to have one or more blank lines 1891 * following the entry. 1892 * 1893 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1894 * attribute values encountered while parsing. 1895 * @param trailingSpaceBehavior The behavior that should be exhibited when 1896 * encountering attribute values which are not 1897 * base64-encoded but contain trailing spaces. 1898 * It must not be {@code null}. 1899 * @param schema The schema to use when parsing the record, 1900 * if applicable. 1901 * @param ldifLines The set of lines that comprise the LDIF 1902 * representation of the entry. It must not be 1903 * {@code null} or empty. 1904 * 1905 * @return The entry read from LDIF. 1906 * 1907 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1908 * entry. 1909 */ 1910 public static Entry decodeEntry( 1911 final boolean ignoreDuplicateValues, 1912 final TrailingSpaceBehavior trailingSpaceBehavior, 1913 final Schema schema, 1914 final String... ldifLines) throws LDIFException 1915 { 1916 final Entry e = decodeEntry(prepareRecord( 1917 (ignoreDuplicateValues 1918 ? DuplicateValueBehavior.STRIP 1919 : DuplicateValueBehavior.REJECT), 1920 trailingSpaceBehavior, schema, ldifLines), 1921 DEFAULT_RELATIVE_BASE_PATH); 1922 debugLDIFRead(e); 1923 return e; 1924 } 1925 1926 1927 1928 /** 1929 * Decodes the provided set of LDIF lines as an LDIF change record. The 1930 * provided set of lines must contain exactly one change record and it must 1931 * include a changetype. Long lines may be wrapped as per the LDIF 1932 * specification, and it is acceptable to have one or more blank lines 1933 * following the entry. 1934 * 1935 * @param ldifLines The set of lines that comprise the LDIF representation 1936 * of the change record. It must not be {@code null} or 1937 * empty. 1938 * 1939 * @return The change record read from LDIF. 1940 * 1941 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1942 * change record. 1943 */ 1944 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1945 throws LDIFException 1946 { 1947 return decodeChangeRecord(false, ldifLines); 1948 } 1949 1950 1951 1952 /** 1953 * Decodes the provided set of LDIF lines as an LDIF change record. The 1954 * provided set of lines must contain exactly one change record. Long lines 1955 * may be wrapped as per the LDIF specification, and it is acceptable to have 1956 * one or more blank lines following the entry. 1957 * 1958 * @param defaultAdd Indicates whether an LDIF record not containing a 1959 * changetype should be retrieved as an add change record. 1960 * If this is {@code false} and the record read does not 1961 * include a changetype, then an {@link LDIFException} 1962 * will be thrown. 1963 * @param ldifLines The set of lines that comprise the LDIF representation 1964 * of the change record. It must not be {@code null} or 1965 * empty. 1966 * 1967 * @return The change record read from LDIF. 1968 * 1969 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1970 * change record. 1971 */ 1972 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1973 final String... ldifLines) 1974 throws LDIFException 1975 { 1976 final LDIFChangeRecord r = 1977 decodeChangeRecord( 1978 prepareRecord(DuplicateValueBehavior.STRIP, 1979 TrailingSpaceBehavior.REJECT, null, ldifLines), 1980 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1981 debugLDIFRead(r); 1982 return r; 1983 } 1984 1985 1986 1987 /** 1988 * Decodes the provided set of LDIF lines as an LDIF change record. The 1989 * provided set of lines must contain exactly one change record. Long lines 1990 * may be wrapped as per the LDIF specification, and it is acceptable to have 1991 * one or more blank lines following the entry. 1992 * 1993 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1994 * attribute values encountered while parsing. 1995 * @param schema The schema to use when processing the change 1996 * record, or {@code null} if no schema should 1997 * be used and all values should be treated as 1998 * case-insensitive strings. 1999 * @param defaultAdd Indicates whether an LDIF record not 2000 * containing a changetype should be retrieved 2001 * as an add change record. If this is 2002 * {@code false} and the record read does not 2003 * include a changetype, then an 2004 * {@link LDIFException} will be thrown. 2005 * @param ldifLines The set of lines that comprise the LDIF 2006 * representation of the change record. It 2007 * must not be {@code null} or empty. 2008 * 2009 * @return The change record read from LDIF. 2010 * 2011 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2012 * change record. 2013 */ 2014 public static LDIFChangeRecord decodeChangeRecord( 2015 final boolean ignoreDuplicateValues, 2016 final Schema schema, 2017 final boolean defaultAdd, 2018 final String... ldifLines) 2019 throws LDIFException 2020 { 2021 return decodeChangeRecord(ignoreDuplicateValues, 2022 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 2023 } 2024 2025 2026 2027 /** 2028 * Decodes the provided set of LDIF lines as an LDIF change record. The 2029 * provided set of lines must contain exactly one change record. Long lines 2030 * may be wrapped as per the LDIF specification, and it is acceptable to have 2031 * one or more blank lines following the entry. 2032 * 2033 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 2034 * attribute values encountered while parsing. 2035 * @param trailingSpaceBehavior The behavior that should be exhibited when 2036 * encountering attribute values which are not 2037 * base64-encoded but contain trailing spaces. 2038 * It must not be {@code null}. 2039 * @param schema The schema to use when processing the change 2040 * record, or {@code null} if no schema should 2041 * be used and all values should be treated as 2042 * case-insensitive strings. 2043 * @param defaultAdd Indicates whether an LDIF record not 2044 * containing a changetype should be retrieved 2045 * as an add change record. If this is 2046 * {@code false} and the record read does not 2047 * include a changetype, then an 2048 * {@link LDIFException} will be thrown. 2049 * @param ldifLines The set of lines that comprise the LDIF 2050 * representation of the change record. It 2051 * must not be {@code null} or empty. 2052 * 2053 * @return The change record read from LDIF. 2054 * 2055 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2056 * change record. 2057 */ 2058 public static LDIFChangeRecord decodeChangeRecord( 2059 final boolean ignoreDuplicateValues, 2060 final TrailingSpaceBehavior trailingSpaceBehavior, 2061 final Schema schema, 2062 final boolean defaultAdd, 2063 final String... ldifLines) 2064 throws LDIFException 2065 { 2066 final LDIFChangeRecord r = decodeChangeRecord( 2067 prepareRecord( 2068 (ignoreDuplicateValues 2069 ? DuplicateValueBehavior.STRIP 2070 : DuplicateValueBehavior.REJECT), 2071 trailingSpaceBehavior, schema, ldifLines), 2072 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2073 debugLDIFRead(r); 2074 return r; 2075 } 2076 2077 2078 2079 /** 2080 * Parses the provided set of lines into a list of {@code StringBuilder} 2081 * objects suitable for decoding into an entry or LDIF change record. 2082 * Comments will be ignored and wrapped lines will be unwrapped. 2083 * 2084 * @param duplicateValueBehavior The behavior that should be exhibited if 2085 * the LDIF reader encounters an entry with 2086 * duplicate values. 2087 * @param trailingSpaceBehavior The behavior that should be exhibited when 2088 * encountering attribute values which are not 2089 * base64-encoded but contain trailing spaces. 2090 * @param schema The schema to use when parsing the record, 2091 * if applicable. 2092 * @param ldifLines The set of lines that comprise the record 2093 * to decode. It must not be {@code null} or 2094 * empty. 2095 * 2096 * @return The prepared list of {@code StringBuilder} objects ready to be 2097 * decoded. 2098 * 2099 * @throws LDIFException If the provided lines do not contain valid LDIF 2100 * content. 2101 */ 2102 private static UnparsedLDIFRecord prepareRecord( 2103 final DuplicateValueBehavior duplicateValueBehavior, 2104 final TrailingSpaceBehavior trailingSpaceBehavior, 2105 final Schema schema, final String... ldifLines) 2106 throws LDIFException 2107 { 2108 ensureNotNull(ldifLines); 2109 ensureFalse(ldifLines.length == 0, 2110 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2111 2112 boolean lastWasComment = false; 2113 final ArrayList<StringBuilder> lineList = 2114 new ArrayList<StringBuilder>(ldifLines.length); 2115 for (int i=0; i < ldifLines.length; i++) 2116 { 2117 final String line = ldifLines[i]; 2118 if (line.length() == 0) 2119 { 2120 // This is only acceptable if there are no more non-empty lines in the 2121 // array. 2122 for (int j=i+1; j < ldifLines.length; j++) 2123 { 2124 if (ldifLines[j].length() > 0) 2125 { 2126 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2127 ldifLines, null); 2128 } 2129 2130 // If we've gotten here, then we know that we're at the end of the 2131 // entry. If we have read data, then we can decode it as an entry. 2132 // Otherwise, there was no real data in the provided LDIF lines. 2133 if (lineList.isEmpty()) 2134 { 2135 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2136 ldifLines, null); 2137 } 2138 else 2139 { 2140 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2141 trailingSpaceBehavior, schema, 0); 2142 } 2143 } 2144 } 2145 2146 if (line.charAt(0) == ' ') 2147 { 2148 if (i > 0) 2149 { 2150 if (! lastWasComment) 2151 { 2152 lineList.get(lineList.size() - 1).append(line.substring(1)); 2153 } 2154 } 2155 else 2156 { 2157 throw new LDIFException( 2158 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2159 true, ldifLines, null); 2160 } 2161 } 2162 else if (line.charAt(0) == '#') 2163 { 2164 lastWasComment = true; 2165 } 2166 else 2167 { 2168 lineList.add(new StringBuilder(line)); 2169 lastWasComment = false; 2170 } 2171 } 2172 2173 if (lineList.isEmpty()) 2174 { 2175 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2176 } 2177 else 2178 { 2179 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2180 trailingSpaceBehavior, schema, 0); 2181 } 2182 } 2183 2184 2185 2186 /** 2187 * Decodes the unparsed record that was read from the LDIF source. It may be 2188 * either an entry or an LDIF change record. 2189 * 2190 * @param unparsedRecord The unparsed LDIF record that was read from the 2191 * input. It must not be {@code null} or empty. 2192 * @param relativeBasePath The base path that will be prepended to relative 2193 * paths in order to obtain an absolute path. 2194 * @param schema The schema to use when parsing. 2195 * 2196 * @return The parsed record, or {@code null} if there are no more entries to 2197 * be read. 2198 * 2199 * @throws LDIFException If the data read could not be parsed as an entry or 2200 * an LDIF change record. 2201 */ 2202 private static LDIFRecord decodeRecord( 2203 final UnparsedLDIFRecord unparsedRecord, 2204 final String relativeBasePath, 2205 final Schema schema) 2206 throws LDIFException 2207 { 2208 // If there was an error reading from the input, then we rethrow it here. 2209 final Exception readError = unparsedRecord.getFailureCause(); 2210 if (readError != null) 2211 { 2212 if (readError instanceof LDIFException) 2213 { 2214 // If the error was an LDIFException, which will normally be the case, 2215 // then rethrow it with all of the same state. We could just 2216 // throw (LDIFException) readError; 2217 // but that's considered bad form. 2218 final LDIFException ldifEx = (LDIFException) readError; 2219 throw new LDIFException(ldifEx.getMessage(), 2220 ldifEx.getLineNumber(), 2221 ldifEx.mayContinueReading(), 2222 ldifEx.getDataLines(), 2223 ldifEx.getCause()); 2224 } 2225 else 2226 { 2227 throw new LDIFException(getExceptionMessage(readError), 2228 -1, true, readError); 2229 } 2230 } 2231 2232 if (unparsedRecord.isEOF()) 2233 { 2234 return null; 2235 } 2236 2237 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2238 if (unparsedRecord.getLineList() == null) 2239 { 2240 return null; // We can get here if there was an error reading the lines. 2241 } 2242 2243 final LDIFRecord r; 2244 if (lineList.size() == 1) 2245 { 2246 r = decodeEntry(unparsedRecord, relativeBasePath); 2247 } 2248 else 2249 { 2250 final String lowerSecondLine = toLowerCase(lineList.get(1).toString()); 2251 if (lowerSecondLine.startsWith("control:") || 2252 lowerSecondLine.startsWith("changetype:")) 2253 { 2254 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2255 } 2256 else 2257 { 2258 r = decodeEntry(unparsedRecord, relativeBasePath); 2259 } 2260 } 2261 2262 debugLDIFRead(r); 2263 return r; 2264 } 2265 2266 2267 2268 /** 2269 * Decodes the provided set of LDIF lines as an entry. The provided list must 2270 * not contain any blank lines or comments, and lines are not allowed to be 2271 * wrapped. 2272 * 2273 * @param unparsedRecord The unparsed LDIF record that was read from the 2274 * input. It must not be {@code null} or empty. 2275 * @param relativeBasePath The base path that will be prepended to relative 2276 * paths in order to obtain an absolute path. 2277 * 2278 * @return The entry read from LDIF. 2279 * 2280 * @throws LDIFException If the provided LDIF data cannot be read as an 2281 * entry. 2282 */ 2283 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2284 final String relativeBasePath) 2285 throws LDIFException 2286 { 2287 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2288 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2289 2290 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2291 2292 // The first line must start with either "version:" or "dn:". If the first 2293 // line starts with "version:" then the second must start with "dn:". 2294 StringBuilder line = iterator.next(); 2295 handleTrailingSpaces(line, null, firstLineNumber, 2296 unparsedRecord.getTrailingSpaceBehavior()); 2297 int colonPos = line.indexOf(":"); 2298 if ((colonPos > 0) && 2299 line.substring(0, colonPos).equalsIgnoreCase("version")) 2300 { 2301 // The first line is "version:". Under most conditions, this will be 2302 // handled by the LDIF reader, but this can happen if you call 2303 // decodeEntry with a set of data that includes a version. At any rate, 2304 // read the next line, which must specify the DN. 2305 line = iterator.next(); 2306 handleTrailingSpaces(line, null, firstLineNumber, 2307 unparsedRecord.getTrailingSpaceBehavior()); 2308 } 2309 2310 colonPos = line.indexOf(":"); 2311 if ((colonPos < 0) || 2312 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2313 { 2314 throw new LDIFException( 2315 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2316 firstLineNumber, true, ldifLines, null); 2317 } 2318 2319 final String dn; 2320 final int length = line.length(); 2321 if (length == (colonPos+1)) 2322 { 2323 // The colon was the last character on the line. This is acceptable and 2324 // indicates that the entry has the null DN. 2325 dn = ""; 2326 } 2327 else if (line.charAt(colonPos+1) == ':') 2328 { 2329 // Skip over any spaces leading up to the value, and then the rest of the 2330 // string is the base64-encoded DN. 2331 int pos = colonPos+2; 2332 while ((pos < length) && (line.charAt(pos) == ' ')) 2333 { 2334 pos++; 2335 } 2336 2337 try 2338 { 2339 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2340 dn = new String(dnBytes, StandardCharsets.UTF_8); 2341 } 2342 catch (final ParseException pe) 2343 { 2344 debugException(pe); 2345 throw new LDIFException( 2346 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2347 pe.getMessage()), 2348 firstLineNumber, true, ldifLines, pe); 2349 } 2350 catch (final Exception e) 2351 { 2352 debugException(e); 2353 throw new LDIFException( 2354 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2355 firstLineNumber, true, ldifLines, e); 2356 } 2357 } 2358 else 2359 { 2360 // Skip over any spaces leading up to the value, and then the rest of the 2361 // string is the DN. 2362 int pos = colonPos+1; 2363 while ((pos < length) && (line.charAt(pos) == ' ')) 2364 { 2365 pos++; 2366 } 2367 2368 dn = line.substring(pos); 2369 } 2370 2371 2372 // The remaining lines must be the attributes for the entry. However, we 2373 // will allow the case in which an entry does not have any attributes, to be 2374 // able to support reading search result entries in which no attributes were 2375 // returned. 2376 if (! iterator.hasNext()) 2377 { 2378 return new Entry(dn, unparsedRecord.getSchema()); 2379 } 2380 2381 return new Entry(dn, unparsedRecord.getSchema(), 2382 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2383 unparsedRecord.getTrailingSpaceBehavior(), 2384 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2385 firstLineNumber)); 2386 } 2387 2388 2389 2390 /** 2391 * Decodes the provided set of LDIF lines as a change record. The provided 2392 * list must not contain any blank lines or comments, and lines are not 2393 * allowed to be wrapped. 2394 * 2395 * @param unparsedRecord The unparsed LDIF record that was read from the 2396 * input. It must not be {@code null} or empty. 2397 * @param relativeBasePath The base path that will be prepended to relative 2398 * paths in order to obtain an absolute path. 2399 * @param defaultAdd Indicates whether an LDIF record not containing a 2400 * changetype should be retrieved as an add change 2401 * record. If this is {@code false} and the record 2402 * read does not include a changetype, then an 2403 * {@link LDIFException} will be thrown. 2404 * @param schema The schema to use in parsing. 2405 * 2406 * @return The change record read from LDIF. 2407 * 2408 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2409 * change record. 2410 */ 2411 private static LDIFChangeRecord decodeChangeRecord( 2412 final UnparsedLDIFRecord unparsedRecord, 2413 final String relativeBasePath, 2414 final boolean defaultAdd, 2415 final Schema schema) 2416 throws LDIFException 2417 { 2418 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2419 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2420 2421 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2422 2423 // The first line must start with either "version:" or "dn:". If the first 2424 // line starts with "version:" then the second must start with "dn:". 2425 StringBuilder line = iterator.next(); 2426 handleTrailingSpaces(line, null, firstLineNumber, 2427 unparsedRecord.getTrailingSpaceBehavior()); 2428 int colonPos = line.indexOf(":"); 2429 int linesRead = 1; 2430 if ((colonPos > 0) && 2431 line.substring(0, colonPos).equalsIgnoreCase("version")) 2432 { 2433 // The first line is "version:". Under most conditions, this will be 2434 // handled by the LDIF reader, but this can happen if you call 2435 // decodeEntry with a set of data that includes a version. At any rate, 2436 // read the next line, which must specify the DN. 2437 line = iterator.next(); 2438 linesRead++; 2439 handleTrailingSpaces(line, null, firstLineNumber, 2440 unparsedRecord.getTrailingSpaceBehavior()); 2441 } 2442 2443 colonPos = line.indexOf(":"); 2444 if ((colonPos < 0) || 2445 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2446 { 2447 throw new LDIFException( 2448 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2449 firstLineNumber, true, ldifLines, null); 2450 } 2451 2452 final String dn; 2453 final int length = line.length(); 2454 if (length == (colonPos+1)) 2455 { 2456 // The colon was the last character on the line. This is acceptable and 2457 // indicates that the entry has the null DN. 2458 dn = ""; 2459 } 2460 else if (line.charAt(colonPos+1) == ':') 2461 { 2462 // Skip over any spaces leading up to the value, and then the rest of the 2463 // string is the base64-encoded DN. 2464 int pos = colonPos+2; 2465 while ((pos < length) && (line.charAt(pos) == ' ')) 2466 { 2467 pos++; 2468 } 2469 2470 try 2471 { 2472 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2473 dn = new String(dnBytes, StandardCharsets.UTF_8); 2474 } 2475 catch (final ParseException pe) 2476 { 2477 debugException(pe); 2478 throw new LDIFException( 2479 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2480 pe.getMessage()), 2481 firstLineNumber, true, ldifLines, pe); 2482 } 2483 catch (final Exception e) 2484 { 2485 debugException(e); 2486 throw new LDIFException( 2487 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2488 e), 2489 firstLineNumber, true, ldifLines, e); 2490 } 2491 } 2492 else 2493 { 2494 // Skip over any spaces leading up to the value, and then the rest of the 2495 // string is the DN. 2496 int pos = colonPos+1; 2497 while ((pos < length) && (line.charAt(pos) == ' ')) 2498 { 2499 pos++; 2500 } 2501 2502 dn = line.substring(pos); 2503 } 2504 2505 2506 // An LDIF change record may contain zero or more controls, with the end of 2507 // the controls signified by the changetype. The changetype element must be 2508 // present, unless defaultAdd is true in which case the first thing that is 2509 // neither control or changetype will trigger the start of add attribute 2510 // parsing. 2511 if (! iterator.hasNext()) 2512 { 2513 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2514 firstLineNumber, true, ldifLines, null); 2515 } 2516 2517 String changeType = null; 2518 ArrayList<Control> controls = null; 2519 while (true) 2520 { 2521 line = iterator.next(); 2522 handleTrailingSpaces(line, dn, firstLineNumber, 2523 unparsedRecord.getTrailingSpaceBehavior()); 2524 colonPos = line.indexOf(":"); 2525 if (colonPos < 0) 2526 { 2527 throw new LDIFException( 2528 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2529 firstLineNumber, true, ldifLines, null); 2530 } 2531 2532 final String token = toLowerCase(line.substring(0, colonPos)); 2533 if (token.equals("control")) 2534 { 2535 if (controls == null) 2536 { 2537 controls = new ArrayList<Control>(5); 2538 } 2539 2540 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2541 relativeBasePath)); 2542 } 2543 else if (token.equals("changetype")) 2544 { 2545 changeType = 2546 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2547 break; 2548 } 2549 else if (defaultAdd) 2550 { 2551 // The line we read wasn't a control or changetype declaration, so we'll 2552 // assume it's an attribute in an add record. However, we're not ready 2553 // for that yet, and since we can't rewind an iterator we'll create a 2554 // new one that hasn't yet gotten to this line. 2555 changeType = "add"; 2556 iterator = ldifLines.iterator(); 2557 for (int i=0; i < linesRead; i++) 2558 { 2559 iterator.next(); 2560 } 2561 break; 2562 } 2563 else 2564 { 2565 throw new LDIFException( 2566 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2567 firstLineNumber), 2568 firstLineNumber, true, ldifLines, null); 2569 } 2570 2571 linesRead++; 2572 } 2573 2574 2575 // Make sure that the change type is acceptable and then decode the rest of 2576 // the change record accordingly. 2577 final String lowerChangeType = toLowerCase(changeType); 2578 if (lowerChangeType.equals("add")) 2579 { 2580 // There must be at least one more line. If not, then that's an error. 2581 // Otherwise, parse the rest of the data as attribute-value pairs. 2582 if (iterator.hasNext()) 2583 { 2584 final Collection<Attribute> attrs = 2585 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2586 unparsedRecord.getTrailingSpaceBehavior(), 2587 unparsedRecord.getSchema(), ldifLines, iterator, 2588 relativeBasePath, firstLineNumber); 2589 final Attribute[] attributes = new Attribute[attrs.size()]; 2590 final Iterator<Attribute> attrIterator = attrs.iterator(); 2591 for (int i=0; i < attributes.length; i++) 2592 { 2593 attributes[i] = attrIterator.next(); 2594 } 2595 2596 return new LDIFAddChangeRecord(dn, attributes, controls); 2597 } 2598 else 2599 { 2600 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2601 firstLineNumber, true, ldifLines, null); 2602 } 2603 } 2604 else if (lowerChangeType.equals("delete")) 2605 { 2606 // There shouldn't be any more data. If there is, then that's an error. 2607 // Otherwise, we can just return the delete change record with what we 2608 // already know. 2609 if (iterator.hasNext()) 2610 { 2611 throw new LDIFException( 2612 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2613 firstLineNumber, true, ldifLines, null); 2614 } 2615 else 2616 { 2617 return new LDIFDeleteChangeRecord(dn, controls); 2618 } 2619 } 2620 else if (lowerChangeType.equals("modify")) 2621 { 2622 // There must be at least one more line. If not, then that's an error. 2623 // Otherwise, parse the rest of the data as a set of modifications. 2624 if (iterator.hasNext()) 2625 { 2626 final Modification[] mods = parseModifications(dn, 2627 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2628 firstLineNumber, schema); 2629 return new LDIFModifyChangeRecord(dn, mods, controls); 2630 } 2631 else 2632 { 2633 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2634 firstLineNumber, true, ldifLines, null); 2635 } 2636 } 2637 else if (lowerChangeType.equals("moddn") || 2638 lowerChangeType.equals("modrdn")) 2639 { 2640 // There must be at least one more line. If not, then that's an error. 2641 // Otherwise, parse the rest of the data as a set of modifications. 2642 if (iterator.hasNext()) 2643 { 2644 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2645 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2646 } 2647 else 2648 { 2649 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2650 firstLineNumber, true, ldifLines, null); 2651 } 2652 } 2653 else 2654 { 2655 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2656 firstLineNumber), 2657 firstLineNumber, true, ldifLines, null); 2658 } 2659 } 2660 2661 2662 2663 /** 2664 * Decodes information about a control from the provided line. 2665 * 2666 * @param line The line to process. 2667 * @param colonPos The position of the colon that separates the 2668 * control token string from tbe encoded control. 2669 * @param firstLineNumber The line number for the start of the record. 2670 * @param ldifLines The lines that comprise the LDIF representation 2671 * of the full record being parsed. 2672 * @param relativeBasePath The base path that will be prepended to relative 2673 * paths in order to obtain an absolute path. 2674 * 2675 * @return The decoded control. 2676 * 2677 * @throws LDIFException If a problem is encountered while trying to decode 2678 * the changetype. 2679 */ 2680 private static Control decodeControl(final StringBuilder line, 2681 final int colonPos, 2682 final long firstLineNumber, 2683 final ArrayList<StringBuilder> ldifLines, 2684 final String relativeBasePath) 2685 throws LDIFException 2686 { 2687 final String controlString; 2688 int length = line.length(); 2689 if (length == (colonPos+1)) 2690 { 2691 // The colon was the last character on the line. This is not 2692 // acceptable. 2693 throw new LDIFException( 2694 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2695 firstLineNumber, true, ldifLines, null); 2696 } 2697 else if (line.charAt(colonPos+1) == ':') 2698 { 2699 // Skip over any spaces leading up to the value, and then the rest of 2700 // the string is the base64-encoded control representation. This is 2701 // unusual and unnecessary, but is nevertheless acceptable. 2702 int pos = colonPos+2; 2703 while ((pos < length) && (line.charAt(pos) == ' ')) 2704 { 2705 pos++; 2706 } 2707 2708 try 2709 { 2710 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2711 controlString = new String(controlBytes, StandardCharsets.UTF_8); 2712 } 2713 catch (final ParseException pe) 2714 { 2715 debugException(pe); 2716 throw new LDIFException( 2717 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2718 firstLineNumber, pe.getMessage()), 2719 firstLineNumber, true, ldifLines, pe); 2720 } 2721 catch (final Exception e) 2722 { 2723 debugException(e); 2724 throw new LDIFException( 2725 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2726 firstLineNumber, true, ldifLines, e); 2727 } 2728 } 2729 else 2730 { 2731 // Skip over any spaces leading up to the value, and then the rest of 2732 // the string is the encoded control. 2733 int pos = colonPos+1; 2734 while ((pos < length) && (line.charAt(pos) == ' ')) 2735 { 2736 pos++; 2737 } 2738 2739 controlString = line.substring(pos); 2740 } 2741 2742 // If the resulting control definition is empty, then that's invalid. 2743 if (controlString.length() == 0) 2744 { 2745 throw new LDIFException( 2746 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2747 firstLineNumber, true, ldifLines, null); 2748 } 2749 2750 2751 // The first element of the control must be the OID, and it must be followed 2752 // by a space (to separate it from the criticality), a colon (to separate it 2753 // from the value and indicate a default criticality of false), or the end 2754 // of the line (to indicate a default criticality of false and no value). 2755 String oid = null; 2756 boolean hasCriticality = false; 2757 boolean hasValue = false; 2758 int pos = 0; 2759 length = controlString.length(); 2760 while (pos < length) 2761 { 2762 final char c = controlString.charAt(pos); 2763 if (c == ':') 2764 { 2765 // This indicates that there is no criticality and that the value 2766 // immediately follows the OID. 2767 oid = controlString.substring(0, pos++); 2768 hasValue = true; 2769 break; 2770 } 2771 else if (c == ' ') 2772 { 2773 // This indicates that there is a criticality. We don't know anything 2774 // about the presence of a value yet. 2775 oid = controlString.substring(0, pos++); 2776 hasCriticality = true; 2777 break; 2778 } 2779 else 2780 { 2781 pos++; 2782 } 2783 } 2784 2785 if (oid == null) 2786 { 2787 // This indicates that the string representation of the control is only 2788 // the OID. 2789 return new Control(controlString, false); 2790 } 2791 2792 2793 // See if we need to read the criticality. If so, then do so now. 2794 // Otherwise, assume a default criticality of false. 2795 final boolean isCritical; 2796 if (hasCriticality) 2797 { 2798 // Skip over any spaces before the criticality. 2799 while (controlString.charAt(pos) == ' ') 2800 { 2801 pos++; 2802 } 2803 2804 // Read until we find a colon or the end of the string. 2805 final int criticalityStartPos = pos; 2806 while (pos < length) 2807 { 2808 final char c = controlString.charAt(pos); 2809 if (c == ':') 2810 { 2811 hasValue = true; 2812 break; 2813 } 2814 else 2815 { 2816 pos++; 2817 } 2818 } 2819 2820 final String criticalityString = 2821 toLowerCase(controlString.substring(criticalityStartPos, pos)); 2822 if (criticalityString.equals("true")) 2823 { 2824 isCritical = true; 2825 } 2826 else if (criticalityString.equals("false")) 2827 { 2828 isCritical = false; 2829 } 2830 else 2831 { 2832 throw new LDIFException( 2833 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2834 firstLineNumber), 2835 firstLineNumber, true, ldifLines, null); 2836 } 2837 2838 if (hasValue) 2839 { 2840 pos++; 2841 } 2842 } 2843 else 2844 { 2845 isCritical = false; 2846 } 2847 2848 // See if we need to read the value. If so, then do so now. It may be 2849 // a string, or it may be base64-encoded. It could conceivably even be read 2850 // from a URL. 2851 final ASN1OctetString value; 2852 if (hasValue) 2853 { 2854 // The character immediately after the colon that precedes the value may 2855 // be one of the following: 2856 // - A second colon (optionally followed by a single space) to indicate 2857 // that the value is base64-encoded. 2858 // - A less-than symbol to indicate that the value should be read from a 2859 // location specified by a URL. 2860 // - A single space that precedes the non-base64-encoded value. 2861 // - The first character of the non-base64-encoded value. 2862 switch (controlString.charAt(pos)) 2863 { 2864 case ':': 2865 try 2866 { 2867 if (controlString.length() == (pos+1)) 2868 { 2869 value = new ASN1OctetString(); 2870 } 2871 else if (controlString.charAt(pos+1) == ' ') 2872 { 2873 value = new ASN1OctetString( 2874 Base64.decode(controlString.substring(pos+2))); 2875 } 2876 else 2877 { 2878 value = new ASN1OctetString( 2879 Base64.decode(controlString.substring(pos+1))); 2880 } 2881 } 2882 catch (final Exception e) 2883 { 2884 debugException(e); 2885 throw new LDIFException( 2886 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2887 firstLineNumber, getExceptionMessage(e)), 2888 firstLineNumber, true, ldifLines, e); 2889 } 2890 break; 2891 case '<': 2892 try 2893 { 2894 final String urlString; 2895 if (controlString.charAt(pos+1) == ' ') 2896 { 2897 urlString = controlString.substring(pos+2); 2898 } 2899 else 2900 { 2901 urlString = controlString.substring(pos+1); 2902 } 2903 value = new ASN1OctetString(retrieveURLBytes(urlString, 2904 relativeBasePath, firstLineNumber)); 2905 } 2906 catch (final Exception e) 2907 { 2908 debugException(e); 2909 throw new LDIFException( 2910 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2911 firstLineNumber, getExceptionMessage(e)), 2912 firstLineNumber, true, ldifLines, e); 2913 } 2914 break; 2915 case ' ': 2916 value = new ASN1OctetString(controlString.substring(pos+1)); 2917 break; 2918 default: 2919 value = new ASN1OctetString(controlString.substring(pos)); 2920 break; 2921 } 2922 } 2923 else 2924 { 2925 value = null; 2926 } 2927 2928 return new Control(oid, isCritical, value); 2929 } 2930 2931 2932 2933 /** 2934 * Decodes the changetype element from the provided line. 2935 * 2936 * @param line The line to process. 2937 * @param colonPos The position of the colon that separates the 2938 * changetype string from its value. 2939 * @param firstLineNumber The line number for the start of the record. 2940 * @param ldifLines The lines that comprise the LDIF representation of 2941 * the full record being parsed. 2942 * 2943 * @return The decoded changetype string. 2944 * 2945 * @throws LDIFException If a problem is encountered while trying to decode 2946 * the changetype. 2947 */ 2948 private static String decodeChangeType(final StringBuilder line, 2949 final int colonPos, final long firstLineNumber, 2950 final ArrayList<StringBuilder> ldifLines) 2951 throws LDIFException 2952 { 2953 final int length = line.length(); 2954 if (length == (colonPos+1)) 2955 { 2956 // The colon was the last character on the line. This is not 2957 // acceptable. 2958 throw new LDIFException( 2959 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2960 true, ldifLines, null); 2961 } 2962 else if (line.charAt(colonPos+1) == ':') 2963 { 2964 // Skip over any spaces leading up to the value, and then the rest of 2965 // the string is the base64-encoded changetype. This is unusual and 2966 // unnecessary, but is nevertheless acceptable. 2967 int pos = colonPos+2; 2968 while ((pos < length) && (line.charAt(pos) == ' ')) 2969 { 2970 pos++; 2971 } 2972 2973 try 2974 { 2975 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 2976 return new String(changeTypeBytes, StandardCharsets.UTF_8); 2977 } 2978 catch (final ParseException pe) 2979 { 2980 debugException(pe); 2981 throw new LDIFException( 2982 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 2983 pe.getMessage()), 2984 firstLineNumber, true, ldifLines, pe); 2985 } 2986 catch (final Exception e) 2987 { 2988 debugException(e); 2989 throw new LDIFException( 2990 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 2991 firstLineNumber, true, ldifLines, e); 2992 } 2993 } 2994 else 2995 { 2996 // Skip over any spaces leading up to the value, and then the rest of 2997 // the string is the changetype. 2998 int pos = colonPos+1; 2999 while ((pos < length) && (line.charAt(pos) == ' ')) 3000 { 3001 pos++; 3002 } 3003 3004 return line.substring(pos); 3005 } 3006 } 3007 3008 3009 3010 /** 3011 * Parses the data available through the provided iterator as a collection of 3012 * attributes suitable for use in an entry or an add change record. 3013 * 3014 * @param dn The DN of the record being read. 3015 * @param duplicateValueBehavior The behavior that should be exhibited if 3016 * the LDIF reader encounters an entry with 3017 * duplicate values. 3018 * @param trailingSpaceBehavior The behavior that should be exhibited when 3019 * encountering attribute values which are not 3020 * base64-encoded but contain trailing spaces. 3021 * @param schema The schema to use when parsing the 3022 * attributes, or {@code null} if none is 3023 * needed. 3024 * @param ldifLines The lines that comprise the LDIF 3025 * representation of the full record being 3026 * parsed. 3027 * @param iterator The iterator to use to access the attribute 3028 * lines. 3029 * @param relativeBasePath The base path that will be prepended to 3030 * relative paths in order to obtain an 3031 * absolute path. 3032 * @param firstLineNumber The line number for the start of the 3033 * record. 3034 * 3035 * @return The collection of attributes that were read. 3036 * 3037 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3038 * set of attributes. 3039 */ 3040 private static ArrayList<Attribute> parseAttributes(final String dn, 3041 final DuplicateValueBehavior duplicateValueBehavior, 3042 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 3043 final ArrayList<StringBuilder> ldifLines, 3044 final Iterator<StringBuilder> iterator, final String relativeBasePath, 3045 final long firstLineNumber) 3046 throws LDIFException 3047 { 3048 final LinkedHashMap<String,Object> attributes = 3049 new LinkedHashMap<String,Object>(ldifLines.size()); 3050 while (iterator.hasNext()) 3051 { 3052 final StringBuilder line = iterator.next(); 3053 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3054 final int colonPos = line.indexOf(":"); 3055 if (colonPos <= 0) 3056 { 3057 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3058 firstLineNumber, true, ldifLines, null); 3059 } 3060 3061 final String attributeName = line.substring(0, colonPos); 3062 final String lowerName = toLowerCase(attributeName); 3063 3064 final MatchingRule matchingRule; 3065 if (schema == null) 3066 { 3067 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 3068 } 3069 else 3070 { 3071 matchingRule = 3072 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 3073 } 3074 3075 Attribute attr; 3076 final LDIFAttribute ldifAttr; 3077 final Object attrObject = attributes.get(lowerName); 3078 if (attrObject == null) 3079 { 3080 attr = null; 3081 ldifAttr = null; 3082 } 3083 else 3084 { 3085 if (attrObject instanceof Attribute) 3086 { 3087 attr = (Attribute) attrObject; 3088 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3089 attr.getRawValues()[0]); 3090 attributes.put(lowerName, ldifAttr); 3091 } 3092 else 3093 { 3094 attr = null; 3095 ldifAttr = (LDIFAttribute) attrObject; 3096 } 3097 } 3098 3099 final int length = line.length(); 3100 if (length == (colonPos+1)) 3101 { 3102 // This means that the attribute has a zero-length value, which is 3103 // acceptable. 3104 if (attrObject == null) 3105 { 3106 attr = new Attribute(attributeName, matchingRule, ""); 3107 attributes.put(lowerName, attr); 3108 } 3109 else 3110 { 3111 try 3112 { 3113 if (! ldifAttr.addValue(new ASN1OctetString(), 3114 duplicateValueBehavior)) 3115 { 3116 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3117 { 3118 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3119 firstLineNumber, attributeName), firstLineNumber, true, 3120 ldifLines, null); 3121 } 3122 } 3123 } 3124 catch (final LDAPException le) 3125 { 3126 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3127 firstLineNumber, attributeName, getExceptionMessage(le)), 3128 firstLineNumber, true, ldifLines, le); 3129 } 3130 } 3131 } 3132 else if (line.charAt(colonPos+1) == ':') 3133 { 3134 // Skip over any spaces leading up to the value, and then the rest of 3135 // the string is the base64-encoded attribute value. 3136 int pos = colonPos+2; 3137 while ((pos < length) && (line.charAt(pos) == ' ')) 3138 { 3139 pos++; 3140 } 3141 3142 try 3143 { 3144 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3145 if (attrObject == null) 3146 { 3147 attr = new Attribute(attributeName, matchingRule, valueBytes); 3148 attributes.put(lowerName, attr); 3149 } 3150 else 3151 { 3152 try 3153 { 3154 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3155 duplicateValueBehavior)) 3156 { 3157 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3158 { 3159 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3160 firstLineNumber, attributeName), firstLineNumber, true, 3161 ldifLines, null); 3162 } 3163 } 3164 } 3165 catch (final LDAPException le) 3166 { 3167 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3168 firstLineNumber, attributeName, getExceptionMessage(le)), 3169 firstLineNumber, true, ldifLines, le); 3170 } 3171 } 3172 } 3173 catch (final ParseException pe) 3174 { 3175 debugException(pe); 3176 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3177 attributeName, firstLineNumber, 3178 pe.getMessage()), 3179 firstLineNumber, true, ldifLines, pe); 3180 } 3181 } 3182 else if (line.charAt(colonPos+1) == '<') 3183 { 3184 // Skip over any spaces leading up to the value, and then the rest of 3185 // the string is a URL that indicates where to get the real content. 3186 // At the present time, we'll only support the file URLs. 3187 int pos = colonPos+2; 3188 while ((pos < length) && (line.charAt(pos) == ' ')) 3189 { 3190 pos++; 3191 } 3192 3193 final byte[] urlBytes; 3194 final String urlString = line.substring(pos); 3195 try 3196 { 3197 urlBytes = 3198 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3199 } 3200 catch (final Exception e) 3201 { 3202 debugException(e); 3203 throw new LDIFException( 3204 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3205 firstLineNumber, e), 3206 firstLineNumber, true, ldifLines, e); 3207 } 3208 3209 if (attrObject == null) 3210 { 3211 attr = new Attribute(attributeName, matchingRule, urlBytes); 3212 attributes.put(lowerName, attr); 3213 } 3214 else 3215 { 3216 try 3217 { 3218 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3219 duplicateValueBehavior)) 3220 { 3221 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3222 { 3223 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3224 firstLineNumber, attributeName), firstLineNumber, true, 3225 ldifLines, null); 3226 } 3227 } 3228 } 3229 catch (final LDIFException le) 3230 { 3231 debugException(le); 3232 throw le; 3233 } 3234 catch (final Exception e) 3235 { 3236 debugException(e); 3237 throw new LDIFException( 3238 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3239 firstLineNumber, e), 3240 firstLineNumber, true, ldifLines, e); 3241 } 3242 } 3243 } 3244 else 3245 { 3246 // Skip over any spaces leading up to the value, and then the rest of 3247 // the string is the value. 3248 int pos = colonPos+1; 3249 while ((pos < length) && (line.charAt(pos) == ' ')) 3250 { 3251 pos++; 3252 } 3253 3254 final String valueString = line.substring(pos); 3255 if (attrObject == null) 3256 { 3257 attr = new Attribute(attributeName, matchingRule, valueString); 3258 attributes.put(lowerName, attr); 3259 } 3260 else 3261 { 3262 try 3263 { 3264 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3265 duplicateValueBehavior)) 3266 { 3267 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3268 { 3269 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3270 firstLineNumber, attributeName), firstLineNumber, true, 3271 ldifLines, null); 3272 } 3273 } 3274 } 3275 catch (final LDAPException le) 3276 { 3277 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3278 firstLineNumber, attributeName, getExceptionMessage(le)), 3279 firstLineNumber, true, ldifLines, le); 3280 } 3281 } 3282 } 3283 } 3284 3285 final ArrayList<Attribute> attrList = 3286 new ArrayList<Attribute>(attributes.size()); 3287 for (final Object o : attributes.values()) 3288 { 3289 if (o instanceof Attribute) 3290 { 3291 attrList.add((Attribute) o); 3292 } 3293 else 3294 { 3295 attrList.add(((LDIFAttribute) o).toAttribute()); 3296 } 3297 } 3298 3299 return attrList; 3300 } 3301 3302 3303 3304 /** 3305 * Retrieves the bytes that make up the file referenced by the given URL. 3306 * 3307 * @param urlString The string representation of the URL to retrieve. 3308 * @param relativeBasePath The base path that will be prepended to relative 3309 * paths in order to obtain an absolute path. 3310 * @param firstLineNumber The line number for the start of the record. 3311 * 3312 * @return The bytes contained in the specified file, or an empty array if 3313 * the specified file is empty. 3314 * 3315 * @throws LDIFException If the provided URL is malformed or references a 3316 * nonexistent file. 3317 * 3318 * @throws IOException If a problem is encountered while attempting to read 3319 * from the target file. 3320 */ 3321 private static byte[] retrieveURLBytes(final String urlString, 3322 final String relativeBasePath, 3323 final long firstLineNumber) 3324 throws LDIFException, IOException 3325 { 3326 int pos; 3327 final String path; 3328 final String lowerURLString = toLowerCase(urlString); 3329 if (lowerURLString.startsWith("file:/")) 3330 { 3331 pos = 6; 3332 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3333 { 3334 pos++; 3335 } 3336 3337 path = urlString.substring(pos-1); 3338 } 3339 else if (lowerURLString.startsWith("file:")) 3340 { 3341 // A file: URL that doesn't include a slash will be interpreted as a 3342 // relative path. 3343 path = relativeBasePath + urlString.substring(5); 3344 } 3345 else 3346 { 3347 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3348 firstLineNumber, true); 3349 } 3350 3351 final File f = new File(path); 3352 if (! f.exists()) 3353 { 3354 throw new LDIFException( 3355 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3356 firstLineNumber, true); 3357 } 3358 3359 // In order to conserve memory, we'll only allow values to be read from 3360 // files no larger than 10 megabytes. 3361 final long fileSize = f.length(); 3362 if (fileSize > (10 * 1024 * 1024)) 3363 { 3364 throw new LDIFException( 3365 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3366 (10*1024*1024)), 3367 firstLineNumber, true); 3368 } 3369 3370 int fileBytesRemaining = (int) fileSize; 3371 final byte[] fileData = new byte[(int) fileSize]; 3372 final FileInputStream fis = new FileInputStream(f); 3373 try 3374 { 3375 int fileBytesRead = 0; 3376 while (fileBytesRead < fileSize) 3377 { 3378 final int bytesRead = 3379 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3380 if (bytesRead < 0) 3381 { 3382 // We hit the end of the file before we expected to. This shouldn't 3383 // happen unless the file size changed since we first looked at it, 3384 // which we won't allow. 3385 throw new LDIFException( 3386 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3387 f.getAbsolutePath()), 3388 firstLineNumber, true); 3389 } 3390 3391 fileBytesRead += bytesRead; 3392 fileBytesRemaining -= bytesRead; 3393 } 3394 3395 if (fis.read() != -1) 3396 { 3397 // There is still more data to read. This shouldn't happen unless the 3398 // file size changed since we first looked at it, which we won't allow. 3399 throw new LDIFException( 3400 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3401 firstLineNumber, true); 3402 } 3403 } 3404 finally 3405 { 3406 fis.close(); 3407 } 3408 3409 return fileData; 3410 } 3411 3412 3413 3414 /** 3415 * Parses the data available through the provided iterator into an array of 3416 * modifications suitable for use in a modify change record. 3417 * 3418 * @param dn The DN of the entry being parsed. 3419 * @param trailingSpaceBehavior The behavior that should be exhibited when 3420 * encountering attribute values which are not 3421 * base64-encoded but contain trailing spaces. 3422 * @param ldifLines The lines that comprise the LDIF 3423 * representation of the full record being 3424 * parsed. 3425 * @param iterator The iterator to use to access the 3426 * modification data. 3427 * @param firstLineNumber The line number for the start of the record. 3428 * @param schema The schema to use in processing. 3429 * 3430 * @return An array containing the modifications that were read. 3431 * 3432 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3433 * set of modifications. 3434 */ 3435 private static Modification[] parseModifications(final String dn, 3436 final TrailingSpaceBehavior trailingSpaceBehavior, 3437 final ArrayList<StringBuilder> ldifLines, 3438 final Iterator<StringBuilder> iterator, 3439 final long firstLineNumber, final Schema schema) 3440 throws LDIFException 3441 { 3442 final ArrayList<Modification> modList = 3443 new ArrayList<Modification>(ldifLines.size()); 3444 3445 while (iterator.hasNext()) 3446 { 3447 // The first line must start with "add:", "delete:", "replace:", or 3448 // "increment:" followed by an attribute name. 3449 StringBuilder line = iterator.next(); 3450 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3451 int colonPos = line.indexOf(":"); 3452 if (colonPos < 0) 3453 { 3454 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3455 firstLineNumber, true, ldifLines, null); 3456 } 3457 3458 final ModificationType modType; 3459 final String modTypeStr = toLowerCase(line.substring(0, colonPos)); 3460 if (modTypeStr.equals("add")) 3461 { 3462 modType = ModificationType.ADD; 3463 } 3464 else if (modTypeStr.equals("delete")) 3465 { 3466 modType = ModificationType.DELETE; 3467 } 3468 else if (modTypeStr.equals("replace")) 3469 { 3470 modType = ModificationType.REPLACE; 3471 } 3472 else if (modTypeStr.equals("increment")) 3473 { 3474 modType = ModificationType.INCREMENT; 3475 } 3476 else 3477 { 3478 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3479 firstLineNumber), 3480 firstLineNumber, true, ldifLines, null); 3481 } 3482 3483 String attributeName; 3484 int length = line.length(); 3485 if (length == (colonPos+1)) 3486 { 3487 // The colon was the last character on the line. This is not 3488 // acceptable. 3489 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3490 firstLineNumber), 3491 firstLineNumber, true, ldifLines, null); 3492 } 3493 else if (line.charAt(colonPos+1) == ':') 3494 { 3495 // Skip over any spaces leading up to the value, and then the rest of 3496 // the string is the base64-encoded attribute name. 3497 int pos = colonPos+2; 3498 while ((pos < length) && (line.charAt(pos) == ' ')) 3499 { 3500 pos++; 3501 } 3502 3503 try 3504 { 3505 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3506 attributeName = new String(dnBytes, StandardCharsets.UTF_8); 3507 } 3508 catch (final ParseException pe) 3509 { 3510 debugException(pe); 3511 throw new LDIFException( 3512 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3513 firstLineNumber, pe.getMessage()), 3514 firstLineNumber, true, ldifLines, pe); 3515 } 3516 catch (final Exception e) 3517 { 3518 debugException(e); 3519 throw new LDIFException( 3520 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3521 firstLineNumber, e), 3522 firstLineNumber, true, ldifLines, e); 3523 } 3524 } 3525 else 3526 { 3527 // Skip over any spaces leading up to the value, and then the rest of 3528 // the string is the attribute name. 3529 int pos = colonPos+1; 3530 while ((pos < length) && (line.charAt(pos) == ' ')) 3531 { 3532 pos++; 3533 } 3534 3535 attributeName = line.substring(pos); 3536 } 3537 3538 if (attributeName.length() == 0) 3539 { 3540 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3541 firstLineNumber), 3542 firstLineNumber, true, ldifLines, null); 3543 } 3544 3545 3546 // The next zero or more lines may be the set of attribute values. Keep 3547 // reading until we reach the end of the iterator or until we find a line 3548 // with just a "-". 3549 final ArrayList<ASN1OctetString> valueList = 3550 new ArrayList<ASN1OctetString>(ldifLines.size()); 3551 while (iterator.hasNext()) 3552 { 3553 line = iterator.next(); 3554 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3555 if (line.toString().equals("-")) 3556 { 3557 break; 3558 } 3559 3560 colonPos = line.indexOf(":"); 3561 if (colonPos < 0) 3562 { 3563 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3564 firstLineNumber, true, ldifLines, null); 3565 } 3566 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3567 { 3568 // There are a couple of cases in which this might be acceptable: 3569 // - If the two names are logically equivalent, but have an alternate 3570 // name (or OID) for the target attribute type, or if there are 3571 // attribute options and the options are just in a different order. 3572 // - If this is the first value for the target attribute and the 3573 // alternate name includes a "binary" option that the original 3574 // attribute name did not have. In this case, all subsequent values 3575 // will also be required to have the binary option. 3576 final String alternateName = line.substring(0, colonPos); 3577 3578 3579 // Check to see if the base names are equivalent. 3580 boolean baseNameEquivalent = false; 3581 final String expectedBaseName = Attribute.getBaseName(attributeName); 3582 final String alternateBaseName = Attribute.getBaseName(alternateName); 3583 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3584 { 3585 baseNameEquivalent = true; 3586 } 3587 else 3588 { 3589 if (schema != null) 3590 { 3591 final AttributeTypeDefinition expectedAT = 3592 schema.getAttributeType(expectedBaseName); 3593 final AttributeTypeDefinition alternateAT = 3594 schema.getAttributeType(alternateBaseName); 3595 if ((expectedAT != null) && (alternateAT != null) && 3596 expectedAT.equals(alternateAT)) 3597 { 3598 baseNameEquivalent = true; 3599 } 3600 } 3601 } 3602 3603 3604 // Check to see if the attribute options are equivalent. 3605 final Set<String> expectedOptions = 3606 Attribute.getOptions(attributeName); 3607 final Set<String> lowerExpectedOptions = 3608 new HashSet<String>(expectedOptions.size()); 3609 for (final String s : expectedOptions) 3610 { 3611 lowerExpectedOptions.add(toLowerCase(s)); 3612 } 3613 3614 final Set<String> alternateOptions = 3615 Attribute.getOptions(alternateName); 3616 final Set<String> lowerAlternateOptions = 3617 new HashSet<String>(alternateOptions.size()); 3618 for (final String s : alternateOptions) 3619 { 3620 lowerAlternateOptions.add(toLowerCase(s)); 3621 } 3622 3623 final boolean optionsEquivalent = 3624 lowerAlternateOptions.equals(lowerExpectedOptions); 3625 3626 3627 if (baseNameEquivalent && optionsEquivalent) 3628 { 3629 // This is fine. The two attribute descriptions are logically 3630 // equivalent. We'll continue using the attribute description that 3631 // was provided first. 3632 } 3633 else if (valueList.isEmpty() && baseNameEquivalent && 3634 lowerAlternateOptions.remove("binary") && 3635 lowerAlternateOptions.equals(lowerExpectedOptions)) 3636 { 3637 // This means that the provided value is the first value for the 3638 // attribute, and that the only significant difference is that the 3639 // provided attribute description included an unexpected "binary" 3640 // option. We'll accept this, but will require any additional 3641 // values for this modification to also include the binary option, 3642 // and we'll use the binary option in the attribute that is 3643 // eventually created. 3644 attributeName = alternateName; 3645 } 3646 else 3647 { 3648 // This means that either the base names are different or the sets 3649 // of options are incompatible. This is not acceptable. 3650 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3651 firstLineNumber, 3652 line.substring(0, colonPos), 3653 attributeName), 3654 firstLineNumber, true, ldifLines, null); 3655 } 3656 } 3657 3658 length = line.length(); 3659 final ASN1OctetString value; 3660 if (length == (colonPos+1)) 3661 { 3662 // The colon was the last character on the line. This is fine. 3663 value = new ASN1OctetString(); 3664 } 3665 else if (line.charAt(colonPos+1) == ':') 3666 { 3667 // Skip over any spaces leading up to the value, and then the rest of 3668 // the string is the base64-encoded value. This is unusual and 3669 // unnecessary, but is nevertheless acceptable. 3670 int pos = colonPos+2; 3671 while ((pos < length) && (line.charAt(pos) == ' ')) 3672 { 3673 pos++; 3674 } 3675 3676 try 3677 { 3678 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3679 } 3680 catch (final ParseException pe) 3681 { 3682 debugException(pe); 3683 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3684 attributeName, firstLineNumber, pe.getMessage()), 3685 firstLineNumber, true, ldifLines, pe); 3686 } 3687 catch (final Exception e) 3688 { 3689 debugException(e); 3690 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3691 firstLineNumber, e), 3692 firstLineNumber, true, ldifLines, e); 3693 } 3694 } 3695 else 3696 { 3697 // Skip over any spaces leading up to the value, and then the rest of 3698 // the string is the value. 3699 int pos = colonPos+1; 3700 while ((pos < length) && (line.charAt(pos) == ' ')) 3701 { 3702 pos++; 3703 } 3704 3705 value = new ASN1OctetString(line.substring(pos)); 3706 } 3707 3708 valueList.add(value); 3709 } 3710 3711 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3712 valueList.toArray(values); 3713 3714 // If it's an add modification type, then there must be at least one 3715 // value. 3716 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3717 (values.length == 0)) 3718 { 3719 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3720 firstLineNumber), 3721 firstLineNumber, true, ldifLines, null); 3722 } 3723 3724 // If it's an increment modification type, then there must be exactly one 3725 // value. 3726 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3727 (values.length != 1)) 3728 { 3729 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3730 firstLineNumber, attributeName), 3731 firstLineNumber, true, ldifLines, null); 3732 } 3733 3734 modList.add(new Modification(modType, attributeName, values)); 3735 } 3736 3737 final Modification[] mods = new Modification[modList.size()]; 3738 modList.toArray(mods); 3739 return mods; 3740 } 3741 3742 3743 3744 /** 3745 * Parses the data available through the provided iterator as the body of a 3746 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3747 * newsuperior lines). 3748 * 3749 * @param ldifLines The lines that comprise the LDIF 3750 * representation of the full record being 3751 * parsed. 3752 * @param iterator The iterator to use to access the modify DN 3753 * data. 3754 * @param dn The current DN of the entry. 3755 * @param controls The set of controls to include in the change 3756 * record. 3757 * @param trailingSpaceBehavior The behavior that should be exhibited when 3758 * encountering attribute values which are not 3759 * base64-encoded but contain trailing spaces. 3760 * @param firstLineNumber The line number for the start of the record. 3761 * 3762 * @return The decoded modify DN change record. 3763 * 3764 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3765 * modify DN change record. 3766 */ 3767 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3768 final ArrayList<StringBuilder> ldifLines, 3769 final Iterator<StringBuilder> iterator, final String dn, 3770 final List<Control> controls, 3771 final TrailingSpaceBehavior trailingSpaceBehavior, 3772 final long firstLineNumber) 3773 throws LDIFException 3774 { 3775 // The next line must be the new RDN, and it must start with "newrdn:". 3776 StringBuilder line = iterator.next(); 3777 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3778 int colonPos = line.indexOf(":"); 3779 if ((colonPos < 0) || 3780 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3781 { 3782 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3783 firstLineNumber), 3784 firstLineNumber, true, ldifLines, null); 3785 } 3786 3787 final String newRDN; 3788 int length = line.length(); 3789 if (length == (colonPos+1)) 3790 { 3791 // The colon was the last character on the line. This is not acceptable. 3792 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3793 firstLineNumber), 3794 firstLineNumber, true, ldifLines, null); 3795 } 3796 else if (line.charAt(colonPos+1) == ':') 3797 { 3798 // Skip over any spaces leading up to the value, and then the rest of the 3799 // string is the base64-encoded new RDN. 3800 int pos = colonPos+2; 3801 while ((pos < length) && (line.charAt(pos) == ' ')) 3802 { 3803 pos++; 3804 } 3805 3806 try 3807 { 3808 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3809 newRDN = new String(dnBytes, StandardCharsets.UTF_8); 3810 } 3811 catch (final ParseException pe) 3812 { 3813 debugException(pe); 3814 throw new LDIFException( 3815 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3816 pe.getMessage()), 3817 firstLineNumber, true, ldifLines, pe); 3818 } 3819 catch (final Exception e) 3820 { 3821 debugException(e); 3822 throw new LDIFException( 3823 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3824 e), 3825 firstLineNumber, true, ldifLines, e); 3826 } 3827 } 3828 else 3829 { 3830 // Skip over any spaces leading up to the value, and then the rest of the 3831 // string is the new RDN. 3832 int pos = colonPos+1; 3833 while ((pos < length) && (line.charAt(pos) == ' ')) 3834 { 3835 pos++; 3836 } 3837 3838 newRDN = line.substring(pos); 3839 } 3840 3841 if (newRDN.length() == 0) 3842 { 3843 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3844 firstLineNumber), 3845 firstLineNumber, true, ldifLines, null); 3846 } 3847 3848 3849 // The next line must be the deleteOldRDN flag, and it must start with 3850 // 'deleteoldrdn:'. 3851 if (! iterator.hasNext()) 3852 { 3853 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3854 firstLineNumber), 3855 firstLineNumber, true, ldifLines, null); 3856 } 3857 3858 line = iterator.next(); 3859 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3860 colonPos = line.indexOf(":"); 3861 if ((colonPos < 0) || 3862 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3863 { 3864 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3865 firstLineNumber), 3866 firstLineNumber, true, ldifLines, null); 3867 } 3868 3869 final String deleteOldRDNStr; 3870 length = line.length(); 3871 if (length == (colonPos+1)) 3872 { 3873 // The colon was the last character on the line. This is not acceptable. 3874 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3875 firstLineNumber), 3876 firstLineNumber, true, ldifLines, null); 3877 } 3878 else if (line.charAt(colonPos+1) == ':') 3879 { 3880 // Skip over any spaces leading up to the value, and then the rest of the 3881 // string is the base64-encoded value. This is unusual and 3882 // unnecessary, but is nevertheless acceptable. 3883 int pos = colonPos+2; 3884 while ((pos < length) && (line.charAt(pos) == ' ')) 3885 { 3886 pos++; 3887 } 3888 3889 try 3890 { 3891 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3892 deleteOldRDNStr = new String(changeTypeBytes, StandardCharsets.UTF_8); 3893 } 3894 catch (final ParseException pe) 3895 { 3896 debugException(pe); 3897 throw new LDIFException( 3898 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3899 firstLineNumber, pe.getMessage()), 3900 firstLineNumber, true, ldifLines, pe); 3901 } 3902 catch (final Exception e) 3903 { 3904 debugException(e); 3905 throw new LDIFException( 3906 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3907 firstLineNumber, e), 3908 firstLineNumber, true, ldifLines, e); 3909 } 3910 } 3911 else 3912 { 3913 // Skip over any spaces leading up to the value, and then the rest of the 3914 // string is the value. 3915 int pos = colonPos+1; 3916 while ((pos < length) && (line.charAt(pos) == ' ')) 3917 { 3918 pos++; 3919 } 3920 3921 deleteOldRDNStr = line.substring(pos); 3922 } 3923 3924 final boolean deleteOldRDN; 3925 if (deleteOldRDNStr.equals("0")) 3926 { 3927 deleteOldRDN = false; 3928 } 3929 else if (deleteOldRDNStr.equals("1")) 3930 { 3931 deleteOldRDN = true; 3932 } 3933 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3934 deleteOldRDNStr.equalsIgnoreCase("no")) 3935 { 3936 // This is technically illegal, but we'll allow it. 3937 deleteOldRDN = false; 3938 } 3939 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3940 deleteOldRDNStr.equalsIgnoreCase("yes")) 3941 { 3942 // This is also technically illegal, but we'll allow it. 3943 deleteOldRDN = false; 3944 } 3945 else 3946 { 3947 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3948 deleteOldRDNStr, firstLineNumber), 3949 firstLineNumber, true, ldifLines, null); 3950 } 3951 3952 3953 // If there is another line, then it must be the new superior DN and it must 3954 // start with "newsuperior:". If this is absent, then it's fine. 3955 final String newSuperiorDN; 3956 if (iterator.hasNext()) 3957 { 3958 line = iterator.next(); 3959 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3960 colonPos = line.indexOf(":"); 3961 if ((colonPos < 0) || 3962 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3963 { 3964 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3965 firstLineNumber), 3966 firstLineNumber, true, ldifLines, null); 3967 } 3968 3969 length = line.length(); 3970 if (length == (colonPos+1)) 3971 { 3972 // The colon was the last character on the line. This is fine. 3973 newSuperiorDN = ""; 3974 } 3975 else if (line.charAt(colonPos+1) == ':') 3976 { 3977 // Skip over any spaces leading up to the value, and then the rest of 3978 // the string is the base64-encoded new superior DN. 3979 int pos = colonPos+2; 3980 while ((pos < length) && (line.charAt(pos) == ' ')) 3981 { 3982 pos++; 3983 } 3984 3985 try 3986 { 3987 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3988 newSuperiorDN = new String(dnBytes, StandardCharsets.UTF_8); 3989 } 3990 catch (final ParseException pe) 3991 { 3992 debugException(pe); 3993 throw new LDIFException( 3994 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3995 firstLineNumber, pe.getMessage()), 3996 firstLineNumber, true, ldifLines, pe); 3997 } 3998 catch (final Exception e) 3999 { 4000 debugException(e); 4001 throw new LDIFException( 4002 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 4003 firstLineNumber, e), 4004 firstLineNumber, true, ldifLines, e); 4005 } 4006 } 4007 else 4008 { 4009 // Skip over any spaces leading up to the value, and then the rest of 4010 // the string is the new superior DN. 4011 int pos = colonPos+1; 4012 while ((pos < length) && (line.charAt(pos) == ' ')) 4013 { 4014 pos++; 4015 } 4016 4017 newSuperiorDN = line.substring(pos); 4018 } 4019 } 4020 else 4021 { 4022 newSuperiorDN = null; 4023 } 4024 4025 4026 // There must not be any more lines. 4027 if (iterator.hasNext()) 4028 { 4029 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 4030 firstLineNumber, true, ldifLines, null); 4031 } 4032 4033 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 4034 newSuperiorDN, controls); 4035 } 4036 4037 4038 4039 /** 4040 * Examines the line contained in the provided buffer to determine whether it 4041 * may contain one or more illegal trailing spaces. If it does, then those 4042 * spaces will either be stripped out or an exception will be thrown to 4043 * indicate that they are illegal. 4044 * 4045 * @param buffer The buffer to be examined. 4046 * @param dn The DN of the LDIF record being parsed. It 4047 * may be {@code null} if the DN is not yet 4048 * known (e.g., because the provided line is 4049 * expected to contain that DN). 4050 * @param firstLineNumber The approximate line number in the LDIF 4051 * source on which the LDIF record begins. 4052 * @param trailingSpaceBehavior The behavior that should be exhibited when 4053 * encountering attribute values which are not 4054 * base64-encoded but contain trailing spaces. 4055 * 4056 * @throws LDIFException If the line contained in the provided buffer ends 4057 * with one or more illegal trailing spaces and 4058 * {@code stripTrailingSpaces} was provided with a 4059 * value of {@code false}. 4060 */ 4061 private static void handleTrailingSpaces(final StringBuilder buffer, 4062 final String dn, final long firstLineNumber, 4063 final TrailingSpaceBehavior trailingSpaceBehavior) 4064 throws LDIFException 4065 { 4066 int pos = buffer.length() - 1; 4067 boolean trailingFound = false; 4068 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 4069 { 4070 trailingFound = true; 4071 pos--; 4072 } 4073 4074 if (trailingFound && (buffer.charAt(pos) != ':')) 4075 { 4076 switch (trailingSpaceBehavior) 4077 { 4078 case STRIP: 4079 buffer.setLength(pos+1); 4080 break; 4081 4082 case REJECT: 4083 if (dn == null) 4084 { 4085 throw new LDIFException( 4086 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4087 buffer.toString()), 4088 firstLineNumber, true); 4089 } 4090 else 4091 { 4092 throw new LDIFException( 4093 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4094 firstLineNumber, buffer.toString()), 4095 firstLineNumber, true); 4096 } 4097 4098 case RETAIN: 4099 default: 4100 // No action will be taken. 4101 break; 4102 } 4103 } 4104 } 4105 4106 4107 4108 /** 4109 * This represents an unparsed LDIFRecord. It stores the line number of the 4110 * first line of the record and each line of the record. 4111 */ 4112 private static final class UnparsedLDIFRecord 4113 { 4114 private final ArrayList<StringBuilder> lineList; 4115 private final long firstLineNumber; 4116 private final Exception failureCause; 4117 private final boolean isEOF; 4118 private final DuplicateValueBehavior duplicateValueBehavior; 4119 private final Schema schema; 4120 private final TrailingSpaceBehavior trailingSpaceBehavior; 4121 4122 4123 4124 /** 4125 * Constructor. 4126 * 4127 * @param lineList The lines that comprise the LDIF record. 4128 * @param duplicateValueBehavior The behavior to exhibit if the entry 4129 * contains duplicate attribute values. 4130 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4131 * encountering trailing spaces in 4132 * non-base64-encoded attribute values. 4133 * @param schema The schema to use when parsing, if 4134 * applicable. 4135 * @param firstLineNumber The first line number of the LDIF record. 4136 */ 4137 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4138 final DuplicateValueBehavior duplicateValueBehavior, 4139 final TrailingSpaceBehavior trailingSpaceBehavior, 4140 final Schema schema, final long firstLineNumber) 4141 { 4142 this.lineList = lineList; 4143 this.firstLineNumber = firstLineNumber; 4144 this.duplicateValueBehavior = duplicateValueBehavior; 4145 this.trailingSpaceBehavior = trailingSpaceBehavior; 4146 this.schema = schema; 4147 4148 failureCause = null; 4149 isEOF = 4150 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4151 } 4152 4153 4154 4155 /** 4156 * Constructor. 4157 * 4158 * @param failureCause The Exception thrown when reading from the input. 4159 */ 4160 private UnparsedLDIFRecord(final Exception failureCause) 4161 { 4162 this.failureCause = failureCause; 4163 4164 lineList = null; 4165 firstLineNumber = 0; 4166 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4167 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4168 schema = null; 4169 isEOF = false; 4170 } 4171 4172 4173 4174 /** 4175 * Return the lines that comprise the LDIF record. 4176 * 4177 * @return The lines that comprise the LDIF record. 4178 */ 4179 private ArrayList<StringBuilder> getLineList() 4180 { 4181 return lineList; 4182 } 4183 4184 4185 4186 /** 4187 * Retrieves the behavior to exhibit when encountering duplicate attribute 4188 * values. 4189 * 4190 * @return The behavior to exhibit when encountering duplicate attribute 4191 * values. 4192 */ 4193 private DuplicateValueBehavior getDuplicateValueBehavior() 4194 { 4195 return duplicateValueBehavior; 4196 } 4197 4198 4199 4200 /** 4201 * Retrieves the behavior that should be exhibited when encountering 4202 * attribute values which are not base64-encoded but contain trailing 4203 * spaces. The LDIF specification strongly recommends that any value which 4204 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4205 * LDIF parser may be configured to automatically strip these spaces, to 4206 * preserve them, or to reject any entry or change record containing them. 4207 * 4208 * @return The behavior that should be exhibited when encountering 4209 * attribute values which are not base64-encoded but contain 4210 * trailing spaces. 4211 */ 4212 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4213 { 4214 return trailingSpaceBehavior; 4215 } 4216 4217 4218 4219 /** 4220 * Retrieves the schema that should be used when parsing the record, if 4221 * applicable. 4222 * 4223 * @return The schema that should be used when parsing the record, or 4224 * {@code null} if none should be used. 4225 */ 4226 private Schema getSchema() 4227 { 4228 return schema; 4229 } 4230 4231 4232 4233 /** 4234 * Return the first line number of the LDIF record. 4235 * 4236 * @return The first line number of the LDIF record. 4237 */ 4238 private long getFirstLineNumber() 4239 { 4240 return firstLineNumber; 4241 } 4242 4243 4244 4245 /** 4246 * Return {@code true} iff the end of the input was reached. 4247 * 4248 * @return {@code true} iff the end of the input was reached. 4249 */ 4250 private boolean isEOF() 4251 { 4252 return isEOF; 4253 } 4254 4255 4256 4257 /** 4258 * Returns the reason that reading the record lines failed. This normally 4259 * is only non-null if something bad happened to the input stream (like 4260 * a disk read error). 4261 * 4262 * @return The reason that reading the record lines failed. 4263 */ 4264 private Exception getFailureCause() 4265 { 4266 return failureCause; 4267 } 4268 } 4269 4270 4271 /** 4272 * When processing in asynchronous mode, this thread is responsible for 4273 * reading the raw unparsed records from the input and submitting them for 4274 * processing. 4275 */ 4276 private final class LineReaderThread 4277 extends Thread 4278 { 4279 /** 4280 * Constructor. 4281 */ 4282 private LineReaderThread() 4283 { 4284 super("Asynchronous LDIF line reader"); 4285 setDaemon(true); 4286 } 4287 4288 4289 4290 /** 4291 * Reads raw, unparsed records from the input and submits them for 4292 * processing until the input is finished or closed. 4293 */ 4294 @Override() 4295 public void run() 4296 { 4297 try 4298 { 4299 boolean stopProcessing = false; 4300 while (!stopProcessing) 4301 { 4302 UnparsedLDIFRecord unparsedRecord = null; 4303 try 4304 { 4305 unparsedRecord = readUnparsedRecord(); 4306 } 4307 catch (final IOException e) 4308 { 4309 debugException(e); 4310 unparsedRecord = new UnparsedLDIFRecord(e); 4311 stopProcessing = true; 4312 } 4313 catch (final Exception e) 4314 { 4315 debugException(e); 4316 unparsedRecord = new UnparsedLDIFRecord(e); 4317 } 4318 4319 try 4320 { 4321 asyncParser.submit(unparsedRecord); 4322 } 4323 catch (final InterruptedException e) 4324 { 4325 debugException(e); 4326 // If this thread is interrupted, then someone wants us to stop 4327 // processing, so that's what we'll do. 4328 Thread.currentThread().interrupt(); 4329 stopProcessing = true; 4330 } 4331 4332 if ((unparsedRecord == null) || (unparsedRecord.isEOF())) 4333 { 4334 stopProcessing = true; 4335 } 4336 } 4337 } 4338 finally 4339 { 4340 try 4341 { 4342 asyncParser.shutdown(); 4343 } 4344 catch (final InterruptedException e) 4345 { 4346 debugException(e); 4347 Thread.currentThread().interrupt(); 4348 } 4349 finally 4350 { 4351 asyncParsingComplete.set(true); 4352 } 4353 } 4354 } 4355 } 4356 4357 4358 4359 /** 4360 * Used to parse Records asynchronously. 4361 */ 4362 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4363 LDIFRecord> 4364 { 4365 /** 4366 * {@inheritDoc} 4367 */ 4368 @Override() 4369 public LDIFRecord process(final UnparsedLDIFRecord input) 4370 throws LDIFException 4371 { 4372 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4373 4374 if ((record instanceof Entry) && (entryTranslator != null)) 4375 { 4376 record = entryTranslator.translate((Entry) record, 4377 input.getFirstLineNumber()); 4378 4379 if (record == null) 4380 { 4381 record = SKIP_ENTRY; 4382 } 4383 } 4384 if ((record instanceof LDIFChangeRecord) && 4385 (changeRecordTranslator != null)) 4386 { 4387 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4388 input.getFirstLineNumber()); 4389 4390 if (record == null) 4391 { 4392 record = SKIP_ENTRY; 4393 } 4394 } 4395 return record; 4396 } 4397 } 4398}