001/* 002 * Copyright 2016-2019 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2016-2019 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldap.sdk.unboundidds.tools; 022 023 024 025import java.io.File; 026import java.io.FileOutputStream; 027import java.io.InputStream; 028import java.io.IOException; 029import java.io.OutputStream; 030import java.util.ArrayList; 031import java.util.Collections; 032import java.util.LinkedHashMap; 033import java.util.LinkedHashSet; 034import java.util.List; 035import java.util.Map; 036import java.util.Set; 037import java.util.TreeMap; 038import java.util.concurrent.atomic.AtomicLong; 039import java.util.zip.GZIPOutputStream; 040 041import com.unboundid.ldap.sdk.Filter; 042import com.unboundid.ldap.sdk.LDAPException; 043import com.unboundid.ldap.sdk.ResultCode; 044import com.unboundid.ldap.sdk.Version; 045import com.unboundid.ldap.sdk.schema.Schema; 046import com.unboundid.ldif.LDIFException; 047import com.unboundid.ldif.LDIFReader; 048import com.unboundid.util.ByteStringBuffer; 049import com.unboundid.util.CommandLineTool; 050import com.unboundid.util.Debug; 051import com.unboundid.util.ObjectPair; 052import com.unboundid.util.PassphraseEncryptedOutputStream; 053import com.unboundid.util.StaticUtils; 054import com.unboundid.util.ThreadSafety; 055import com.unboundid.util.ThreadSafetyLevel; 056import com.unboundid.util.args.ArgumentException; 057import com.unboundid.util.args.ArgumentParser; 058import com.unboundid.util.args.BooleanArgument; 059import com.unboundid.util.args.DNArgument; 060import com.unboundid.util.args.FileArgument; 061import com.unboundid.util.args.FilterArgument; 062import com.unboundid.util.args.IntegerArgument; 063import com.unboundid.util.args.SubCommand; 064import com.unboundid.util.args.StringArgument; 065 066import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*; 067 068 069 070/** 071 * This class provides a command-line tool that can be used to split an LDIF 072 * file below a specified base DN. This can be used to help initialize an 073 * entry-balancing deployment for use with the Directory Proxy Server. 074 * <BR> 075 * <BLOCKQUOTE> 076 * <B>NOTE:</B> This class, and other classes within the 077 * {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only 078 * supported for use against Ping Identity, UnboundID, and 079 * Nokia/Alcatel-Lucent 8661 server products. These classes provide support 080 * for proprietary functionality or for external specifications that are not 081 * considered stable or mature enough to be guaranteed to work in an 082 * interoperable way with other types of LDAP servers. 083 * </BLOCKQUOTE> 084 * <BR> 085 * It supports a number of algorithms for determining how to split the data, 086 * including: 087 * <UL> 088 * <LI> 089 * split-using-hash-on-rdn -- The tool will compute a digest of the DN 090 * component that is immediately below the split base DN, and will use a 091 * modulus to select a backend set for a given entry. Since the split is 092 * based purely on computation involving the DN, the there is no need for 093 * caching to ensure that children are placed in the same sets as their 094 * parent, which allows it to run effectively with a small memory footprint. 095 * </LI> 096 * <LI> 097 * split-using-hash-on-attribute -- The tool will compute a digest of the 098 * value(s) of a specified attribute, and will use a modulus to select a 099 * backend set for a given entry. This hash will only be computed for 100 * entries immediately below the split base DN, and a cache will be used to 101 * ensure that entries more than one level below the split base DN are 102 * placed in the same backend set as their parent. 103 * </LI> 104 * <LI> 105 * split-using-fewest-entries -- When examining an entry immediately below 106 * the split base DN, the tool will place that entry in the set that has the 107 * fewest entries. For flat DITs in which entries only exist one level 108 * below the split base DN, this will effectively ensure a round-robin 109 * distribution. But for cases in which there are branches of varying sizes 110 * below the split base DN, this can help ensure that entries are more 111 * evenly distributed across backend sets. A cache will be used to ensure 112 * that entries more than one level below the split base DN are placed in 113 * the same backend set as their parent. 114 * </LI> 115 * <LI> 116 * split-using-filter -- When examining an entry immediately below the split 117 * base DN, a series of filters will be evaluated against that entry, which 118 * each filter associated with a specific backend set. If an entry doesn't 119 * match any of the provided filters, an RDN hash can be used to select the 120 * set. A cache will be used to ensure that entries more than one level 121 * below the split base DN are placed in the same backend set as their 122 * parent. 123 * </LI> 124 * </UL> 125 */ 126@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 127public final class SplitLDIF 128 extends CommandLineTool 129{ 130 /** 131 * The maximum length of any message to write to standard output or standard 132 * error. 133 */ 134 private static final int MAX_OUTPUT_LINE_LENGTH = 135 StaticUtils.TERMINAL_WIDTH_COLUMNS - 1; 136 137 138 139 // The global arguments used by this tool. 140 private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null; 141 private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null; 142 private BooleanArgument compressTarget = null; 143 private BooleanArgument encryptTarget = null; 144 private BooleanArgument sourceCompressed = null; 145 private DNArgument splitBaseDN = null; 146 private FileArgument encryptionPassphraseFile = null; 147 private FileArgument schemaPath = null; 148 private FileArgument sourceLDIF = null; 149 private FileArgument targetLDIFBasePath = null; 150 private IntegerArgument numThreads = null; 151 152 // The arguments used to split using a hash of the RDN. 153 private IntegerArgument splitUsingHashOnRDNNumSets = null; 154 private SubCommand splitUsingHashOnRDN = null; 155 156 // The arguments used to split using a hash on a specified attribute. 157 private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null; 158 private BooleanArgument splitUsingHashOnAttributeUseAllValues = null; 159 private IntegerArgument splitUsingHashOnAttributeNumSets = null; 160 private StringArgument splitUsingHashOnAttributeAttributeName = null; 161 private SubCommand splitUsingHashOnAttribute = null; 162 163 // The arguments used to choose the set with the fewest entries. 164 private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null; 165 private IntegerArgument splitUsingFewestEntriesNumSets = null; 166 private SubCommand splitUsingFewestEntries = null; 167 168 // The arguments used to choose the set using a provided set of filters. 169 private BooleanArgument splitUsingFilterAssumeFlatDIT = null; 170 private FilterArgument splitUsingFilterFilter = null; 171 private SubCommand splitUsingFilter = null; 172 173 174 175 /** 176 * Runs the tool with the provided set of command-line arguments. 177 * 178 * @param args The command-line arguments provided to this tool. 179 */ 180 public static void main(final String... args) 181 { 182 final ResultCode resultCode = main(System.out, System.err, args); 183 if (resultCode != ResultCode.SUCCESS) 184 { 185 System.exit(resultCode.intValue()); 186 } 187 } 188 189 190 191 /** 192 * Runs the tool with the provided set of command-line arguments. 193 * 194 * @param out The output stream used for standard output. It may be 195 * {@code null} if standard output should be suppressed. 196 * @param err The output stream used for standard error. It may be 197 * {@code null} if standard error should be suppressed. 198 * @param args The command-line arguments provided to this tool. 199 * 200 * @return A result code with information about the processing performed. 201 * Any result code other than {@link ResultCode#SUCCESS} indicates 202 * that an error occurred. 203 */ 204 public static ResultCode main(final OutputStream out, final OutputStream err, 205 final String... args) 206 { 207 final SplitLDIF tool = new SplitLDIF(out, err); 208 return tool.runTool(args); 209 } 210 211 212 213 /** 214 * Creates a new instance of this tool with the provided information. 215 * 216 * @param out The output stream used for standard output. It may be 217 * {@code null} if standard output should be suppressed. 218 * @param err The output stream used for standard error. It may be 219 * {@code null} if standard error should be suppressed. 220 */ 221 public SplitLDIF(final OutputStream out, final OutputStream err) 222 { 223 super(out, err); 224 } 225 226 227 228 /** 229 * {@inheritDoc} 230 */ 231 @Override() 232 public String getToolName() 233 { 234 return "split-ldif"; 235 } 236 237 238 239 /** 240 * {@inheritDoc} 241 */ 242 @Override() 243 public String getToolDescription() 244 { 245 return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get(); 246 } 247 248 249 250 /** 251 * {@inheritDoc} 252 */ 253 @Override() 254 public String getToolVersion() 255 { 256 return Version.NUMERIC_VERSION_STRING; 257 } 258 259 260 261 /** 262 * {@inheritDoc} 263 */ 264 @Override() 265 public boolean supportsInteractiveMode() 266 { 267 return true; 268 } 269 270 271 272 /** 273 * {@inheritDoc} 274 */ 275 @Override() 276 public boolean defaultsToInteractiveMode() 277 { 278 return true; 279 } 280 281 282 283 /** 284 * {@inheritDoc} 285 */ 286 @Override() 287 public boolean supportsPropertiesFile() 288 { 289 return true; 290 } 291 292 293 294 /** 295 * {@inheritDoc} 296 */ 297 @Override() 298 public void addToolArguments(final ArgumentParser parser) 299 throws ArgumentException 300 { 301 // Add the global arguments. 302 sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null, 303 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true, 304 false); 305 sourceLDIF.addLongIdentifier("inputLDIF", true); 306 sourceLDIF.addLongIdentifier("source-ldif", true); 307 sourceLDIF.addLongIdentifier("input-ldif", true); 308 parser.addArgument(sourceLDIF); 309 310 sourceCompressed = new BooleanArgument('C', "sourceCompressed", 311 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get()); 312 sourceCompressed.addLongIdentifier("inputCompressed", true); 313 sourceCompressed.addLongIdentifier("source-compressed", true); 314 sourceCompressed.addLongIdentifier("input-compressed", true); 315 parser.addArgument(sourceCompressed); 316 317 targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1, 318 null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false, 319 true, true, false); 320 targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath", true); 321 targetLDIFBasePath.addLongIdentifier("target-ldif-base-path", true); 322 targetLDIFBasePath.addLongIdentifier("output-ldif-base-path", true); 323 parser.addArgument(targetLDIFBasePath); 324 325 compressTarget = new BooleanArgument('c', "compressTarget", 326 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get()); 327 compressTarget.addLongIdentifier("compressOutput", true); 328 compressTarget.addLongIdentifier("compress", true); 329 compressTarget.addLongIdentifier("compress-target", true); 330 compressTarget.addLongIdentifier("compress-output", true); 331 parser.addArgument(compressTarget); 332 333 encryptTarget = new BooleanArgument(null, "encryptTarget", 334 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_TARGET.get()); 335 encryptTarget.addLongIdentifier("encryptOutput", true); 336 encryptTarget.addLongIdentifier("encrypt", true); 337 encryptTarget.addLongIdentifier("encrypt-target", true); 338 encryptTarget.addLongIdentifier("encrypt-output", true); 339 parser.addArgument(encryptTarget); 340 341 encryptionPassphraseFile = new FileArgument(null, 342 "encryptionPassphraseFile", false, 1, null, 343 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_PW_FILE.get(), true, true, 344 true, false); 345 encryptionPassphraseFile.addLongIdentifier("encryptionPasswordFile", true); 346 encryptionPassphraseFile.addLongIdentifier("encryption-passphrase-file", 347 true); 348 encryptionPassphraseFile.addLongIdentifier("encryption-password-file", 349 true); 350 parser.addArgument(encryptionPassphraseFile); 351 352 splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null, 353 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get()); 354 splitBaseDN.addLongIdentifier("baseDN", true); 355 splitBaseDN.addLongIdentifier("split-base-dn", true); 356 splitBaseDN.addLongIdentifier("base-dn", true); 357 parser.addArgument(splitBaseDN); 358 359 addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null, 360 "addEntriesOutsideSplitBaseDNToAllSets", 1, 361 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get()); 362 addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier( 363 "add-entries-outside-split-base-dn-to-all-sets", true); 364 parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets); 365 366 addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null, 367 "addEntriesOutsideSplitBaseDNToDedicatedSet", 1, 368 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get()); 369 addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier( 370 "add-entries-outside-split-base-dn-to-dedicated-set", true); 371 parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet); 372 373 schemaPath = new FileArgument(null, "schemaPath", false, 0, null, 374 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false, 375 false); 376 schemaPath.addLongIdentifier("schemaFile", true); 377 schemaPath.addLongIdentifier("schemaDirectory", true); 378 schemaPath.addLongIdentifier("schema-path", true); 379 schemaPath.addLongIdentifier("schema-file", true); 380 schemaPath.addLongIdentifier("schema-directory", true); 381 parser.addArgument(schemaPath); 382 383 numThreads = new IntegerArgument('t', "numThreads", false, 1, null, 384 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1, 385 Integer.MAX_VALUE, 1); 386 numThreads.addLongIdentifier("num-threads", true); 387 parser.addArgument(numThreads); 388 389 390 // Add the subcommand used to split entries using a hash on the RDN. 391 final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser( 392 "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get()); 393 394 splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1, 395 null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2, 396 Integer.MAX_VALUE); 397 splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets", true); 398 splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets); 399 400 final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples = 401 new LinkedHashMap<>(StaticUtils.computeMapCapacity(1)); 402 splitUsingHashOnRDNExamples.put( 403 new String[] 404 { 405 "split-using-hash-on-rdn", 406 "--sourceLDIF", "whole.ldif", 407 "--targetLDIFBasePath", "split.ldif", 408 "--splitBaseDN", "ou=People,dc=example,dc=com", 409 "--numSets", "4", 410 "--schemaPath", "config/schema", 411 "--addEntriesOutsideSplitBaseDNToAllSets" 412 }, 413 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get()); 414 415 splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn", 416 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser, 417 splitUsingHashOnRDNExamples); 418 splitUsingHashOnRDN.addName("hash-on-rdn", true); 419 420 parser.addSubCommand(splitUsingHashOnRDN); 421 422 423 // Add the subcommand used to split entries using a hash on a specified 424 // attribute. 425 final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser( 426 "split-using-hash-on-attribute", 427 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get()); 428 429 splitUsingHashOnAttributeAttributeName = new StringArgument(null, 430 "attributeName", true, 1, "{attr}", 431 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get()); 432 splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name", 433 true); 434 splitUsingHashOnAttributeParser.addArgument( 435 splitUsingHashOnAttributeAttributeName); 436 437 splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets", 438 true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(), 439 2, Integer.MAX_VALUE); 440 splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets", true); 441 splitUsingHashOnAttributeParser.addArgument( 442 splitUsingHashOnAttributeNumSets); 443 444 splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null, 445 "useAllValues", 1, 446 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get()); 447 splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values", 448 true); 449 splitUsingHashOnAttributeParser.addArgument( 450 splitUsingHashOnAttributeUseAllValues); 451 452 splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null, 453 "assumeFlatDIT", 1, 454 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get()); 455 splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit", 456 true); 457 splitUsingHashOnAttributeParser.addArgument( 458 splitUsingHashOnAttributeAssumeFlatDIT); 459 460 final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples = 461 new LinkedHashMap<>(StaticUtils.computeMapCapacity(1)); 462 splitUsingHashOnAttributeExamples.put( 463 new String[] 464 { 465 "split-using-hash-on-attribute", 466 "--sourceLDIF", "whole.ldif", 467 "--targetLDIFBasePath", "split.ldif", 468 "--splitBaseDN", "ou=People,dc=example,dc=com", 469 "--attributeName", "uid", 470 "--numSets", "4", 471 "--schemaPath", "config/schema", 472 "--addEntriesOutsideSplitBaseDNToAllSets" 473 }, 474 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get()); 475 476 splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute", 477 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(), 478 splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples); 479 splitUsingHashOnAttribute.addName("hash-on-attribute", true); 480 481 parser.addSubCommand(splitUsingHashOnAttribute); 482 483 484 // Add the subcommand used to split entries by selecting the set with the 485 // fewest entries. 486 final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser( 487 "split-using-fewest-entries", 488 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get()); 489 490 splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets", 491 true, 1, null, 492 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(), 493 2, Integer.MAX_VALUE); 494 splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets", true); 495 splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets); 496 497 splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null, 498 "assumeFlatDIT", 1, 499 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get()); 500 splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit", 501 true); 502 splitUsingFewestEntriesParser.addArgument( 503 splitUsingFewestEntriesAssumeFlatDIT); 504 505 final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples = 506 new LinkedHashMap<>(StaticUtils.computeMapCapacity(1)); 507 splitUsingFewestEntriesExamples.put( 508 new String[] 509 { 510 "split-using-fewest-entries", 511 "--sourceLDIF", "whole.ldif", 512 "--targetLDIFBasePath", "split.ldif", 513 "--splitBaseDN", "ou=People,dc=example,dc=com", 514 "--numSets", "4", 515 "--schemaPath", "config/schema", 516 "--addEntriesOutsideSplitBaseDNToAllSets" 517 }, 518 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get()); 519 520 splitUsingFewestEntries = new SubCommand("split-using-fewest-entries", 521 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(), 522 splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples); 523 splitUsingFewestEntries.addName("fewest-entries", true); 524 525 parser.addSubCommand(splitUsingFewestEntries); 526 527 528 // Add the subcommand used to split entries by selecting the set based on a 529 // filter. 530 final ArgumentParser splitUsingFilterParser = new ArgumentParser( 531 "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get()); 532 533 splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null, 534 INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get()); 535 splitUsingFilterParser.addArgument(splitUsingFilterFilter); 536 537 splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT", 538 1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get()); 539 splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit", true); 540 splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT); 541 542 final LinkedHashMap<String[],String> splitUsingFilterExamples = 543 new LinkedHashMap<>(StaticUtils.computeMapCapacity(1)); 544 splitUsingFilterExamples.put( 545 new String[] 546 { 547 "split-using-filter", 548 "--sourceLDIF", "whole.ldif", 549 "--targetLDIFBasePath", "split.ldif", 550 "--splitBaseDN", "ou=People,dc=example,dc=com", 551 "--filter", "(timeZone=Eastern)", 552 "--filter", "(timeZone=Central)", 553 "--filter", "(timeZone=Mountain)", 554 "--filter", "(timeZone=Pacific)", 555 "--schemaPath", "config/schema", 556 "--addEntriesOutsideSplitBaseDNToAllSets" 557 }, 558 INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get()); 559 560 splitUsingFilter = new SubCommand("split-using-filter", 561 INFO_SPLIT_LDIF_SC_FILTER_DESC.get(), 562 splitUsingFilterParser, splitUsingFilterExamples); 563 splitUsingFilter.addName("filter", true); 564 565 parser.addSubCommand(splitUsingFilter); 566 } 567 568 569 570 /** 571 * {@inheritDoc} 572 */ 573 @Override() 574 public void doExtendedArgumentValidation() 575 throws ArgumentException 576 { 577 // If multiple sourceLDIF values were provided, then a target LDIF base path 578 // must have been given. 579 final List<File> sourceLDIFValues = sourceLDIF.getValues(); 580 if (sourceLDIFValues.size() > 1) 581 { 582 if (! targetLDIFBasePath.isPresent()) 583 { 584 throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get( 585 sourceLDIF.getIdentifierString(), 586 targetLDIFBasePath.getIdentifierString())); 587 } 588 } 589 590 591 // If the split-using-filter subcommand was provided, then at least two 592 // filters must have been provided, and none of the filters can be logically 593 // equivalent to any of the others. 594 if (splitUsingFilter.isPresent()) 595 { 596 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 597 final Set<Filter> filterSet = new LinkedHashSet<>( 598 StaticUtils.computeMapCapacity(filterList.size())); 599 for (final Filter f : filterList) 600 { 601 if (filterSet.contains(f)) 602 { 603 throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get( 604 splitUsingFilterFilter.getIdentifierString(), f.toString())); 605 } 606 else 607 { 608 filterSet.add(f); 609 } 610 } 611 612 if (filterSet.size() < 2) 613 { 614 throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get( 615 splitUsingFilter.getPrimaryName(), 616 splitUsingFilterFilter.getIdentifierString())); 617 } 618 } 619 } 620 621 622 623 /** 624 * {@inheritDoc} 625 */ 626 @Override() 627 public ResultCode doToolProcessing() 628 { 629 // Get the schema to use during processing. 630 final Schema schema; 631 try 632 { 633 schema = getSchema(); 634 } 635 catch (final LDAPException le) 636 { 637 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage()); 638 return le.getResultCode(); 639 } 640 641 642 // If an encryption passphrase file is provided, then get the passphrase 643 // from it. 644 String encryptionPassphrase = null; 645 if (encryptionPassphraseFile.isPresent()) 646 { 647 try 648 { 649 encryptionPassphrase = ToolUtils.readEncryptionPassphraseFromFile( 650 encryptionPassphraseFile.getValue()); 651 } 652 catch (final LDAPException e) 653 { 654 Debug.debugException(e); 655 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, e.getMessage()); 656 return e.getResultCode(); 657 } 658 } 659 660 661 // Figure out which subcommand was selected, and create the appropriate 662 // translator to use to perform the processing. 663 final SplitLDIFTranslator translator; 664 if (splitUsingHashOnRDN.isPresent()) 665 { 666 translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(), 667 splitUsingHashOnRDNNumSets.getValue(), 668 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 669 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 670 } 671 else if (splitUsingHashOnAttribute.isPresent()) 672 { 673 translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(), 674 splitUsingHashOnAttributeNumSets.getValue(), 675 splitUsingHashOnAttributeAttributeName.getValue(), 676 splitUsingHashOnAttributeUseAllValues.isPresent(), 677 splitUsingHashOnAttributeAssumeFlatDIT.isPresent(), 678 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 679 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 680 } 681 else if (splitUsingFewestEntries.isPresent()) 682 { 683 translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(), 684 splitUsingFewestEntriesNumSets.getValue(), 685 splitUsingFewestEntriesAssumeFlatDIT.isPresent(), 686 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 687 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 688 } 689 else if (splitUsingFilter.isPresent()) 690 { 691 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 692 final LinkedHashSet<Filter> filterSet = new LinkedHashSet<>( 693 StaticUtils.computeMapCapacity(filterList.size())); 694 for (final Filter f : filterList) 695 { 696 filterSet.add(f); 697 } 698 699 translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(), 700 schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(), 701 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 702 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 703 } 704 else 705 { 706 // This should never happen. 707 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 708 ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get( 709 splitUsingHashOnRDN.getPrimaryName() + ", " + 710 splitUsingHashOnAttribute.getPrimaryName() + ", " + 711 splitUsingFewestEntries.getPrimaryName() + ", " + 712 splitUsingFilter.getPrimaryName())); 713 return ResultCode.PARAM_ERROR; 714 } 715 716 717 // Create the LDIF reader. 718 final LDIFReader ldifReader; 719 try 720 { 721 final InputStream inputStream; 722 if (sourceLDIF.isPresent()) 723 { 724 final ObjectPair<InputStream,String> p = 725 ToolUtils.getInputStreamForLDIFFiles(sourceLDIF.getValues(), 726 encryptionPassphrase, getOut(), getErr()); 727 inputStream = p.getFirst(); 728 if ((encryptionPassphrase == null) && (p.getSecond() != null)) 729 { 730 encryptionPassphrase = p.getSecond(); 731 } 732 } 733 else 734 { 735 inputStream = System.in; 736 } 737 738 ldifReader = new LDIFReader(inputStream, numThreads.getValue(), 739 translator); 740 if (schema != null) 741 { 742 ldifReader.setSchema(schema); 743 } 744 } 745 catch (final Exception e) 746 { 747 Debug.debugException(e); 748 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 749 ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get( 750 StaticUtils.getExceptionMessage(e))); 751 return ResultCode.LOCAL_ERROR; 752 } 753 754 755 // Iterate through and process all of the entries. 756 ResultCode resultCode = ResultCode.SUCCESS; 757 final LinkedHashMap<String,OutputStream> outputStreams = 758 new LinkedHashMap<>(StaticUtils.computeMapCapacity(10)); 759 try 760 { 761 final AtomicLong entriesRead = new AtomicLong(0L); 762 final AtomicLong entriesExcluded = new AtomicLong(0L); 763 final TreeMap<String,AtomicLong> fileCounts = new TreeMap<>(); 764 765readLoop: 766 while (true) 767 { 768 final SplitLDIFEntry entry; 769 try 770 { 771 entry = (SplitLDIFEntry) ldifReader.readEntry(); 772 } 773 catch (final LDIFException le) 774 { 775 Debug.debugException(le); 776 resultCode = ResultCode.LOCAL_ERROR; 777 778 final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS); 779 OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS); 780 if (s == null) 781 { 782 try 783 { 784 s = new FileOutputStream(f); 785 786 if (encryptTarget.isPresent()) 787 { 788 if (encryptionPassphrase == null) 789 { 790 try 791 { 792 encryptionPassphrase = 793 ToolUtils.promptForEncryptionPassphrase(false, true, 794 getOut(), getErr()); 795 } 796 catch (final LDAPException ex) 797 { 798 Debug.debugException(ex); 799 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage()); 800 return ex.getResultCode(); 801 } 802 } 803 804 s = new PassphraseEncryptedOutputStream(encryptionPassphrase, 805 s); 806 } 807 808 if (compressTarget.isPresent()) 809 { 810 s = new GZIPOutputStream(s); 811 } 812 813 outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s); 814 fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS, 815 new AtomicLong(0L)); 816 } 817 catch (final Exception e) 818 { 819 Debug.debugException(e); 820 resultCode = ResultCode.LOCAL_ERROR; 821 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 822 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 823 f.getAbsolutePath(), 824 StaticUtils.getExceptionMessage(e))); 825 break readLoop; 826 } 827 } 828 829 final ByteStringBuffer buffer = new ByteStringBuffer(); 830 buffer.append("# "); 831 buffer.append(le.getMessage()); 832 buffer.append(StaticUtils.EOL_BYTES); 833 834 final List<String> dataLines = le.getDataLines(); 835 if (dataLines != null) 836 { 837 for (final String dataLine : dataLines) 838 { 839 buffer.append(dataLine); 840 buffer.append(StaticUtils.EOL_BYTES); 841 } 842 } 843 844 buffer.append(StaticUtils.EOL_BYTES); 845 846 try 847 { 848 s.write(buffer.toByteArray()); 849 } 850 catch (final Exception e) 851 { 852 Debug.debugException(e); 853 resultCode = ResultCode.LOCAL_ERROR; 854 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 855 ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get( 856 le.getMessage(), f.getAbsolutePath(), 857 StaticUtils.getExceptionMessage(e))); 858 break readLoop; 859 } 860 861 if (le.mayContinueReading()) 862 { 863 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 864 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get( 865 StaticUtils.getExceptionMessage(le))); 866 continue; 867 } 868 else 869 { 870 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 871 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get( 872 StaticUtils.getExceptionMessage(le))); 873 break; 874 } 875 } 876 catch (final IOException ioe) 877 { 878 Debug.debugException(ioe); 879 resultCode = ResultCode.LOCAL_ERROR; 880 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 881 ERR_SPLIT_LDIF_IO_READ_ERROR.get( 882 StaticUtils.getExceptionMessage(ioe))); 883 break; 884 } 885 catch (final Exception e) 886 { 887 Debug.debugException(e); 888 resultCode = ResultCode.LOCAL_ERROR; 889 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 890 ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get( 891 StaticUtils.getExceptionMessage(e))); 892 break; 893 } 894 895 if (entry == null) 896 { 897 break; 898 } 899 900 final long readCount = entriesRead.incrementAndGet(); 901 if ((readCount % 1000L) == 0) 902 { 903 // Even though we aren't done with this entry yet, we'll go ahead and 904 // log a progress message now because it's easier to do that now than 905 // to ensure that it's handled properly through all possible error 906 // conditions that need to be handled below. 907 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 908 INFO_SPLIT_LDIF_PROGRESS.get(readCount)); 909 } 910 911 912 // Get the set(s) to which the entry should be written. If this is 913 // null (which could be the case as a result of a race condition when 914 // using multiple threads where processing for a child completes before 915 // processing for its parent, or as a result of a case in which a 916 // child is included without or before its parent), then try to see if 917 // we can get the sets by passing the entry through the translator. 918 Set<String> sets = entry.getSets(); 919 byte[] ldifBytes = entry.getLDIFBytes(); 920 if (sets == null) 921 { 922 try 923 { 924 sets = translator.translate(entry, 0L).getSets(); 925 } 926 catch (final Exception e) 927 { 928 Debug.debugException(e); 929 } 930 931 if (sets == null) 932 { 933 final SplitLDIFEntry errorEntry = translator.createEntry(entry, 934 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get( 935 entry.getDN(), splitBaseDN.getStringValue()), 936 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS)); 937 ldifBytes = errorEntry.getLDIFBytes(); 938 sets = errorEntry.getSets(); 939 } 940 } 941 942 943 // If the entry shouldn't be written into any sets, then we don't need 944 // to do anything else. 945 if (sets.isEmpty()) 946 { 947 entriesExcluded.incrementAndGet(); 948 continue; 949 } 950 951 952 // Write the entry into each of the target sets, creating the output 953 // files if necessary. 954 for (final String set : sets) 955 { 956 if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS)) 957 { 958 // This indicates that an error was encountered during processing, 959 // so we'll update the result code to reflect that. 960 resultCode = ResultCode.LOCAL_ERROR; 961 } 962 963 final File f = getOutputFile(set); 964 OutputStream s = outputStreams.get(set); 965 if (s == null) 966 { 967 try 968 { 969 s = new FileOutputStream(f); 970 971 if (encryptTarget.isPresent()) 972 { 973 if (encryptionPassphrase == null) 974 { 975 try 976 { 977 encryptionPassphrase = 978 ToolUtils.promptForEncryptionPassphrase(false, true, 979 getOut(), getErr()); 980 } 981 catch (final LDAPException ex) 982 { 983 Debug.debugException(ex); 984 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage()); 985 return ex.getResultCode(); 986 } 987 } 988 989 s = new PassphraseEncryptedOutputStream(encryptionPassphrase, 990 s); 991 } 992 993 if (compressTarget.isPresent()) 994 { 995 s = new GZIPOutputStream(s); 996 } 997 998 outputStreams.put(set, s); 999 fileCounts.put(set, new AtomicLong(0L)); 1000 } 1001 catch (final Exception e) 1002 { 1003 Debug.debugException(e); 1004 resultCode = ResultCode.LOCAL_ERROR; 1005 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1006 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 1007 f.getAbsolutePath(), 1008 StaticUtils.getExceptionMessage(e))); 1009 break readLoop; 1010 } 1011 } 1012 1013 try 1014 { 1015 s.write(ldifBytes); 1016 } 1017 catch (final Exception e) 1018 { 1019 Debug.debugException(e); 1020 resultCode = ResultCode.LOCAL_ERROR; 1021 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1022 ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get( 1023 entry.getDN(), f.getAbsolutePath(), 1024 StaticUtils.getExceptionMessage(e))); 1025 break readLoop; 1026 } 1027 1028 fileCounts.get(set).incrementAndGet(); 1029 } 1030 } 1031 1032 1033 // Processing is complete. Summarize the processing that was performed. 1034 final long finalReadCount = entriesRead.get(); 1035 if (finalReadCount > 1000L) 1036 { 1037 out(); 1038 } 1039 1040 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1041 INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount)); 1042 1043 final long excludedCount = entriesExcluded.get(); 1044 if (excludedCount > 0L) 1045 { 1046 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1047 INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount)); 1048 } 1049 1050 for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet()) 1051 { 1052 final File f = getOutputFile(e.getKey()); 1053 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 1054 INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(), 1055 f.getName())); 1056 } 1057 } 1058 finally 1059 { 1060 try 1061 { 1062 ldifReader.close(); 1063 } 1064 catch (final Exception e) 1065 { 1066 Debug.debugException(e); 1067 } 1068 1069 for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet()) 1070 { 1071 try 1072 { 1073 e.getValue().close(); 1074 } 1075 catch (final Exception ex) 1076 { 1077 Debug.debugException(ex); 1078 resultCode = ResultCode.LOCAL_ERROR; 1079 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1080 ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get( 1081 getOutputFile(e.getKey()), 1082 StaticUtils.getExceptionMessage(ex))); 1083 } 1084 } 1085 } 1086 1087 return resultCode; 1088 } 1089 1090 1091 1092 /** 1093 * Retrieves the schema that should be used for processing. 1094 * 1095 * @return The schema that was created. 1096 * 1097 * @throws LDAPException If a problem is encountered while retrieving the 1098 * schema. 1099 */ 1100 private Schema getSchema() 1101 throws LDAPException 1102 { 1103 // If any schema paths were specified, then load the schema only from those 1104 // paths. 1105 if (schemaPath.isPresent()) 1106 { 1107 final ArrayList<File> schemaFiles = new ArrayList<>(10); 1108 for (final File path : schemaPath.getValues()) 1109 { 1110 if (path.isFile()) 1111 { 1112 schemaFiles.add(path); 1113 } 1114 else 1115 { 1116 final TreeMap<String,File> fileMap = new TreeMap<>(); 1117 for (final File schemaDirFile : path.listFiles()) 1118 { 1119 final String name = schemaDirFile.getName(); 1120 if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif")) 1121 { 1122 fileMap.put(name, schemaDirFile); 1123 } 1124 } 1125 schemaFiles.addAll(fileMap.values()); 1126 } 1127 } 1128 1129 if (schemaFiles.isEmpty()) 1130 { 1131 throw new LDAPException(ResultCode.PARAM_ERROR, 1132 ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get( 1133 schemaPath.getIdentifierString())); 1134 } 1135 else 1136 { 1137 try 1138 { 1139 return Schema.getSchema(schemaFiles); 1140 } 1141 catch (final Exception e) 1142 { 1143 Debug.debugException(e); 1144 throw new LDAPException(ResultCode.LOCAL_ERROR, 1145 ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get( 1146 StaticUtils.getExceptionMessage(e))); 1147 } 1148 } 1149 } 1150 else 1151 { 1152 // If the INSTANCE_ROOT environment variable is set and it refers to a 1153 // directory that has a config/schema subdirectory that has one or more 1154 // schema files in it, then read the schema from that directory. 1155 try 1156 { 1157 final String instanceRootStr = System.getenv("INSTANCE_ROOT"); 1158 if (instanceRootStr != null) 1159 { 1160 final File instanceRoot = new File(instanceRootStr); 1161 final File configDir = new File(instanceRoot, "config"); 1162 final File schemaDir = new File(configDir, "schema"); 1163 if (schemaDir.exists()) 1164 { 1165 final TreeMap<String,File> fileMap = new TreeMap<>(); 1166 for (final File schemaDirFile : schemaDir.listFiles()) 1167 { 1168 final String name = schemaDirFile.getName(); 1169 if (schemaDirFile.isFile() && 1170 name.toLowerCase().endsWith(".ldif")) 1171 { 1172 fileMap.put(name, schemaDirFile); 1173 } 1174 } 1175 1176 if (! fileMap.isEmpty()) 1177 { 1178 return Schema.getSchema(new ArrayList<>(fileMap.values())); 1179 } 1180 } 1181 } 1182 } 1183 catch (final Exception e) 1184 { 1185 Debug.debugException(e); 1186 } 1187 } 1188 1189 1190 // If we've gotten here, then just return null and the tool will try to use 1191 // the default standard schema. 1192 return null; 1193 } 1194 1195 1196 1197 /** 1198 * Retrieves a file object that refers to an output file with the provided 1199 * extension. 1200 * 1201 * @param extension The extension to use for the file. 1202 * 1203 * @return A file object that refers to an output file with the provided 1204 * extension. 1205 */ 1206 private File getOutputFile(final String extension) 1207 { 1208 final File baseFile; 1209 if (targetLDIFBasePath.isPresent()) 1210 { 1211 baseFile = targetLDIFBasePath.getValue(); 1212 } 1213 else 1214 { 1215 baseFile = sourceLDIF.getValue(); 1216 } 1217 1218 return new File(baseFile.getAbsolutePath() + extension); 1219 } 1220 1221 1222 1223 /** 1224 * {@inheritDoc} 1225 */ 1226 @Override() 1227 public LinkedHashMap<String[],String> getExampleUsages() 1228 { 1229 final LinkedHashMap<String[],String> exampleMap = 1230 new LinkedHashMap<>(StaticUtils.computeMapCapacity(4)); 1231 1232 for (final Map.Entry<String[],String> e : 1233 splitUsingHashOnRDN.getExampleUsages().entrySet()) 1234 { 1235 exampleMap.put(e.getKey(), e.getValue()); 1236 } 1237 1238 for (final Map.Entry<String[],String> e : 1239 splitUsingHashOnAttribute.getExampleUsages().entrySet()) 1240 { 1241 exampleMap.put(e.getKey(), e.getValue()); 1242 } 1243 1244 for (final Map.Entry<String[],String> e : 1245 splitUsingFewestEntries.getExampleUsages().entrySet()) 1246 { 1247 exampleMap.put(e.getKey(), e.getValue()); 1248 } 1249 1250 for (final Map.Entry<String[],String> e : 1251 splitUsingFilter.getExampleUsages().entrySet()) 1252 { 1253 exampleMap.put(e.getKey(), e.getValue()); 1254 } 1255 1256 return exampleMap; 1257 } 1258}