Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,12 @@ private static void applyAssemblyLayoutToOutput(
final long[] componentEndBpExclusive = new long[contigRecords.size()];
final var scaffoldIds = new java.util.LinkedHashMap<String, Long>();
final var sourceChromLayout = readSourceChromLayout(src, selectedResolutions.get(0));
final boolean isJuiceboxAssemblyLayout = layoutPath.getFileName().toString().toLowerCase().endsWith(".assembly");
final boolean allContigNamesResolve = contigRecords.stream()
.allMatch(record -> canResolveSourceChromIndex(sourceChromLayout, record.getContigName()));
final boolean useSingleAssemblyChromosome = !allContigNamesResolve && sourceChromLayout.lengthsBp().length == 1;
if (!allContigNamesResolve && !useSingleAssemblyChromosome) {
final boolean allowUnmappedJuiceboxContigs = !allContigNamesResolve && !useSingleAssemblyChromosome && isJuiceboxAssemblyLayout;
if (!allContigNamesResolve && !useSingleAssemblyChromosome && !allowUnmappedJuiceboxContigs) {
final var missingContigName = contigRecords.stream()
.map(AGPProcessor.ContigAGPRecord::getContigName)
.filter(name -> !canResolveSourceChromIndex(sourceChromLayout, name))
Expand All @@ -199,6 +201,7 @@ private static void applyAssemblyLayoutToOutput(
final Map<String, SourceComponentRange> singleAssemblySourceRanges = useSingleAssemblyChromosome
? readSingleAssemblySourceRanges(layoutPath)
: Map.of();
int unmappedJuiceboxContigs = 0;

for (int i = 0; i < contigRecords.size(); i++) {
final var record = contigRecords.get(i);
Expand All @@ -220,22 +223,35 @@ final var record = contigRecords.get(i);
componentEndBpExclusive[i] = assemblyChromosomeOffsetBp + componentLengthBp;
}
assemblyChromosomeOffsetBp = componentEndBpExclusive[i];
} else if (allowUnmappedJuiceboxContigs && !canResolveSourceChromIndex(sourceChromLayout, record.getContigName())) {
sourceChromIds[i] = -1;
componentStartBp0[i] = 0L;
componentEndBpExclusive[i] = componentLengthBp;
unmappedJuiceboxContigs++;
} else {
sourceChromIds[i] = resolveSourceChromIndex(sourceChromLayout, record.getContigName(), layoutPath);
componentStartBp0[i] = record.getIntraContigStartBpIncl() - 1L;
componentEndBpExclusive[i] = record.getIntraContigEndBpIncl();
}
final long sourceChromLengthBp = sourceChromLayout.lengthsBp()[sourceChromIds[i]];
if (componentStartBp0[i] < 0L || componentEndBpExclusive[i] <= componentStartBp0[i] || componentEndBpExclusive[i] > sourceChromLengthBp) {
throw new IllegalArgumentException(
"Assembly layout component " + record.getContigName() + ":" + record.getIntraContigStartBpIncl() +
"-" + record.getIntraContigEndBpIncl() + " is outside source contig length " + sourceChromLengthBp +
" from " + layoutPath.getFileName()
);
if (sourceChromIds[i] >= 0) {
final long sourceChromLengthBp = sourceChromLayout.lengthsBp()[sourceChromIds[i]];
if (componentStartBp0[i] < 0L || componentEndBpExclusive[i] <= componentStartBp0[i] || componentEndBpExclusive[i] > sourceChromLengthBp) {
throw new IllegalArgumentException(
"Assembly layout component " + record.getContigName() + ":" + record.getIntraContigStartBpIncl() +
"-" + record.getIntraContigEndBpIncl() + " is outside source contig length " + sourceChromLengthBp +
" from " + layoutPath.getFileName()
);
}
}
contigLengthBp[i] = componentLengthBp;
contigScaffoldIds[i] = scaffoldIds.computeIfAbsent(record.getScaffoldName(), ignored -> (long) scaffoldIds.size());
}
if (unmappedJuiceboxContigs > 0) {
logConsumer.accept(
"Assembly layout contains " + unmappedJuiceboxContigs +
" contig(s) that are absent from source .mcool chromosome metadata; keeping them as hidden zero-bin contigs."
);
}

final long totalAssemblyLengthBp = Arrays.stream(contigLengthBp).sum();
if (totalAssemblyLengthBp <= 0L) {
Expand All @@ -259,6 +275,11 @@ final var record = contigRecords.get(i);
final long[] sourceBinStarts = src.int64().readArray("/resolutions/" + resolution + "/bins/start");
final long[] sourceBinEnds = src.int64().readArray("/resolutions/" + resolution + "/bins/end");
for (int i = 0; i < contigRecords.size(); i++) {
if (sourceChromIds[i] < 0) {
startBins[i] = 0L;
lengthBins[i] = 0L;
continue;
}
final var range = resolveSourceBinRange(
resolution,
sourceChromIds[i],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,53 @@ void reorderedJuiceboxAssemblyUsesHeaderOrderForSingleHictkSourceOffsets() throw
}
}

@Test
void juiceboxAssemblyKeepsContigsMissingFromMultiChromSourceHidden() throws Exception {
final var mcool = tempDir.resolve("partial-source.mcool");
final var assembly = tempDir.resolve("partial-layout.assembly");
final var output = tempDir.resolve("partial-output.hict.hdf5");

writeSyntheticMcool(mcool);
Files.writeString(
assembly,
String.join(
System.lineSeparator(),
">ctgA 1 2500",
">ctgB 2 3600",
">ctgC 3 1500",
"1",
"2",
"3"
) + System.lineSeparator()
);

new McoolToHictConverter().convert(
new ConversionOptions(
mcool,
output,
List.of(1_000L),
64,
0,
ConversionOptions.CompressionAlgorithm.DEFLATE,
assembly.toString(),
false,
1
),
ignored -> {
}
);

try (final var reader = HDF5Factory.openForReading(output.toFile())) {
assertArrayEquals(new String[]{"ctgA", "ctgB", "ctgC"}, reader.string().readArray(getContigNameDatasetPath()));
assertArrayEquals(new long[]{3L, 4L, 0L}, reader.int64().readArray(getContigLengthBinsDatasetPath(1_000L)));

final long[][] basisAtu = reader.int64().readMatrix(getBasisATUDatasetPath(1_000L));
assertEquals(2, basisAtu.length);
assertArrayEquals(new long[]{0L, 0L, 3L, 1L}, basisAtu[0]);
assertArrayEquals(new long[]{0L, 3L, 7L, 1L}, basisAtu[1]);
}
}

private static void writeSyntheticMcool(final Path path) {
HDF5LibraryInitializer.initializeHDF5Library();
try (final var writer = HDF5Factory.open(path.toFile())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
import static org.junit.jupiter.api.Assertions.assertTrue;

class HicAssemblyConversionIntegrationTest {
private static final Path HIC = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedisAegypti/AGWG.draft.hic");
private static final Path ASSEMBLY = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedisAegypti/AGWG.draft.assembly");
private static final Path FASTA = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedisAegypti/AGWG.draft.fasta.gz");
private static final Path HIC = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedesAegypti/AaegL5.0.hic");
private static final Path ASSEMBLY = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedesAegypti/AaegL5.0.assembly");
private static final Path FASTA = Path.of("/mnt/Models/HiCT/data/DNAZoo/AedesAegypti/AaegL5.0.fasta.gz");

@TempDir
Path tempDir;
Expand All @@ -44,7 +44,7 @@ void convertsAndOpensHicWithJuiceboxAssemblyAndFasta() throws Exception {
new ConversionOptions(
HIC,
output,
List.of(),
List.of(2_500_000L),
8_192,
6,
ConversionOptions.CompressionAlgorithm.DEFLATE,
Expand Down Expand Up @@ -80,7 +80,7 @@ void convertsAndOpensHicWithJuiceboxAssemblyAndFasta() throws Exception {
chunkedFile.getAssemblyInfo().contigs().size(),
chunkedFile.getContigTree().getOrderedContigList().size()
);
assertTrue(chunkedFile.getContigTree().getOrderedContigList().size() > 1);
assertTrue(chunkedFile.getContigTree().getOrderedContigList().size() > 3);
}

private static Path findLocalHictkBinary() throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.175-5ddcf3c-webui_a609177
1.0.176-533a1f0-webui_a609177
Loading