diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/programgroups/LongReadProgramGroup.java b/src/main/java/org/broadinstitute/hellbender/cmdline/programgroups/LongReadProgramGroup.java new file mode 100644 index 00000000000..88946feecfb --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/programgroups/LongReadProgramGroup.java @@ -0,0 +1,16 @@ +package org.broadinstitute.hellbender.cmdline.programgroups; + +import org.broadinstitute.barclay.argparser.CommandLineProgramGroup; +import org.broadinstitute.hellbender.utils.help.HelpConstants; + +/** + * Tools that are designed for use with long reads. + * @author Jonn Smith + */ +public final class LongReadProgramGroup implements CommandLineProgramGroup { + @Override + public String getName() { return HelpConstants.DOC_CAT_LONGREAD; } + + @Override + public String getDescription() { return HelpConstants.DOC_CAT_LONGREAD; } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ComputeLongReadMetrics.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ComputeLongReadMetrics.java new file mode 100644 index 00000000000..3255ccdd66d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ComputeLongReadMetrics.java @@ -0,0 +1,307 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import com.google.common.base.Joiner; +import htsjdk.samtools.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.LongReadProgramGroup; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.*; + +/** + * Compute simple metrics on long read data + *

Input

+ * + * + *

Outputs

+ * + * + *

Usage Example

+ *
+ *   gatk ComputeLongReadMetrics \
+ *     -I my.bam \
+ *     -O metrics
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Compute simple metrics on long read data", + oneLineSummary = "Compute simple metrics on long read data", + programGroup = LongReadProgramGroup.class +) +public final class ComputeLongReadMetrics extends ReadWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="Write output to this file prefix") + public String OUTPUT; + + @Argument(fullName = "roundLengthsToNearest", shortName="rn", doc="Round read lengths to nearest ") + public Integer ROUND_TO_NEAREST = 100; + + private long num_reads = 0; + private long yield = 0; + private long yieldTimesPasses = 0; + private final Map prlHist = new TreeMap<>(); + private final Map prlYieldHist = new TreeMap<>(); + private final Map rlHist = new TreeMap<>(); + private final Map rlYieldHist = new TreeMap<>(); + private final Map zmwNumHist = new TreeMap<>(); + private final Map npHist = new TreeMap<>(); + private final Map rangeGapHist = new TreeMap<>(); + + private final List polymeraseReadLengths = new ArrayList<>(); + private final List readLengths = new ArrayList<>(); + + private long lastZmw = -1; + private int prl = 0; + private int lastRangeStop = 0; + + @Override + public void apply( GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + SAMRecord sr = read.convertToSAMRecord(this.getHeaderForReads()); + + if (!sr.isSecondaryOrSupplementary() || sr.getReadUnmappedFlag()) { + int np = sr.hasAttribute("np") ? sr.getIntegerAttribute("np") : 1; + int readLength = sr.getReadLength(); + long zmw = lastZmw + 1; + + int range_start = 0; + int range_stop = 0; + + if (sr.getReadName().contains("/")) { + String[] pieces = sr.getReadName().split("/"); + if (pieces.length == 3) { + zmw = Long.parseLong(pieces[1]); + + if (pieces[2].contains("_")) { + String[] range = pieces[2].split("_"); + range_start = Integer.parseInt(range[0]); + range_stop = Integer.parseInt(range[1]); + } + } + } + + num_reads++; + yield += readLength; + yieldTimesPasses += (long) readLength * np; + + increment(rlHist, roundToNearest(readLength, ROUND_TO_NEAREST)); + increment(zmwNumHist, zmw); + increment(npHist, np); + + add(rlYieldHist, roundToNearest(readLength, ROUND_TO_NEAREST), (long) readLength); + + if ( lastRangeStop != 0 && lastZmw == zmw) { + increment(rangeGapHist, range_start - lastRangeStop); + } + + readLengths.add(readLength); + + if ( lastZmw != -1 && lastZmw != zmw) { + increment(prlHist, roundToNearest(prl, ROUND_TO_NEAREST)); + add(prlYieldHist, roundToNearest(prl, ROUND_TO_NEAREST), (long) prl); + + polymeraseReadLengths.add(prl); + + prl = 0; + } + + prl = (zmw == lastZmw) ? prl + readLength : readLength; + + lastZmw = zmw; + lastRangeStop = range_stop; + } + } + + private int roundToNearest(int num, int nearest) { + return ((num + (nearest - 1)) / nearest) * nearest; + } + + private void increment(Map h, Long k) { + if (!h.containsKey(k)) { + h.put(k, 1L); + } else { + h.put(k, h.get(k) + 1L); + } + } + + private void increment(Map h, Integer k) { + if (!h.containsKey(k)) { + h.put(k, 1L); + } else { + h.put(k, h.get(k) + 1L); + } + } + + private void add(Map h, Integer k, Long v) { + if (!h.containsKey(k)) { + h.put(k, v); + } else { + h.put(k, h.get(k) + v); + } + } + + private int NX(List l, int X) { + double sum = 0.0; + for (int i : l) { sum += i; } + + double h = (((double) X)/100.0)*sum; + + double cs = 0.0; + int last = l.get(0); + for (int i : l) { + cs += i; + if (cs > h) { + break; + } + + last = i; + } + + return last; + } + + @Override + public void closeTool() { + readLengths.sort(Collections.reverseOrder()); + polymeraseReadLengths.sort(Collections.reverseOrder()); + + final PrintStream polymeraseReadLengthCountsOut; + final PrintStream polymeraseReadLengthHistOut; + final PrintStream polymeraseReadLengthYieldHistOut; + final PrintStream polymeraseReadLengthNxOut; + + final PrintStream readLengthCountsOut; + final PrintStream readLengthHistOut; + final PrintStream readLengthYieldHistOut; + final PrintStream readLengthNxOut; + + final PrintStream zmwHistOut; + final PrintStream npHistOut; + final PrintStream rangeGapHistOut; + + // TODO: add data for number of passes vs. read length boxplot + + try { + polymeraseReadLengthCountsOut = new PrintStream(new FileOutputStream(OUTPUT + ".prl_counts.txt")); + polymeraseReadLengthHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".prl_hist.txt")); + polymeraseReadLengthYieldHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".prl_yield_hist.txt")); + polymeraseReadLengthNxOut = new PrintStream(new FileOutputStream(OUTPUT + ".prl_nx.txt")); + + readLengthCountsOut = new PrintStream(new FileOutputStream(OUTPUT + ".rl_counts.txt")); + readLengthHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".rl_hist.txt")); + readLengthYieldHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".rl_yield_hist.txt")); + readLengthNxOut = new PrintStream(new FileOutputStream(OUTPUT + ".rl_nx.txt")); + + zmwHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".zmw_hist.txt")); + npHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".np_hist.txt")); + rangeGapHistOut = new PrintStream(new FileOutputStream(OUTPUT + ".range_gap_hist.txt")); + } catch (FileNotFoundException e) { + throw new GATKException("Couldn't open output file."); + } + + final Map zmw_hist = new TreeMap<>(); + for (long zmw : zmwNumHist.keySet()) { + increment(zmw_hist, zmwNumHist.get(zmw)); + } + + final Double polymerase_read_length_mean = polymeraseReadLengths.stream().mapToInt(Integer::intValue).average().getAsDouble(); + + polymeraseReadLengthCountsOut.println(Joiner.on("\t").join( "num_polymerase_reads", "polymerase_read_yield_bp", "polymerase_read_yield_bp_times_passes", "polymerase_read_min_length_bp", "polymerase_read_max_length_bp", "polymerase_read_mean_length_bp", "polymerase_read_sd_length_bp" )); + writeReadStatsToFile(polymeraseReadLengthCountsOut, polymerase_read_length_mean, polymeraseReadLengths); + + polymeraseReadLengthHistOut.println("polymerase_read_length\tlength_bp"); + for (int i : prlHist.keySet()) { + polymeraseReadLengthHistOut.println(i + "\t" + prlHist.get(i)); + } + + polymeraseReadLengthYieldHistOut.println("polymerase_read_length\tyield_bp"); + for (int i : prlYieldHist.keySet()) { + polymeraseReadLengthYieldHistOut.println(i + "\t" + prlYieldHist.get(i)); + } + + polymeraseReadLengthNxOut.println("NX\tvalue"); + readLengthNxOut.println("NX\tvalue"); + for (int X = 1; X < 100; X++) { + polymeraseReadLengthNxOut.println(X + "\t" + NX(polymeraseReadLengths, X)); + readLengthNxOut.println(X + "\t" + NX(readLengths, X)); + } + + final Double read_length_mean = readLengths.stream().mapToInt(Integer::intValue).average().getAsDouble(); + + readLengthCountsOut.println(Joiner.on("\t").join( "num_reads", "read_yield_bp", "read_yield_bp_times_passes", "read_min_length_bp", "read_max_length_bp", "read_mean_length_bp", "read_sd_length_bp" )); + writeReadStatsToFile(readLengthCountsOut, read_length_mean, readLengths); + + readLengthHistOut.println("read_length_bp\tcount"); + for (int i : rlHist.keySet()) { + readLengthHistOut.println(i + "\t" + rlHist.get(i)); + } + + readLengthYieldHistOut.println("read_length\tyield_bp"); + for (int i : rlYieldHist.keySet()) { + readLengthYieldHistOut.println(i + "\t" + rlYieldHist.get(i)); + } + + zmwHistOut.println("zmw_count\tcount"); + for (long i : zmw_hist.keySet()) { + zmwHistOut.println(i + "\t" + zmw_hist.get(i)); + } + + npHistOut.println("np\tcount"); + for (int i : npHist.keySet()) { + npHistOut.println(i + "\t" + npHist.get(i)); + } + + rangeGapHistOut.println("range_gap\tcount"); + for (int i : rangeGapHist.keySet()) { + rangeGapHistOut.println(i + "\t" + rangeGapHist.get(i)); + } + + polymeraseReadLengthCountsOut.close(); + polymeraseReadLengthHistOut.close(); + polymeraseReadLengthYieldHistOut.close(); + polymeraseReadLengthNxOut.close(); + readLengthCountsOut.close(); + readLengthHistOut.close(); + readLengthYieldHistOut.close(); + readLengthNxOut.close(); + zmwHistOut.close(); + npHistOut.close(); + rangeGapHistOut.close(); + } + + /** + * Write the read length stats to a file + * @param outStream The output stream to which to write. + * @param readLengthMean The mean read length. + * @param readLengthList The list of read lengths. + */ + private void writeReadStatsToFile(final PrintStream outStream, final Double readLengthMean, final List readLengthList) { + outStream.println(Joiner.on("\t").join( + readLengthList.size(), + readLengthList.stream().mapToInt(Integer::intValue).sum(), + yieldTimesPasses, + readLengthList.stream().min(Integer::compareTo).get(), + readLengthList.stream().max(Integer::compareTo).get(), + readLengthMean, + Math.sqrt(readLengthList.stream() + .map((integer) -> Math.pow(integer - readLengthMean, 2)) + .mapToDouble(Double::doubleValue).sum() / (readLengthList.size() - 1)) + )); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/CountIndels.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/CountIndels.java new file mode 100644 index 00000000000..e02ee2a4f73 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/CountIndels.java @@ -0,0 +1,282 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import com.google.common.base.Joiner; + +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.LongReadProgramGroup; +import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.io.*; +import java.util.*; + +/** + * Quickly count errors + * + *

Input

+ *
    + *
  • A single BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A collection of BAM files where care has been keep reads from the same ZMW in the same file
  • + *
+ * + *

Usage Example

+ *

Quickly count errors

+ *
+ *   gatk CountIndels \
+ *     -I input.bam \
+ *     -L region.bed \
+ *     -O output.txt
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Count indels", + oneLineSummary = "Count indels", + programGroup = LongReadProgramGroup.class +) +public final class CountIndels extends IntervalWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc="Path to which per-read information should be written") + public String perReadPathName; + + @Argument(fullName = "outputIntervalInfo", + shortName = "OI", + doc="Path to which per-interval information should be written") + public String perIntervalPathName; + + private static class DataEntry { + private final String readName; + private final boolean isNegativeStrand; + private final CigarOperator type; + private final int refPos; + private final int alleleLength; + private final String allele; + + public DataEntry(String readName, boolean isNegativeStrand, CigarOperator type, int refPos, int alleleLength, String allele) { + this.readName = readName; + this.isNegativeStrand = isNegativeStrand; + this.type = type; + this.refPos = refPos; + this.alleleLength = alleleLength; + this.allele = allele; + } + + public String getReadName() { return readName; } + public boolean isNegativeStrand() { return isNegativeStrand; } + public CigarOperator getType() { return type; } + public int getRefPos() { return refPos; } + public int getAlleleLength() { return alleleLength; } + public String getAllele() { return allele; } + + @Override + public String toString() { + return "DataEntry{" + + "readName='" + readName + '\'' + + ", isNegativeStrand=" + isNegativeStrand + + ", type=" + type + + ", refPos=" + refPos + + ", alleleLength=" + alleleLength + + ", allele='" + allele + '\'' + + '}'; + } + } + + private SimpleInterval curInterval = null; + private Set d = null; + + BufferedWriter readStatsWriter, intervalStatsWriter; + + @Override + public void onTraversalStart() { + try { + readStatsWriter = new BufferedWriter(new FileWriter(perReadPathName)); + readStatsWriter.write(Joiner.on("\t").join( + "interval", + "readName", + "isNegativeStrand", + "lengthOnReference", + "numDeletedBases", + "numInsertedBases", + "delLengths", + "insLengths", + "insAlleles" + ) + "\n"); + + intervalStatsWriter = new BufferedWriter(new FileWriter(perIntervalPathName)); + intervalStatsWriter.write(Joiner.on("\t").join( + "interval", + "numReads", + "lengthOnReference", + "allNumDeletedBasesFwd", + "allNumDeletedBasesRev", + "allNumInsertedBasesFwd", + "allNumInsertedBasesRev", + "allDelLengthsFwd", + "allDelLengthsRev", + "allInsAllelesFwd", + "allInsAllelesRev", + "allInsAllelesFwd", + "allInsAllelesRev" + ) + "\n"); + } catch (IOException e) { + throw new GATKException(e.getMessage()); + } + } + + @Override + public void apply(SimpleInterval interval, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + if (curInterval == null || interval.getStart() != curInterval.getStart()) { + if (d != null) { + summarizeInterval(curInterval, d); + } + + curInterval = interval; + d = new HashSet<>(); + + logger.info("{}", interval); + } + + for (GATKRead read : readsContext) { + int curOffset = 1; + for (CigarElement ce : read.getCigar()) { + int opOffset = curOffset; + int refOffset = 0; + if (ce.getOperator().isIndel()) { + opOffset = curOffset - 1; + if (ce.getOperator().equals(CigarOperator.D)) { + refOffset = 1; + } + } + int refPos = read.convertToSAMRecord(getHeaderForReads()).getReferencePositionAtReadPosition(opOffset) + refOffset; + + int alleleLength = ce.getLength(); + String allele = ce.getOperator().equals(CigarOperator.D) ? "." : read.getBasesString().substring(curOffset - 1, curOffset - 1 + alleleLength); + + if (ce.getOperator().isIndel() && alleleLength > 3 && interval.overlapsWithMargin(new SimpleInterval(interval.getContig(), refPos, refPos + alleleLength), 10)) { + d.add(new DataEntry(read.getName(), read.isReverseStrand(), ce.getOperator(), refPos, alleleLength, allele)); + } + + curOffset += ce.getOperator().consumesReadBases() ? ce.getLength() : 0; + } + } + } + + private void summarizeInterval(SimpleInterval interval, Set d) { + Map readNames = new HashMap<>(); + if ( !d.isEmpty() ) { + for (DataEntry de : d) { + readNames.put(de.getReadName(), de); + } + + int allNumDeletedBasesFwd = 0; + int allNumDeletedBasesRev = 0; + int allNumInsertedBasesFwd = 0; + int allNumInsertedBasesRev = 0; + List allDelLengthsFwd = new ArrayList<>(); + List allDelLengthsRev = new ArrayList<>(); + List allInsLengthsFwd = new ArrayList<>(); + List allInsLengthsRev = new ArrayList<>(); + List allInsAllelesFwd = new ArrayList<>(); + List allInsAllelesRev = new ArrayList<>(); + + for (String readName : readNames.keySet()) { + int numDeletedBases = 0; + int numInsertedBases = 0; + boolean isNegativeStrand = false; + List delLengths = new ArrayList<>(); + List insLengths = new ArrayList<>(); + List insAlleles = new ArrayList<>(); + + DataEntry de = readNames.get(readName); + + numDeletedBases += de.getType().equals(CigarOperator.D) ? de.alleleLength : 0; + numInsertedBases += de.getType().equals(CigarOperator.I) ? de.alleleLength : 0; + isNegativeStrand = de.isNegativeStrand(); + + if (de.getType().equals(CigarOperator.D)) { + delLengths.add(de.getAlleleLength()); + } else { + insLengths.add(de.getAlleleLength()); + insAlleles.add(de.allele); + } + + if (isNegativeStrand) { + allNumDeletedBasesRev += numDeletedBases; + allNumInsertedBasesRev += numInsertedBases; + allDelLengthsRev.addAll(delLengths); + allInsLengthsRev.addAll(insLengths); + allInsAllelesRev.addAll(insAlleles); + } else { + allNumDeletedBasesFwd += numDeletedBases; + allNumInsertedBasesFwd += numInsertedBases; + allDelLengthsFwd.addAll(delLengths); + allInsLengthsFwd.addAll(insLengths); + allInsAllelesFwd.addAll(insAlleles); + } + + try { + readStatsWriter.write(Joiner.on("\t").join( + interval, + readName, + isNegativeStrand ? "-" : "+", + interval.getLengthOnReference(), + numDeletedBases, + numInsertedBases, + Joiner.on(",").join(delLengths), + Joiner.on(",").join(insLengths), + !insAlleles.isEmpty() ? Joiner.on(",").join(insAlleles) : "." + ) + "\n"); + } catch (IOException e) { + throw new GATKException(e.getMessage()); + } + } + + try { + intervalStatsWriter.write(Joiner.on("\t").join( + interval, + readNames.size(), + interval.getLengthOnReference(), + allNumDeletedBasesFwd, + allNumDeletedBasesRev, + allNumInsertedBasesFwd, + allNumInsertedBasesRev, + Joiner.on(",").join(allDelLengthsFwd), + Joiner.on(",").join(allDelLengthsRev), + Joiner.on(",").join(allInsAllelesFwd), + Joiner.on(",").join(allInsAllelesRev), + !allInsAllelesFwd.isEmpty() ? Joiner.on(",").join(allInsAllelesFwd) : ".", + !allInsAllelesRev.isEmpty() ? Joiner.on(",").join(allInsAllelesRev) : "." + ) + "\n"); + } catch (IOException e) { + throw new GATKException(e.getMessage()); + } + } + } + + @Override + public void closeTool() { + summarizeInterval(curInterval, d); + + try { + readStatsWriter.close(); + intervalStatsWriter.close(); + } catch (IOException e) { + throw new GATKException(e.getMessage()); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountErrors.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountErrors.java new file mode 100644 index 00000000000..f61f4545680 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountErrors.java @@ -0,0 +1,93 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.LongReadProgramGroup; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.io.FileNotFoundException; +import java.io.PrintStream; + +/** + * Quickly count errors + * + *

Input

+ *
    + *
  • A single BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A collection of BAM files where care has been keep reads from the same ZMW in the same file
  • + *
+ * + *

Usage Example

+ *

Quickly count errors

+ *
+ *   gatk QuicklyCountErrors \
+ *     -I input.bam \
+ *     -O output.txt
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Quickly count errors", + oneLineSummary = "Quickly count errors", + programGroup = LongReadProgramGroup.class +) +public final class QuicklyCountErrors extends ReadWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc="Path to which variants should be written") + public String outPathName; + //final Path outPath = IOUtils.getPath(outPathName); + + private long numMismatches = 0; + private long numInsertions = 0; + private long numDeletions = 0; + private long numBases = 0; + + @Override + public void apply( GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + boolean xseen = false; + for (CigarElement ce : read.getCigarElements()) { + if (ce.getOperator().equals(CigarOperator.X)) { + xseen = true; + numMismatches++; + } else if (ce.getOperator().equals(CigarOperator.D)) { + numDeletions += ce.getLength(); + } else if (ce.getOperator().equals(CigarOperator.I)) { + numInsertions += ce.getLength(); + } + } + + if (!xseen && read.hasAttribute("NM")) { + numMismatches += read.getAttributeAsInteger("NM"); + } + + numBases += read.getLength(); + } + + @Override + public void closeTool() { + try ( final PrintStream out = new PrintStream(outPathName) ) { + out.println(String.join(" ", "numMismatches", "numDeletions", "numInsertions", "numBases")); + out.println(String.join(" ", Long.valueOf(numMismatches).toString(), Long.valueOf(numDeletions).toString(), Long.valueOf(numInsertions).toString(), Long.valueOf(numBases).toString())); + } catch (final FileNotFoundException e) { + throw new UserException("Cannot open given outfile: " + outPathName, e); + } + + logger.info("numMismatches: " + numMismatches + " numDeletions: " + numDeletions + " numInsertions: " + numInsertions + " numBases:" + numBases); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountMismatches.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountMismatches.java new file mode 100644 index 00000000000..71d6f001838 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/QuicklyCountMismatches.java @@ -0,0 +1,138 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.utils.pileup.PileupElement; +import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import org.broadinstitute.hellbender.exceptions.UserException; + +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.*; + +/** + * Quickly count errors + * + *

Input

+ *
    + *
  • A single BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A collection of BAM files where care has been keep reads from the same ZMW in the same file
  • + *
+ * + *

Usage Example

+ *

Quickly count errors

+ *
+ *   gatk QuicklyCountMismatches \
+ *     -I input.bam \
+ *     -O output.txt
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Quickly count errors", + oneLineSummary = "Quickly count errors", + programGroup = ReadDataManipulationProgramGroup.class +) +public final class QuicklyCountMismatches extends LocusWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc="Path to which variants should be written") + public String outPathName; + + @Argument(fullName = "insHist", + shortName = "insHist", + doc="Path to which variants should be written") + public String insPathName; + + @Argument(fullName = "delHist", + shortName = "delHist", + doc="Path to which variants should be written") + public String delPathName; + + private final Map numMismatchesMap = new TreeMap<>(); + private final Map numInsertionsMap = new TreeMap<>(); + private final Map numDeletionsMap = new TreeMap<>(); + private final Map numBasesMap = new TreeMap<>(); + + private final Map insHist = new TreeMap<>(); + private final Map delHist = new TreeMap<>(); + + @Override + public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext) { + for (PileupElement pe : alignmentContext.getBasePileup()) { + int npr = (pe.getRead().hasAttribute("np")) ? pe.getRead().getAttributeAsInteger("np") : 0; + + for (int np : Arrays.asList(-1, npr)) { + if (!numMismatchesMap.containsKey(np)) { numMismatchesMap.put(np, 0L); } + if (!numInsertionsMap.containsKey(np)) { numInsertionsMap.put(np, 0L); } + if (!numDeletionsMap.containsKey(np)) { numDeletionsMap.put(np, 0L); } + if (!numBasesMap.containsKey(np)) { numBasesMap.put(np, 0L); } + + if (pe.isDeletion() && pe.atStartOfCurrentCigar()) { + int len = pe.getCurrentCigarElement().getLength(); + if (!delHist.containsKey(len)) { delHist.put(len, 0); } + delHist.put(len, delHist.get(len) + 1); + + if (len > 1) { + numDeletionsMap.put(np, numDeletionsMap.get(np) + 1L); + } + + //logger.info("{} {} {} {} {}", pe.getRead(), pe.getCurrentCigarElement(), pe.getCurrentCigarOffset(), pe.getOffsetInCurrentCigar(), pe); + } else if (pe.isBeforeInsertion() && pe.getBasesOfImmediatelyFollowingInsertion() != null) { + int len = pe.getBasesOfImmediatelyFollowingInsertion().length(); + + if (!insHist.containsKey(len)) { insHist.put(len, 0); } + insHist.put(len, insHist.get(len) + 1); + + if (len > 1) { + numInsertionsMap.put(np, numInsertionsMap.get(np) + 1L); + } + } else { + if (pe.getBase() != referenceContext.getBase()) { + numMismatchesMap.put(np, numMismatchesMap.get(np) + 1L); + } + } + + numBasesMap.put(np, numBasesMap.get(np) + 1L); + } + } + } + + @Override + public void closeTool() { + try (final PrintStream out = new PrintStream(outPathName)) { + + for ( final int np : numMismatchesMap.keySet() ) { + out.println(String.join(" ", numMismatchesMap.get(np).toString(), numInsertionsMap.get(np).toString(), numDeletionsMap.get(np).toString(), numBasesMap.get(np).toString())); + } + } catch (final FileNotFoundException e) { + throw new UserException("Cannot open given outfile: " + outPathName, e); + } + + try (PrintStream outIns = new PrintStream(insPathName)) { + for ( final int len : insHist.keySet() ) { + outIns.println(String.join(" ", Integer.valueOf(len).toString(), insHist.get(len).toString())); + } + } catch (final FileNotFoundException e) { + throw new UserException("Cannot open given outfile: " + insPathName, e); + } + + try (final PrintStream outDel = new PrintStream(delPathName)) { + for ( final int len : delHist.keySet() ) { + outDel.println(String.join(" ", Integer.valueOf(len).toString(), delHist.get(len).toString())); + } + } catch (final FileNotFoundException e) { + throw new UserException("Cannot open given outfile: " + delPathName, e); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RecoverUncorrectedReads.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RecoverUncorrectedReads.java new file mode 100644 index 00000000000..fef857a9d4b --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RecoverUncorrectedReads.java @@ -0,0 +1,116 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import htsjdk.samtools.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; +import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * Recover uncorrected reads + * + *

Input

+ *
    + *
  • The uncorrected BAM file
  • + *
  • The CCS-corrected BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A BAM file of remaining subreads that were not corrected by the CCS step
  • + *
+ * + *

Usage Example

+ *

Quickly count errors

+ *
+ *   gatk RecoverUncorrectedReads \
+ *     -I input.bam \
+ *     -C ccs.bam \
+ *     -O output.bam
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Recover uncorrected reads", + oneLineSummary = "Recover uncorrected reads", + programGroup = ReadDataManipulationProgramGroup.class +) +public final class RecoverUncorrectedReads extends ReadWalker { + @Argument(fullName = "corrected", + shortName = "C", + doc="CCS-corrected reads") + public String corrected; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc="Write output to this file") + public String output; + private SAMFileGATKReadWriter outputWriter; + + private final Set correctedReadNames = new HashSet<>(); + private int uncorrectedReads = 0; + + @Override + public void onTraversalStart() { + final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); + + try (final SamReader srs = srf.open(IOUtils.getPath(corrected))) { + for ( final SAMRecord sr : srs ) { + String[] name = sr.getReadName().split("/"); + String readZmwName = name[ 0 ] + "/" + name[ 1 ]; + correctedReadNames.add(readZmwName); + } + + } catch (final IOException e) { + throw new UserException("Unable to open path: " + corrected, e); + } + + outputWriter = createSAMWriter(new GATKPath(IOUtils.getPath(output).toUri().toString()), true); + } + + @Override + public void apply( GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + String[] name = read.getName().split("/"); + if (name.length >= 2) { + String readZmwName = name[0] + "/" + name[1]; + if (!correctedReadNames.contains(readZmwName)) { + outputWriter.addRead(read); + uncorrectedReads++; + + logger.debug("uncorrected: {}", readZmwName); + } else { + logger.debug("corrected: {}", readZmwName); + } + } else { + outputWriter.addRead(read); + uncorrectedReads++; + + logger.debug("uncorrected: {}", read.getName()); + } + } + + @Override + public void closeTool() { + logger.info("corrected: {} ; uncorrected: {}", correctedReadNames.size(), uncorrectedReads); + + if ( outputWriter != null ) { + outputWriter.close(); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RepairLongReadBam.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RepairLongReadBam.java new file mode 100644 index 00000000000..c7052e140a8 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/RepairLongReadBam.java @@ -0,0 +1,131 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import htsjdk.samtools.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.read.GATKRead; + +import java.io.File; +import java.util.List; + +/** + * Restore annotations from unaligned BAM files that are discarded during conversion by samtools fastq + * + *

Input

+ *
    + *
  • A unaligned BAM file
  • + *
  • A aligned BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • An aligned BAM file with annotations restored
  • + *
+ * + *

Usage Example

+ *

Restore annotations from unaligned BAM files that may be discarded during conversion by samtools fastq

+ *
+ *   gatk RepairLongReadBam \
+ *     -I unaligned.bam \
+ *     -A aligned.bam \
+ *     -O restored.bam
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Restore annotations from unaligned BAM files that are discarded during conversion by samtools fastq", + oneLineSummary = "Restore annotations from unaligned BAM files that are discarded during conversion by samtools fastq", + programGroup = ReadDataManipulationProgramGroup.class +) +public final class RepairLongReadBam extends ReadWalker { + @Argument(fullName = "aligned", shortName = "A", doc="aligned reads") + public String aligned; + + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="Write output to this file") + public String output; + + @Argument(fullName = "sort", shortName = "s", doc="Sort output", optional = true) + public boolean sort = false; + + private SAMFileWriter writer; + private SAMRecordIterator it; + private SAMRecord currentAlignedRead; + + @Override + public void onTraversalStart() { + if ( readArguments.getReadPathSpecifiers().size() != 1 ) { + throw new UserException("Specify a single unaligned BAM file to -I and a single aligned BAM file to -A"); + } + + SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); + SamReader srs = srf.open(IOUtils.getPath(aligned)); + + if (getHeaderForReads() != null && getHeaderForReads().getReadGroups() != null && getHeaderForReads().getReadGroups().size() != 1) { + throw new UserException("One (and only one) read group per aligned/unaligned BAM file required"); + } + + it = srs.iterator(); + currentAlignedRead = it.hasNext() ? it.next() : null; + + writer = createWriter(output, srs.getFileHeader(), srs.getFileHeader().getSequenceDictionary(), sort); + } + + @Override + public void apply( GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + SAMRecord unalignedRead = read.convertToSAMRecord(this.getHeaderForReads()); + + do { + if (currentAlignedRead != null) { + if (!unalignedRead.getReadName().equals(currentAlignedRead.getReadName())) { + throw new UserException( + "Expected aligned read to have name '" + + unalignedRead.getReadName() + + "', but saw '" + + currentAlignedRead.getReadName() + + "'. Are these unaligned and aligned versions of exactly the same data?" + ); + } else { + List attrs = unalignedRead.getAttributes(); + attrs.addAll(currentAlignedRead.getAttributes()); + + for (SAMRecord.SAMTagAndValue tv : attrs) { + currentAlignedRead.setAttribute(tv.tag, tv.value); + } + + writer.addAlignment(currentAlignedRead); + } + } + + currentAlignedRead = it.hasNext() ? it.next() : null; + } while (currentAlignedRead != null && unalignedRead.getReadName().equals(currentAlignedRead.getReadName())); + } + + @Override + public void closeTool() { + if ( writer != null ) { + writer.close(); + } + } + + /** + * Creates SAMFileWriter + * @return A SAMFileWriter. + */ + private SAMFileWriter createWriter(String out, SAMFileHeader sfh, SAMSequenceDictionary ssd, boolean sortOutput) { + sfh.setSortOrder(sortOutput ? SAMFileHeader.SortOrder.coordinate : SAMFileHeader.SortOrder.unsorted); + sfh.setSequenceDictionary(ssd); + + return new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, !sortOutput, new File(out)); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ReplaceQuals.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ReplaceQuals.java new file mode 100644 index 00000000000..67b27b2b212 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ReplaceQuals.java @@ -0,0 +1,72 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; +import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; + +import java.util.Arrays; + +/** + * Quickly count errors + * + *

Input

+ *
    + *
  • A single BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A collection of BAM files where care has been keep reads from the same ZMW in the same file
  • + *
+ * + *

Usage Example

+ *

Quickly count errors

+ *
+ *   gatk QuicklyCountMismatches \
+ *     -I input.bam \
+ *     -O output.txt
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Quickly replace read quals with fixed value (@).", + oneLineSummary = "Quickly replace read quals with fixed value (@).", + programGroup = ReadDataManipulationProgramGroup.class +) +public final class ReplaceQuals extends ReadWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc="Write output to this file") + public String output; + private SAMFileGATKReadWriter outputWriter; + + @Override + public void onTraversalStart() { + outputWriter = createSAMWriter(new GATKPath(IOUtils.getPath(output).toUri().toString()), true); + } + + @Override + public void apply(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + byte[] quals = new byte[read.getBases().length]; + Arrays.fill(quals, (byte) 40); + read.setBaseQualities(quals); + + outputWriter.addRead(read); + } + + @Override + public void closeTool() { + if ( outputWriter != null ) { + outputWriter.close(); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ShardLongReads.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ShardLongReads.java new file mode 100644 index 00000000000..7dd31ef9ebd --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/longreads/ShardLongReads.java @@ -0,0 +1,127 @@ +package org.broadinstitute.hellbender.tools.walkers.longreads; + +import htsjdk.samtools.util.IOUtil; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.BetaFeature; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.LongReadProgramGroup; +import org.broadinstitute.hellbender.engine.GATKPath; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadWalker; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; + +import java.io.File; + +/** + * Shards long read BAMs, keeping reads from the same ZMW in the same file if ZMW information is present", + * + *

Input

+ *
    + *
  • A single BAM file
  • + *
+ * + *

Outputs

+ *
    + *
  • A collection of BAM files where care has been keep reads from the same ZMW in the same file
  • + *
+ * + *

Usage Example

+ *

Split reads in BAM file by sample name, read group and library name

+ *
+ *   gatk ShardLongReads \
+ *     -I input.bam \
+ *     -O outputDirectory \
+ *     --num-reads-per-split 10000
+ * 
+ */ +@DocumentedFeature +@ExperimentalFeature +@CommandLineProgramProperties( + summary = "Shards long read BAMs and keep subreads from the same ZMW in the same file if ZMW information is present", + oneLineSummary = "Shards long read BAMs and keep subreads from the same ZMW in the same file if ZMW information is present", + programGroup = LongReadProgramGroup.class +) +public final class ShardLongReads extends ReadWalker { + public static final String READS_PER_SPLIT_LONG_NAME = "num-reads-per-split"; + public static final String READS_PER_SPLIT_SHORT_NAME = "nr"; + + @Argument( + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + doc = "The directory to output SAM/BAM/CRAM files." + ) + public File OUTPUT_DIRECTORY = new File(""); + + @Argument( + fullName = READS_PER_SPLIT_LONG_NAME, + shortName = READS_PER_SPLIT_SHORT_NAME, + doc = "Approximate number of reads per split" + ) + public int NUM_READS_PER_SPLIT = 10000; + + private SAMFileGATKReadWriter writer; + private int shardIndex = 0; + + private GATKRead lastRead = null; + private int numWrittenToShard = 0; + + @Override + public void onTraversalStart() { + IOUtil.assertDirectoryIsWritable(OUTPUT_DIRECTORY); + if ( readArguments.getReadPathSpecifiers().size() != 1 ) { + throw new UserException("This tool only accepts a single SAM/BAM/CRAM as input"); + } + writer = createWriter(shardIndex); + } + + @Override + public void apply( GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext ) { + if (numWrittenToShard < NUM_READS_PER_SPLIT) { + writer.addRead(read); + } else { + int lastZmw = lastRead.hasAttribute("zm") ? lastRead.getAttributeAsInteger("zm") : 0; + int thisZmw = read.hasAttribute("zm") ? read.getAttributeAsInteger("zm") : 1; + + if (lastZmw != thisZmw) { + writer.close(); + + shardIndex++; + numWrittenToShard = 0; + writer = createWriter(shardIndex); + } + + writer.addRead(read); + } + + lastRead = read; + numWrittenToShard++; + } + + @Override + public void closeTool() { + if ( writer != null ) { + writer.close(); + } + } + + /** + * Creates SAMFileWriter instances for the read shards + * @param shardIndex index of shard + * @return A SAMFileWriter. + */ + private SAMFileGATKReadWriter createWriter(final int shardIndex) { + final String base = readArguments.getReadPathSpecifiers().get(0).getBaseName().get(); + final String extension = readArguments.getReadPathSpecifiers().get(0).getExtension().get(); + final String key = String.format("%06d", shardIndex); + + final GATKPath outFile = new GATKPath(IOUtil.toPath(new File(OUTPUT_DIRECTORY, base + "." + key + "." + extension)).toUri().toString()); + + return createSAMWriter(outFile, true); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/help/HelpConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/help/HelpConstants.java index e4a82809eee..4ede9572a4c 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/help/HelpConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/help/HelpConstants.java @@ -43,6 +43,10 @@ public static String forumArticle(String article) { public final static String DOC_CAT_CNV = "Copy Number Variant Discovery"; public final static String DOC_CAT_CNV_SUMMARY = "Tools that analyze read coverage to detect copy number variants."; + public final static String DOC_CAT_LONGREAD = "Long Read Tools"; + public final static String DOC_CAT_LONGREAD_SUMMARY = "Tools that are designed for use with long reads."; + + public final static String DOC_CAT_EXAMPLE = "Example Tools"; public final static String DOC_CAT_EXAMPLE_SUMMARY = "Example tools that show developers how to implement new tools";