From 8f2d78216fcd52e93fd8ef5e713d8eadb6fd1587 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 13:37:53 +0200 Subject: [PATCH 01/11] New script for AVITI demultiplexStat Ref: #102 --- bin/demuxStatsElement.R | 161 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100755 bin/demuxStatsElement.R diff --git a/bin/demuxStatsElement.R b/bin/demuxStatsElement.R new file mode 100755 index 0000000..7e25e17 --- /dev/null +++ b/bin/demuxStatsElement.R @@ -0,0 +1,161 @@ +#!/usr/bin/env Rscript + +## Extrait des statistiques de demultipelxage de plusieurs fichiers générés par bases2fastq et les compile en un seul CSV. + +## -------------------- +# PACKAGES +## -------------------- +suppressWarnings(library('optparse')) +suppressWarnings(library('jsonlite')) +suppressWarnings(library('dplyr')) + +## -------------------- +# FUNCTIONS +## -------------------- + + +## -------------------- +# PARAMETERS +## -------------------- +option_list = list( + # All arguments are compulsory + make_option(c("-a", "--assigned"), type = "character", default = './IndexAssignment.csv', metavar = "character", + help = "Path to the IndexAssignment.csv file."), + make_option(c("-u", "--unassigned"), type = "character", default = './UnassignedSequences.csv', metavar = "character", + help = "Path to the UnassignedSequences.csv file."), + make_option(c("-r", "--runManifest"), type = "character", default = './RunManifest.json', metavar = "character", + help = "Path to the RunManifest.json file. That Must be a JSON file !"), + make_option(c("-l", "--lane"), type = "character", default = '1', metavar = "character", + help = "Lane to work on, could be 1, 2 or 1+2"), + make_option(c("-t", "--threshold"), type = "numeric", default = 0.8, metavar = "numeric", + help = "Barcode count threshold") +) + +opt_parser = OptionParser(usage="Make demultiplexStats file from Element demultiplexing output.", option_list = option_list) +opt = parse_args(opt_parser) + +if(is.null(opt$assigned) | is.null(opt$unassigned) | is.null(opt$runManifest) | is.null(opt$lane)) { + stop("At least one argument is missing.\n", call. = FALSE) +} + +## -------------------- +# LOG +## -------------------- +cat("\nLancement du script demuxStatsElement.R avec les options suivantes :\n") +cat(paste0("\tFichier des index assignés :\t\t", opt$assigned, "\n")) +cat(paste0("\tFichier des index non assignés :\t", opt$unassigned, "\n")) +cat(paste0("\tRunManifest :\t\t\t\t" , opt$runManifest, "\n")) +cat(paste0("\tNumero de lane :\t\t\t" , opt$lane, "\n")) +cat(paste0("\tBC count threshold :\t\t\t" , opt$threshold, "\n")) +launchDir<-getwd() +cat(paste0("\nRépertoire de travail :\t",launchDir , "\n\n")) + + +## -------------------- +# MAIN +## -------------------- +# Initialisation des variables +assigned <- opt$assigned +unassigned <- opt$unassigned +runManifestJson <- opt$runManifest +lane <- opt$lane +threshold <- opt$threshold +demultiplexStat <- paste0(launchDir,"/demultiplexStat.csv") +demultiplex_stat <- data.frame( + Project = character(), + Sample = character(), + Barcode = character(), + bcCount = integer(), + percOfFrag = numeric(), + stringsAsFactors = FALSE +) + +# Lecture des données d'entrée +assigned_data <- read.csv(assigned, stringsAsFactors = FALSE) +run_manifest <- fromJSON(runManifestJson) +unassigned_data <- read.csv(unassigned, stringsAsFactors = FALSE) + +# Filtrer les lignes par numéro de lane +assigned_filtered <- assigned_data %>% + filter(assigned_data [, ncol(assigned_data )] == lane) + +unassigned_filtered <- unassigned_data %>% + filter(unassigned_data[, ncol(unassigned_data)] == lane) + +# Parcourir les lignes du fichier assigned +cat("\nExtraction des échantillons assignés\n") +for (i in 1:nrow(assigned_filtered)) { + sample <- assigned_filtered[i, 2] + bc1 <- assigned_filtered[i, 3] + bc2 <- assigned_filtered[i, 4] + bcCount <- assigned_filtered[i, 5] + perc <- assigned_filtered[i, 6] + + project <- run_manifest$Samples %>% + filter(SampleName == sample) %>% + .$Project + + new_row <- data.frame( + Project = project, + Sample = sample, + Barcode = paste(bc1, bc2, sep = "-"), + bcCount = bcCount, + percOfFrag = perc, + stringsAsFactors = FALSE + ) + + cat("\tAjout de l'échantillon : ", project, "->" , sample, "\n") + + demultiplex_stat <- rbind(demultiplex_stat, new_row) +} +cat("\n") +# Récupération du seuil limite +bcCount.threshold<-threshold*min(demultiplex_stat$bcCount) + +# Parcourir les lignes du fichier unassigned +cat("\nExtraction des séquences non assignés\n") +for (i in 1:nrow(unassigned_filtered)) { + sample <- "Undetermined" + bc1 <- unassigned_filtered[i, 1] + bc2 <- unassigned_filtered[i, 2] + bcCount <- unassigned_filtered[i, 4] + perc <- unassigned_filtered[i, 3] + + project <- "DefaultProject" + + new_row <- data.frame( + Project = project, + Sample = sample, + Barcode = paste(bc1, bc2, sep = "-"), + bcCount = bcCount, + percOfFrag = perc, + stringsAsFactors = FALSE + ) + + cat("\tAjout de l'échantillon : ", project, "->" , sample, "\n") + + demultiplex_stat <- rbind(demultiplex_stat, new_row) +} +cat("\n") + +# Filtrer les lignes selon le seuil de bcCount +initial_nrow <- nrow(demultiplex_stat) +demultiplex_stat <- demultiplex_stat %>% + filter(bcCount >= bcCount.threshold) +initial_nrow <- nrow(demultiplex_stat) +lines_removed <- initial_nrow - initial_nrow +cat("L'échantillon le moins souvent retrouvé a", min(demultiplex_stat$bcCount), "séquences\n") +cat("Le seuil du nombre de séquence à ", threshold*100, "% est donc de", bcCount.threshold, "séquences\n") +cat("Tous les index indéterminés ayant au moins ce nombre de séquences seront gardés\n") +cat("\tNombre de lignes retirées du demultiplexStat :", lines_removed, "\n") + +# Tri selon bcCount +demultiplex_stat <- demultiplex_stat %>% + arrange(desc(bcCount)) + + +# Écriture du fichier de sortie +cat("\nEcriture du fichier de sortie :", demultiplexStat,"\n") +write.csv(demultiplex_stat, demultiplexStat, row.names = FALSE, quote = FALSE) + +cat("Fin normale du script.\n") \ No newline at end of file -- GitLab From 471ef532acdce6b48c67e9237c7706ddf0267536 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 13:52:22 +0200 Subject: [PATCH 02/11] fastqSreen config file has default localisation In assets/ Ref: #105 --- conf/base.config | 2 -- modules/local/module_core.nf | 8 +++++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/conf/base.config b/conf/base.config index 2a88de8..5a965ba 100644 --- a/conf/base.config +++ b/conf/base.config @@ -113,8 +113,6 @@ process { time = { checkMax( 1.h * task.attempt, 'time' ) } module = toolsModuleHash['FASTQSCREEN'] - ext.args = "--conf ${params.inputdir}/fastq_screen.conf" - publishDir = [ path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy' diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index a874ffc..43e8128 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -83,8 +83,14 @@ process FASTQSCREEN { script: def args = task.ext.args ?: '' + def defaultConf = "${baseDir}/assets/fastq_screen.conf_example" + def inputConf = "${params.inputdir}/fastq_screen.conf" + def confFile = file(inputConf).exists() ? inputConf : defaultConf """ - fastq_screen $reads $args + fastq_screen \\ + $reads \\ + --conf ${confFile} \\ + $args """ } -- GitLab From 227313c72e7aff47df3db61de01a055e08839b06 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 13:59:09 +0200 Subject: [PATCH 03/11] Update paths in default fastqscreen config file Ref: #105 --- assets/fastq_screen.conf_example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/fastq_screen.conf_example b/assets/fastq_screen.conf_example index dbbb7bc..084bedc 100644 --- a/assets/fastq_screen.conf_example +++ b/assets/fastq_screen.conf_example @@ -55,10 +55,10 @@ THREADS 8 ## have contaminated your sample during the library preparation step. ## Genome of E. coli -DATABASE E.coli /work/bank/bwadb/Escherichia_coli_FRIK2069 +DATABASE E.coli /home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069 Sequence of PhiX -DATABASE PhiX /work/bank/bwadb/phi.fa +DATABASE PhiX /home/sbsuser/plage/references/indexed/ng6_conta_ref/PhiX/BWA/phi.fa Genome of yeast -DATABASE Yeast /work/bank/bwadb/yeast.nt +DATABASE Yeast /home/sbsuser/plage/references/indexed/ng6_conta_ref/Saccharomyces_cerevisiae/genome/BWA/yeast.nt -- GitLab From 5c300cddaaccc57dbd12b40582f258093c84e727 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 14:29:19 +0200 Subject: [PATCH 04/11] Rename illumina_QC to Short_reads_qc Ref: #106 --- main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index be6b94b..639c4f5 100644 --- a/main.nf +++ b/main.nf @@ -32,10 +32,10 @@ params.summary.collect{k,v -> println "$k : $v"} NAMED WORKFLOW FOR PIPELINE ======================================================================================== */ -include { ILLUMINA_QC } from "$baseDir/workflow/illumina_qc.nf" +include { SHORT_READS_QC } from "$baseDir/workflow/illumina_qc.nf" -workflow QC_ANALYSIS { - ILLUMINA_QC() +workflow PLAGE { + SHORT_READS_QC() } /* @@ -45,5 +45,5 @@ workflow QC_ANALYSIS { */ workflow { - QC_ANALYSIS() + PLAGE() } -- GitLab From 3facd2ecaba028c4f3dd732d8048a46ac0b415ed Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 14:40:14 +0200 Subject: [PATCH 05/11] Move illumina's ngl-bi dependencies to illumina sub-worflows Ref: #107 --- modules/local/module_NGL-Bi.nf | 2 +- sub-workflows/local/core_illumina.nf | 13 ++++++++++++ workflow/illumina_qc.nf | 31 +++++++++++++++------------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index c45a7b9..c32681f 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -20,7 +20,7 @@ process prepareReadSetCreation { """ } -process TREATMENT_DEMUXSTAT { +process TREATMENT_DEMUXSTAT_ILLUMINA { label 'ngl' input: diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index a03cd6f..61f8885 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -15,6 +15,11 @@ include { ILLUMINA_FILTER; } from "$baseDir/modules/local/module_core.nf" +include { + TREATMENT_DEMUXSTAT_ILLUMINA as TREATMENT_DEMUX_RUN; + TREATMENT_DEMUXSTAT_ILLUMINA as TREATMENT_DEMUX_READSETS; +} from "$baseDir/modules/local/module_NGL-Bi.nf" + // ------------------------------------------------- // LOCAL PARAMS // ------------------------------------------------- @@ -30,6 +35,8 @@ workflow CORE_ILLUMINA { demuxStatXML demuxSummary fastq + nglBiRunCode + readsetsFile main: // ----------- DemultiplexStat @@ -45,6 +52,12 @@ workflow CORE_ILLUMINA { fastq_good = ILLUMINA_FILTER.out.reads } + if (params.insert_to_ngl){ + // Add demultiplexStat treatments + TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV) + TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV) + } + emit: fastq = fastq_good demuxStat = DEMUX_STATS.out.demultiplexStatsTSV diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 9ba6b36..63d95c5 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -81,9 +81,7 @@ include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" include { DIVERSITY_QC } from "$baseDir/sub-workflows/local/diversity_qc.nf" include { PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf" -include { TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN; - TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS; - FILE_RENAME as RENAME_FASTQ; +include { FILE_RENAME as RENAME_FASTQ; FILE_RENAME as RENAME_INDEX; } from "$baseDir/modules/local/module_NGL-Bi.nf" include { MULTIQC } from "${params.shared_modules}/multiqc.nf" @@ -109,18 +107,23 @@ sendBeginMail(format.format(new Date())) // ------------------------------------------------- // WORKFLOW // ------------------------------------------------- -workflow ILLUMINA_QC { +workflow SHORT_READS_QC { ch_mqc = Channel.empty() WORKFLOW_SUMMARY() if (params.insert_to_ngl){ NGLBI(params.bi_run_code, params.sq_xp_code, '', params.sequencer) + nglBiRunCode = NGLBI.out.nglBiRunCode + readsets_created = NGLBI.out.readsets_created + ready = NGLBI.out.ready + } else { + nglBiRunCode = Channel.empty() + readsets_created = Channel.empty() + ready = Channel.empty() } - if ( params.skip_core_illumina ) { - fastq = ch_read - } else { - CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read) + if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) { + CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created) fastq = CORE_ILLUMINA.out.fastq if (params.insert_to_ngl){ @@ -187,15 +190,15 @@ workflow ILLUMINA_QC { if (params.insert_to_ngl){ if (params.single_cell) { - RENAME_INDEX(ch_index.map{it[1]}.collect(), NGLBI.out.readsets_created, params.sq_xp_code, 'fastq_index') + RENAME_INDEX(ch_index.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_index') MD5SUM_INDEX(RENAME_INDEX.out.fastq.collect(), params.run_name+'_fastq_index') - ADD_RS_INDEX_FILES(NGLBI.out.readsets_created, MD5SUM_INDEX.out, 'INDEX', NGLBI.out.ready) + ADD_RS_INDEX_FILES(readsets_created, MD5SUM_INDEX.out, 'INDEX', ready) } - RENAME_FASTQ(fastq.map{it[1]}.collect(), NGLBI.out.readsets_created, params.sq_xp_code, 'fastq_read') + RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_read') MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read') - ADD_RS_RAW_FILES(NGLBI.out.readsets_created, MD5SUM.out, 'RAW', NGLBI.out.ready) - UPDATE_STATE_FQC(NGLBI.out.readsets_created, 'F-QC', MULTIQC.out.html) - CREATE_ANALYSIS(NGLBI.out.nglBiRunCode, NGLBI.out.readsets_created, 1) + ADD_RS_RAW_FILES(readsets_created, MD5SUM.out, 'RAW', ready) + UPDATE_STATE_FQC(readsets_created, 'F-QC', MULTIQC.out.html) + CREATE_ANALYSIS(nglBiRunCode, readsets_created, 1) ADD_MULTIQC(CREATE_ANALYSIS.out.createdFile, MULTIQC.out.html, CREATE_ANALYSIS.out.ready) } -- GitLab From 23f25da07d150f784f9e3c763ca2ca368bdbcd9e Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 15:15:34 +0200 Subject: [PATCH 06/11] New ressources for AVITI data - Mangement of fastq, with lane number - Suppress merging lane step - Use lane number and max-depth to determine number of fastq in the lane - Use aviti module Ref: #96 --- modules/local/module_core_element.nf | 28 +++++++++++++++++ nextflow.config | 5 +-- sub-workflows/local/core_element.nf | 47 ++++++++++++++++++++++++++++ sub-workflows/local/core_pipeline.nf | 20 +----------- workflow/illumina_qc.nf | 42 ++++++++++++++++--------- 5 files changed, 107 insertions(+), 35 deletions(-) create mode 100644 modules/local/module_core_element.nf create mode 100644 sub-workflows/local/core_element.nf diff --git a/modules/local/module_core_element.nf b/modules/local/module_core_element.nf new file mode 100644 index 0000000..c1e26a2 --- /dev/null +++ b/modules/local/module_core_element.nf @@ -0,0 +1,28 @@ +/* + * Module pour les analyses de base des données Element Biosciences +*/ + +process DEMUX_STATS { + label 'demux' + label 'little_R' + + input: + path runManifestJson + path assigned + path unassigned + + output: + path "demultiplexStat.csv", emit: csv + + script: + def threshold = task.ext.threshold ?: '' + def lane = params.lane ?: '1' + """ + demuxStatsElement.R \\ + --assigned $assigned \\ + --unassigned $unassigned \\ + --runManifest $runManifestJson \\ + --lane $lane \\ + $threshold + """ +} diff --git a/nextflow.config b/nextflow.config index b54f809..8bff246 100644 --- a/nextflow.config +++ b/nextflow.config @@ -86,6 +86,7 @@ params { // skip parameters skip_core_illumina = false + skip_core_element = false help = false } @@ -95,9 +96,9 @@ params { //========================================= import java.nio.file.Files import java.nio.file.Paths -def n_read_files = Files.walk(Paths.get(params.inputdir.toString())) +def n_read_files = Files.walk(Paths.get(params.inputdir.toString()), 3) .filter(Files::isRegularFile) - .filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz")) + .filter(p -> p.getFileName().toString().matches(".*_L00${params.lane}_R[12](_.*)?\\.fastq\\.gz")) .count() // on retire les 2 fichiers undetermined diff --git a/sub-workflows/local/core_element.nf b/sub-workflows/local/core_element.nf new file mode 100644 index 0000000..6207685 --- /dev/null +++ b/sub-workflows/local/core_element.nf @@ -0,0 +1,47 @@ +// ------------------------------------------------- +// CORE ELEMENT +// ------------------------------------------------- +/* + * Statistiques de démultiplexage +*/ + +// ------------------------------------------------- +// MODULES +// ------------------------------------------------- +include { + DEMUX_STATS +} from "$baseDir/modules/local/module_core_element.nf" + +include { + TREATMENT_DEMUXSTAT_ELEMENT as TREATMENT_DEMUX_RUN; + TREATMENT_DEMUXSTAT_ELEMENT as TREATMENT_DEMUX_READSETS; +} from "$baseDir/modules/local/module_NGL-Bi.nf" + +// ------------------------------------------------- +// LOCAL PARAMS +// ------------------------------------------------- + + +// ------------------------------------------------- +// WORKFLOW +// ------------------------------------------------- + +workflow CORE_ELEMENT { + take: + runManifestJson + assigned + unassigned + nglBiRunCode + readsetsFile + + main: + // ----------- DemultiplexStat + DEMUX_STATS(runManifestJson, assigned, unassigned) + + // ----------- NGL-Bi + TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.csv) + TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.csv) + + emit: + demuxStat = DEMUX_STATS.out.csv +} \ No newline at end of file diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index a14b560..20d45dc 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -15,7 +15,6 @@ include { FASTQC; FASTQSCREEN; DUPLICATED_READS; - MERGE_LANES; } from "$baseDir/modules/local/module_core.nf" include { GUNZIP } from "${params.shared_modules}/gzip.nf" include { SEQTK_SAMPLE } from "${params.shared_modules}/seqtk.nf" @@ -28,26 +27,9 @@ isResume=workflow.resume //------------------------------------------------- workflow CORE { take: - ch_fastq + ch_read main: - // ----------- Lane merging fastq - if (params.merge_lanes) { - MERGE_LANES(ch_fastq - .collect{it[1]} - .flatten() - .map { $it -> [ ($it.simpleName =~ /(.*)_S\d+_.*/)[0][1] , $it ] } - .groupTuple() - ) - - ch_read = MERGE_LANES.out.fastq - .collect{it[1]} - .flatten() - .map{$it -> [$it.simpleName, $it]} - } else { - ch_read = ch_fastq - } - // ----------- FASTQC FASTQC(ch_read) diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 63d95c5..1a8939e 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -27,22 +27,36 @@ System.out.println "\n" // ------------------------------------------------- // CHANNELS // ------------------------------------------------- -ch_ss = Channel.fromPath(params.samplesheet) -ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt") -ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml') +// Illumina's channels +ss_path = file(params.samplesheet) +demuxSummary_path = file(params.inputdir+"/Stats/DemuxSummaryF1L${params.lane}.txt") +demuxStatXML_path = file(params.inputdir+'/Stats/DemultiplexingStats.xml') +ch_ss = ss_path.exists() ? Channel.fromPath(ss_path) : Channel.empty() +ch_DemuxSummary = demuxSummary_path.exists() ? Channel.fromPath(demuxSummary_path) : Channel.empty() +ch_DemuxStatXML = demuxStatXML_path.exists() ? Channel.fromPath(demuxStatXML_path) : Channel.empty() + +// Element's channels +runManifestJSON_path = file(params.inputdir+"/RunManifest.json") +indexAssigned_path = file(params.inputdir+"/IndexAssignment.csv") +indexUnassigned_path = file(params.inputdir+"/UnassignedSequences.csv") +ch_runManifestJSON = runManifestJSON_path.exists() ? Channel.fromPath(runManifestJSON_path) : Channel.empty() +ch_indexAssigned = indexAssigned_path.exists() ? Channel.fromPath(indexAssigned_path) : Channel.empty() +ch_indexUnassigned = indexUnassigned_path.exists() ? Channel.fromPath(indexUnassigned_path) : Channel.empty() + +def SamplesBaseDir = params.sequencer =~ 'AVITI' ? 'Samples' : '' // Get samples globPatterns def sampleList = [] def indexFilesList = [] if (params.select_samples) { params.select_samples.tokenize(',').each { sample -> - sampleList.add("${params.inputdir}/${params.project}/**" + sample +'_*_R{1,2}{_*,*}.fastq.gz') - indexFilesList.add("${params.inputdir}/${params.project}/**" + sample +'_*_I{1,2}{_*,*}.fastq.gz') + sampleList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**" + sample +"_*_L00${params.lane}_R{1,2}{_*,*}.fastq.gz") + indexFilesList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**" + sample +"_*_L00${params.lane}_I{1,2}{_*,*}.fastq.gz") } } else { System.out.println "Aucun échantillon selectionné, on les sélectionne tous" - sampleList.add("${params.inputdir}/${params.project}/**_R{1,2}{_*,*}.fastq.gz") - indexFilesList.add("${params.inputdir}/${params.project}/**_I{1,2}{_*,*}.fastq.gz") + sampleList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**_L00${params.lane}_R{1,2}{_*,*}.fastq.gz") + indexFilesList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**_L00${params.lane}_I{1,2}{_*,*}.fastq.gz") } // Get 10X index Files @@ -76,6 +90,7 @@ createDir = file(params.outdir).mkdir() // INCLUDES // ------------------------------------------------- include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf" +include { CORE_ELEMENT } from "$baseDir/sub-workflows/local/core_element.nf" include { CORE } from "$baseDir/sub-workflows/local/core_pipeline.nf" include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" @@ -125,12 +140,11 @@ workflow SHORT_READS_QC { if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) { CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created) fastq = CORE_ILLUMINA.out.fastq - - if (params.insert_to_ngl){ - // Add demultiplexStat treatments - TREATMENT_DEMUX_RUN(NGLBI.out.nglBiRunCode, CORE_ILLUMINA.out.demuxStat) - TREATMENT_DEMUX_READSETS(NGLBI.out.readsets_created, CORE_ILLUMINA.out.demuxStat) - } + } else { + fastq = ch_read + } + if (! params.skip_core_element && params.sequencer =~ "AVITI") { + CORE_ELEMENT(ch_runManifestJSON, ch_indexAssigned, ch_indexUnassigned, nglBiRunCode, readsets_created) } CORE(fastq) @@ -175,7 +189,7 @@ workflow SHORT_READS_QC { ) } else { - System.out.println "Le QC des données non ADN n'est pas prit en charge pour le moment." + System.out.println "Le QC des données ${params.data_nature} n'a pas de sub-workflow spécifique pour le moment." ch_mqc = ch_mqc.mix( Channel.empty() ) } -- GitLab From e20fd2d83937fd1940b5bda828a03e6e81c0ebc0 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 16:26:12 +0200 Subject: [PATCH 07/11] Fix bad workflow.profile variable --- conf/functions.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/functions.config b/conf/functions.config index 863ca2b..33c8190 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -153,7 +153,7 @@ def sendFinalMail(formatted_date, summary) { email_fields['runNGLBi'] = (params.bi_run_code ?: '') email_fields['xpNGLSq'] = (params.sq_xp_code ?: '') email_fields['success'] = workflow.success - email_fields['instance'] = params.profile + email_fields['instance'] = worflow.profile email_fields['dateComplete'] = formatted_date email_fields['duration'] = workflow.duration email_fields['exitStatus'] = workflow.exitStatus -- GitLab From 6cada584994e26a2cfccc6c3e00b9f0ccc0a7b4c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 16:39:49 +0200 Subject: [PATCH 08/11] Update docs/usage.md Ref: #96 --- docs/usage.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 0d966ed..6bf72b3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,7 +131,11 @@ To skip subsampling step in core pipeline. _Default_ : false - **`--skip_core_illumina`** [bool] -To skip core illumina sub-workflow in core pipeline. To be use to analyze data produced by other platform than Illumina. +To skip core illumina sub-workflow in core pipeline. Have effect only if `sequencer` is NovaSeq or MiSeq. +_Default_ : false + +- **`--skip_core_element`** [bool] +To skip core element sub-workflow in core pipeline.Have effect only if `sequencer` is AVITI. _Default_ : false ## Workflows related parameters -- GitLab From b6fcd0fd3eaf36ffc22e8ee58bf7e322b48d0367 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 16:45:13 +0200 Subject: [PATCH 09/11] MD5SUM is now named with file type --- workflow/illumina_qc.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 1a8939e..d2d87ca 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -106,7 +106,7 @@ include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC; CREATE_ANALYSIS; } from "${params.shared_modules}/ngl_bi.nf" include { READSET_FILE_FROM_FILE as ADD_RS_INDEX_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'INDEX') include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') -include { md5sum as MD5SUM; +include { md5sum as MD5SUM_FASTQ; md5sum as MD5SUM_INDEX; } from "${params.shared_modules}/md5sum.nf" include { BEGIN_NGLBI as NGLBI } from "${params.shared_modules}/workflows/begin_nglbi.nf" @@ -209,8 +209,8 @@ workflow SHORT_READS_QC { ADD_RS_INDEX_FILES(readsets_created, MD5SUM_INDEX.out, 'INDEX', ready) } RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_read') - MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read') - ADD_RS_RAW_FILES(readsets_created, MD5SUM.out, 'RAW', ready) + MD5SUM_FASTQ(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read') + ADD_RS_RAW_FILES(readsets_created, MD5SUM_FASTQ.out, 'RAW', ready) UPDATE_STATE_FQC(readsets_created, 'F-QC', MULTIQC.out.html) CREATE_ANALYSIS(nglBiRunCode, readsets_created, 1) ADD_MULTIQC(CREATE_ANALYSIS.out.createdFile, MULTIQC.out.html, CREATE_ANALYSIS.out.ready) -- GitLab From f12c5904819da84673c13a75f3951ef8930df50a Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Mon, 5 Aug 2024 17:01:56 +0200 Subject: [PATCH 10/11] Fix bad workflow.profile variable --- conf/functions.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/functions.config b/conf/functions.config index 33c8190..e4ff017 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -153,7 +153,7 @@ def sendFinalMail(formatted_date, summary) { email_fields['runNGLBi'] = (params.bi_run_code ?: '') email_fields['xpNGLSq'] = (params.sq_xp_code ?: '') email_fields['success'] = workflow.success - email_fields['instance'] = worflow.profile + email_fields['instance'] = workflow.profile email_fields['dateComplete'] = formatted_date email_fields['duration'] = workflow.duration email_fields['exitStatus'] = workflow.exitStatus -- GitLab From 197b93e3a1fe0e8e134def1c59d4ce0cd06ed8c8 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 22 Aug 2024 16:59:34 +0200 Subject: [PATCH 11/11] Update base config for md5sum process --- conf/base.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index 5a965ba..465f361 100644 --- a/conf/base.config +++ b/conf/base.config @@ -324,7 +324,7 @@ process { module = toolsModuleHash['SEQTK'] } - withName: MULTIQC { + withName: MULTIQC { ext.args = [ "--config ${baseDir}/assets/multiqc_config.yaml", params.project ? "--title '${params.project} - ${params.run_name}'" : '' @@ -355,7 +355,7 @@ process { ] } - withName: MD5SUM { + withName: 'MD5SUM_FASTQ|MD5SUM_INDEX' { time = { checkMax( 3.h * task.attempt * params.resource_factor, 'time' ) } publishDir = [ path: "${params.outdir}/fastq", -- GitLab