From 8f2d78216fcd52e93fd8ef5e713d8eadb6fd1587 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 13:37:53 +0200
Subject: [PATCH 01/11] New script for AVITI demultiplexStat

	Ref: #102
---
 bin/demuxStatsElement.R | 161 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100755 bin/demuxStatsElement.R

diff --git a/bin/demuxStatsElement.R b/bin/demuxStatsElement.R
new file mode 100755
index 0000000..7e25e17
--- /dev/null
+++ b/bin/demuxStatsElement.R
@@ -0,0 +1,161 @@
+#!/usr/bin/env Rscript
+
+## Extrait des statistiques de demultipelxage de plusieurs fichiers générés par bases2fastq et les compile en un seul CSV.
+
+## --------------------
+#       PACKAGES
+## --------------------
+suppressWarnings(library('optparse'))
+suppressWarnings(library('jsonlite'))
+suppressWarnings(library('dplyr'))
+
+## --------------------
+#       FUNCTIONS
+## --------------------
+
+
+## --------------------
+#       PARAMETERS
+## --------------------
+option_list = list(
+		# All arguments are compulsory 
+		make_option(c("-a", "--assigned"), type = "character", default = './IndexAssignment.csv', metavar = "character", 
+				help = "Path to the IndexAssignment.csv file."),
+		make_option(c("-u", "--unassigned"), type = "character", default = './UnassignedSequences.csv', metavar = "character", 
+				help = "Path to the UnassignedSequences.csv file."),
+		make_option(c("-r", "--runManifest"), type = "character", default = './RunManifest.json', metavar = "character", 
+				help = "Path to the RunManifest.json file. That Must be a JSON file !"),
+        make_option(c("-l", "--lane"), type = "character", default = '1', metavar = "character", 
+				help = "Lane to work on, could be 1, 2 or 1+2"),
+		make_option(c("-t", "--threshold"), type = "numeric", default = 0.8, metavar = "numeric", 
+				help = "Barcode count threshold")
+)
+
+opt_parser = OptionParser(usage="Make demultiplexStats file from Element demultiplexing output.", option_list = option_list)
+opt = parse_args(opt_parser)
+
+if(is.null(opt$assigned) | is.null(opt$unassigned) | is.null(opt$runManifest) | is.null(opt$lane)) {
+	stop("At least one argument is missing.\n", call. = FALSE)
+}
+
+## --------------------
+#          LOG
+## --------------------
+cat("\nLancement du script demuxStatsElement.R avec les options suivantes :\n")
+cat(paste0("\tFichier des index assignés :\t\t", opt$assigned, "\n"))
+cat(paste0("\tFichier des index non assignés :\t", opt$unassigned, "\n"))
+cat(paste0("\tRunManifest :\t\t\t\t" , opt$runManifest, "\n"))
+cat(paste0("\tNumero de lane :\t\t\t" , opt$lane, "\n"))
+cat(paste0("\tBC count threshold :\t\t\t" , opt$threshold, "\n"))
+launchDir<-getwd()
+cat(paste0("\nRépertoire de travail :\t",launchDir , "\n\n"))
+
+
+## --------------------
+#          MAIN
+## --------------------
+# Initialisation des variables
+assigned <- opt$assigned
+unassigned <- opt$unassigned
+runManifestJson <- opt$runManifest
+lane <- opt$lane
+threshold <- opt$threshold
+demultiplexStat <- paste0(launchDir,"/demultiplexStat.csv")
+demultiplex_stat <- data.frame(
+    Project = character(),
+    Sample = character(),
+    Barcode = character(),
+    bcCount = integer(),
+    percOfFrag = numeric(),
+    stringsAsFactors = FALSE
+)
+
+# Lecture des données d'entrée
+assigned_data  <- read.csv(assigned, stringsAsFactors = FALSE)
+run_manifest <- fromJSON(runManifestJson)
+unassigned_data  <- read.csv(unassigned, stringsAsFactors = FALSE)
+
+# Filtrer les lignes par numéro de lane
+assigned_filtered <- assigned_data  %>%
+    filter(assigned_data [, ncol(assigned_data )] == lane)
+
+unassigned_filtered <- unassigned_data %>%
+    filter(unassigned_data[, ncol(unassigned_data)] == lane)
+
+# Parcourir les lignes du fichier assigned
+cat("\nExtraction des échantillons assignés\n")
+for (i in 1:nrow(assigned_filtered)) {
+    sample <- assigned_filtered[i, 2]
+    bc1 <- assigned_filtered[i, 3]
+    bc2 <- assigned_filtered[i, 4]
+    bcCount <- assigned_filtered[i, 5]
+    perc <- assigned_filtered[i, 6]
+  
+    project <- run_manifest$Samples %>%
+        filter(SampleName == sample) %>%
+        .$Project
+  
+    new_row <- data.frame(
+        Project = project,
+        Sample = sample,
+        Barcode = paste(bc1, bc2, sep = "-"),
+        bcCount = bcCount,
+        percOfFrag = perc,
+        stringsAsFactors = FALSE
+    )
+
+    cat("\tAjout de l'échantillon : ", project, "->" , sample, "\n")
+  
+    demultiplex_stat <- rbind(demultiplex_stat, new_row)
+}
+cat("\n")
+# Récupération du seuil limite
+bcCount.threshold<-threshold*min(demultiplex_stat$bcCount)
+
+# Parcourir les lignes du fichier unassigned
+cat("\nExtraction des séquences non assignés\n")
+for (i in 1:nrow(unassigned_filtered)) {
+    sample <- "Undetermined"
+    bc1 <- unassigned_filtered[i, 1]
+    bc2 <- unassigned_filtered[i, 2]
+    bcCount <- unassigned_filtered[i, 4]
+    perc <- unassigned_filtered[i, 3]
+  
+    project <- "DefaultProject"
+  
+    new_row <- data.frame(
+        Project = project,
+        Sample = sample,
+        Barcode = paste(bc1, bc2, sep = "-"),
+        bcCount = bcCount,
+        percOfFrag = perc,
+        stringsAsFactors = FALSE
+    )
+
+    cat("\tAjout de l'échantillon : ", project, "->" , sample, "\n")
+  
+    demultiplex_stat <- rbind(demultiplex_stat, new_row)
+}
+cat("\n")
+
+# Filtrer les lignes selon le seuil de bcCount
+initial_nrow <- nrow(demultiplex_stat)
+demultiplex_stat <- demultiplex_stat %>%
+    filter(bcCount >= bcCount.threshold)
+initial_nrow <- nrow(demultiplex_stat)
+lines_removed <- initial_nrow - initial_nrow
+cat("L'échantillon le moins souvent retrouvé a", min(demultiplex_stat$bcCount), "séquences\n")
+cat("Le seuil du nombre de séquence à", threshold*100, "% est donc de", bcCount.threshold, "séquences\n")
+cat("Tous les index indéterminés ayant au moins ce nombre de séquences seront gardés\n")
+cat("\tNombre de lignes retirées du demultiplexStat :", lines_removed, "\n")
+
+# Tri selon bcCount
+demultiplex_stat <- demultiplex_stat %>%
+    arrange(desc(bcCount))
+
+
+# Écriture du fichier de sortie
+cat("\nEcriture du fichier de sortie :", demultiplexStat,"\n")
+write.csv(demultiplex_stat, demultiplexStat, row.names = FALSE, quote = FALSE)
+
+cat("Fin normale du script.\n")
\ No newline at end of file
-- 
GitLab


From 471ef532acdce6b48c67e9237c7706ddf0267536 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 13:52:22 +0200
Subject: [PATCH 02/11] fastqSreen config file has default localisation

	In assets/

	Ref: #105
---
 conf/base.config             | 2 --
 modules/local/module_core.nf | 8 +++++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 2a88de8..5a965ba 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -113,8 +113,6 @@ process {
 		time = { checkMax( 1.h * task.attempt, 'time' ) }
 		module = toolsModuleHash['FASTQSCREEN']
 
-		ext.args = "--conf ${params.inputdir}/fastq_screen.conf"
-
 		publishDir = [
 			path: "${params.outdir}/ContaminationSearch/FastQ-Screen",
 			mode: 'copy'
diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf
index a874ffc..43e8128 100644
--- a/modules/local/module_core.nf
+++ b/modules/local/module_core.nf
@@ -83,8 +83,14 @@ process FASTQSCREEN {
 	
 	script:
 	def args = task.ext.args ?: ''
+	def defaultConf = "${baseDir}/assets/fastq_screen.conf_example"
+	def inputConf = "${params.inputdir}/fastq_screen.conf"
+	def confFile = file(inputConf).exists() ? inputConf : defaultConf
 	"""
-		fastq_screen $reads $args
+		fastq_screen \\
+			$reads \\
+			--conf ${confFile} \\
+			$args
 	"""
 }
 
-- 
GitLab


From 227313c72e7aff47df3db61de01a055e08839b06 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 13:59:09 +0200
Subject: [PATCH 03/11] Update paths in default fastqscreen config file

	Ref: #105
---
 assets/fastq_screen.conf_example | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/assets/fastq_screen.conf_example b/assets/fastq_screen.conf_example
index dbbb7bc..084bedc 100644
--- a/assets/fastq_screen.conf_example
+++ b/assets/fastq_screen.conf_example
@@ -55,10 +55,10 @@ THREADS		8
 ## have contaminated your sample during the library preparation step.
 ##
 Genome of E. coli
-DATABASE	E.coli	/work/bank/bwadb/Escherichia_coli_FRIK2069
+DATABASE	E.coli	/home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069
 
 Sequence of PhiX
-DATABASE	PhiX	/work/bank/bwadb/phi.fa
+DATABASE	PhiX	/home/sbsuser/plage/references/indexed/ng6_conta_ref/PhiX/BWA/phi.fa
 
 Genome of yeast
-DATABASE	Yeast	/work/bank/bwadb/yeast.nt
+DATABASE	Yeast	/home/sbsuser/plage/references/indexed/ng6_conta_ref/Saccharomyces_cerevisiae/genome/BWA/yeast.nt
-- 
GitLab


From 5c300cddaaccc57dbd12b40582f258093c84e727 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 14:29:19 +0200
Subject: [PATCH 04/11] Rename illumina_QC to Short_reads_qc

	Ref: #106
---
 main.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index be6b94b..639c4f5 100644
--- a/main.nf
+++ b/main.nf
@@ -32,10 +32,10 @@ params.summary.collect{k,v -> println "$k : $v"}
     NAMED WORKFLOW FOR PIPELINE
 ========================================================================================
 */
-include { ILLUMINA_QC } from "$baseDir/workflow/illumina_qc.nf"
+include { SHORT_READS_QC } from "$baseDir/workflow/illumina_qc.nf"
 
-workflow QC_ANALYSIS {
-    ILLUMINA_QC()
+workflow PLAGE {
+    SHORT_READS_QC()
 }
 
 /*
@@ -45,5 +45,5 @@ workflow QC_ANALYSIS {
 */
 
 workflow {
-    QC_ANALYSIS()
+    PLAGE()
 }
-- 
GitLab


From 3facd2ecaba028c4f3dd732d8048a46ac0b415ed Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 14:40:14 +0200
Subject: [PATCH 05/11] Move illumina's ngl-bi dependencies to illumina
 sub-worflows

	Ref: #107
---
 modules/local/module_NGL-Bi.nf       |  2 +-
 sub-workflows/local/core_illumina.nf | 13 ++++++++++++
 workflow/illumina_qc.nf              | 31 +++++++++++++++-------------
 3 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index c45a7b9..c32681f 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -20,7 +20,7 @@ process prepareReadSetCreation {
 	"""
 }
 
-process TREATMENT_DEMUXSTAT {
+process TREATMENT_DEMUXSTAT_ILLUMINA {
 	label 'ngl'
 
 	input:
diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf
index a03cd6f..61f8885 100644
--- a/sub-workflows/local/core_illumina.nf
+++ b/sub-workflows/local/core_illumina.nf
@@ -15,6 +15,11 @@ include {
 	ILLUMINA_FILTER;
 } from "$baseDir/modules/local/module_core.nf"
 
+include {	
+	TREATMENT_DEMUXSTAT_ILLUMINA as TREATMENT_DEMUX_RUN;
+	TREATMENT_DEMUXSTAT_ILLUMINA as TREATMENT_DEMUX_READSETS;
+} from "$baseDir/modules/local/module_NGL-Bi.nf"
+
 // -------------------------------------------------
 // 					LOCAL PARAMS
 // -------------------------------------------------
@@ -30,6 +35,8 @@ workflow CORE_ILLUMINA {
 		demuxStatXML
 		demuxSummary
 		fastq
+		nglBiRunCode
+		readsetsFile
 		
 	main:		
 		// ----------- DemultiplexStat
@@ -45,6 +52,12 @@ workflow CORE_ILLUMINA {
 			fastq_good = ILLUMINA_FILTER.out.reads
 		}
 
+		if (params.insert_to_ngl){
+			// Add demultiplexStat treatments
+			TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV)
+			TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV)
+		}
+
     emit:
         fastq = fastq_good
 		demuxStat = DEMUX_STATS.out.demultiplexStatsTSV
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 9ba6b36..63d95c5 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -81,9 +81,7 @@ include {	DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
 include {	RNA_QC		} from "$baseDir/sub-workflows/local/rna_qc.nf"
 include {	DIVERSITY_QC	} from "$baseDir/sub-workflows/local/diversity_qc.nf"
 include { 	PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf"
-include {	TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN;
-			TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS;
-			FILE_RENAME as RENAME_FASTQ;
+include {	FILE_RENAME as RENAME_FASTQ;
 			FILE_RENAME as RENAME_INDEX;
 						} from "$baseDir/modules/local/module_NGL-Bi.nf"
 include {	MULTIQC		} from "${params.shared_modules}/multiqc.nf"
@@ -109,18 +107,23 @@ sendBeginMail(format.format(new Date()))
 // -------------------------------------------------
 // 					WORKFLOW
 // -------------------------------------------------
-workflow ILLUMINA_QC {
+workflow SHORT_READS_QC {
 	ch_mqc = Channel.empty()
 	WORKFLOW_SUMMARY()
 
 	if (params.insert_to_ngl){
 		NGLBI(params.bi_run_code, params.sq_xp_code, '', params.sequencer)
+		nglBiRunCode = NGLBI.out.nglBiRunCode
+		readsets_created = NGLBI.out.readsets_created
+		ready = NGLBI.out.ready
+	} else {
+		nglBiRunCode = Channel.empty()
+		readsets_created = Channel.empty()
+		ready = Channel.empty()
 	}
 
-	if ( params.skip_core_illumina ) {
-		fastq = ch_read
-	} else {
-		CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read)
+	if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) {
+		CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created)
 		fastq = CORE_ILLUMINA.out.fastq
 
 		if (params.insert_to_ngl){
@@ -187,15 +190,15 @@ workflow ILLUMINA_QC {
 
 	if (params.insert_to_ngl){
 		if (params.single_cell) {
-			RENAME_INDEX(ch_index.map{it[1]}.collect(), NGLBI.out.readsets_created, params.sq_xp_code, 'fastq_index')
+			RENAME_INDEX(ch_index.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_index')
 			MD5SUM_INDEX(RENAME_INDEX.out.fastq.collect(), params.run_name+'_fastq_index')
-			ADD_RS_INDEX_FILES(NGLBI.out.readsets_created, MD5SUM_INDEX.out, 'INDEX', NGLBI.out.ready)
+			ADD_RS_INDEX_FILES(readsets_created, MD5SUM_INDEX.out, 'INDEX', ready)
 		}
-		RENAME_FASTQ(fastq.map{it[1]}.collect(), NGLBI.out.readsets_created, params.sq_xp_code, 'fastq_read')
+		RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_read')
 		MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read')
-		ADD_RS_RAW_FILES(NGLBI.out.readsets_created, MD5SUM.out, 'RAW', NGLBI.out.ready)
-		UPDATE_STATE_FQC(NGLBI.out.readsets_created, 'F-QC', MULTIQC.out.html)
-		CREATE_ANALYSIS(NGLBI.out.nglBiRunCode, NGLBI.out.readsets_created, 1)
+		ADD_RS_RAW_FILES(readsets_created, MD5SUM.out, 'RAW', ready)
+		UPDATE_STATE_FQC(readsets_created, 'F-QC', MULTIQC.out.html)
+		CREATE_ANALYSIS(nglBiRunCode, readsets_created, 1)
 		ADD_MULTIQC(CREATE_ANALYSIS.out.createdFile, MULTIQC.out.html, CREATE_ANALYSIS.out.ready)
 	}
 
-- 
GitLab


From 23f25da07d150f784f9e3c763ca2ca368bdbcd9e Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 15:15:34 +0200
Subject: [PATCH 06/11] New ressources for AVITI data

	- Mangement of fastq, with lane number
	- Suppress merging lane step
	- Use lane number and max-depth to determine number of fastq in the lane
	- Use aviti module

	Ref: #96
---
 modules/local/module_core_element.nf | 28 +++++++++++++++++
 nextflow.config                      |  5 +--
 sub-workflows/local/core_element.nf  | 47 ++++++++++++++++++++++++++++
 sub-workflows/local/core_pipeline.nf | 20 +-----------
 workflow/illumina_qc.nf              | 42 ++++++++++++++++---------
 5 files changed, 107 insertions(+), 35 deletions(-)
 create mode 100644 modules/local/module_core_element.nf
 create mode 100644 sub-workflows/local/core_element.nf

diff --git a/modules/local/module_core_element.nf b/modules/local/module_core_element.nf
new file mode 100644
index 0000000..c1e26a2
--- /dev/null
+++ b/modules/local/module_core_element.nf
@@ -0,0 +1,28 @@
+/*
+ *	Module pour les analyses de base des données Element Biosciences
+*/
+
+process DEMUX_STATS {
+	label 'demux'
+    label 'little_R'
+
+	input:
+		path runManifestJson
+        path assigned
+        path unassigned
+	
+	output:
+		path "demultiplexStat.csv", emit: csv
+	
+	script:
+    def threshold = task.ext.threshold ?: ''
+    def lane = params.lane ?: '1'
+	"""
+    demuxStatsElement.R \\
+        --assigned $assigned \\
+        --unassigned $unassigned \\
+        --runManifest $runManifestJson \\
+        --lane $lane \\
+        $threshold
+	"""
+}
diff --git a/nextflow.config b/nextflow.config
index b54f809..8bff246 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -86,6 +86,7 @@ params {
 
 	// skip parameters
 	skip_core_illumina = false
+	skip_core_element = false
 
 	help = false
 }
@@ -95,9 +96,9 @@ params {
 //=========================================
 import java.nio.file.Files
 import java.nio.file.Paths
-def n_read_files = Files.walk(Paths.get(params.inputdir.toString()))
+def n_read_files = Files.walk(Paths.get(params.inputdir.toString()), 3)
 	.filter(Files::isRegularFile)
-	.filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz"))
+	.filter(p -> p.getFileName().toString().matches(".*_L00${params.lane}_R[12](_.*)?\\.fastq\\.gz"))
 	.count()
 
 // on retire les 2 fichiers undetermined
diff --git a/sub-workflows/local/core_element.nf b/sub-workflows/local/core_element.nf
new file mode 100644
index 0000000..6207685
--- /dev/null
+++ b/sub-workflows/local/core_element.nf
@@ -0,0 +1,47 @@
+// -------------------------------------------------
+// 					CORE ELEMENT
+// -------------------------------------------------
+/*
+ * Statistiques de démultiplexage
+*/
+
+// -------------------------------------------------
+// 					MODULES
+// -------------------------------------------------
+include {
+	DEMUX_STATS
+} from "$baseDir/modules/local/module_core_element.nf"
+
+include {	
+	TREATMENT_DEMUXSTAT_ELEMENT as TREATMENT_DEMUX_RUN;
+	TREATMENT_DEMUXSTAT_ELEMENT as TREATMENT_DEMUX_READSETS;
+} from "$baseDir/modules/local/module_NGL-Bi.nf"
+
+// -------------------------------------------------
+// 					LOCAL PARAMS
+// -------------------------------------------------
+
+
+// -------------------------------------------------
+// 					WORKFLOW
+// -------------------------------------------------
+
+workflow CORE_ELEMENT {
+   take:
+        runManifestJson
+        assigned
+        unassigned
+        nglBiRunCode
+		readsetsFile
+		
+	main:		
+		// ----------- DemultiplexStat
+		DEMUX_STATS(runManifestJson, assigned, unassigned)
+
+		// ----------- NGL-Bi
+		TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.csv)
+		TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.csv)
+
+    emit:
+		demuxStat = DEMUX_STATS.out.csv
+}
\ No newline at end of file
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index a14b560..20d45dc 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -15,7 +15,6 @@ include {
 	FASTQC;
 	FASTQSCREEN;
 	DUPLICATED_READS;
-	MERGE_LANES;
 } from "$baseDir/modules/local/module_core.nf"
 include { GUNZIP			} from "${params.shared_modules}/gzip.nf"
 include { SEQTK_SAMPLE 		} from "${params.shared_modules}/seqtk.nf"
@@ -28,26 +27,9 @@ isResume=workflow.resume
 //-------------------------------------------------
 workflow CORE {
 	take:
-		ch_fastq
+		ch_read
 		
 	main:
-		// ----------- Lane merging fastq
-		if (params.merge_lanes) {
-			MERGE_LANES(ch_fastq
-				.collect{it[1]}
-				.flatten()
-				.map { $it -> [ ($it.simpleName =~ /(.*)_S\d+_.*/)[0][1] , $it ] }
-				.groupTuple()
-			)
-
-			ch_read = MERGE_LANES.out.fastq
-				.collect{it[1]}
-				.flatten()
-				.map{$it -> [$it.simpleName, $it]}
-		} else {
-			ch_read = ch_fastq
-		}
-		
 		// ----------- FASTQC
 		FASTQC(ch_read)
 		
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 63d95c5..1a8939e 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -27,22 +27,36 @@ System.out.println "\n"
 // -------------------------------------------------
 // 					CHANNELS
 // -------------------------------------------------
-ch_ss = Channel.fromPath(params.samplesheet)
-ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
-ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
+// Illumina's channels
+ss_path = file(params.samplesheet)
+demuxSummary_path = file(params.inputdir+"/Stats/DemuxSummaryF1L${params.lane}.txt")
+demuxStatXML_path = file(params.inputdir+'/Stats/DemultiplexingStats.xml')
+ch_ss = ss_path.exists() ? Channel.fromPath(ss_path) : Channel.empty()
+ch_DemuxSummary = demuxSummary_path.exists() ? Channel.fromPath(demuxSummary_path) : Channel.empty()
+ch_DemuxStatXML = demuxStatXML_path.exists() ? Channel.fromPath(demuxStatXML_path) : Channel.empty()
+
+// Element's channels
+runManifestJSON_path = file(params.inputdir+"/RunManifest.json")
+indexAssigned_path = file(params.inputdir+"/IndexAssignment.csv")
+indexUnassigned_path = file(params.inputdir+"/UnassignedSequences.csv")
+ch_runManifestJSON = runManifestJSON_path.exists() ? Channel.fromPath(runManifestJSON_path) : Channel.empty()
+ch_indexAssigned = indexAssigned_path.exists() ? Channel.fromPath(indexAssigned_path) : Channel.empty()
+ch_indexUnassigned = indexUnassigned_path.exists() ? Channel.fromPath(indexUnassigned_path) : Channel.empty()
+
+def SamplesBaseDir = params.sequencer =~ 'AVITI' ? 'Samples' : ''
 
 // Get samples globPatterns
 def sampleList = []
 def indexFilesList = []
 if (params.select_samples) {
 	params.select_samples.tokenize(',').each { sample ->
-		sampleList.add("${params.inputdir}/${params.project}/**" + sample +'_*_R{1,2}{_*,*}.fastq.gz')
-		indexFilesList.add("${params.inputdir}/${params.project}/**" + sample +'_*_I{1,2}{_*,*}.fastq.gz')
+		sampleList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**" + sample +"_*_L00${params.lane}_R{1,2}{_*,*}.fastq.gz")
+		indexFilesList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**" + sample +"_*_L00${params.lane}_I{1,2}{_*,*}.fastq.gz")
 	}
 } else {
 	System.out.println "Aucun échantillon selectionné, on les sélectionne tous"
-	sampleList.add("${params.inputdir}/${params.project}/**_R{1,2}{_*,*}.fastq.gz")
-	indexFilesList.add("${params.inputdir}/${params.project}/**_I{1,2}{_*,*}.fastq.gz")
+	sampleList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**_L00${params.lane}_R{1,2}{_*,*}.fastq.gz")
+	indexFilesList.add("${params.inputdir}/${SamplesBaseDir}/${params.project}/**_L00${params.lane}_I{1,2}{_*,*}.fastq.gz")
 }
 
 // Get 10X index Files
@@ -76,6 +90,7 @@ createDir = file(params.outdir).mkdir()
 // 					INCLUDES
 // -------------------------------------------------
 include {	CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf"
+include {	CORE_ELEMENT } from "$baseDir/sub-workflows/local/core_element.nf"
 include {	CORE		} from "$baseDir/sub-workflows/local/core_pipeline.nf"
 include {	DNA_QC		} from "$baseDir/sub-workflows/local/dna_qc.nf"
 include {	RNA_QC		} from "$baseDir/sub-workflows/local/rna_qc.nf"
@@ -125,12 +140,11 @@ workflow SHORT_READS_QC {
 	if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) {
 		CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created)
 		fastq = CORE_ILLUMINA.out.fastq
-
-		if (params.insert_to_ngl){
-			// Add demultiplexStat treatments
-			TREATMENT_DEMUX_RUN(NGLBI.out.nglBiRunCode, CORE_ILLUMINA.out.demuxStat)
-			TREATMENT_DEMUX_READSETS(NGLBI.out.readsets_created, CORE_ILLUMINA.out.demuxStat)
-		}
+	} else {
+		fastq = ch_read
+	}
+	if (! params.skip_core_element && params.sequencer =~ "AVITI") {
+		CORE_ELEMENT(ch_runManifestJSON, ch_indexAssigned, ch_indexUnassigned, nglBiRunCode, readsets_created)		
 	}
 
 	CORE(fastq)
@@ -175,7 +189,7 @@ workflow SHORT_READS_QC {
 		)
 
 	} else {
-		System.out.println "Le QC des données non ADN n'est pas prit en charge pour le moment."
+		System.out.println "Le QC des données ${params.data_nature} n'a pas de sub-workflow spécifique pour le moment."
 		ch_mqc = ch_mqc.mix( Channel.empty() )
 	}
 	
-- 
GitLab


From e20fd2d83937fd1940b5bda828a03e6e81c0ebc0 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 16:26:12 +0200
Subject: [PATCH 07/11] Fix bad workflow.profile variable

---
 conf/functions.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/functions.config b/conf/functions.config
index 863ca2b..33c8190 100644
--- a/conf/functions.config
+++ b/conf/functions.config
@@ -153,7 +153,7 @@ def sendFinalMail(formatted_date, summary) {
     email_fields['runNGLBi'] = (params.bi_run_code ?: '')
     email_fields['xpNGLSq'] = (params.sq_xp_code ?: '')
 	email_fields['success'] = workflow.success
-	email_fields['instance'] = params.profile
+	email_fields['instance'] = worflow.profile
 	email_fields['dateComplete'] = formatted_date
 	email_fields['duration'] = workflow.duration
 	email_fields['exitStatus'] = workflow.exitStatus
-- 
GitLab


From 6cada584994e26a2cfccc6c3e00b9f0ccc0a7b4c Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 16:39:49 +0200
Subject: [PATCH 08/11] Update docs/usage.md

	Ref: #96
---
 docs/usage.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index 0d966ed..6bf72b3 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -131,7 +131,11 @@ To skip subsampling step in core pipeline.
 _Default_ : false
 
 - **`--skip_core_illumina`** [bool]  
-To skip core illumina sub-workflow in core pipeline. To be use to analyze data produced by other platform than Illumina.    
+To skip core illumina sub-workflow in core pipeline. Have effect only if `sequencer` is NovaSeq or MiSeq.    
+_Default_ : false
+
+- **`--skip_core_element`** [bool]  
+To skip core element sub-workflow in core pipeline.Have effect  only if `sequencer` is AVITI.    
 _Default_ : false
 
 ## Workflows related parameters
-- 
GitLab


From b6fcd0fd3eaf36ffc22e8ee58bf7e322b48d0367 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 16:45:13 +0200
Subject: [PATCH 09/11] MD5SUM is now named with file type

---
 workflow/illumina_qc.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 1a8939e..d2d87ca 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -106,7 +106,7 @@ include {	UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC;
 			CREATE_ANALYSIS;	} from "${params.shared_modules}/ngl_bi.nf"
 include {	READSET_FILE_FROM_FILE as ADD_RS_INDEX_FILES	} from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'INDEX')
 include {	READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES		} from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW')
-include { 	md5sum as MD5SUM;
+include { 	md5sum as MD5SUM_FASTQ;
 			md5sum as MD5SUM_INDEX;
 						} from "${params.shared_modules}/md5sum.nf"
 include {	BEGIN_NGLBI as NGLBI						} from "${params.shared_modules}/workflows/begin_nglbi.nf"
@@ -209,8 +209,8 @@ workflow SHORT_READS_QC {
 			ADD_RS_INDEX_FILES(readsets_created, MD5SUM_INDEX.out, 'INDEX', ready)
 		}
 		RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, params.sq_xp_code, 'fastq_read')
-		MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read')
-		ADD_RS_RAW_FILES(readsets_created, MD5SUM.out, 'RAW', ready)
+		MD5SUM_FASTQ(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq_read')
+		ADD_RS_RAW_FILES(readsets_created, MD5SUM_FASTQ.out, 'RAW', ready)
 		UPDATE_STATE_FQC(readsets_created, 'F-QC', MULTIQC.out.html)
 		CREATE_ANALYSIS(nglBiRunCode, readsets_created, 1)
 		ADD_MULTIQC(CREATE_ANALYSIS.out.createdFile, MULTIQC.out.html, CREATE_ANALYSIS.out.ready)
-- 
GitLab


From f12c5904819da84673c13a75f3951ef8930df50a Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 5 Aug 2024 17:01:56 +0200
Subject: [PATCH 10/11] Fix bad workflow.profile variable

---
 conf/functions.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/functions.config b/conf/functions.config
index 33c8190..e4ff017 100644
--- a/conf/functions.config
+++ b/conf/functions.config
@@ -153,7 +153,7 @@ def sendFinalMail(formatted_date, summary) {
     email_fields['runNGLBi'] = (params.bi_run_code ?: '')
     email_fields['xpNGLSq'] = (params.sq_xp_code ?: '')
 	email_fields['success'] = workflow.success
-	email_fields['instance'] = worflow.profile
+	email_fields['instance'] = workflow.profile
 	email_fields['dateComplete'] = formatted_date
 	email_fields['duration'] = workflow.duration
 	email_fields['exitStatus'] = workflow.exitStatus
-- 
GitLab


From 197b93e3a1fe0e8e134def1c59d4ce0cd06ed8c8 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 22 Aug 2024 16:59:34 +0200
Subject: [PATCH 11/11] Update base config for md5sum process

---
 conf/base.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 5a965ba..465f361 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -324,7 +324,7 @@ process {
 		module = toolsModuleHash['SEQTK']
 	}
 
-	withName: MULTIQC {
+	withName: MULTIQC {		
         ext.args = [
             "--config ${baseDir}/assets/multiqc_config.yaml",
             params.project ? "--title '${params.project} - ${params.run_name}'" : ''
@@ -355,7 +355,7 @@ process {
 	    ]
     }
 
-	withName: MD5SUM {
+	withName: 'MD5SUM_FASTQ|MD5SUM_INDEX' {
 		time = { checkMax( 3.h * task.attempt * params.resource_factor, 'time' ) }
 		publishDir = [
             path: "${params.outdir}/fastq",
-- 
GitLab