12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- suppressPackageStartupMessages({
- library(jsonlite)
- library(dplyr)
- library(tidyr)
- library(purrr)
- library(argparse)
- })
- parser <- ArgumentParser()
- parser$add_argument("--input", type="character", nargs='+', help="input fastp JSON reports")
- parser$add_argument("--output", type="character", default="report", help="output folder to store fastp summary TSV and RData files")
- args <- parser$parse_args()
- json_files <- args$input
- output_folder <- args$output
- if (length(json_files) == 0) {
- stop("No input files provided.")
- }
- dir.create(output_folder, showWarnings = FALSE, recursive = TRUE)
- output_file <- file.path(output_folder, "fastp_summary.tsv")
- fields <- c(
- "total_reads",
- "total_bases",
- "q30_rate",
- "gc_content"
- )
- fastp_summary <- data.frame()
- for (json_file in json_files) {
-
- data <- fromJSON(json_file)
-
-
- sample_name <- gsub("_fastp.json", "", basename(json_file))
-
-
- before_filtering <- data$summary$before_filtering[fields]
- after_filtering <- data$summary$after_filtering[fields]
-
-
- result <- data.frame(sample = sample_name)
- result <- cbind(result, before_filtering, after_filtering)
- fastp_summary <- rbind(fastp_summary, result)
- }
- colnames(fastp_summary) <- c("sample",
- paste0("before_", fields),
- paste0("after_", fields))
- fastp_summary <- fastp_summary %>% arrange(sample)
- write.table(fastp_summary, output_file, sep = "\t", row.names = FALSE, quote = FALSE)
- save(fastp_summary, file = file.path(output_folder, "fastp_summary.RData"))
|