From 7b8e6df5746750b987304b4f6be5db25eda62ae9 Mon Sep 17 00:00:00 2001 From: Jonathan Kitt <jonathan.kitt@inrae.fr> Date: Thu, 30 Jan 2025 16:57:53 +0100 Subject: [PATCH 1/2] fix bug in detect_cpts() --- R/detect_cpts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/detect_cpts.R b/R/detect_cpts.R index 1beea41..12c86ad 100644 --- a/R/detect_cpts.R +++ b/R/detect_cpts.R @@ -32,7 +32,7 @@ detect_cpts <- function(lr, file_name = NULL, chromosome = NULL, # Set NULL variables - chromosome <- file_name <- position <- NULL + chromosome <- file_name <- position <- lr_sub <- NULL # Extract list of samples -- GitLab From 7f7f6228444aafcbf3c6810b9332d28b16004eb2 Mon Sep 17 00:00:00 2001 From: Jonathan Kitt <jonathan.kitt@inrae.fr> Date: Mon, 3 Feb 2025 11:18:17 +0100 Subject: [PATCH 2/2] update read_axiom() function: read Axiom files, add SNPs, remove unmapped SNPs & arrange data --- R/read_axiom.R | 109 ++++++++++++++++++++++++++++++++++++++++++---- man/read_axiom.Rd | 6 ++- 2 files changed, 106 insertions(+), 9 deletions(-) diff --git a/R/read_axiom.R b/R/read_axiom.R index ba0b22f..cbf45fd 100644 --- a/R/read_axiom.R +++ b/R/read_axiom.R @@ -2,8 +2,11 @@ #' #' @param path path to folder containing the files #' @param read_conf whether or not to read the AxiomGT1.confidences.txt file (defaults to FALSE) +#' @param snp_file file containing the list of SNPs (must have 3 columns, 'probeset_id', 'chromosome', 'position') +#' @param rm_unmapped whether or not to remove unmapped SNPs (defaults to FALSE) #' #' @import dplyr +#' @import tidyr #' @import readr #' @import stringr #' @@ -15,10 +18,10 @@ #' read_axiom(path, read_conf = FALSE) #' } -read_axiom <- function(path, read_conf = FALSE) { +read_axiom <- function(path, read_conf = FALSE, snp_file = NULL, rm_unmapped = FALSE) { # Set NULL variables - "." <- NULL + # "." <- NULL # Check if all Axiom files exist in the folder @@ -43,8 +46,10 @@ read_axiom <- function(path, read_conf = FALSE) { message("Reading AxiomGT1.calls.final.txt") calls <- readr::read_tsv(paste0(path, "AxiomGT1.calls.final.txt"), - show_col_types = FALSE) - assign(x = "calls", value = calls, pos = ".GlobalEnv") + show_col_types = FALSE) |> + tidyr::pivot_longer(cols = -probeset_id, + names_to = "file_name", + values_to = "genotyping_call") # Read AxiomGT1.confidences.txt -> 'confidences' @@ -64,9 +69,10 @@ read_axiom <- function(path, read_conf = FALSE) { comment = "#", col_names = conf_colnames, show_col_types = FALSE) |> - dplyr::filter(probeset_id != "probeset_id") - - assign(x = "confidences", value = confidences, pos = ".GlobalEnv") + dplyr::filter(probeset_id != "probeset_id") |> + tidyr::pivot_longer(cols = -probeset_id, + names_to = "file_name", + values_to = "confidence_score") } @@ -88,6 +94,93 @@ read_axiom <- function(path, read_conf = FALSE) { show_col_types = FALSE) |> dplyr::filter(probeset_id != "probeset_id") - assign(x = "summary", value = summary, pos = ".GlobalEnv") + # Extract A signal from summary file + + signal_a <- summary |> + dplyr::filter(!grepl(pattern = "NP", x = probeset_id)) |> + dplyr::filter(grepl("AX-[0-9]*-A", probeset_id)) |> + dplyr::mutate(probeset_id = stringr::str_remove(string = probeset_id, pattern = "-A")) |> + tidyr::pivot_longer(cols = -probeset_id, + names_to = "file_name", + values_to = "signal_a") + + # Extract B signal from summary file + + signal_b <- summary |> + dplyr::filter(!grepl(pattern = "NP", x = probeset_id)) |> + dplyr::filter(grepl("AX-[0-9]*-B", probeset_id)) |> + dplyr::mutate(probeset_id = stringr::str_remove(string = probeset_id, pattern = "-B")) |> + tidyr::pivot_longer(cols = -probeset_id, + names_to = "file_name", + values_to = "signal_b") + + # Regroup all Axiom data + + message("Regrouping Axiom data") + + axiom_data <- signal_a |> + dplyr::left_join(signal_b, by = c("probeset_id", "file_name")) |> + dplyr::left_join(calls, by = c("probeset_id", "file_name")) + + if (read_conf == TRUE & !is.null(snp_file)) { + + message("Adding SNPs") + + snps <- readr::read_delim(snp_file, show_col_types = FALSE) |> + dplyr::select(probeset_id, chromosome, position) + + axiom_data <- axiom_data |> + dplyr::left_join(confidences, by = c("probeset_id", "file_name")) |> + dplyr::left_join(snps, by = "probeset_id") |> + dplyr::select(file_name, probeset_id, chromosome, position, confidence_score, + signal_a, signal_b, genotyping_call) |> + dplyr::arrange(file_name, chromosome, position) + + } + + if (read_conf == TRUE & is.null(snp_file)) { + + axiom_data <- axiom_data |> + dplyr::left_join(confidences, by = c("probeset_id", "file_name")) |> + dplyr::select(file_name, probeset_id, confidence_score, + signal_a, signal_b, genotyping_call) |> + dplyr::arrange(file_name, probeset_id) + + } + + if (read_conf == FALSE & !is.null(snp_file)) { + + message("Adding SNPs") + + snps <- readr::read_delim(snp_file, show_col_types = FALSE) |> + dplyr::select(probeset_id, chromosome, position) + + axiom_data <- axiom_data |> + dplyr::left_join(snps, by = "probeset_id") |> + dplyr::select(file_name, probeset_id, chromosome, position, + signal_a, signal_b, genotyping_call) |> + dplyr::arrange(file_name, chromosome, position) + + } + + if (read_conf == FALSE & is.null(snp_file)) { + + axiom_data <- axiom_data |> + dplyr::select(file_name, probeset_id, + signal_a, signal_b, genotyping_call) |> + dplyr::arrange(file_name, probeset_id) + + } + + if (!is.null(snp_file) & rm_unmapped == TRUE) { + + message("Removing unmapped SNPs") + + axiom_data <- axiom_data |> + tidyr::drop_na(chromosome, position) + + } + + return(axiom_data) } diff --git a/man/read_axiom.Rd b/man/read_axiom.Rd index 791003d..2e56f71 100644 --- a/man/read_axiom.Rd +++ b/man/read_axiom.Rd @@ -4,12 +4,16 @@ \alias{read_axiom} \title{Read Axiom genotyping output files} \usage{ -read_axiom(path, read_conf = FALSE) +read_axiom(path, read_conf = FALSE, snp_file = NULL, rm_unmapped = FALSE) } \arguments{ \item{path}{path to folder containing the files} \item{read_conf}{whether or not to read the AxiomGT1.confidences.txt file (defaults to FALSE)} + +\item{snp_file}{file containing the list of SNPs (must have 3 columns, 'probeset_id', 'chromosome', 'position')} + +\item{rm_unmapped}{whether or not to remove unmapped SNPs (defaults to FALSE)} } \value{ A \code{\link[=tibble]{tibble()}}. -- GitLab