Search for one or more amino acid or junction CDR3 sequences in a study tibble.
Usage
searchSeq(
study_table,
sequence,
seq_type = "junction",
edit_distance = 0,
match = "global"
)
findSeq(sequence, query_list, edit_distance, seq_type, match)
Arguments
- study_table
A tibble generated by the LymphoSeq2 functions readImmunoSeq or productiveSeq. "junction_aa" or "junction", "duplicate_frequency", and "duplicate_count" are required columns.
- sequence
A character vector of one ore more amino acid or junction CDR3 sequences to search.
- seq_type
A character vector specifying the type of sequence(s) to be searched. Available options are "junction_aa" or "junction".
- edit_distance
An integer giving the minimum edit distance that the sequence must be less than or equal to. See details below.
- match
A string indicating the type of sequence matching to perform. Acceptable values are "global" and "partial". See details below.
Value
Returns the rows for every instance in the list of data frames where the searched sequence(s) appeared.
Details
An exact partial match means the searched sequence is contained within target sequence. An exact global match means the searched sequence is identical to the target sequence.
Edit distance is a way of quantifying how dissimilar two sequences are to one another by counting the minimum number of operations required to transform one sequence into the other. For example, an edit distance of 0 means the sequences are identical and an edit distance of 1 indicates that the sequences different by a single amino acid or junction.
Examples
file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeqTest")
stable <- readImmunoSeq(path = file_path)
#> Rows: 1 Columns: 144
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (69): sequence_id, sequence, sequence_aa, locus, v_call, d_call, d2_call...
#> dbl (70): v_score, v_identity, v_support, d_score, d_identity, d_support, d2...
#> lgl (5): rev_comp, productive, vj_in_frame, stop_codon, complete_vdj
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (33): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (4): vFamilyTies, jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 414 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (35): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (2): jFamilyTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 920 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
aa1 <- "CASSPVSNEQFF"
aa2 <- "CASSQEVPPYQAFF"
searchSeq(study_table = stable,
sequence = aa1,
seq_type = "junction_aa",
edit_distance = 0)
#> # A tibble: 2 × 146
#> sequence_id sequence sequence_aa rev_comp productive vj_in_frame stop_codon
#> <int> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 417 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> 2 2 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> # … with 139 more variables: complete_vdj <lgl>, locus <chr>, v_call <chr>,
#> # d_call <chr>, d2_call <chr>, j_call <chr>, c_call <chr>,
#> # sequence_alignment <chr>, sequence_alignment_aa <chr>,
#> # germline_alignment <chr>, germline_alignment_aa <chr>, junction <chr>,
#> # junction_aa <chr>, np1 <chr>, np1_aa <chr>, np2 <chr>, np2_aa <chr>,
#> # np3 <chr>, np3_aa <chr>, cdr1 <chr>, cdr1_aa <chr>, cdr2 <chr>,
#> # cdr2_aa <chr>, cdr3 <chr>, cdr3_aa <chr>, fwr1 <chr>, fwr1_aa <chr>, …
searchSeq(study_table = stable,
sequence = c(aa1, aa2),
seq_type = "junction_aa",
edit_distance = 0)
#> # A tibble: 3 × 146
#> sequence_id sequence sequence_aa rev_comp productive vj_in_frame stop_codon
#> <int> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 417 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> 2 2 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> 3 3 ATCAATTCCC… CASSQEVPPY… FALSE TRUE NA FALSE
#> # … with 139 more variables: complete_vdj <lgl>, locus <chr>, v_call <chr>,
#> # d_call <chr>, d2_call <chr>, j_call <chr>, c_call <chr>,
#> # sequence_alignment <chr>, sequence_alignment_aa <chr>,
#> # germline_alignment <chr>, germline_alignment_aa <chr>, junction <chr>,
#> # junction_aa <chr>, np1 <chr>, np1_aa <chr>, np2 <chr>, np2_aa <chr>,
#> # np3 <chr>, np3_aa <chr>, cdr1 <chr>, cdr1_aa <chr>, cdr2 <chr>,
#> # cdr2_aa <chr>, cdr3 <chr>, cdr3_aa <chr>, fwr1 <chr>, fwr1_aa <chr>, …
searchSeq(study_table = stable,
sequence = aa1,
seq_type = "junction_aa",
edit_distance = 1)
#> # A tibble: 2 × 146
#> sequence_id sequence sequence_aa rev_comp productive vj_in_frame stop_codon
#> <int> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 417 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> 2 2 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> # … with 139 more variables: complete_vdj <lgl>, locus <chr>, v_call <chr>,
#> # d_call <chr>, d2_call <chr>, j_call <chr>, c_call <chr>,
#> # sequence_alignment <chr>, sequence_alignment_aa <chr>,
#> # germline_alignment <chr>, germline_alignment_aa <chr>, junction <chr>,
#> # junction_aa <chr>, np1 <chr>, np1_aa <chr>, np2 <chr>, np2_aa <chr>,
#> # np3 <chr>, np3_aa <chr>, cdr1 <chr>, cdr1_aa <chr>, cdr2 <chr>,
#> # cdr2_aa <chr>, cdr3 <chr>, cdr3_aa <chr>, fwr1 <chr>, fwr1_aa <chr>, …
nt <- "CTGATTCTGGAGTCCGCCAGCACCAACCAGACATCTATGTACCTCTGTGCCAGCAGTCCGGTAAGCAATGAGCAGTTCTTCGGGCCA"
searchSeq(study_table = stable,
sequence = nt,
seq_type = "junction",
edit_distance = 3)
#> # A tibble: 2 × 146
#> sequence_id sequence sequence_aa rev_comp productive vj_in_frame stop_codon
#> <int> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 417 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> 2 2 CTGATTCTGG… CASSPVSNEQ… FALSE TRUE NA FALSE
#> # … with 139 more variables: complete_vdj <lgl>, locus <chr>, v_call <chr>,
#> # d_call <chr>, d2_call <chr>, j_call <chr>, c_call <chr>,
#> # sequence_alignment <chr>, sequence_alignment_aa <chr>,
#> # germline_alignment <chr>, germline_alignment_aa <chr>, junction <chr>,
#> # junction_aa <chr>, np1 <chr>, np1_aa <chr>, np2 <chr>, np2_aa <chr>,
#> # np3 <chr>, np3_aa <chr>, cdr1 <chr>, cdr1_aa <chr>, cdr2 <chr>,
#> # cdr2_aa <chr>, cdr3 <chr>, cdr3_aa <chr>, fwr1 <chr>, fwr1_aa <chr>, …
searchSeq(study_table = stable,
sequence = "CASSPVS",
seq_type = "junction_aa",
edit_distance = 0)
#> # A tibble: 0 × 146
#> # … with 146 variables: sequence_id <int>, sequence <chr>, sequence_aa <chr>,
#> # rev_comp <lgl>, productive <lgl>, vj_in_frame <lgl>, stop_codon <lgl>,
#> # complete_vdj <lgl>, locus <chr>, v_call <chr>, d_call <chr>, d2_call <chr>,
#> # j_call <chr>, c_call <chr>, sequence_alignment <chr>,
#> # sequence_alignment_aa <chr>, germline_alignment <chr>,
#> # germline_alignment_aa <chr>, junction <chr>, junction_aa <chr>, np1 <chr>,
#> # np1_aa <chr>, np2 <chr>, np2_aa <chr>, np3 <chr>, np3_aa <chr>, …
searchSeq(study_table = study_table,
sequence = nt,
seq_type = "junction",
edit_distance = 0)
#> Error in dplyr::filter(., !is.na(!!base::as.symbol(seq_type))): object 'study_table' not found