Skip to contents

Creates line plot tracking amino acid frequencies across multiple samples

Usage

cloneTrack(
  study_table,
  sample_list = NULL,
  sequence_track = NULL,
  unassigned = TRUE
)

Arguments

study_table

A tibble of productive amino acid sequences generated by LymphoSeq function productiveSeq where the aggregate parameter was set to "junction_aa"

sample_list

A character vector of one or more repertoire_ids to track. If set to NULL (default), all repertoire_ids in the sequence matrix will be tracked.

sequence_track

An optional character vector of one or more amino acid sequences to track. If set to NULL (default), will pull all junction_aa sequences from the sequence matrix.

unassigned

Value

Returns a line plot showing the amino acid frequencies across multiple samples in the sequence matrix where each line represents one unique sequence.

Details

The plot is made using the package ggplot2 and can be reformatted using ggplot2 functions. See examples below.

See also

An excellent resource for examples on how to reformat a ggplot can be found in the R Graphics Cookbook online (http://www.cookbook-r.com/Graphs/).

Examples

file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeqTest")

stable <- readImmunoSeq(path = file_path)
#> Rows: 1 Columns: 144
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (69): sequence_id, sequence, sequence_aa, locus, v_call, d_call, d2_call...
#> dbl (70): v_score, v_identity, v_support, d_score, d_identity, d_support, d2...
#> lgl  (5): rev_comp, productive, vj_in_frame, stop_codon, complete_vdj
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (33): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (4): vFamilyTies, jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 414 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (35): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (2): jFamilyTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 920 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")

atable <- productiveSeq(study_table = stable, aggregate = "junction_aa")

top_freq <- topFreq(atable, frequency = 0.001)

# Track clones without mapping or tracking specific sequences
cloneTrack(atable)
#> # A tibble: 7,533 × 12
#>    repertoire_id junction_aa     v_call d_call j_call v_family d_family j_family
#>    <chr>         <chr>           <chr>  <chr>  <chr>  <chr>    <chr>    <chr>   
#>  1 TRB_CD4_949   CAIKPGQGASNSPL… TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  2 TRB_CD4_949   CAIRAGTSTDTQYF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ02 
#>  3 TRB_CD4_949   CAISDETPGELFF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  4 TRB_CD4_949   CAISDLGRGDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  5 TRB_CD4_949   CAISDLKEQPQHF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ01 
#>  6 TRB_CD4_949   CAISDQGGDQPQHF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ01 
#>  7 TRB_CD4_949   CAISEREQGAFF    TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  8 TRB_CD4_949   CAISEWSGSSYNEQ… TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  9 TRB_CD4_949   CAISGQGSTEAFF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#> 10 TRB_CD4_949   CAISLNSGGAYEQYF TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ02 
#> # … with 7,523 more rows, and 4 more variables: reading_frame <chr>,
#> #   duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>

# Track top 20 clones mapping to the CD4 and CD8 samples
cloneTrack(atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
   sequence_track = top_freq$junction_aa[1:20], unassigned = TRUE) 
#> # A tibble: 20 × 12
#>    repertoire_id junction_aa     v_call d_call j_call v_family d_family j_family
#>    <chr>         <chr>           <chr>  <chr>  <chr>  <chr>    <chr>    <chr>   
#>  1 TRB_CD4_949   CASSWEQGTNYGYTF TCRBV… TCRBD… TCRBJ… TCRBV28  TCRBD01  TCRBJ01 
#>  2 TRB_CD8_949   CAIKMETPNGEQYF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  3 TRB_CD8_949   CASRDGQGSGNTIYF TCRBV… TCRBD… TCRBJ… TCRBV06  TCRBD01  TCRBJ01 
#>  4 TRB_CD8_949   CASSEALPGMVPLHF TCRBV… TCRBD… TCRBJ… TCRBV02  TCRBD01  TCRBJ01 
#>  5 TRB_CD8_949   CASSGGVAAFSSYN… TCRBV… TCRBD… TCRBJ… TCRBV05  TCRBD01  TCRBJ01 
#>  6 TRB_CD8_949   CASSLAGDSQETQYF TCRBV… TCRBD… TCRBJ… TCRBV28  TCRBD01  TCRBJ02 
#>  7 TRB_CD8_949   CASSLQGREKLFF   TCRBV… TCRBD… TCRBJ… TCRBV27  TCRBD01  TCRBJ01 
#>  8 TRB_CD8_949   CASSLSPYEQYF    TCRBV… TCRBD… TCRBJ… TCRBV06  TCRBD02  TCRBJ02 
#>  9 TRB_CD8_949   CASSPFDRGPDTEA… TCRBV… TCRBD… TCRBJ… TCRBV28  TCRBD01  TCRBJ01 
#> 10 TRB_CD8_949   CASSPGTGTYGYTF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#> 11 TRB_CD8_949   CASSPLDGLTNEQFF TCRBV… TCRBD… TCRBJ… TCRBV28  TCRBD01  TCRBJ02 
#> 12 TRB_CD8_949   CASSPPTGERDTQYF TCRBV… TCRBD… TCRBJ… TCRBV07  TCRBD02  TCRBJ02 
#> 13 TRB_CD8_949   CASSPSRNTEAFF   TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD02  TCRBJ01 
#> 14 TRB_CD8_949   CASSQDLGQAFSPL… TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#> 15 TRB_CD8_949   CASSQDLMTVDSLF… TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ02 
#> 16 TRB_CD8_949   CASSQDRTGQYGYTF TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#> 17 TRB_CD8_949   CASSQDSSDTEAFF  TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#> 18 TRB_CD8_949   CASSQDWERLGEQFF TCRBV… TCRBD… TCRBJ… TCRBV14  TCRBD02  TCRBJ02 
#> 19 TRB_CD8_949   CASSREGDQPQHF   TCRBV… TCRBD… TCRBJ… TCRBV05  TCRBD01  TCRBJ01 
#> 20 TRB_CD8_949   CASSSWDNEQFF    TCRBV… TCRBD… TCRBJ… TCRBV05  TCRBD01  TCRBJ02 
#> # … with 4 more variables: reading_frame <chr>, duplicate_count <dbl>,
#> #   duplicate_frequency <dbl>, seen <int>

# Track the top 10 clones from top.freq
cloneTrack(study_table = atable, sequence_track = top_freq$junction_aa[1:10], 
           unassigned = FALSE) 
#> # A tibble: 75 × 12
#>    repertoire_id junction_aa     v_call d_call j_call v_family d_family j_family
#>    <chr>         <chr>           <chr>  <chr>  <chr>  <chr>    <chr>    <chr>   
#>  1 TRB_CD8_949   CAIKMETPNGEQYF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  2 TRB_CD8_949   CASRDGQGSGNTIYF TCRBV… TCRBD… TCRBJ… TCRBV06  TCRBD01  TCRBJ01 
#>  3 TRB_CD8_949   CASSLQGREKLFF   TCRBV… TCRBD… TCRBJ… TCRBV27  TCRBD01  TCRBJ01 
#>  4 TRB_CD8_949   CASSPFDRGPDTEA… TCRBV… TCRBD… TCRBJ… TCRBV28  TCRBD01  TCRBJ01 
#>  5 TRB_CD8_949   CASSPGTGTYGYTF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  6 TRB_CD8_949   CASSQDLGQAFSPL… TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#>  7 TRB_CD8_949   CASSQDLMTVDSLF… TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ02 
#>  8 TRB_CD8_949   CASSQDRTGQYGYTF TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#>  9 TRB_CD8_949   CASSQDSSDTEAFF  TCRBV… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#> 10 TRB_CD8_949   CASSREGDQPQHF   TCRBV… TCRBD… TCRBJ… TCRBV05  TCRBD01  TCRBJ01 
#> # … with 65 more rows, and 4 more variables: reading_frame <chr>,
#> #   duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>

# Track clones mapping to the CD4 and CD8 samples while ignoring all others
cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
   unassigned = FALSE) 
#> # A tibble: 1,607 × 12
#>    repertoire_id junction_aa     v_call d_call j_call v_family d_family j_family
#>    <chr>         <chr>           <chr>  <chr>  <chr>  <chr>    <chr>    <chr>   
#>  1 TRB_CD4_949   CAIKPGQGASNSPL… TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  2 TRB_CD4_949   CAIRAGTSTDTQYF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ02 
#>  3 TRB_CD4_949   CAISDETPGELFF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  4 TRB_CD4_949   CAISDLGRGDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  5 TRB_CD4_949   CAISDLKEQPQHF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ01 
#>  6 TRB_CD4_949   CAISDQGGDQPQHF  TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ01 
#>  7 TRB_CD4_949   CAISEREQGAFF    TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#>  8 TRB_CD4_949   CAISEWSGSSYNEQ… TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ02 
#>  9 TRB_CD4_949   CAISGQGSTEAFF   TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD01  TCRBJ01 
#> 10 TRB_CD4_949   CAISLNSGGAYEQYF TCRBV… TCRBD… TCRBJ… TCRBV10  TCRBD02  TCRBJ02 
#> # … with 1,597 more rows, and 4 more variables: reading_frame <chr>,
#> #   duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>

# Track clones mapping to the CD4 and CD8 samples and track 2 specific sequences
cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
   sequence_track = c("CASSPPTGERDTQYF", "CASSQDRTGQYGYTF"), unassigned = FALSE)
#> # A tibble: 2 × 12
#>   repertoire_id junction_aa     v_call  d_call j_call v_family d_family j_family
#>   <chr>         <chr>           <chr>   <chr>  <chr>  <chr>    <chr>    <chr>   
#> 1 TRB_CD8_949   CASSPPTGERDTQYF TCRBV0… TCRBD… TCRBJ… TCRBV07  TCRBD02  TCRBJ02 
#> 2 TRB_CD8_949   CASSQDRTGQYGYTF TCRBV0… TCRBD… TCRBJ… TCRBV04  TCRBD01  TCRBJ01 
#> # … with 4 more variables: reading_frame <chr>, duplicate_count <dbl>,
#> #   duplicate_frequency <dbl>, seen <int>