Creates line plot tracking amino acid frequencies across multiple samples
Arguments
- study_table
A tibble of productive amino acid sequences generated by LymphoSeq function productiveSeq where the aggregate parameter was set to "junction_aa"
- sample_list
A character vector of one or more repertoire_ids to track. If set to NULL (default), all repertoire_ids in the sequence matrix will be tracked.
- sequence_track
An optional character vector of one or more amino acid sequences to track. If set to NULL (default), will pull all junction_aa sequences from the sequence matrix.
- unassigned
Value
Returns a line plot showing the amino acid frequencies across multiple samples in the sequence matrix where each line represents one unique sequence.
Details
The plot is made using the package ggplot2 and can be reformatted using ggplot2 functions. See examples below.
See also
An excellent resource for examples on how to reformat a ggplot can be found in the R Graphics Cookbook online (http://www.cookbook-r.com/Graphs/).
Examples
file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeqTest")
stable <- readImmunoSeq(path = file_path)
#> Rows: 1 Columns: 144
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (69): sequence_id, sequence, sequence_aa, locus, v_call, d_call, d2_call...
#> dbl (70): v_score, v_identity, v_support, d_score, d_identity, d_support, d2...
#> lgl (5): rev_comp, productive, vj_in_frame, stop_codon, complete_vdj
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (33): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (4): vFamilyTies, jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 414 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (35): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (2): jFamilyTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 920 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
atable <- productiveSeq(study_table = stable, aggregate = "junction_aa")
top_freq <- topFreq(atable, frequency = 0.001)
# Track clones without mapping or tracking specific sequences
cloneTrack(atable)
#> # A tibble: 7,533 × 12
#> repertoire_id junction_aa v_call d_call j_call v_family d_family j_family
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 TRB_CD4_949 CAIKPGQGASNSPL… TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 2 TRB_CD4_949 CAIRAGTSTDTQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ02
#> 3 TRB_CD4_949 CAISDETPGELFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 4 TRB_CD4_949 CAISDLGRGDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 5 TRB_CD4_949 CAISDLKEQPQHF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ01
#> 6 TRB_CD4_949 CAISDQGGDQPQHF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ01
#> 7 TRB_CD4_949 CAISEREQGAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 8 TRB_CD4_949 CAISEWSGSSYNEQ… TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 9 TRB_CD4_949 CAISGQGSTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 10 TRB_CD4_949 CAISLNSGGAYEQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ02
#> # … with 7,523 more rows, and 4 more variables: reading_frame <chr>,
#> # duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>
# Track top 20 clones mapping to the CD4 and CD8 samples
cloneTrack(atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
sequence_track = top_freq$junction_aa[1:20], unassigned = TRUE)
#> # A tibble: 20 × 12
#> repertoire_id junction_aa v_call d_call j_call v_family d_family j_family
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 TRB_CD4_949 CASSWEQGTNYGYTF TCRBV… TCRBD… TCRBJ… TCRBV28 TCRBD01 TCRBJ01
#> 2 TRB_CD8_949 CAIKMETPNGEQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 3 TRB_CD8_949 CASRDGQGSGNTIYF TCRBV… TCRBD… TCRBJ… TCRBV06 TCRBD01 TCRBJ01
#> 4 TRB_CD8_949 CASSEALPGMVPLHF TCRBV… TCRBD… TCRBJ… TCRBV02 TCRBD01 TCRBJ01
#> 5 TRB_CD8_949 CASSGGVAAFSSYN… TCRBV… TCRBD… TCRBJ… TCRBV05 TCRBD01 TCRBJ01
#> 6 TRB_CD8_949 CASSLAGDSQETQYF TCRBV… TCRBD… TCRBJ… TCRBV28 TCRBD01 TCRBJ02
#> 7 TRB_CD8_949 CASSLQGREKLFF TCRBV… TCRBD… TCRBJ… TCRBV27 TCRBD01 TCRBJ01
#> 8 TRB_CD8_949 CASSLSPYEQYF TCRBV… TCRBD… TCRBJ… TCRBV06 TCRBD02 TCRBJ02
#> 9 TRB_CD8_949 CASSPFDRGPDTEA… TCRBV… TCRBD… TCRBJ… TCRBV28 TCRBD01 TCRBJ01
#> 10 TRB_CD8_949 CASSPGTGTYGYTF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 11 TRB_CD8_949 CASSPLDGLTNEQFF TCRBV… TCRBD… TCRBJ… TCRBV28 TCRBD01 TCRBJ02
#> 12 TRB_CD8_949 CASSPPTGERDTQYF TCRBV… TCRBD… TCRBJ… TCRBV07 TCRBD02 TCRBJ02
#> 13 TRB_CD8_949 CASSPSRNTEAFF TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD02 TCRBJ01
#> 14 TRB_CD8_949 CASSQDLGQAFSPL… TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 15 TRB_CD8_949 CASSQDLMTVDSLF… TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ02
#> 16 TRB_CD8_949 CASSQDRTGQYGYTF TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 17 TRB_CD8_949 CASSQDSSDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 18 TRB_CD8_949 CASSQDWERLGEQFF TCRBV… TCRBD… TCRBJ… TCRBV14 TCRBD02 TCRBJ02
#> 19 TRB_CD8_949 CASSREGDQPQHF TCRBV… TCRBD… TCRBJ… TCRBV05 TCRBD01 TCRBJ01
#> 20 TRB_CD8_949 CASSSWDNEQFF TCRBV… TCRBD… TCRBJ… TCRBV05 TCRBD01 TCRBJ02
#> # … with 4 more variables: reading_frame <chr>, duplicate_count <dbl>,
#> # duplicate_frequency <dbl>, seen <int>
# Track the top 10 clones from top.freq
cloneTrack(study_table = atable, sequence_track = top_freq$junction_aa[1:10],
unassigned = FALSE)
#> # A tibble: 75 × 12
#> repertoire_id junction_aa v_call d_call j_call v_family d_family j_family
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 TRB_CD8_949 CAIKMETPNGEQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 2 TRB_CD8_949 CASRDGQGSGNTIYF TCRBV… TCRBD… TCRBJ… TCRBV06 TCRBD01 TCRBJ01
#> 3 TRB_CD8_949 CASSLQGREKLFF TCRBV… TCRBD… TCRBJ… TCRBV27 TCRBD01 TCRBJ01
#> 4 TRB_CD8_949 CASSPFDRGPDTEA… TCRBV… TCRBD… TCRBJ… TCRBV28 TCRBD01 TCRBJ01
#> 5 TRB_CD8_949 CASSPGTGTYGYTF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 6 TRB_CD8_949 CASSQDLGQAFSPL… TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 7 TRB_CD8_949 CASSQDLMTVDSLF… TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ02
#> 8 TRB_CD8_949 CASSQDRTGQYGYTF TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 9 TRB_CD8_949 CASSQDSSDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> 10 TRB_CD8_949 CASSREGDQPQHF TCRBV… TCRBD… TCRBJ… TCRBV05 TCRBD01 TCRBJ01
#> # … with 65 more rows, and 4 more variables: reading_frame <chr>,
#> # duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>
# Track clones mapping to the CD4 and CD8 samples while ignoring all others
cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
unassigned = FALSE)
#> # A tibble: 1,607 × 12
#> repertoire_id junction_aa v_call d_call j_call v_family d_family j_family
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 TRB_CD4_949 CAIKPGQGASNSPL… TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 2 TRB_CD4_949 CAIRAGTSTDTQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ02
#> 3 TRB_CD4_949 CAISDETPGELFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 4 TRB_CD4_949 CAISDLGRGDTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 5 TRB_CD4_949 CAISDLKEQPQHF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ01
#> 6 TRB_CD4_949 CAISDQGGDQPQHF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ01
#> 7 TRB_CD4_949 CAISEREQGAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 8 TRB_CD4_949 CAISEWSGSSYNEQ… TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ02
#> 9 TRB_CD4_949 CAISGQGSTEAFF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD01 TCRBJ01
#> 10 TRB_CD4_949 CAISLNSGGAYEQYF TCRBV… TCRBD… TCRBJ… TCRBV10 TCRBD02 TCRBJ02
#> # … with 1,597 more rows, and 4 more variables: reading_frame <chr>,
#> # duplicate_count <dbl>, duplicate_frequency <dbl>, seen <int>
# Track clones mapping to the CD4 and CD8 samples and track 2 specific sequences
cloneTrack(study_table = atable, sample_list = c("TRB_CD4_949", "TRB_CD8_949"),
sequence_track = c("CASSPPTGERDTQYF", "CASSQDRTGQYGYTF"), unassigned = FALSE)
#> # A tibble: 2 × 12
#> repertoire_id junction_aa v_call d_call j_call v_family d_family j_family
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 TRB_CD8_949 CASSPPTGERDTQYF TCRBV0… TCRBD… TCRBJ… TCRBV07 TCRBD02 TCRBJ02
#> 2 TRB_CD8_949 CASSQDRTGQYGYTF TCRBV0… TCRBD… TCRBJ… TCRBV04 TCRBD01 TCRBJ01
#> # … with 4 more variables: reading_frame <chr>, duplicate_count <dbl>,
#> # duplicate_frequency <dbl>, seen <int>