Skip to contents

Plots a Lorenz curve derived from the frequency of the amino acid sequences.

Usage

lorenzCurve(repertoire_ids, study_table)

Arguments

repertoire_ids

A character vector of repertoire_id names in list.

study_table

A tibble generated using the LymphoSeq function readImmunoSeq or productiveSeq. "duplicate_frequency" is a required column.

Value

Returns a Lorenz curve.

Details

The Gini coefficient is an alternative metric used to calculate repertoire diversity and is derived from the Lorenz curve. The Lorenz curve is drawn such that x-axis represents the cumulative percentage of unique sequences and the y-axis represents the cumulative percentage of reads. A line passing through the origin with a slope of 1 reflects equal frequencies of all sequences. The Gini coefficient is the ratio of the area between the line of equality and the observed Lorenz curve over the total area under the line of equality.

The plot is made using the package ggplot2 and can be reformatted using ggplot2 functions. See examples below.

See also

An excellent resource for examples on how to reformat a ggplot can be found in the R Graphics Cookbook online (http://www.cookbook-r.com/Graphs/).

Examples

file_path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeqTest")

stable <- readImmunoSeq(path = file_path)
#> Rows: 1 Columns: 144
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (69): sequence_id, sequence, sequence_aa, locus, v_call, d_call, d2_call...
#> dbl (70): v_score, v_identity, v_support, d_score, d_identity, d_support, d2...
#> lgl  (5): rev_comp, productive, vj_in_frame, stop_codon, complete_vdj
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (33): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (4): vFamilyTies, jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 414 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (35): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (2): jFamilyTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 920 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (29): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vFami...
#> dbl (14): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (9): vGeneAllele, vGeneAlleleTies, dGeneAllele, dFamilyTies, dGeneAllel...
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")
#> Rows: 1000 Columns: 52
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (34): nucleotide, aminoAcid, vMaxResolved, vFamilyName, vGeneName, vGene...
#> dbl (15): count (templates/reads), frequencyCount (%), cdr3Length, vDeletion...
#> lgl  (3): jFamilyTies, jGeneNameTies, jGeneAlleleTies
#> 
#>  Use `spec()` to retrieve the full column specification for this data.
#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Joining, by = c("sequence", "sequence_aa", "v_call", "d_call", "d2_call",
#> "j_call", "junction", "junction_aa", "duplicate_count", "clone_id",
#> "repertoire_id")

repertoire_ids <- stable %>% dplyr::pull(repertoire_id) %>% unique()
#> Error in stable %>% dplyr::pull(repertoire_id) %>% unique(): could not find function "%>%"

lorenzCurve(repertoire_ids = repertoire_ids, study_table = stable)
#> Error in loadNamespace(x): there is no package called ‘LymphoSeq2’

atable <- productiveSeq(study_table = stable, aggregate = "junction_aa")

repertoire_ids <- atable %>% dplyr::pull(repertoire_id) %>% unique()
#> Error in atable %>% dplyr::pull(repertoire_id) %>% unique(): could not find function "%>%"

lorenzCurve(repertoire_ids = repertoire_ids, study_table = atable)
#> Error in loadNamespace(x): there is no package called ‘LymphoSeq2’

# Change the legend labels, line colors, and add a title
repertoire_ids <- c("TRB_Unsorted_0", "TRB_Unsorted_32", 
   "TRB_Unsorted_83", "TRB_Unsorted_949", "TRB_Unsorted_1320")

lorenz_curve <- lorenzCurve(repertoire_ids = repertoire_ids, study_table = atable)
#> Error in loadNamespace(x): there is no package called ‘LymphoSeq2’

labels <- c("Day 0", "Day 32", "Day 83", "Day 949", "Day 1320")

colors <- c("navyblue", "red", "darkgreen", "orange", "purple",
             "yellow", "pink", "lightgreen", "cyan", "maroon")

lorenz_curve + 
ggplot2::scale_color_manual(name = "repertoire_ids", 
                            breaks = repertoire_ids, 
                            labels = labels, values = colors) + 
ggplot2::ggtitle("Figure Title")
#> Error in eval(expr, envir, enclos): object 'lorenz_curve' not found