Get an annotation table from a gdb object.

getAnno(
  object,
  table,
  fields = "*",
  left = c(),
  inner = c(),
  VAR_id = NULL,
  ranges = NULL,
  padding = 250,
  where = c()
)

Arguments

object

an object of class gdb

table

base table to query

fields

columns to retain

left

left join operations to perform

inner

inner join operations to perform

VAR_id

retain only variants with matching ID

ranges

Extract variants within specified ranges. Ranges can be specified as a data.frame, including at least 'CHROM','start', and 'end' columns, or can be a GenomicRanges::GRanges object.

padding

Number of basepairs to extend the search region beyond the specified genomic ranges to capture variants where the reference allele (REF) overlaps the input ranges, but the POS of the variant falls outside the ranges. This accounts for variants where the REF allele spans multiple base pairs.

where

An SQL compliant where clause to filter output; eg: "CHROM=2 AND POS between 5000 AND 50000 AND AF<0.01 AND (cadd.caddPhred>15 OR snpEff.SIFT='D')".

Examples


library(rvatData)
gdb <- create_example_gdb()

# retrieve full anno table
varinfo <- getAnno(gdb, table = "varInfo")
head(varinfo)
#>   VAR_id CHROM      POS          ID REF ALT    QUAL FILTER AC    AN
#> 1      1  chr1 11013912        <NA>   A   G  722.13   <NA>  1 45484
#> 2      2  chr1 11013928 rs755357622   C   T 2598.23   <NA>  1 49194
#> 3      3  chr1 11013936        <NA>   A   C 4135.36   <NA>  1 50000
#> 4      4  chr1 11013936        <NA>   A   G 4135.36   <NA>  1 42636
#> 5      5  chr1 11013952        <NA>   C   G  517.13   <NA>  1 49574
#> 6      6  chr1 11016874  rs80356715   C   T 64910.1   <NA> 32 49766
#>             AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1  2.19858e-05    TARDBP          0              1          0      24.8
#> 2  2.03277e-05    TARDBP          0              0          1         .
#> 3        2e-05    TARDBP          0              1          0      24.1
#> 4  2.34544e-05    TARDBP          0              1          0      22.6
#> 5  2.01719e-05    TARDBP          0              0          1         .
#> 6 0.0006430093    TARDBP          0              1          0      22.2
#>   PolyPhen SIFT
#> 1        P    D
#> 2        .    .
#> 3        B    T
#> 4        B    T
#> 5        .    .
#> 6        B    T

# extract a genomic range
varinfo <- getAnno(gdb, 
                   table = "varInfo", 
                   ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
head(varinfo)
#>   VAR_id CHROM      POS          ID REF ALT    QUAL FILTER AC    AN
#> 1      1  chr1 11013912        <NA>   A   G  722.13   <NA>  1 45484
#> 2      2  chr1 11013928 rs755357622   C   T 2598.23   <NA>  1 49194
#> 3      3  chr1 11013936        <NA>   A   C 4135.36   <NA>  1 50000
#> 4      4  chr1 11013936        <NA>   A   G 4135.36   <NA>  1 42636
#> 5      5  chr1 11013952        <NA>   C   G  517.13   <NA>  1 49574
#> 6      6  chr1 11016874  rs80356715   C   T 64910.1   <NA> 32 49766
#>             AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1  2.19858e-05    TARDBP          0              1          0      24.8
#> 2  2.03277e-05    TARDBP          0              0          1         .
#> 3        2e-05    TARDBP          0              1          0      24.1
#> 4  2.34544e-05    TARDBP          0              1          0      22.6
#> 5  2.01719e-05    TARDBP          0              0          1         .
#> 6 0.0006430093    TARDBP          0              1          0      22.2
#>   PolyPhen SIFT
#> 1        P    D
#> 2        .    .
#> 3        B    T
#> 4        B    T
#> 5        .    .
#> 6        B    T

# keep only specified fields
varinfo <- getAnno(gdb, 
                   table = "varInfo", 
                   fields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "ModerateImpact"),
                   ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
head(varinfo)
#>   VAR_id CHROM      POS REF ALT ModerateImpact
#> 1      1  chr1 11013912   A   G              1
#> 2      2  chr1 11013928   C   T              0
#> 3      3  chr1 11013936   A   C              1
#> 4      4  chr1 11013936   A   G              1
#> 5      5  chr1 11013952   C   G              0
#> 6      6  chr1 11016874   C   T              1

# the `where` parameter can be used to to pass an SQL-compliant where clause t
varinfo <- getAnno(gdb, 
                   table = "varInfo", 
                   where = "gene_name = 'SOD1' and ModerateImpact = 1")
head(varinfo)
#>   VAR_id CHROM      POS          ID  REF ALT    QUAL FILTER AC    AN
#> 1   1268 chr21 31659783 rs121912442    C   T 15317.2   <NA>  6 47612
#> 2   1270 chr21 31659799        <NA>    G   C  732.18   <NA>  1 49466
#> 3   1271 chr21 31659819        <NA> GCAT   G  241.15   <NA>  1 44404
#> 4   1273 chr21 31659828 rs768029813    A   G 15989.6   <NA>  5 49638
#> 5   1275 chr21 31659837        <NA>    A   T 1968.71   <NA>  1 50000
#> 6   1276 chr21 31659838        <NA>    G   C 1702.79   <NA>  1 48628
#>             AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1 0.0001260187      SOD1          0              1          0        31
#> 2  2.02159e-05      SOD1          0              1          0      23.4
#> 3  2.25205e-05      SOD1          0              1          0         .
#> 4 0.0001007293      SOD1          0              1          0     13.44
#> 5        2e-05      SOD1          0              1          0      23.9
#> 6  2.05643e-05      SOD1          0              1          0      24.9
#>   PolyPhen SIFT
#> 1        D    D
#> 2        P    T
#> 3        .    .
#> 4        B    T
#> 5        B    D
#> 6        D    D


# the `inner` and `left` parameters can be used to perform inner and left join operations respectively
# e.g. we can use the `inner` parameter to filter e.g. based on a table containing QC-passing variants
# for example:
uploadAnno(gdb, name = "QCpass", value = data.frame(VAR_id = 1:100), skipRemap = TRUE, verbose = FALSE)
#> [1] 1
varinfo <- getAnno(gdb, 
                   inner = "QCpass",
                   table = "varInfo")