getAnno.Rd
Get an annotation table from a gdb
object.
an object of class gdb
base table to query
columns to retain
left join operations to perform
inner join operations to perform
retain only variants with matching ID
Extract variants within specified ranges.
Ranges can be specified as a data.frame, including at least 'CHROM','start', and 'end' columns, or
can be a GenomicRanges::GRanges
object.
Number of basepairs to extend the search region beyond the specified genomic ranges to capture variants where the reference allele (REF) overlaps the input ranges, but the POS of the variant falls outside the ranges. This accounts for variants where the REF allele spans multiple base pairs.
An SQL compliant where clause to filter output; eg: "CHROM=2 AND POS between 5000 AND 50000 AND AF<0.01 AND (cadd.caddPhred>15 OR snpEff.SIFT='D')".
library(rvatData)
gdb <- create_example_gdb()
# retrieve full anno table
varinfo <- getAnno(gdb, table = "varInfo")
head(varinfo)
#> VAR_id CHROM POS ID REF ALT QUAL FILTER AC AN
#> 1 1 chr1 11013912 <NA> A G 722.13 <NA> 1 45484
#> 2 2 chr1 11013928 rs755357622 C T 2598.23 <NA> 1 49194
#> 3 3 chr1 11013936 <NA> A C 4135.36 <NA> 1 50000
#> 4 4 chr1 11013936 <NA> A G 4135.36 <NA> 1 42636
#> 5 5 chr1 11013952 <NA> C G 517.13 <NA> 1 49574
#> 6 6 chr1 11016874 rs80356715 C T 64910.1 <NA> 32 49766
#> AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1 2.19858e-05 TARDBP 0 1 0 24.8
#> 2 2.03277e-05 TARDBP 0 0 1 .
#> 3 2e-05 TARDBP 0 1 0 24.1
#> 4 2.34544e-05 TARDBP 0 1 0 22.6
#> 5 2.01719e-05 TARDBP 0 0 1 .
#> 6 0.0006430093 TARDBP 0 1 0 22.2
#> PolyPhen SIFT
#> 1 P D
#> 2 . .
#> 3 B T
#> 4 B T
#> 5 . .
#> 6 B T
# extract a genomic range
varinfo <- getAnno(gdb,
table = "varInfo",
ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
head(varinfo)
#> VAR_id CHROM POS ID REF ALT QUAL FILTER AC AN
#> 1 1 chr1 11013912 <NA> A G 722.13 <NA> 1 45484
#> 2 2 chr1 11013928 rs755357622 C T 2598.23 <NA> 1 49194
#> 3 3 chr1 11013936 <NA> A C 4135.36 <NA> 1 50000
#> 4 4 chr1 11013936 <NA> A G 4135.36 <NA> 1 42636
#> 5 5 chr1 11013952 <NA> C G 517.13 <NA> 1 49574
#> 6 6 chr1 11016874 rs80356715 C T 64910.1 <NA> 32 49766
#> AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1 2.19858e-05 TARDBP 0 1 0 24.8
#> 2 2.03277e-05 TARDBP 0 0 1 .
#> 3 2e-05 TARDBP 0 1 0 24.1
#> 4 2.34544e-05 TARDBP 0 1 0 22.6
#> 5 2.01719e-05 TARDBP 0 0 1 .
#> 6 0.0006430093 TARDBP 0 1 0 22.2
#> PolyPhen SIFT
#> 1 P D
#> 2 . .
#> 3 B T
#> 4 B T
#> 5 . .
#> 6 B T
# keep only specified fields
varinfo <- getAnno(gdb,
table = "varInfo",
fields = c("VAR_id", "CHROM", "POS", "REF", "ALT", "ModerateImpact"),
ranges = data.frame(CHROM = "chr1", start = 11013847, end = 11016874))
head(varinfo)
#> VAR_id CHROM POS REF ALT ModerateImpact
#> 1 1 chr1 11013912 A G 1
#> 2 2 chr1 11013928 C T 0
#> 3 3 chr1 11013936 A C 1
#> 4 4 chr1 11013936 A G 1
#> 5 5 chr1 11013952 C G 0
#> 6 6 chr1 11016874 C T 1
# the `where` parameter can be used to to pass an SQL-compliant where clause t
varinfo <- getAnno(gdb,
table = "varInfo",
where = "gene_name = 'SOD1' and ModerateImpact = 1")
head(varinfo)
#> VAR_id CHROM POS ID REF ALT QUAL FILTER AC AN
#> 1 1268 chr21 31659783 rs121912442 C T 15317.2 <NA> 6 47612
#> 2 1270 chr21 31659799 <NA> G C 732.18 <NA> 1 49466
#> 3 1271 chr21 31659819 <NA> GCAT G 241.15 <NA> 1 44404
#> 4 1273 chr21 31659828 rs768029813 A G 15989.6 <NA> 5 49638
#> 5 1275 chr21 31659837 <NA> A T 1968.71 <NA> 1 50000
#> 6 1276 chr21 31659838 <NA> G C 1702.79 <NA> 1 48628
#> AF gene_name HighImpact ModerateImpact Synonymous CADDphred
#> 1 0.0001260187 SOD1 0 1 0 31
#> 2 2.02159e-05 SOD1 0 1 0 23.4
#> 3 2.25205e-05 SOD1 0 1 0 .
#> 4 0.0001007293 SOD1 0 1 0 13.44
#> 5 2e-05 SOD1 0 1 0 23.9
#> 6 2.05643e-05 SOD1 0 1 0 24.9
#> PolyPhen SIFT
#> 1 D D
#> 2 P T
#> 3 . .
#> 4 B T
#> 5 B D
#> 6 D D
# the `inner` and `left` parameters can be used to perform inner and left join operations respectively
# e.g. we can use the `inner` parameter to filter e.g. based on a table containing QC-passing variants
# for example:
uploadAnno(gdb, name = "QCpass", value = data.frame(VAR_id = 1:100), skipRemap = TRUE, verbose = FALSE)
#> [1] 1
varinfo <- getAnno(gdb,
inner = "QCpass",
table = "varInfo")