cmdline.RmdSome output will be written during the tutorials, a temporary directory will be used for that:
outdir <- tempdir()
Sys.setenv(rvat = system.file('exec/rvat.R', package = 'rvat'),
vcfpath = rvatData::rvat_example('rvatData.vcf.gz'),
phenopath = rvatData::rvat_example('rvatData.pheno'),
varinfopath = rvatData::rvat_example('rvatData.varinfo'),
outdir = outdir
)Identify RVAT executable path:
Main help page:
## Rare variant analysis toolkit (rvat)
##
## Error: Please provide a valid function call.
##
## Provide a valid function call for detailed help.
## --buildGdb
## --concatGdb
## --uploadAnno
## --mapVariants
## --uploadCohort
## --listAnno
## --listCohort
## --dropTable
## --subsetGdb
## --buildVarSet
## --spatialClust
## --summariseGeno
## --aggregate
## --mergeAggDbs
## --collapseAggDbs
## --vcfInfo2Table
## --writeVcf
## --assocTest
## --buildResamplingFile
## --buildGeneSet
## --buildCorMatrix
## --geneSetAssoc
## Error:
## Execution halted
Function specific help page:
##
## buildGdb
##
## Creates a new gdb file.
## The gdb can be structured and populated using a provided vcf file.
##
## Usage:
## Rscript rvat.R --buildGdb --vcf={vcf} --output={output} [options]
##
## Arguments:
## vcf Input vcf file used to structure and populate gdb.
## Warning this function makes the following assumptions:
## 1) strict adherence to vcf format (GT subfield first element in genotype fields),
## 2) desired genotype QC has already been applied (DP,GQ filters),
## 3) GT values conform to the set {0/0,0/1,1/0,1/1,./.,0|0,0|1,1|0,1|1,.|.}.
## Multiallelic parsing and genotype QC can be performed using vcftools and/or
## accompanying parser scripts included in the rvat repository.
## output Path for output gdb file.
## skipIndexes Flag to skip generation of indexes for var and dosage table (VAR_id;CHROM,POS,REF,ALT).
## Typically only required if you plan to use --concatGdb to concatenate a series of separately generated gdb files.
## skipVarRanges Flag to skip generation of ranged var table.
## Typically only useful (i.e., faster) if you plan to use --concatGdb to concatenate a series of separately generated gdb files.
## overWrite Flag to indicate that output gdb file should be overwritten if it already exists.
## genomeBuild Optional genome build to include in the gdb metadata.
## If specified, it will be used to set ploidies (diploid, XnonPAR, YnonPAR) if the genome build is implemented in RVAT (currently: GRCh37, hg19, GRCh38, hg38).
## memlimit Maximum number of vcf records to parse at a time, defaults to 1000.
## quiet Flag to suppress verbose messages.
##
## Error:
## Execution halted
build varSet containing moderate variants:
Rscript $rvat --buildVarSet \
--gdb $outdir/rvat_tutorials_cmdline.gdb \
--varSetName Moderate \
--unitTable varInfo \
--unitName gene_name \
--where "ModerateImpact = 1" \
--output ${outdir}/moderate_cmdline.txt.gzbuild multiple varSets
tables=(HighImpact ModerateImpact CADDphred PolyPhen)
weights=(1 1 CADDphred 1)
wheres=("HighImpact = 1" "ModerateImpact = 1" "CADDphred is not 'NA'" "PolyPhen is 'D'")
n=${#tables[@]}
for ((i=0;i<${n};i++))
do
Rscript $rvat --buildVarSet \
--gdb $outdir/rvat_tutorials_cmdline.gdb \
--unitTable varInfo \
--unitName gene_name \
--varSetName ${tables[$i]} \
--where "${wheres[$i]}" \
--weightName ${weights[$i]} \
--output $outdir/${tables[$i]}_cmdline.txt.gz
done &> $outdir/varsets.logMerge variant sets for coordinated set based association testing of multiple filters
varSets=(HighImpact ModerateImpact CADDphred PolyPhen)
gunzip -c $outdir/${varSets[1]}_cmdline.txt.gz | grep '^#' > $outdir/mergedVarsets_cmdline.txt
(for varSet in ${varSets[@]}
do
gunzip -c $outdir/${varSet}_cmdline.txt.gz | grep -v '^#'
done) | sort -t '|' -s -k1,1 >> $outdir/mergedVarsets_cmdline.txt
gzip -c $outdir/mergedVarsets_cmdline.txt > $outdir/mergedVarsets_cmdline.txt.gzRscript $rvat \
--assocTest \
--gdb $outdir/rvat_tutorials_cmdline.gdb \
--varSet $outdir/mergedVarsets_cmdline.txt.gz \
--name maxMAF001 \
--cohort pheno \
--pheno pheno \
--test firth,skat_robust,acatv \
--output $outdir/burden_tests_merged_cmdline.txt.gz &> $outdir/burden_tests_merged_cmdline.log