1 Quick start

This part helps users quickly go through function usage. Click each title to view more details.

1.1 Search gene metadata

id <- c("MCM10", "CDC20", "S100A9", "MMP1", "BCC7","FAKEID","HBD", "NUDT10")
info <- genInfo(id, org = "hs")
# keep unique records
info2 <- genInfo(id, org = "hs", unique = TRUE)

1.2 Search PubMed records

## check package version
packageVersion("genekitr")

## for version < 0.8.6
term <- c("Tp53", "Brca1", "Tet2")
keys <- c("stem cell", "mouse")
l <- getPubmed(term, keys)

## for version > 0.8.6
term <- c("Tp53", "Brca1", "Tet2")
add_term <- c("stem cell", "mouse")
l <- getPubmed(term, add_term, num = 30)

# set "num" larger to get all records
all <- getPubmed(term, add_term, num = 666666)

1.3 Gene ID conversion

id <-  c("Cyp2c23", "Fhit", "Gal3st2b", "Trp53", "Tp53")
sym2ens <- transId(id ,transTo = "ensembl", org = "mouse")
# keep unique records
sym2ensent <- transId(id ,transTo = c("ensembl","entrez"), org = "mouse", unique = TRUE)

1.4 Protein ID conversion

id <-  c("P27364", "P14046", "P15650", "P16638", "P24483")
protein2sym <- transId(id ,transTo = "sym", org = "rat")

1.5 Probe ID conversion

id <-  c('205647_at','204857_at','201394_s_at','210377_at')
probe2sym <- transProbe(id, transTo = 'sym',org = 'hs')

To faciliate gene enrichment analysis, I developed geneset

1.6 Get gene sets

# install.packages("geneset")
library(geneset)
gs <- getGO(org = "human",ont = "mf")
gs <- getKEGG(org = "hsa",category = "pathway")
gs <- getMesh(org = "human", method = "gendoo", category = "A")
gs <- getMsigdb(org = "human", category = "H")
gs <- getWiki(org = "human")
gs <- getReactome(org = "human")
gs <- getEnrichrdb(org = "human", library = "COVID-19_Related_Gene_Sets")
gs <- getHgDisease(source = "do")

1.7 ORA analysis (over-representation analysis)

# 1st step: get input IDs
id <- c("TP53","BRCA1", "PD1", "PDL1")
# 2nd step: get gene set
gs <- geneset::getGO(org = "human",ont = "mf")
# 3rd: analysis
ego <- genORA(id,
              geneset = gs,
              p_cutoff = 0.05,
              q_cutoff = 0.05
)

1.8 FCS GSEA analysis

FCS stands for “functional-class-scoring”.

geneList is an order ranked gene list in decreasing order. Input IDs can be Entrez, Ensembl, symbol or UniProt IDs.

# 1st step: get pre-ranked gene list
data(geneList, package = "genekitr")
# 2nd step: get gene set
gs <- geneset::getGO(org = "human",ont = "mf")
# 3rd: analysis
gse <- genGSEA(genelist = geneList, 
               geneset = gs,
               p_cutoff = 0.05,
               q_cutoff = 0.05)

1.9 Import external data

1.9.1 Import Panther of GeneOntology website

# here we save panther result as "analysis.txt"
panther_res <- importPanther("analysis.txt")

1.9.2 Import clusterProfiler result

library(clusterProfiler)
library(org.Hs.eg.db)
data(geneList, package="DOSE")
gene <- names(geneList)[abs(geneList) > 2]

## GO result from clusterProfiler
go <- enrichGO(gene          = gene,
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
## GSEA result from clusterProfiler
kk <- gseKEGG(geneList     = geneList,
               organism     = 'hsa',
               minGSSize    = 120,
               pvalueCutoff = 0.05,
               verbose      = FALSE)
## Other ORA result from clusterProfiler 
# (e.g. KEGG/DOSE/WikiPathways...)
do <- enrichDO(gene          = gene,
              ont           = "DO",
              pvalueCutoff  = 0.05,
              pAdjustMethod = "BH",
              universe      = names(geneList),
              minGSSize     = 5,
              maxGSSize     = 500,
              qvalueCutoff  = 0.05)

## Import 
go_easy <- importCP(go, type = "go")
kk_easy <- importCP(kk,type = 'gsea')
do_easy <- importCP(do,type = 'other')

1.10 Plot ORA

Supported types include: bar, wego, bubble, dot, lollipop, geneheat, genechord, network, gomap, goheat, gotangram, wordcloud, upset and two-group barplot

plotEnrich(ego, plot_type = "dot")
plotEnrich(keg, plot_type = "bar")
plotEnrich(ego, plot_type = "geneheat")
plotEnrich(ego, plot_type = "network", scale_ratio = 0.5)

1.11 Plot GSEA

Supported types include: volcano, classic, fgsea, ridge and bar

plotGSEA(gse, plot_type = "volcano", show_pathway = 3)

genes <- c("MET", "TP53", "PMM2")
plotGSEA(gse, plot_type = "classic", show_pathway = pathways, show_gene = genes)

plotGSEA(gse, plot_type = "fgsea", show_pathway = 3)

1.12 Plot Venn

set1 <- paste0(rep("gene", 100), sample(1:1000, 100))
set2 <- paste0(rep("gene", 100), sample(1:1000, 100))
set3 <- paste0(rep("gene", 100), sample(1:1000, 100))

# classic venn plot
plotVenn(list(gset1 = set1, gset2 = set2, gset3 = set3), use_venn = TRUE)
# UpSet plot
plotVenn(list(gset1 = set1, gset2 = set2, gset3 = set3), use_venn = FALSE)

1.13 Plot DEG Volcano

data(deg, package = "genekitr")
plotVolcano(deg, "p.adjust",
  remove_legend = T,
  show_gene = c("CD36", "DUSP6", "IER3", "CDH7")
)