help(functionName) or ?functionName
help.search(string) or ??string
apropos("str",mode = "function")
apropos("plot",mode="function")
## [1] "assocplot" "barplot" "barplot.default" ## [4] "biplot" "boxplot" "boxplot.default" ## [7] "boxplot.matrix" "boxplot.stats" "cdplot" ## [10] "coplot" "fourfoldplot" "interaction.plot" ## [13] "lag.plot" "matplot" "monthplot" ## [16] "mosaicplot" "plot" "plot.default" ## [19] "plot.design" "plot.ecdf" "plot.function" ## [22] "plot.new" "plot.spec.coherency" "plot.spec.phase" ## [25] "plot.stepfun" "plot.ts" "plot.window" ## [28] "plot.xy" "preplot" "qqplot" ## [31] "recordPlot" "replayPlot" "savePlot" ## [34] "screeplot" "spineplot" "sunflowerplot" ## [37] "termplot" "ts.plot"
a <- "hello" print(a)
## [1] "hello"
"hi" -> b print(b)
## [1] "hi"
c = 10; d = 11 print(c+d)
## [1] 21
assign("aString",1) aString
## [1] 1
for (i in 1:5){ varName = paste("v",i,sep="") assign(varName,i) }
ls() ## list all the variables in the workplace
## [1] "a" "aString" "b" "c" "d" "i" "v1" ## [8] "v2" "v3" "v4" "v5" "varName"
rm(variableName) rm(list=ls()) # clear the environment; It's a good habit to put it at top of your script
if (F) { " ... ... " }
install.packages("ISLR")
library("ISLR")
require("ISLR")
# a vector of numeric values a = 1:9 length(a)
## [1] 9
a
## [1] 1 2 3 4 5 6 7 8 9
b = c(1,3,4,10,11) b
## [1] 1 3 4 10 11
# character strings chVec = c("hello","hi","a","abc") chVec
## [1] "hello" "hi" "a" "abc"
is.vector(chVec)
## [1] TRUE
# logical values logicVec = c(TRUE,TRUE,FALSE,TRUE,FALSE) logicVec
## [1] TRUE TRUE FALSE TRUE FALSE
all elements should be same data type
Syntax
matrix(vector,nrow,ncol,byrow,dimnames)
-Examples
m = matrix(1:12,4,3) m
## [,1] [,2] [,3] ## [1,] 1 5 9 ## [2,] 2 6 10 ## [3,] 3 7 11 ## [4,] 4 8 12
is.matrix(m)
## [1] TRUE
m[3,2] # element at 3rd row, 2nd colum
## [1] 7
m[2,] # the 2nd row
## [1] 2 6 10
m[,2:3] # the 2nd,3rd columns
## [,1] [,2] ## [1,] 5 9 ## [2,] 6 10 ## [3,] 7 11 ## [4,] 8 12
m[2:4,c(1,3)]
## [,1] [,2] ## [1,] 2 10 ## [2,] 3 11 ## [3,] 4 12
rbind(m,m) # combine by rows
## [,1] [,2] [,3] ## [1,] 1 5 9 ## [2,] 2 6 10 ## [3,] 3 7 11 ## [4,] 4 8 12 ## [5,] 1 5 9 ## [6,] 2 6 10 ## [7,] 3 7 11 ## [8,] 4 8 12
cbind(m,m) # combine by columns
## [,1] [,2] [,3] [,4] [,5] [,6] ## [1,] 1 5 9 1 5 9 ## [2,] 2 6 10 2 6 10 ## [3,] 3 7 11 3 7 11 ## [4,] 4 8 12 4 8 12
m = matrix(c(3,2,-2,2,5,2,2,8,4),3,3) # create a square matrix m
## [,1] [,2] [,3] ## [1,] 3 2 2 ## [2,] 2 5 8 ## [3,] -2 2 4
t(m) # transpose
## [,1] [,2] [,3] ## [1,] 3 2 -2 ## [2,] 2 5 2 ## [3,] 2 8 4
solve(m) # inverse
## [,1] [,2] [,3] ## [1,] -0.50 0.50 -0.750 ## [2,] 3.00 -2.00 2.500 ## [3,] -1.75 1.25 -1.375
det(m) # determinant
## [1] -8
## * and %*% are different m * m
## [,1] [,2] [,3] ## [1,] 9 4 4 ## [2,] 4 25 64 ## [3,] 4 4 16
m %*% m # this is the matrix multiplication !!
## [,1] [,2] [,3] ## [1,] 9 20 30 ## [2,] 0 45 76 ## [3,] -10 14 28
array(vector, dimensions, dimnames)
array(1:6) # a vector
## [1] 1 2 3 4 5 6
array(1:6,dim=c(2,3)) # a matrix
## [,1] [,2] [,3] ## [1,] 1 3 5 ## [2,] 2 4 6
array(1:24, dim=c(2,3,4)) # 3 dimentions
## , , 1 ## ## [,1] [,2] [,3] ## [1,] 1 3 5 ## [2,] 2 4 6 ## ## , , 2 ## ## [,1] [,2] [,3] ## [1,] 7 9 11 ## [2,] 8 10 12 ## ## , , 3 ## ## [,1] [,2] [,3] ## [1,] 13 15 17 ## [2,] 14 16 18 ## ## , , 4 ## ## [,1] [,2] [,3] ## [1,] 19 21 23 ## [2,] 20 22 24
array(1:24, dim=c(2,3,4))[1,,]
## [,1] [,2] [,3] [,4] ## [1,] 1 7 13 19 ## [2,] 3 9 15 21 ## [3,] 5 11 17 23
# create a data.frame testDF = data.frame(col1 = c(1:5), col2 = letters[1:5], col3=c(T,T,F,F,T)) testDF
## col1 col2 col3 ## 1 1 a TRUE ## 2 2 b TRUE ## 3 3 c FALSE ## 4 4 d FALSE ## 5 5 e TRUE
# change the colnames and rownames colnames(testDF) = c("number","character","logic") rownames(testDF) = paste("row",1:5,sep="") testDF
## number character logic ## row1 1 a TRUE ## row2 2 b TRUE ## row3 3 c FALSE ## row4 4 d FALSE ## row5 5 e TRUE
testDF$number
## [1] 1 2 3 4 5
testDF[c("row1","row3"),c("number","character")]
## number character ## row1 1 a ## row3 3 c
summary(cars)
## speed dist ## Min. : 4.0 Min. : 2.00 ## 1st Qu.:12.0 1st Qu.: 26.00 ## Median :15.0 Median : 36.00 ## Mean :15.4 Mean : 42.98 ## 3rd Qu.:19.0 3rd Qu.: 56.00 ## Max. :25.0 Max. :120.00
a = c(0,1,0,2,1,0,1,2,1) factor(a)
## [1] 0 1 0 2 1 0 1 2 1 ## Levels: 0 1 2
str(factor(a))
## Factor w/ 3 levels "0","1","2": 1 2 1 3 2 1 2 3 2
list(name1 = object1, name2 = object2,...)
testList = list(n = c(2, 3, 5), char = c("aa", "bb", "cc", "dd", "ee"), bool = c(TRUE, FALSE, TRUE, FALSE, FALSE), m = matrix(1:9,3,3), alist = list(name=c("a","b"),gender=c("male","female"))) testList
## $n ## [1] 2 3 5 ## ## $char ## [1] "aa" "bb" "cc" "dd" "ee" ## ## $bool ## [1] TRUE FALSE TRUE FALSE FALSE ## ## $m ## [,1] [,2] [,3] ## [1,] 1 4 7 ## [2,] 2 5 8 ## [3,] 3 6 9 ## ## $alist ## $alist$name ## [1] "a" "b" ## ## $alist$gender ## [1] "male" "female"
testList[[5]]
## $name ## [1] "a" "b" ## ## $gender ## [1] "male" "female"
testList[["m"]]
## [,1] [,2] [,3] ## [1,] 1 4 7 ## [2,] 2 5 8 ## [3,] 3 6 9
testList$char
## [1] "aa" "bb" "cc" "dd" "ee"
avoid using full filepath
get working directory
getwd()
setwd("~") getwd()
http://www-bcf.usc.edu/~gareth/ISL/data.html
autoData = read.table("Auto.csv",header=T,sep=",",quote="") head(autoData)
## mpg cylinders displacement horsepower weight acceleration year origin ## 1 18 8 307 130 3504 12.0 70 1 ## 2 15 8 350 165 3693 11.5 70 1 ## 3 18 8 318 150 3436 11.0 70 1 ## 4 16 8 304 150 3433 12.0 70 1 ## 5 17 8 302 140 3449 10.5 70 1 ## 6 15 8 429 198 4341 10.0 70 1 ## name ## 1 chevrolet chevelle malibu ## 2 buick skylark 320 ## 3 plymouth satellite ## 4 amc rebel sst ## 5 ford torino ## 6 ford galaxie 500
write.table(autoData,file="Auto2.txt",quote=F,sep=" ; ",row.names=F)
students = data.frame(name=character(),age=numeric(),grade=numeric(),stringsAsFactors = F) fix(students)
if (condition) { statement1 } else { statement2 }
x <- 0 if (x < 0) { print("Negative number") } else if (x > 0) { print("Positive number") } else { print("Zero") }
## [1] "Zero"
-Example
year = 2017 if ( (year %% 4 == 0 & year %% 100 != 0) | year %% 400 ==0){ print(paste(year,"is a leap year")) } else { print("no") }
## [1] "no"
-Syntax
for (var in range) { statement }
for (i in 2000:2020){ if ( (i %% 4 == 0 & i %% 100 != 0) | i %% 400 ==0){ print(paste(i,"is a leap year")) } }
## [1] "2000 is a leap year" ## [1] "2004 is a leap year" ## [1] "2008 is a leap year" ## [1] "2012 is a leap year" ## [1] "2016 is a leap year" ## [1] "2020 is a leap year"
while (condition){ statement }
x = 5 while(x <= 20){ print(x) x = x+5 }
## [1] 5 ## [1] 10 ## [1] 15 ## [1] 20
x = 5 repeat{ print(x) x = x+5 if (x > 20) break }
## [1] 5 ## [1] 10 ## [1] 15 ## [1] 20
for (i in 1:6){ if (i==5){ break } print(i) }
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4
for (i in 1:6){ if (i==5){ next } print(i) }
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 6
-Syntax
testFunc = function(arg1,arg2,...){ statements return(something) }
-Example
sumSquare = function(x,y){ val = x^2+y^2 return(val) } sumSquare(3,4)
## [1] 25
Arithmetic Operators
Mathematic Functions
testVect = c(1,3,5,2,9,10,7,8,6) min(testVect) # minimum
## [1] 1
max(testVect) # maximum
## [1] 10
mean(testVect) # mean
## [1] 5.666667
median(testVect) # median
## [1] 6
quantile(testVect) # quantile
## 0% 25% 50% 75% 100% ## 1 3 6 8 10
var(testVect) #variance
## [1] 10
sd(testVect) # standard deviation
## [1] 3.162278
vect1 = cars$speed vect2 = cars$dist cov(vect1,vect2) # covariance
## [1] 109.9469
cor(vect1,vect2) # correlation coefficient
## [1] 0.8068949
# bionomial dbinom(2, size=10, prob=0.2)
## [1] 0.3019899
dbinom(0, size=10, prob=0.2) + dbinom(1, size=10, prob=0.2) + dbinom(2, size=10, prob=0.2)
## [1] 0.6777995
pbinom(2,size=10,prob=0.2)
## [1] 0.6777995
runif(6,min=1,max=2)
## [1] 1.609645 1.247878 1.108959 1.249730 1.623926 1.522166
qt(c(.025, .975), df=4)
## [1] -2.776445 2.776445
qf(.95, df1=3, df2=4)
## [1] 6.591382
normSamples = rnorm(1000,mean=5,sd = 3) hist(normSamples)
set.seed(100) rnorm(5)
## [1] -0.50219235 0.13153117 -0.07891709 0.88678481 0.11697127
rnorm(5)
## [1] 0.3186301 -0.5817907 0.7145327 -0.8252594 -0.3598621
set.seed(100) # reproduce the results rnorm(5)
## [1] -0.50219235 0.13153117 -0.07891709 0.88678481 0.11697127
rnorm(5)
## [1] 0.3186301 -0.5817907 0.7145327 -0.8252594 -0.3598621
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library(ISLR) sqlCars = sqldf(" select name,origin,year,cylinders,horsepower from Auto where cylinders = 8 and horsepower > 200 order by horsepower ")
## Loading required package: tcltk
sqlCars
## name origin year cylinders horsepower ## 1 mercury marquis 1 72 8 208 ## 2 dodge d200 1 70 8 210 ## 3 plymouth fury iii 1 70 8 215 ## 4 ford f250 1 70 8 215 ## 5 chrysler new yorker brougham 1 73 8 215 ## 6 chevrolet impala 1 70 8 220 ## 7 pontiac catalina 1 70 8 225 ## 8 buick estate wagon (sw) 1 70 8 225 ## 9 buick electra 225 custom 1 73 8 225 ## 10 pontiac grand prix 1 73 8 230
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## ## Attaching package: 'zoo'
## The following objects are masked from 'package:base': ## ## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
getSymbols("AAPL",src="yahoo",from = Sys.Date()-500)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and ## auto.assign=TRUE by default. ## ## This behavior will be phased out in 0.5-0 when the call will ## default to use auto.assign=FALSE. getOption("getSymbols.env") and ## getOptions("getSymbols.auto.assign") are now checked for alternate defaults ## ## This message is shown once per session and may be disabled by setting ## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "AAPL"
tail(AAPL)
## AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume ## 2017-01-24 119.55 120.10 119.50 119.97 23150200 ## 2017-01-25 120.42 122.10 120.28 121.88 32377600 ## 2017-01-26 121.67 122.44 121.60 121.94 26282000 ## 2017-01-27 122.14 122.35 121.60 121.95 20437400 ## 2017-01-30 120.93 121.63 120.66 121.63 30278800 ## 2017-01-31 121.15 121.39 120.62 121.35 41610600 ## AAPL.Adjusted ## 2017-01-24 119.97 ## 2017-01-25 121.88 ## 2017-01-26 121.94 ## 2017-01-27 121.95 ## 2017-01-30 121.63 ## 2017-01-31 121.35
barChart(AAPL)
getQuote("AAPL",src="yahoo")
## Trade Time Last Change % Change Open High Low ## AAPL 2017-02-01 04:00:00 128.7908 7.4408 +6.1317% 127.1 130.49 127.01 ## Volume ## AAPL 11590137