R version 2.10.1 (2009-12-14)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

[R.app GUI 1.31 (5537) x86_64-apple-darwin9.8.0]

> x=c(10,20)
> y=c(30,40)
> m=cbind(x,y)
> m
      x  y
[1,] 10 30
[2,] 20 40
> class(m)
[1] "matrix"
> ?class
starting httpd help server ... done
> length(m)
[1] 4
> dim(m)
[1] 2 2
> m[1,]
 x  y 
10 30 
> m[,2]
[1] 30 40
> m[2,2]
 y 
40 
> m[,1]*m[,2]
[1] 300 800
> m$x
Error in m$x : $ operator is invalid for atomic vectors
> m
      x  y
[1,] 10 30
[2,] 20 40

> rownames(m)=c('a','b')
> m
   x  y
a 10 30
b 20 40

> m[,'y']
 a  b 
30 40 
> m$y
Error in m$y : $ operator is invalid for atomic vectors

> m
   x  y
a 10 30
b 20 40
> z=c('a','b')
> m=cbind(x,y,z)
> m
     x    y    z  
[1,] "10" "30" "a"
[2,] "20" "40" "b"
> m[,1]+m[,2]
Error in m[, 1] + m[, 2] : non-numeric argument to binary operator
> class(m)
[1] "matrix"
> class(m[,1])
[1] "character"
> class(c(10,20))
[1] "numeric"
> length(m)
[1] 6
> dim(m)
[1] 2 3
> m=cbind(x,y)
> m
      x  y
[1,] 10 30
[2,] 20 40
> df=data.frame(x,y)
> df
   x  y
1 10 30
2 20 40
> class(df)
[1] "data.frame"
> length(df)
[1] 2
> dim(df)
[1] 2 2
> df$x
[1] 10 20
> df[,'x']
[1] 10 20
> rownames(df)=c('a','b')
> df[1,]
   x  y
a 10 30
> df['a',]
   x  y
a 10 30
> df[,1]
[1] 10 20
> df[1,1]
[1] 10
> df['a',1]
[1] 10
> df[,1] + df[,2]
[1] 40 60
> df
   x  y
a 10 30
b 20 40
> df=data.frame(x,y,z)
> df
   x  y z
1 10 30 a
2 20 40 b
> m
      x  y
[1,] 10 30
[2,] 20 40
> m=cbind(x,y,z)
> m
     x    y    z  
[1,] "10" "30" "a"
[2,] "20" "40" "b"
> df
   x  y z
1 10 30 a
2 20 40 b

> df[,1] + df[,2]
[1] 40 60

> df[,1]+df[,3]
[1] NA NA
Warning message:
In Ops.factor(df[, 1], df[, 3]) : + not meaningful for factors

> df[c(1,3)]
   x z
1 10 a
2 20 b
> df[,c(1,3)]
   x z
1 10 a
2 20 b
> df[c(1,3),]
    x  y    z
1  10 30    a
NA NA NA <NA>

> height=1:10
> height>7
 [1] FALSE FALSE FALSE FALSE FALSE FALSE
 [7] FALSE  TRUE  TRUE  TRUE
> which(height>7)
[1]  8  9 10
> height[which(height>7)]
[1]  8  9 10
> height[height>7]
[1]  8  9 10
> df[c(1,3),]
    x  y    z
1  10 30    a
NA NA NA <NA>
> df[,c(1,3)]
   x z
1 10 a
2 20 b
> df[,c(T,F,T)]
   x z
1 10 a
2 20 b

> y=seq(30,50,by=10)
> x
[1] 10 20
> y
[1] 30 40 50
> m=cbind(x,y)
Warning message:
In cbind(x, y) :
  number of rows of result is not a multiple of vector length (arg 1)
> m
      x  y
[1,] 10 30
[2,] 20 40
[3,] 10 50
> m=rbind(x,y)
Warning message:
In rbind(x, y) :
  number of columns of result is not a multiple of vector length (arg 1)
> m
  [,1] [,2] [,3]
x   10   20   10
y   30   40   50
> x=c(x,NA)
> rbind(x,y)
  [,1] [,2] [,3]
x   10   20   NA
y   30   40   50
> x=c(10,20)
> x
[1] 10 20
> y
[1] 30 40 50

> rm(df)
> df=data.frame(x,y)
Error in data.frame(x, y) : 
  arguments imply differing number of rows: 2, 3
> df
function (x, df1, df2, ncp, log = FALSE) 
{
    if (missing(ncp)) 
        .Internal(df(x, df1, df2, log))
    else .Internal(dnf(x, df1, df2, ncp, log))
}
<environment: namespace:stats>

> y=c(30,40)
> x
[1] 10 20
> y
[1] 30 40
> l=list(x,y)
> l
[[1]]
[1] 10 20

[[2]]
[1] 30 40

> l=list(xx=x,yy=y)
> l
$xx
[1] 10 20

$yy
[1] 30 40

> class(l)
[1] "list"
> length(l)
[1] 2
> dim(l)
NULL
> l[1]
$xx
[1] 10 20

> l['xx']
$xx
[1] 10 20

> l$xx
[1] 10 20
> sum(l[1])
Error in sum(l[1]) : invalid 'type' (list) of argument
> l[[1]]
[1] 10 20
> l[1]
$xx
[1] 10 20

> l$xx
[1] 10 20
> sum(l$xx)
[1] 30
> sum(l[[1]])
[1] 30
> sum(l[['xx']])
[1] 30
> l=list(xx=x,yy=y)
> l
$xx
[1] 10 20

$yy
[1] 30 40

> names(l)
[1] "xx" "yy"
> l=list(x,y)
> l
[[1]]
[1] 10 20

[[2]]
[1] 30 40

> names(l)=c('a','b')
> l
$a
[1] 10 20

$b
[1] 30 40

> m
  [,1] [,2] [,3]
x   10   20   10
y   30   40   50
> dimnames(m)
[[1]]
[1] "x" "y"

[[2]]
NULL

> dimnames(l)
NULL
> l
$a
[1] 10 20

$b
[1] 30 40

> names(l)=c('a','hello word')
> l
$a
[1] 10 20

$`hello word`
[1] 30 40

> l$hello word
Error: unexpected symbol in "l$hello word"
Error: unexpected symbol in "l$''hello"
> l$'hello word'
[1] 30 40

> l=list(xx=x,yy=y,zz=z)
> l
$xx
[1] 10 20

$yy
[1] 30 40

$zz
[1] "a" "b"

> l$xx+l$yy
[1] 40 60
> class(l$xx)
[1] "numeric"
> class(l$zz)
[1] "character"
> y=seq(30,50,by=10)
> y
[1] 30 40 50
> l=list(xx=x,yy=y,zz=z)
> l
$xx
[1] 10 20

$yy
[1] 30 40 50

$zz
[1] "a" "b"

> df=data.frame(x,y)
Error in data.frame(x, y) : 
  arguments imply differing number of rows: 2, 3
> y=c(30,40)
> df=data.frame(x,y)
> df
   x  y
1 10 30
2 20 40
> x
[1] 10 20
> rm(x)
> df=data.frame(x=c(10,20),y)
> df
   x  y
1 10 30
2 20 40

> attach(df)

	The following object(s) are masked _by_ .GlobalEnv :

	 y 

> detach(df)
> y
[1] 30 40
> df=data.frame(x=c(10,20),y=c(50,60))
> attach(df)

	The following object(s) are masked _by_ .GlobalEnv :

	 y 

> y
[1] 30 40
> rm(y)
> attach(df)

	The following object(s) are masked from df ( position 3 ) :

	 x y 

> detach(df)
> detach(df)
> df
   x  y
1 10 50
2 20 60
> x
Error: object 'x' not found
> y
Error: object 'y' not found
> attach(df)
> x
[1] 10 20
> y
[1] 50 60
> x=7
> x
[1] 7
> df
   x  y
1 10 50
2 20 60
> df$x=7
> df
  x  y
1 7 50
2 7 60
> detach(df)

> df
   x  y z
1 10 50 a
2 20 60 b
> df[1,2:3]
   y z
1 50 a

> df=data.frame(x=c(10,20,30),y=c(50,60,70),z=c('a','b','c'))
> df
   x  y z
1 10 50 a
2 20 60 b
3 30 70 c
> df[2:3,2:3]
   y z
2 60 b
3 70 c

> subset(df,subset=(z=='a' | z=='c'))
   x  y z
1 10 50 a
3 30 70 c
> subset(df,subset=(z=='a' | z=='c'))[,2:3]
   y z
1 50 a
3 70 c
> subset(df,subset=(z=='a' | z=='c'),select=2:3)
   y z
1 50 a
3 70 c
> df
   x  y z
1 10 50 a
2 20 60 b
3 30 70 c
> df$y=rev(df$y)
> df
   x  y z
1 10 70 a
2 20 60 b
3 30 50 c

> sort(df)
Error in `[.data.frame`(x, order(x, na.last = na.last, decreasing = decreasing)) : 
  undefined columns selected

> order(df$y)
[1] 3 2 1
> df$y
[1] 70 60 50
> order(df$y,decreasing=T)
[1] 1 2 3
> df[order(df$y),]
   x  y z
3 30 50 c
2 20 60 b
1 10 70 a

> order(df$y)
[1] 3 2 1

> df[c(3,2,1),]
   x  y z
3 30 50 c
2 20 60 b
1 10 70 a

> df[c(2,3,1),]
   x  y z
2 20 60 b
3 30 50 c
1 10 70 a
> df[c(2,3),]
   x  y z
2 20 60 b
3 30 50 c
> df[c(3,2),]
   x  y z
3 30 50 c
2 20 60 b

> library(UsingR)
> ewr
   Year Month   AA   CO   DL   HP   NW
1  2000   Nov  8.6  8.3  8.6 10.4  8.1
2  2000   Oct  8.5  8.0  8.4 11.2  8.2
3  2000   Sep  8.1  8.5  8.4 10.2  8.3
4  2000   Aug  8.9  9.1  9.2 14.5  9.0
5  2000   Jul  8.3  8.9  8.2 11.5  8.8
6  2000   Jun  8.8  9.0  8.8 14.9  8.4
7  2000   May  8.1  8.8  7.9  9.8  7.8
8  2000   Apr  8.7  8.6  8.1  9.8  8.6
9  2000   Mar  8.8  8.4  7.8  8.0  8.7
10 2000   Feb  8.7  8.9  7.6  9.3  8.5
11 2000   Jan  9.4  9.4  8.5  9.9  9.4
12 1999   Dec  8.6  8.7  7.8  9.3  8.1
13 1999   Nov  8.3  9.3  8.5 10.4  8.9
14 1999   Oct  7.9  9.4  8.1 10.8  9.3
15 1999   Sep  8.4  9.3  8.1 10.3  9.0
...

> ?ewr
> df=ewr[,3:10]

> mean(df)
      AA       CO       DL       HP 
17.83478 20.01957 16.63043 19.60435 
      NW       TW       UA       US 
15.79783 16.28043 17.69130 15.49348 

> median(df)
[1] NA NA
Warning messages:
1: In mean.default(X[[1L]], ...) :
  argument is not numeric or logical: returning NA
2: In mean.default(X[[2L]], ...) :
  argument is not numeric or logical: returning NA

> mean(df)
      AA       CO       DL       HP 
17.83478 20.01957 16.63043 19.60435 
      NW       TW       UA       US 
15.79783 16.28043 17.69130 15.49348 

> apply(df,2,median)
   AA    CO    DL    HP    NW    TW 
16.05 18.15 15.50 18.95 14.55 15.65 
   UA    US 
16.45 14.45 

> apply(df,2,mean)
      AA       CO       DL       HP 
17.83478 20.01957 16.63043 19.60435 
      NW       TW       UA       US 
15.79783 16.28043 17.69130 15.49348 

> ?apply
> ?sapply

> l
$xx
[1] 10 20

$yy
[1] 30 40 50

$zz
[1] "a" "b"

> x
[1] 7

> l=list(xx=c(10,20),yy=c(30,40))
> l
$xx
[1] 10 20

$yy
[1] 30 40

> lapply(l,mean)
$xx
[1] 15

$yy
[1] 35

> lapply(l,median)
$xx
[1] 15

$yy
[1] 35

> sapply(l,median)
xx yy 
15 35 
> class(sapply(l,median))
[1] "numeric"
> sapply(l,range)
     xx yy
[1,] 10 30
[2,] 20 40
> class(sapply(l,range))
[1] "matrix"
> sapply(l,sum)
xx yy 
30 70 
> l=list(xx=c(10,20),yy=c(30,40,50))
> l
$xx
[1] 10 20

$yy
[1] 30 40 50

> class(sapply(l,median))
[1] "numeric"
> sapply(l,median)
xx yy 
15 40 
> sapply(l,range)
     xx yy
[1,] 10 30
[2,] 20 50
> sapply(l,sum)
 xx  yy 
 30 120 

> boxplot(ewr[3:10])
> plot(ewr[3:10])

> attach(student.expenses)
> table(cell.phone,car)
          car
cell.phone N Y
         N 1 2
         Y 3 4
> table(cell.phone,car,cable.modem)
, , cable.modem = N

          car
cell.phone N Y
         N 1 2
         Y 2 3

, , cable.modem = Y

          car
cell.phone N Y
         N 0 0
         Y 1 1

> ftable(cell.phone,car,cable.modem)
               cable.modem N Y
cell.phone car                
N          N               1 0
           Y               2 0
Y          N               2 1
           Y               3 1

> ftable(table(cell.phone,car,cable.modem),col.vars=c('cable.modem','car'))
           cable.modem N   Y  
           car         N Y N Y
cell.phone                    
N                      1 2 0 0
Y                      2 3 1 1

#=============================================================================
# Chapter 4, Example 4.3, pp. 109-110

> colnames(babies)
 [1] "id"        "pluralty"  "outcome"   "date"      "gestation" "sex"      
 [7] "wt"        "parity"    "race"      "age"       "ed"        "ht"       
[13] "wt1"       "drace"     "dage"      "ded"       "dht"       "dwt"      
[19] "marital"   "inc"       "smoke"     "time"      "number"   
> ?babies
> attach(babies)

# scatter-plot of all values
> plot(gestation,wt)

# replace 999 (code for NA) with actual NA (to be ignored in plot), & replot
> gestation[gestation==999]=NA
> plot(gestation,wt)

# looking at "smoke", we see each entry in the df has one of 5 possible 
# smoke values (i.e., falls into one of 5 categories of smoking behavior).
> table(smoke)
smoke
  0   1   2   3   9 
544 484  95 103  10 

# we can specify that the dataset should be partitioned into differents subset
# for each category (by setting the plotting character pch to the smoke value)
#
> plot(gestation,wt,pch=smoke)

# doing ?babies, we see "smoke" categories defined as:
#
#     smoke does mother smoke? 0=never, 1= smokes now, 2=until current
#          pregnancy, 3=once did, not now, 9=unknown
#
# let's make a legend, mapping each description with its corresponding value,
# manually placing legend location with "locator(1)"
#
legend(locator(1), legend=c('never','yes','until pregnant','long ago','unknown'), pch=unique(smoke))

# let's plot with different colors instead of different characters
# rainbox(length(unique(smoke))) will give us one color category
# to index into it, we'll map 0:3,9 --> 0:4 --> 1:5
> smoke[smoke==9]=4
> plot(gestation,wt,col=rainbow(length(unique(smoke)))[smoke+1])

# how would we change the legend command to use colors instead of characters?

#=============================================================================
# Chapter 4, section 4.3.1, p. 126

# boxplot of income vs. gestation, including all values
> boxplot(gestation~inc,data=babies)

# exclude the coded NA values
> boxplot(gestation~inc,data=babies,subset=(gestation!=999 & inc!=98))

# vary width of boxplot in proportion to sample size (for each income level) 
> boxplot(gestation~inc,data=babies,subset=(gestation!=999 & inc!=98),varwidth=T)

# another way to select the same subset
> boxplot(gestation~inc,data=babies[gestation!=999 & inc!=98,],varwidth=T)

# add axis labels 
> boxplot(gestation~inc,data=babies[gestation!=999 & inc!=98,],varwidth=T,xlab="income level",ylab="gestation (days)")

#=============================================================================
> mandms
                  blue   brown   green  orange     red  yellow
milk chocolate 10.0000 30.0000 10.0000 10.0000 20.0000 20.0000
Peanut         20.0000 20.0000 10.0000 10.0000 20.0000 20.0000
Peanut Butter  20.0000 20.0000 20.0000  0.0000 20.0000 20.0000
Almond         16.6667 16.6667 16.6667 16.6667 16.6667 16.6667
kid minis      16.6667 16.6667 16.6667 16.6667 16.6667 16.6667

# by default, which returns 1-dimensional indexing
> which(mandms==0)
[1] 18

# with optional argument, returns matrix indexing when appropriate
> which(mandms==0,arr.ind=TRUE)
              row col
Peanut Butter   3   4

# now an ugly way to directly convert the 1-dimensional index 
# to the  matrix (row,col) index
> index=which(mandms==0)
> row=((index-1)%%nrow(mandms))+1
> row
[1] 3
> col=ceiling(index/nrow(mandms))
> col
[1] 4

> mandms[row,col]
[1] 0

# in which package in what color missing
> rownames(mandms)[row]
[1] "Peanut Butter"
> colnames(mandms)[col]
[1] "orange"

# the other direction: convert (row,col) to a 1-dimensional index
> gen_index=(col-1)*nrow(mandms)+row
> gen_index
[1] 18