\name{model.importance.plot}

\alias{model.importance.plot}

\title{Compares the variable importance of two models with a back to back barchart.}

\description{Takes two models and produces a back to back bar chart to compare the importance of the predictor variables. Models can be any combination of Random Forest or Stochastic Gradient Boosting, as long as both models have the same predictor variables. }

\usage{
model.importance.plot(model.obj.1 = NULL, model.obj.2 = NULL, 
model.name.1 = "Model 1", model.name.2 = "Model 2", imp.type.1 = NULL, 
imp.type.2 = NULL, type.label=TRUE, class.1 = NULL, class.2 = NULL, 
scale.by = "sum", sort.by = "model.obj.1", predList = NULL, 
folder = NULL, PLOTfn = NULL, device.type = NULL, jpeg.res = 72, 
device.width = 7, device.height = 7,cex=par()$cex,...)
}

\arguments{

  \item{model.obj.1}{\code{R} model object. The model object to use for left side of barchart.  The model object must be of type RF or SGB and have the same predictors as \code{model.obj.2}.}

  \item{model.obj.2}{\code{R} model object. The model object to use for right side of barchart.  The model object must be of type RF or SGB and have the same predictors as \code{model.obj.1}.}

  \item{model.name.1}{String. Label for left side of barchart.}

  \item{model.name.2}{String. Label for right side of barchart.}
 
  \item{imp.type.1}{Number. Type of importance to use for model 1. Importance type 1 is permutation based, as described in Breiman (2001).  Importance type 2 is model based. For RF models is the decrease in node impurities attributable to each predictor variable. For SGB models, it is the reduction attributable to each variable in predicting the gradient on each iteration. Default for random forest models is \code{imp.type.1 = 1}. Default for SGB models is \code{imp.type.1 = 2}.}

  \item{imp.type.2}{Number. Type of importance to use for model 2. Importance type 1 is permutation based, as described in Breiman (2001).  Importance type 2 is model based. For RF models is the decrease in node impurities attributable to each predictor variable. For SGB models, it is the reduction attributable to each variable in predicting the gradient on each iteration. Default for random forest models is \code{imp.type.2 = 1}. Default for SGB models is \code{imp.type.2 = 2}.}

  \item{type.label}{Logical. Should axis labels include importance type for each side of plot.}

  \item{class.1}{String. For binary and categorical random forest models. If the name a class is specified, the class-specific relative influence is used for plot. If \code{class.1 = NULL} overall relative influence used for plot.}

  \item{class.2}{String. For binary and categorical random forest models. If the name a class is specified, the class-specific relative influence is used for plot. If \code{class.2 = NULL} overall relative influence used for plot.}

  \item{scale.by}{String. Scale by: \code{"max"} or \code{"sum"}. When \code{scale.by="max"} the importance are scaled for each model so that the maximum importance for each model fills the graph. When \code{scale.by="sum"}, the importance for each model are scaled to sum to 100.}

  \item{sort.by}{String. Sort by: \code{"model.obj.1"}, \code{"model.obj.2"}, \code{"predList"}. Gives the order to draw the bars for the predictor variables. When \code{sort.by="model.obj.1"} the predictors are sorted largest to smallest based on importance from model 1. When \code{sort.by="model.obj.2"} the predictors are sorted largest to smallest based on importance from model 2. When \code{sort.by="predList"} the predictors are sorted to match the order given in \code{"predList"}.}

  \item{predList}{String.  A character vector of the predictor short names used to build the models. If \code{sort.by="predList"}, then \code{predList} is used to specify the order to draw the predictors in the barchart.}

  \item{folder}{ String.  The folder used for all output.  Do not add ending slash to path string.  If \code{folder = NULL} (default), a GUI interface prompts user to browse to a folder.  To use the working directory, specify \code{folder = getwd()}.}

  \item{PLOTfn}{ String.  The file name to use to save the generated graphical plots. If \code{PLOTfn = NULL} a default name is generated by pasting \code{model.name.1_model.name.2}. The filename can be the full path, or it can be the simple basename, in which case the output will be to the folder specified by \code{folder}.}

 \item{device.type}{ String or vector of strings.  Model validation.  One or more device types for graphical output from model validation diagnostics. 

Current choices:

\tabular{lllll}{
	  \tab \tab \tab \code{"default"} \tab default graphics device\cr
	  \tab \tab \tab \code{"jpeg"} \tab *.jpg files\cr
	  \tab \tab \tab \code{"none"} \tab no graphics device generated\cr	
	  \tab \tab \tab \code{"pdf"} \tab *.pdf files\cr
	  \tab \tab \tab \code{"postscript"} \tab *.ps files\cr
	  \tab \tab \tab \code{"win.metafile"} \tab *.emf files }
 }

  \item{jpeg.res}{ Integer.  Pixels per inch for jpeg plots.  The default is 72dpi, good for on screen viewing. For printing, suggested setting is 300dpi. }

  \item{device.width}{ Integer.  The device width for diagnostic plots in inches. }

  \item{device.height}{ Integer.  The device height for diagnostic plots in inches. }

  \item{cex}{ Integer.  The cex for plots. }



  \item{\dots}{Arguments to be passed to methods, such as graphical parameters (see \code{\link{par}}).}

}
\details{
The importance measures used in this plot depend on the model type (RF verses SGB) and the response type (continuous, categorical, or binary). 

Importance type 1 is permutation based, as described in Breiman (2001). Importance is calculated by randomly permuting each predictor variable and computing the associated reduction in predictive performance using Out Of Bag error for RF models and training error for SGB models. Note that for SGB models permutation based importance measures are still considered experimental.  Importance type 2 is model based. For RF models, importance type 2 is calculated by the decrease in node impurities attributable to each predictor variable. For SGB models, importance type 2 is the reduction attributable to each variable in predicting the gradient on each iteration as described in described in Friedman (2001). 

For SGB models:
\tabular{lllclll}{
	  \tab \tab  \code{response type} \tab \code{type}	\tab				\tab \tab Importance Measure \cr
	  \tab \tab  \code{"continuous"}  \tab \code{1}		\tab permutation 		\tab \tab reduction predictive performance \cr
	  \tab \tab  \code{"binary"}      \tab \code{1}		\tab permutation		\tab \tab reduction predictive performance \cr
	  \tab \tab  \code{"continuous"}  \tab \code{2}		\tab gradient of loss function	\tab \tab reduction of squared error \cr
	  \tab \tab  \code{"binary"}      \tab \code{2}		\tab gradient of loss function	\tab \tab reduction in sum of squared error }

For RF models:
\tabular{lllclll}{
	  \tab \tab  \code{response type} \tab \code{type}	\tab			\tab \tab Importance Measure \cr	
	  \tab \tab  \code{"continuous"}  \tab \code{1}   	\tab permutation 	\tab \tab \%IncMSE \cr
	  \tab \tab  \code{"binary"}      \tab \code{1}    	\tab permutation 	\tab \tab Mean Decrease Accuracy \cr
	  \tab \tab  \code{"categorical"} \tab \code{1}    	\tab permutation 	\tab \tab Mean Decrease Accuracy \cr
	  \tab \tab  \code{"continuous"}  \tab \code{2}    	\tab node impurity	\tab \tab Residual sum of squares \cr
	  \tab \tab  \code{"binary"}      \tab \code{2}    	\tab node impurity	\tab \tab Mean Decrease Gini \cr
	  \tab \tab  \code{"categorical"} \tab \code{2}    	\tab node impurity	\tab \tab Mean Decrease Gini   }
  
For Random Forest models, if \code{imp.type} not specified, importance type defaults to \code{imp.type} of \code{1} - permutation importance. For SGB models, permutation importance is considered experimental so importance defaults to \code{imp.type} of \code{2} - reduction of gradient of the loss function.

Also, for binary and categorical Random Forest models, class specific importance plots can be generated by the use of the \code{class} argument. Note that class specific importance is only available for Random Forest models with importance type 1.

}


\references{

Breiman, L. (2001) Random Forests. Machine Learning, 45:5-32.

Friedman, J.H. (2001). Greedy function approximation: a gradient boosting machine. Ann. Stat., 29(5):1189-1232.

}

\author{Elizabeth Freeman}


\seealso{\code{\link{model.build}}}

\examples{

###########################################################################
############################# Run this set up code: #######################
###########################################################################

# set seed:
seed=38

# Define training and test files:

qdata.trainfn = system.file("extdata", "helpexamples","DATATRAIN.csv", package = "ModelMap")

# Define folder for all output:
folder=getwd()	


#identifier for individual training and test data points

unique.rowname="ID"

##################################################################
########## Continuous Response, Continuous Predictors ############
##################################################################

#file names:
MODELfn.RF="RF_Bio_TC"				
MODELfn.SGB="SGB_Bio_TC"

#predictors:
predList=c("TCB","TCG","TCW")	

#define which predictors are categorical:
predFactor=FALSE	

# Response name and type:
response.name="BIO"
response.type="continuous"

########## Build Models #################################

model.obj.RF = model.build( model.type="RF",
                       qdata.trainfn=qdata.trainfn,
                       folder=folder,		
                       unique.rowname=unique.rowname,		
                       MODELfn=MODELfn.RF,
                       predList=predList,
                       predFactor=predFactor,
                       response.name=response.name,
                       response.type=response.type,
                       seed=seed
)

model.obj.SGB = model.build( model.type="SGB",
                       qdata.trainfn=qdata.trainfn,
                       folder=folder,		
                       unique.rowname=unique.rowname,		
                       MODELfn=MODELfn.SGB,
                       predList=predList,
                       predFactor=predFactor,
                       response.name=response.name,
                       response.type=response.type,
                       seed=seed+1
)

############## Make Imortance Plot - RF vs. SGB ###################

model.importance.plot(	model.obj.1=model.obj.RF, 
			model.obj.2=model.obj.SGB, 
			model.name.1="RF Model", 
			model.name.2="SGB Model", 
			scale.by="sum",
			sort.by="predList", 
			predList=predList,
			main="RF verses SGB",
			device.type="default")

########## Make Imortance Plot - RF Importance type 1 vs 2 #######

model.importance.plot(	model.obj.1=model.obj.RF, 
			model.obj.2=model.obj.RF, 
			model.name.1="PercentIncMSE", 
			model.name.2="IncNodePurity",
			imp.type.1=1,
			imp.type.2=2,
			scale.by="sum",
			sort.by="predList", 
			predList=predList,
			main="Imp type 1 vs Imp type 2",
			device.type="default")


##################################################################
########## Categorical Response, Continuous Predictors ###########
##################################################################

#file name:
MODELfn="RF_NLCD_TC"				

#predictors:
predList=c("TCB","TCG","TCW")	

#define which predictors are categorical:
predFactor=FALSE	

# Response name and type:
response.name="NLCD"
response.type="categorical"

########## Build Model #################################

model.obj.NLCD = model.build( model.type="RF",
                       qdata.trainfn=qdata.trainfn,
                       folder=folder,		
                       unique.rowname=unique.rowname,	
                       MODELfn=MODELfn,
                       predList=predList,
                       predFactor=predFactor,
                       response.name=response.name,
                       response.type=response.type,
                       seed=seed)

############## Make Imortance Plot ###################

model.importance.plot(	model.obj.1=model.obj.NLCD, 
			model.obj.2=model.obj.NLCD, 
			model.name.1="NLCD=41", 
			model.name.2="NLCD=42",
			class.1="41",
			class.2="42",
			scale.by="sum",
			sort.by="predList", 
			predList=predList,
			main="Class 41 vs. Class 42",
			device.type="default")


}

\keyword{ models }

