% file MASS/Aids2.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Aids2}
\alias{Aids2}
\title{
Australian AIDS Survival Data
}
\description{
Data on patients diagnosed with AIDS in Australia before 1 July 1991.
}
\usage{
data(Aids2)
}
\format{
This data frame contains 2843 rows and the following columns:
\describe{
\item{\code{state}}{
Grouped state of origin: NSW includes ACT and \code{"other"} is WA, SA, NT
and TAS.
}
\item{\code{sex}}{
Sex of patient
}
\item{\code{diag}}{
(Julian) date of diagnosis
}
\item{\code{death}}{
(Julian) date of death or end of observation
}
\item{\code{status}}{
\code{"A"} (alive) or \code{"D"} (dead) at end of observation
}
\item{\code{T.categ}}{
Reported transmission category
}
\item{\code{age}}{
Age (years) at diagnosis
}}}
\note{
This data set has been slightly jittered as a
condition of its release, to ensure patient confidentiality.
}
\source{
Dr P. J. Solomon and the Australian National Centre in HIV Epidemiology
and Clinical Research.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/animals.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Animals}
\alias{Animals}
\title{
Brain and Body Weights for 28 Species
}
\description{
Average brain and body weights for 28 species of land animals.
}
\usage{
data(Animals)
}
\format{
\describe{
\item{\code{body}}{
body weight in kg
}
\item{\code{brain}}{
brain weight in g
}}}
\note{
  The name \code{Animals} avoids conflicts with a system dataset
  \code{animals} in S-PLUS 4.5 and later.
}
\source{
P. J. Rousseeuw  and A. M. Leroy (1987)
\emph{Robust Regression and Outlier Detection.}
Wiley, p. 57.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/Boston.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Boston}
\alias{Boston}
\title{
Housing Values in Suburbs of Boston
}
\description{
The \code{Boston} data frame has 506 rows and 14 columns.
}
\usage{
data(Boston)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{crim}}{
per capita crime rate by town
}
\item{\code{zn}}{
proportion of residential land zoned for lots over 25,000 sq.ft.
}
\item{\code{indus}}{
proportion of non-retail business acres per town
}
\item{\code{chas}}{
Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
}
\item{\code{nox}}{
nitrogen oxides concentration (parts per 10 million)
}
\item{\code{rm}}{
average number of rooms per dwelling
}
\item{\code{age}}{
proportion of owner-occupied units built prior to 1940
}
\item{\code{dis}}{
weighted mean of distances to five Boston employment centres
}
\item{\code{rad}}{
index of accessibility to radial highways
}
\item{\code{tax}}{
full-value property-tax rate per \$10,000
}
\item{\code{ptratio}}{
pupil-teacher ratio by town
}
\item{\code{black}}{
\eqn{1000(Bk - 0.63)^2} where Bk is the proportion of blacks by town
}
\item{\code{lstat}}{
lower status of the population (percent)
}
\item{\code{medv}}{
median value of owner-occupied homes in \$1000
}}}
\source{
Harrison, D. and Rubinfeld, D.L. (1978)
Hedonic prices and the demand for clean air.
\emph{J. Environ. Economics and Management}
\bold{5}, 81--102.


Belsley D.A., Kuh, E.  and Welsch, R.E. (1980)
\emph{Regression Diagnostics. Identifying Influential Data and Sources of Collinearity.}
New York: Wiley.
}
\keyword{datasets}

\eof
% file MASS/Cars93.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Cars93}
\alias{Cars93}
\title{
Data from 93 Cars on Sale in the USA in 1993
}
\description{
The \code{Cars93} data frame has 93 rows and 27 columns.
}
\usage{
data(Cars93)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Manufacturer}}{
Manufacturer
}
\item{\code{Model}}{
Model
}
\item{\code{Type}}{
Type: Small, Sporty, Compact, Midsize, Large, Van
}
\item{\code{Min.Price}}{
Minimum Price (in \$1,000) - price for a basic version
}
\item{\code{Price}}{
Midrange Price (in \$1,000) - average of \code{Min.Price} and \code{Max.Price}
}
\item{\code{Max.Price}}{
Maximum Price (in \$1,000) - price for \dQuote{a premium version}
}
\item{\code{MPG.city}}{
City MPG (miles per US gallon by EPA rating)
}
\item{\code{MPG.highway}}{
Highway MPG
}
\item{\code{AirBags}}{
Air Bags standard. Factor: none, driver only, or driver & passenger
}
\item{\code{DriveTrain}}{
Drive train type: rear wheel, front wheel or 4WD; (factor).
}
\item{\code{Cylinders}}{
Number of cylinders (missing for Mazda RX-7, which has a rotary engine).
}
\item{\code{EngineSize}}{
Engine size (litres)
}
\item{\code{Horsepower}}{
Horsepower (maximum)
}
\item{\code{RPM}}{
RPM (revs per minute at maximum horsepower)
}
\item{\code{Rev.per.mile}}{
Engine revolutions per mile (in highest gear)
}
\item{\code{Man.trans.avail}}{
Is a manual transmission version available? (yes or no, Factor).
}
\item{\code{Fuel.tank.capacity}}{
Fuel tank capacity (US gallons)
}
\item{\code{Passengers}}{
Passenger capacity (persons)
}
\item{\code{Length}}{
Length  (inches)
}
\item{\code{Wheelbase}}{
Wheelbase (inches)
}
\item{\code{Width}}{
Width (inches)
}
\item{\code{Turn.circle}}{
U-turn space (feet)
}
\item{\code{Rear.seat.room}}{
Rear seat room (inches) (missing for 2-seater vehicles)
}
\item{\code{Luggage.room}}{
Luggage capacity (cubic feet) (missing for vans)
}
\item{\code{Weight}}{
Weight (pounds)
}
\item{\code{Origin}}{
Of non-USA or USA company origins? (factor)
}
\item{\code{Make}}{
Combination of Manufacturer and Model (character)
}}}
\details{
Cars were selected at random from among 1993 passenger car models that
were listed in both the \emph{Consumer Reports} issue and the
\emph{PACE Buying Guide}.  Pickup trucks and Sport/Utility vehicles were
eliminated due to incomplete information in the \emph{Consumer Reports}
source.  Duplicate models (e.g., Dodge Shadow and Plymouth Sundance)
were listed at most once.


Further description can be found in Lock (1993). Use the URL
\url{http://www.amstat.org/publications/jse/v1n1/datasets.lock.html}
}
\source{
Lock, R. H. (1993)
1993 New Car Data.
\emph{Journal of Statistics Education}
\bold{1}(1)
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/Cushings.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Cushings}
\alias{Cushings}
\title{
Diagnostic Tests on Patients with Cushing's Syndrome
}
\description{
Cushing's syndrome is a hypertensive disorder associated with
over-secretion of cortisol by the adrenal gland. The observations
are urinary excretion rates of two steroid metabolites.
}
\usage{
data(Cushings)
}
\format{
The \code{Cushings} data frame has 27 rows and 3 columns:
\describe{
\item{\code{Tetrahydrocortisone}}{
urinary excretion rate (mg/24hr) of Tetrahydrocortisone.
}
\item{\code{Pregnanetriol}}{
urinary excretion rate (mg/24hr) of  Pregnanetriol.
}
\item{\code{Type}}{
underlying type of syndrome, coded \code{a} (adenoma) , \code{b}
(bilateral hyperplasia), \code{c} (carcinoma) or \code{u} for unknown.
}}}
\source{
J. Aitchison and I. R. Dunsmore (1975)
\emph{Statistical Prediction Analysis.}
Cambridge University Press, Tables 11.1--3.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/DDT.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{DDT}
\alias{DDT}
\title{
DDT in Kale
}
\description{
A numeric vector of 15 measurements by different laboratories of
the pesticide DDT in kale,
in ppm (parts per million) using the multiple pesticide residue
measurement.
}
\usage{
data(DDT)
}
\source{
C. E. Finsterwalder (1976)
Collaborative study of an extension of the Mills
\emph{et al}
method for the determination of pesticide residues in food.
\emph{J. Off. Anal. Chem.} \bold{59}, 169--171

R. G. Staudte and S. J. Sheather (1990)
\emph{Robust Estimation and Testing.}
Wiley
}
\keyword{datasets}

\eof
% file MASS/GAGurine.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{GAGurine}
\alias{GAGurine}
\title{
Level of GAG in Urine of Children
}
\description{
Data were collected on the concentration of a chemical GAG in the
urine of 314 children aged from zero to seventeen years.  The aim of
the study was to produce a chart to help a paediatrican to assess if a
child's GAG concentration is \dQuote{normal}.
}
\usage{
data(GAGurine)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Age}}{
age of child in years
}
\item{\code{GAG}}{
concentration of GAG (the units have been lost)
}}}
\source{
Mrs Susan Prosser, Paediatrics Department, University of Oxford,
via Department of Statistics Consulting Service.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/Insurance.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Insurance}
\alias{Insurance}
\title{
Numbers of Car Insurance claims
}
\description{
The data given in data frame \code{Insurance} consist of the
numbers of policyholders of an insurance company who were
exposed to risk, and the numbers of car insurance claims made by
those policyholders in the third quarter of 1973.
}
\usage{
data(Insurance)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{District}}{
district of policyholder (1 to 4): 4 is major cities.
}
\item{\code{Group}}{
group of car (1 to 4), <1 litre, 1--1.5 litre, 1.5--2 litre, >2 litre.
}
\item{\code{Age}}{
of driver in 4 ordered groups, <25, 25--29, 30--35, >35.
}
\item{\code{Holders}}{
numbers of policyholders
}
\item{\code{Claims}}{
numbers of claims
}}}
\source{
L. A. Baxter, S. M. Coutts and G. A. F. Ross (1980) Applications of
linear models in motor insurance.
\emph{Proceedings of the 21st International Congress of Actuaries, Zurich}
pp. 11--29

M. Aitkin, D. Anderson, B. Francis and J. Hinde (1989)
\emph{Statistical Modelling in GLIM.}
Oxford University Press.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\examples{
## main-effects fit as Poisson GLM with offset
glm(Claims ~ District + Group + Age + offset(log(Holders)),
    data = Insurance, family = poisson)

# same via loglm
loglm(Claims ~ District + Group + Age + offset(log(Holders)),
      data = Insurance)
}
\keyword{datasets}

\eof
\name{MASS-internal}
\alias{enlist}
\alias{fbeta}
\alias{frequency.polygon}
\alias{nclass.freq}
\alias{neg.bin}
\alias{negexp.SSival}
%\alias{pairs.profile}
%\alias{plot.profile}
%\alias{print.Anova}
%\alias{print.abbrev}
\alias{MASS.data.load}
\title{Internal MASS functions}
\description{
 Internal MASS functions
}
\usage{
enlist(vec)
fbeta(x, alpha, beta)
frequency.polygon(x, nclass = nclass.freq(x), xlab="", ylab="", \dots)
nclass.freq(x)
neg.bin(theta = stop("theta must be given"))
negexp.SSival(mCall, data, LHS)
%pairs.profile(x, colours = 2:3, \dots)
%plot.profile(x, nseg, \dots)
%print.Anova(x, \dots)
MASS.data.load(i)
}
\details{
  These are not to be called by the user.  Some are for compatibility
  with earlier versions of MASS (the book).
}
\keyword{internal}

\eof
% file MASS/Melanoma.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Melanoma}
\alias{Melanoma}
\title{
Survival from Malignant Melanoma
}
\description{
The \code{Melanoma} data frame has data on 205 patients in Denmark
with malignant melanoma.
}
\usage{
data(Melanoma)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{time}}{
survival time in days, possibly censored
}
\item{\code{status}}{
\code{1} died from melanoma, \code{2} alive, \code{3} dead from other causes
}
\item{\code{sex}}{
\code{1} = male, \code{2} = female
}
\item{\code{age}}{
age in years
}
\item{\code{year}}{
of operation
}
\item{\code{thickness}}{
tumour thickness in mm
}
\item{\code{ulcer}}{
\code{1} = presence, \code{0} = absence
}}}
\source{
P. K. Andersen, O. Borgan, R. D. Gill, and N. Keiding (1993)
\emph{Statistical Models based on Counting Processes.}
Springer.
}
\keyword{datasets}

\eof
% file MASS/Null.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Null}
\alias{Null}
\title{
Null Spaces of Matrices
}
\description{
Given a matrix, \code{M}, find a matrix \code{N} giving a basis for the
null space.  That is \code{t(N) \%*\% M}
is the zero and \code{N} has the maximum number of linearly
independent columns.
}
\usage{
Null(M)
}
\arguments{
\item{M}{
Input matrix.  A vector is coerced to a 1-column matrix.
}}
\value{
The matrix \code{N} with the basis for the null space, or an empty
vector if the matrix \code{M} is square and of maximal rank.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{qr}}, \code{\link{qr.Q}}
}
\examples{
# The function is currently defined as
function(M)
{
	tmp <- qr(M)
	set <- if(tmp$rank == 0) 1:ncol(M) else  - (1:tmp$rank)
	qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]
}
}
\keyword{algebra}

\eof
% file MASS/OME.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{OME}
\alias{OME}
\title{
Tests of Auditory Perception in Children with OME
}
\description{
Experiments were performed on children on their ability to
differentiate a signal in broad-band noise. The noise was played from
a pair of speakers and a signal was added to just one channel; the
subject had to turn his/her head to the channel with the added signal.
The signal was either coherent (the amplitude of the noise was
increased for a period) or incoherent (independent noise was added for
the same period to form the same increase in power).


The threshold used in the original analysis was the stimulus loudness
needs to get 75\% correct responses. Some of the children had
suffered from otitis media with effusion (OME).
}
\usage{
data(OME)
}
\format{
The \code{OME} data frame has 1129 rows and 7 columns:
\describe{
\item{\code{ID}}{
Subject ID (1 to 99, with some IDs missing). A few subjects were
measured at different ages.
}
\item{\code{OME}}{
\code{"low"} or \code{"high"} or \code{"N/A"} (at ages other than 30 and 60 months).
}
\item{\code{Age}}{
Age of the subject (months).
}
\item{\code{Loud}}{
Loudness of stimulus, in decibels.
}
\item{\code{Noise}}{
Whether the signal in the stimulus was \code{"coherent"} or \code{"incoherent"}.
}
\item{\code{Correct}}{
Number of correct responses from \code{Trials} trials.
}
\item{\code{Trials}}{
Number of trials performed.
}}}
\source{
Sarah Hogan, Dept of Physiology, University of Oxford, via
Dept of Statistics Consulting Service
}
\section{Background}{
The experiment was to study otitis media with effusion (OME), a very
common childhood condition where the middle ear space, which is
normally air-filled, becomes congested by a fluid.  There is a
concomitant fluctuating, conductive hearing loss which can result in
various language, cognitive and social deficits. The term \dQuote{binaural
hearing} is used to describe the listening conditions in which the
brain is processing information from both ears at the same time.  The
brain computes differences in the intensity and/or timing of signals
arriving at each ear which contributes to sound localisation and also
to our ability to hear in background noise.


Some years ago, it was found that children of 7-8 years with a history
of significant OME had significantly worse binaural hearing than
children without such a history, despite having equivalent
sensitivity. The question remained as to whether it was the timing,
the duration, or the degree of severity of the otitis media episodes
during critical periods, which affected later binaural hearing.  In an
attempt to begin to answer this question, 95 children were monitored for
the presence of effusion every month since birth.  On the basis of OME
experience in their first two years, the test population was split
into one group of high OME prevalence and one of low prevalence.
}
\examples{
# Fit logistic curve from p = 0.5 to p = 1.0
fp1 <- deriv(~ 0.5 + 0.5/(1 + exp(-(x-L75)/scal)),
             c("L75", "scal"),
             function(x,L75,scal)NULL)
nls(Correct/Trials ~ fp1(Loud, L75, scal), data = OME,
    start = c(L75=45, scal=3))
nls(Correct/Trials ~ fp1(Loud, L75, scal),
    data = OME[OME$Noise == "coherent",],
    start=c(L75=45, scal=3))
nls(Correct/Trials ~ fp1(Loud, L75, scal),
    data = OME[OME$Noise == "incoherent",],
    start = c(L75=45, scal=3))

# individual fits for each experiment

aa <- factor(OME$Age)
ab <- 10*OME$ID + unclass(aa)
ac <- unclass(factor(ab))
OME$UID <- as.vector(ac)
OME$UIDn <- OME$UID + 0.1*(OME$Noise == "incoherent")
rm(aa, ab, ac)
OMEi <- OME

library(nlme)
fp2 <- deriv(~ 0.5 + 0.5/(1 + exp(-(x-L75)/2)),
            "L75", function(x,L75) NULL)
options(show.error.messages = FALSE)
OMEi.nls <- nlsList(Correct/Trials ~ fp2(Loud, L75) | UIDn,
   data = OMEi, start = list(L75=45), control = list(maxiter=100))
options(show.error.messages = TRUE)
tmp <- sapply(OMEi.nls, function(X)
              {if(is.null(X)) NA else as.vector(coef(X))})
OMEif <- data.frame(UID = round(as.numeric((names(tmp)))),
         Noise = rep(c("coherent", "incoherent"), 110),
         L75 = as.vector(tmp))
OMEif$Age <- OME$Age[match(OMEif$UID, OME$UID)]
OMEif$OME <- OME$OME[match(OMEif$UID, OME$UID)]
OMEif <- OMEif[OMEif$L75 > 30,]
summary(lm(L75 ~ Noise/Age, data = OMEif, na.action = na.omit))
summary(lm(L75 ~ Noise/(Age + OME), data = OMEif,
           subset = (Age >= 30 & Age <= 60),
           na.action = na.omit), cor = FALSE)

# Or fit by weighted least squares
fpl75 <- deriv(~ sqrt(n)*(r/n - 0.5 - 0.5/(1 + exp(-(x-L75)/scal))),
               c("L75", "scal"),
               function(r,n,x,L75,scal) NULL)
nls(0 ~ fpl75(Correct, Trials, Loud, L75, scal),
    data = OME[OME$Noise == "coherent",],
    start = c(L75=45, scal=3))
nls(0 ~ fpl75(Correct, Trials, Loud, L75, scal),
    data = OME[OME$Noise == "incoherent",],
    start = c(L75=45, scal=3))

# Test to see if the curves shift with age
fpl75age <- deriv(~sqrt(n)*(r/n -  0.5 - 0.5/(1 +
                  exp(-(x-L75-slope*age)/scal))),
                  c("L75", "slope", "scal"),
                  function(r,n,x,age,L75,slope,scal) NULL)
OME.nls1 <-
nls(0 ~ fpl75age(Correct, Trials, Loud, Age, L75, slope, scal),
    data = OME[OME$Noise == "coherent",],
    start = c(L75=45, slope=0, scal=2))
sqrt(diag(vcov(OME.nls1)))

OME.nls2 <-
nls(0 ~ fpl75age(Correct, Trials, Loud, Age, L75, slope, scal),
    data = OME[OME$Noise == "incoherent",],
    start = c(L75=45, slope=0, scal=2))
sqrt(diag(vcov(OME.nls2)))

# Now allow random effects by using NLME
OMEf <- OME[rep(1:nrow(OME), OME$Trials),]
attach(OME)
OMEf$Resp <- rep(rep(c(1,0), length(Trials)),
                 t(cbind(Correct, Trials-Correct)))
OMEf <- OMEf[, -match(c("Correct", "Trials"), names(OMEf))]
detach("OME")

\dontrun{## this fails in R on some platforms
fp2 <- deriv(~ 0.5 + 0.5/(1 + exp(-(x-L75)/exp(lsc))),
             c("L75", "lsc"),
             function(x, L75, lsc) NULL)
G1.nlme <- nlme(Resp ~ fp2(Loud, L75, lsc),
     fixed = list(L75 ~ Age, lsc ~ 1),
     random = L75 + lsc ~ 1 | UID,
     data = OMEf[OMEf$Noise == "coherent",], method = "ML",
     start = list(fixed=c(L75=c(48.7, -0.03), lsc=0.24)), verbose = TRUE)
summary(G1.nlme)

G2.nlme <- nlme(Resp ~ fp2(Loud, L75, lsc),
     fixed = list(L75 ~ Age, lsc ~ 1),
     random = L75 + lsc ~ 1 | UID,
     data = OMEf[OMEf$Noise == "incoherent",], method="ML",
     start = list(fixed=c(L75=c(41.5, -0.1), lsc=0)), verbose = TRUE)
summary(G2.nlme)
}}
\keyword{datasets}

\eof
% file MASS/Pima.tr.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Pima.tr}
\alias{Pima.tr}
\alias{Pima.tr2}
\alias{Pima.te}
\title{
Diabetes in Pima Indian Women
}
\description{
A population of women who were at least 21 years old, of Pima Indian heritage
and living near Phoenix, Arizona,  was tested for diabetes
according to World Health Organization criteria.  The data
were collected by the US National Institute of Diabetes and Digestive and
Kidney Diseases. We used the 532 complete records after dropping the
(mainly missing) data on serum insulin.
}
\usage{
data(Pima.tr)
data(Pima.tr2)
data(Pima.te)
}
\format{
These data frames contains the following columns:
\describe{
\item{\code{npreg}}{
number of pregnancies
}
\item{\code{glu}}{
plasma glucose concentration in an oral glucose tolerance test
}
\item{\code{bp}}{
diastolic blood pressure (mm Hg)
}
\item{\code{skin}}{
triceps skin fold thickness (mm)
}
\item{\code{bmi}}{
body mass index (weight in kg/(height in m)\eqn{^2}{\^2})
}
\item{\code{ped}}{
diabetes pedigree function
}
\item{\code{age}}{
age in years
}
\item{\code{type}}{
\code{Yes} or \code{No}, for diabetic according to WHO criteria
}}}
\details{
The training set \code{Pima.tr} contains a randomly selected set of 200
subjects, and \code{Pima.te} contains the remaining 322 subjects.
\code{Pima.tr2} contains \code{Pima.tr} plus 100 subjects with
missing values in the explanatory variables.
}
\source{
Smith, J. W., Everhart, J. E., Dickson, W. C., Knowler, W. C.
and Johannes, R. S. (1988)
Using the ADAP learning algorithm to forecast the onset of
\emph{diabetes mellitus}.
In
\emph{Proceedings of the Symposium on Computer Applications in Medical Care (Washington, 1988),}
ed. R. A. Greenes, pp. 261--265.
Los Alamitos, CA: IEEE Computer Society Press.


Ripley, B.D. (1996)
\emph{Pattern Recognition and Neural Networks.}
Cambridge: Cambridge University Press.
}
\keyword{datasets}

\eof
% file MASS/Rabbit.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Rabbit}
\alias{Rabbit}
\title{
Blood Pressure in Rabbits
}
\description{
Five rabbits were studied on two occasions, after treatment with
saline (control) and after treatment with the \eqn{5-HT_3} antagonist MDL
72222.  After each treatment ascending doses of phenylbiguanide were
injected intravenously at 10 minute intervals and the responses of
mean blood pressure measured.  The goal was to test whether the
cardiogenic chemoreflex elicited by phenylbiguanide depends on the
activation of \eqn{5-HT_3} receptors.
}
\usage{
data(Rabbit)
}
\format{
This data frame contains 60 rows and the following variables:
\describe{
\item{\code{BPchange}}{
change in blood pressure relative to the start of the experiment
}
\item{\code{Dose}}{
dose of Phenylbiguanide in micrograms
}
\item{\code{Run}}{
label of run (\code{"C1"} to \code{"C5"}, then \code{"M1"} to \code{"M5"})
}
\item{\code{Treatment}}{
placebo or the \eqn{5-HT_3} antagonist MDL 72222
}
\item{\code{Animal}}{
label of animal used (\code{"R1"} to \code{"R5"})
}}}
\source{
J. Ludbrook (1994)
Repeated measurements and multiple comparisons in cardiovascular research.
\emph{Cardiovascular Research}
\bold{28}, 303--311.\cr
[The numerical data are not in the paper but were supplied by
Professor Ludbrook]
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/Rubber.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Rubber}
\alias{Rubber}
\title{
Accelerated Testing of Tyre Rubber
}
\description{
Data frame from accelerated testing of tyre rubber.
}
\usage{
data(Rubber)
}
\format{
\describe{
\item{\code{loss}}{
the abrasion loss in gm/hr.
}
\item{\code{hard}}{
the hardness in Shore units.
}
\item{\code{tens}}{
tensile strength in kg/sq m.
}}}
\source{
O.L. Davies (1947)
\emph{Statistical Methods in Research and Production.}
Oliver and Boyd, Table 6.1 p. 119.


O.L. Davies and P.L. Goldsmith (1972)
\emph{Statistical Methods in Research and Production.}
4th edition, Longmans, Table 8.1 p. 239.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
\name{SP500}
\alias{SP500}
\title{
Returns of the Standard and Poors 500
}
\description{
Returns of the Standard and Poors 500 Index in the 1990's
}
\usage{
data(SP500)
}
\format{
A vector of returns of the Standard and Poors 500 index for all
the trading days in 1990, 1991, \dots, 1999.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/Sitka.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Sitka}
\alias{Sitka}
\title{
Growth Curves for Sitka Spruce Trees in 1988
}
\description{
The \code{Sitka} data frame has 395 rows and 4 columns.  It gives repeated
measurements on the log-size of 79 Sitka spruce trees, 54 of which
were grown in ozone-enriched chambers and 25 were controls.  The size
was measured five times in 1988, at roughly monthly intervals.
}
\usage{
data(Sitka)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{size}}{
measured size (height times diameter squared) of tree, on log scale
}
\item{\code{Time}}{
time of measurement in  days since 1 January 1988
}
\item{\code{tree}}{
number of tree
}
\item{\code{treat}}{
either \code{"ozone"} for an ozone-enriched chamber or \code{"control"}
}}}
\seealso{
\code{\link{Sitka89}}
}
\source{
P. J. Diggle, K.-Y. Liang and S. L. Zeger (1994)
\emph{Analysis of Longitudinal Data.}
Clarendon Press, Oxford
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/Sitka89.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Sitka89}
\alias{Sitka89}
\title{
Growth Curves for Sitka Spruce Trees in 1989
}
\description{
The \code{Sitka89} data frame has 632 rows and 4 columns.  It gives repeated
measurements on the log-size of 79 Sitka spruce trees, 54 of which
were grown in ozone-enriched chambers and 25 were controls.  The size
was measured eight times in 1989, at roughly monthly intervals.
}
\usage{
data(Sitka89)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{size}}{
measured size (height times diameter squared) of tree, on log scale
}
\item{\code{Time}}{
time of measurement in  days since 1 January 1988
}
\item{\code{tree}}{
number of tree
}
\item{\code{treat}}{
either \code{"ozone"} for an ozone-enriched chamber or \code{"control"}
}}}
\seealso{
\code{\link{Sitka}}
}
\source{
P. J. Diggle, K.-Y. Liang and S. L. Zeger (1994)
\emph{Analysis of Longitudinal Data.}
Clarendon Press, Oxford
}
\keyword{datasets}

\eof
% file MASS/Skye.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Skye}
\alias{Skye}
\title{
AFM Compositions of Aphyric Skye Lavas
}
\description{
The \code{Skye} data frame has 23 rows and 3 columns.
}
\usage{
data(Skye)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{A}}{
Percentage of sodium and potassium oxides
}
\item{\code{F}}{
Percentage of iron oxide
}
\item{\code{M}}{
Percentage of magnesium oxide
}}}
\source{
R. N. Thompson, J. Esson and A. C. Duncan (1972)
Major element chemical variation in the Eocene lavas of the Isle of
Skye. \emph{J. Petrology}, \bold{13}, 219--253.
}
\references{
J. Aitchison (1986)
\emph{The Statistical Analysis of Compositional Data.}
Chapman and Hall, p.360.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
# ternary() is from the on-line answers.
ternary <- function(X, pch = par("pch"), lcex = 1,
                    add = FALSE, ord = 1:3, \dots)
{
  X <- as.matrix(X)
  if(any(X) < 0) stop("X must be non-negative")
  s <- drop(X \%*\% rep(1, ncol(X)))
  if(any(s<=0)) stop("each row of X must have a positive sum")
  if(max(abs(s-1)) > 1e-6) {
    warning("row(s) of X will be rescaled")
    X <- X / s
  }
  X <- X[, ord]
  s3 <- sqrt(1/3)
  if(!add)
  {
    oldpty <- par("pty")
    on.exit(par(pty=oldpty))
    par(pty="s")
    plot(c(-s3, s3), c(0.5-s3, 0.5+s3), type="n", axes=FALSE,
         xlab="", ylab="")
    polygon(c(0, -s3, s3), c(1, 0, 0), density=0)
    lab <- NULL
    if(!is.null(dn <- dimnames(X))) lab <- dn[[2]]
    if(length(lab) < 3) lab <- as.character(1:3)
    eps <- 0.05 * lcex
    text(c(0, s3+eps*0.7, -s3-eps*0.7),
         c(1+eps, -0.1*eps, -0.1*eps), lab, cex=lcex)
  }
  points((X[,2] - X[,3])*s3, X[,1], \dots)
}

ternary(Skye/100, ord=c(1,3,2))
}
\keyword{datasets}

\eof
% file MASS/Traffic.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Traffic}
\alias{Traffic}
\title{
Effect of Swedish Speed Limits on Accidents
}
\description{
An experiment was performed in Sweden in 1961-2 to assess the
effect of a speed limit on the motorway accident rate.  The
experiment was conducted on 92 days in each year, matched so that
day \code{j} in 1962 was comparable to day \code{j} in 1961.  On some days
the speed limit was in effect and enforced, while on other days
there was no speed limit and cars tended to be driven faster.
The speed limit days tended to be in contiguous blocks.
}
\usage{
data(Traffic)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{year}}{
1961 or 1962
}
\item{\code{day}}{
of year
}
\item{\code{limit}}{
was there a speed limit?
}
\item{\code{y}}{
traffic accident count for that day
}}}
\source{
Svensson, A. (1981)
On the goodness-of-fit test for the multiplicative Poisson model.
\emph{Annals of Statistics,}
\bold{9}, 697--704.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/UScereal.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{UScereal}
\alias{UScereal}
\title{
Nutritional and Marketing Information on US Cereals
}
\description{
The \code{UScereal} data frame has 65 rows and 11 columns.
The data come from the 1993 ASA Statistical Graphics Exposition,
and are taken from the mandatory  F&DA food label. The data have been
normalized here to a portion of one American cup.
}
\usage{
data(UScereal)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{mfr}}{
Manufacturer, represented by its first initial: G=General Mills,
K=Kelloggs, N=Nabisco, P=Post, Q=Quaker Oats, R=Ralston Purina.
}
\item{\code{calories}}{
number of calories in one portion
}
\item{\code{protein}}{
grams of protein in one portion
}
\item{\code{fat}}{
grams of fat in one portion
}
\item{\code{sodium}}{
milligrams of sodium in one portion
}
\item{\code{fibre}}{
grams of dietary fibre in one portion
}
\item{\code{carbo}}{
grams of complex carbohydrates in one portion
}
\item{\code{sugars}}{
grams of sugars in one portion
}
\item{\code{shelf}}{
display shelf (1, 2, or 3, counting from the floor)
}
\item{\code{potassium}}{
grams of potassium
}
\item{\code{vitamins}}{
vitamins and minerals (none, enriched, or 100\%)
}}}
\source{
The original data are available at
\url{http://lib.stat.cmu.edu/datasets/1993.expo/}.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/UScrime.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{UScrime}
\alias{UScrime}
\title{
The Effect of Punishment Regimes on Crime Rates
}
\description{
Criminologists are interested in the effect of punishment regimes on
crime rates.  This has been studied using aggregate data on 47 states
of the USA for 1960 given in this data frame. The variables seem to
have been re-scaled to convenient numbers.
}
\usage{
data(UScrime)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{M}}{
percentage of males aged 14-24
}
\item{\code{So}}{
indicator variable for a southern state
}
\item{\code{Ed}}{
mean years of schooling
}
\item{\code{Po1}}{
police expenditure in 1960
}
\item{\code{Po2}}{
police expenditure in 1959
}
\item{\code{LF}}{
labour force participation rate
}
\item{\code{M.F}}{
number of males per 1000 females
}
\item{\code{Pop}}{
state population
}
\item{\code{NW}}{
number of nonwhites per 1000 people
}
\item{\code{U1}}{
unemployment rate of urban males 14-24
}
\item{\code{U2}}{
unemployment rate of urban males 35-39
}
\item{\code{GDP}}{
gross domestic product per head
}
\item{\code{Ineq}}{
income inequality
}
\item{\code{Prob}}{
probability of imprisonment
}
\item{\code{Time}}{
average time served in state prisons
}
\item{\code{y}}{
rate of crimes in a particular category per head of population
}}}
\source{
Ehrlich, I. (1973) Participation in illegitimate activities: a
theoretical and empirical investigation.
\emph{Journal of Political Economy}, \bold{81}, 521--565.

Vandaele, W. (1978) Participation in illegitimate activities: Ehrlich
revisited.  In \emph{Deterrence and Incapacitation},
eds A. Blumstein, J. Cohen and D. Nagin, pp. 270--335.
US National Academy of Sciences.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
\name{VA}
\alias{VA}
\title{
  Veteran's Administration Lung Cancer Trial
}
\description{
  Veteran's Administration lung cancer trial from Kalbfleisch & Prentice.
}
\usage{
data(VA)
}
\format{
A data frame with columns:
\describe{
\item{\code{stime}}{
  survival or follow-up time in days.
}
\item{\code{status}}{
  dead or censored.
}
\item{\code{treat}}{
  standard or test
}
\item{\code{age}}{
patient's age in years
}
\item{\code{Karn}}{
  Karnofsky score of patient's performance on a scale of 0 to 100.
}
\item{\code{diag.time}}{
  times since diagnosis in months at entry to trial.
}
\item{\code{cell}}{
  one of four cell types.
}
\item{\code{prior}}{
  prior therapy?
}}
}
\source{
Kalbfleisch, J.D. and Prentice R.L. (1980)
\emph{The Statistical Analysis of Failure Time Data.}
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/abbey.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{abbey}
\alias{abbey}
\title{
Determinations of Nickel Content
}
\description{
A numeric vector of 31 determinations of nickel content (ppm) in
a Canadian syenite rock.
}
\usage{
data(abbey)
}
\source{
  S. Abbey (1988) \emph{Geostandards Newsletter} \bold{12}, 241.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/accdeaths.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{accdeaths}
\alias{accdeaths}
\title{
Accidental Deaths in the US 1973-1978
}
\description{
A regular time series giving the monthly totals of accidental
deaths in the USA. The values for first six months of 1979 (p. 326) are
\code{7798 7406 8363 8460 9217 9316}
}
\usage{
data(accdeaths)
}
\source{
P. J. Brockwell and R. A. Davis (1991)
\emph{Time Series: Theory and Methods.}
Springer, New York.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/addterm.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{addterm}
\alias{addterm}
\alias{addterm.default}
\alias{addterm.glm}
\alias{addterm.lm}
%\alias{addterm.mlm}
%\alias{addterm.negbin}
%\alias{addterm.survreg}
\title{
Try All One-Term Additions to a Model
}
\description{
Try fitting all models that differ from the current model by adding a
single term from those supplied, maintaining marginality.


This function is generic; there exist methods for classes \code{lm} and
\code{glm} and the default method will work for many other classes.
}
\usage{
addterm(object, \dots)

\method{addterm}{default}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
        k = 2, sorted = FALSE, trace = FALSE, \dots)
\method{addterm}{lm}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
        k = 2, sorted = FALSE, \dots)
\method{addterm}{glm}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
        k = 2, sorted = FALSE, trace = FALSE, \dots)
}
\arguments{
\item{object}{
An object fitted by some model-fitting function.
}
\item{scope}{
a formula specifying a maximal model which should include the current
one. All additional terms in the maximal model with all marginal terms
in the original model are tried.
}
\item{scale}{
used in the definition of the AIC statistic for selecting the models,
currently only for \code{lm}, \code{aov} and \code{glm} models. Specifying \code{scale}
asserts that the residual standard error or dispersion is known.
}
\item{test}{
should the results include a test statistic relative to the original
model?  The F test is only appropriate for \code{lm} and \code{aov} models,
and perhaps for some over-dispersed \code{glm} models. The
Chisq test can be an exact test (\code{lm} models with known scale) or a
likelihood-ratio test depending on the method.
}
\item{k}{
the multiple of the number of degrees of freedom used for the penalty.
Only \code{k=2} gives the genuine AIC: \code{k = log(n)} is sometimes referred
to as BIC or SBC.
}
\item{sorted}{
should the results be sorted on the value of AIC?
}
\item{trace}{
if \code{TRUE} additional information may be given on the fits as they are tried.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
A table of class \code{"anova"} containing at least columns for the change
in degrees of freedom and AIC (or Cp) for the models. Some methods
will give further information, for example sums of squares, deviances,
log-likelihoods and test statistics.
}
\details{
The definition of AIC is only up to an additive constant: when
appropriate (\code{lm} models with specified scale) the constant is taken
to be that used in Mallows' Cp statistic and the results are labelled
accordingly.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{dropterm}}, \code{\link{stepAIC}}
}
\examples{
quine.hi <- aov(log(Days + 2.5) ~ .^4, quine)
quine.lo <- aov(log(Days+2.5) ~ 1, quine)
addterm(quine.lo, quine.hi, test="F")

house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat, family=poisson,
                   data=housing)
addterm(house.glm0, ~. + Sat:(Infl+Type+Cont), test="Chisq")
house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl+Type+Cont))
addterm(house.glm1, ~. + Sat:(Infl+Type+Cont)^2, test = "Chisq")
}
\keyword{models}

\eof
% file MASS/anorexia.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{anorexia}
\alias{anorexia}
\title{
Anorexia Data on Weight Change
}
\description{
The \code{anorexia} data frame has 72 rows and 3 columns.
Weight change data for young female anorexia patients.
}
\usage{
data(anorexia)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Treat}}{
Factor of three levels: Cont (Control), CBT (Cognitive Behavioural
Treatment) and  FT (Family treatment).
}
\item{\code{Prewt}}{
Weight of patient before study period, in lbs.
}
\item{\code{Postwt}}{
Weight of patient after study period, in lbs.
}}}
\source{
Hand, D. J., Daly, F., McConway, K., Lunn, D. and Ostrowski, E. eds (1993)
\emph{A Handbook of Small Data Sets.}
Chapman & Hall, Data set 285 (p. 229)

(Note that the original source mistakenly says that weights are in kg.)
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/anova.negbin.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{anova.negbin}
\alias{anova.negbin}
\title{
Likelihood Ratio Tests for Negative Binomial GLMs
}
\description{
Method function to perform sequential likelihood ratio tests for Negative
Binomial generalized linear models.
}
\usage{
\method{anova}{negbin}(object, \dots, test = "Chisq")
}
\arguments{
\item{object}{
  Fitted model object of class \code{"negbin"}, inheriting from
  classes \code{"glm"} and \code{"lm"}, specifying a Negative Binomial
  fitted GLM.  Typically the output of \code{\link{glm.nb}()}.
}
\item{\dots}{
Zero or more additional fitted model objects of class \code{"negbin"}.  They
should form a nested sequence of models, but need not be specified in any
particular order.
}
\item{test}{
  Argument to match the \code{test} argument of \code{\link{anova.glm}}.
  Ignored (with a warning if changed) if a sequence of two or more
  Negative Binomial fitted model objects is specified, but possibly
  used if only one object is specified.
}}
\note{
If only one fitted model object is specified, a sequential analysis of
deviance table is given for the fitted model.  The \code{theta} parameter is kept
fixed.  If more than one fitted model object is specified they must all be
of class \code{"negbin"} and likelihood ratio tests are done of each model within
the next.  In this case \code{theta} is assumed to have been re-estimated for each
model.
}
\details{
This function is a method for the generic function
\code{anova()} for class \code{"negbin"}.
It can be invoked by calling \code{anova(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{anova.negbin(x)} regardless of the
class of the object.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{glm.nb}}, \code{\link{negative.binomial}}, \code{\link{summary.negbin}}
}
\examples{
m1 <- glm.nb(Days ~ Eth*Age*Lrn*Sex, quine, link = log)
m2 <- update(m1, . ~ . - Eth:Age:Lrn:Sex)
anova(m2, m1)
anova(m2)
}
\keyword{regression}

\eof
% file MASS/area.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{area}
\alias{area}
\title{
Adaptive Numerical Integration
}
\description{
Integrate a function of one variable over a finite range using a
recursive adaptive method.  This function is mainly for
demonstration purposes.
}
\usage{
area(f, a, b, \dots, fa = f(a, \dots), fb = f(b, \dots),
     limit = 10, eps = 1e-05)
}
\arguments{
\item{f}{
The integrand as an \code{S} function object.  The variable of integration must be
the first argument.
}
\item{a}{
Lower limit of integration.
}
\item{b}{
Upper limit of integration.
}
\item{\dots}{
Additional arguments needed by the integrand.
}
\item{fa}{
Function value at the lower limit.
}
\item{fb}{
Function value at the upper limit.
}
\item{limit}{
Limit on the depth to which recursion is allowed to go.
}
\item{eps}{
Error tolerance to control the process.
}}
\value{
The integral from \code{a} to \code{b} of \code{f(x)}.
}
\details{
The method divides the interval in two and compares the values given by
Simpson's rule and the trapezium rule.  If these are within eps of each
other the Simpson's rule result is given, otherwise the process is applied
separately to each half of the interval and the results added together.
}
\references{
  Venables, W. N. and Ripley, B. D. (1994)
  \emph{Modern Applied Statistics with S-Plus.} Springer.
  pp. 105--110.
}
\examples{
area(sin, 0, pi)  # integrate the sin function from 0 to pi.
}
\keyword{nonlinear}

\eof
% file MASS/austres.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{austres-MASS}
\alias{austres}
\title{
Quarterly Time Series of the Number of Australian Residents
}
\description{
Numbers (in thousands) of Australian residents measured quarterly from
March 1971 to March 1994. The object is of class \code{"ts"}.
}
\usage{
data(austres)
}
\source{
P. J. Brockwell and R. A. Davis (1996)
\emph{Introduction to Time Series and Forecasting.}
Springer
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
\name{bacteria}
\alias{bacteria}
\non_function{}
\title{
Presence of Bacteria after Drug Treatments
}
\description{
Tests of the presence of the bacteria
\emph{H. influenzae}
in children with otitis media in the Northern Territory of Australia.
}
\usage{
data(bacteria)
}
\format{
This data frame has 220 rows and the following columns:
  \describe{
    \item{y}{presence or absence: a factor with levels
      \code{n} and \code{y}.}
    \item{ap}{active/placebo: a factor with levels \code{a} and \code{p}.}
    \item{hilo}{hi/low compliance: a factor with levels \code{hi} amd
      \code{lo}.}
    \item{week}{numeric: week of test.}
    \item{ID}{subject ID: a factor.}
    \item{trt}{a factor with levels \code{placebo}, \code{drug} and
      \code{drug+}, a re-coding of \code{ap} and \code{hilo}.}
  }
}
\details{
Dr A. Leach tested the effects of a drug on 50 children with a history of
otitis media in the Northern Territory of Australia.  The children
were randomized to the drug or the a placebo, and also to receive
active encouragement to comply with taking the drug.

The presence of
\emph{H. influenzae}
was checked at weeks 0, 2, 4, 6
and 11: 30 of the checks were missing and are not included in this
data frame.
}
\source{
Menzies School of Health Research 1999--2000 Annual Report pp. 18--21
(\url{http://www.menzies.edu.au/publications/anreps/MSHR00.pdf}).
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
contrasts(bacteria$trt) <- structure(contr.sdif(3),
     dimnames = list(NULL, c("drug", "encourage")))
## fixed effects analyses
summary(glm(y ~ trt * week, binomial, data = bacteria))
summary(glm(y ~ trt + week, binomial, data = bacteria))
summary(glm(y ~ trt + I(week > 2), binomial, data = bacteria))

# conditional random-effects analysis
library(survival)
bacteria$Time <- rep(1, nrow(bacteria))
coxph(Surv(Time, unclass(y)) ~ week + strata(ID),
      data = bacteria, method = "exact")
coxph(Surv(Time, unclass(y)) ~ factor(week) + strata(ID),
      data = bacteria, method = "exact")
coxph(Surv(Time, unclass(y)) ~ I(week > 2) + strata(ID),
      data = bacteria, method = "exact")

# PQL glmm analysis
library(nlme)
summary(glmmPQL(y ~ trt + I(week > 2), random = ~ 1 | ID,
                family = binomial, data = bacteria))
}
\keyword{datasets}

\eof
\name{bandwidth.nrd}
\alias{bandwidth.nrd}
\title{
Bandwidth for density() via Normal Reference Distribution
}
\description{
A well-supported rule-of-thumb for choosing the bandwidth of a Gaussian
kernel density estimator.
}
\usage{
bandwidth.nrd(x)
}
\arguments{
\item{x}{
A data vector.
}}
\value{
  A bandwidth on a scale suitable for the \code{width} argument of
  \code{density}.
}
\references{
Venables, W. N. and Ripley, B. D. (2002)
\emph{Modern Applied Statistics with S.}
Springer, equation (5.5) on page 130.
}
\examples{
# The function is currently defined as
function(x)
{
	r <- quantile(x, c(0.25, 0.75))
	h <- (r[2] - r[1])/1.34
	4 * 1.06 * min(sqrt(var(x)), h) * length(x)^(-1/5)
}
}
\keyword{dplot}

\eof
% file MASS/bcv.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{bcv}
\alias{bcv}
\title{
Biased Cross-Validation for Bandwidth Selection
}
\description{
Uses biased cross-validation to select the bandwidth of a  Gaussian
kernel density estimator.
}
\usage{
bcv(x, nb = 1000, lower, upper)
}
\arguments{
\item{x}{
a numeric vector
}
\item{nb}{
number of bins to use.
}
\item{lower, upper}{
Range over which to minimize.  The default is almost always satisfactory.
}}
\value{
a bandwidth
}
\references{
Scott, D. W. (1992)
\emph{Multivariate Density Estimation: Theory, Practice, and Visualization.}
Wiley.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{ucv}}, \code{\link{width.SJ}}, \code{\link{density}}
}
\examples{
bcv(geyser$duration)
}
\keyword{dplot}

\eof
% file MASS/beav1.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{beav1}
\alias{beav1}
\title{
Body Temperature Series of Beaver 1
}
\description{
Reynolds (1994) describes a
small part of a study of the long-term temperature dynamics of beaver
\emph{Castor canadensis}
in north-central Wisconsin.  Body temperature was measured by
telemetry every 10 minutes for four females, but data from a one
period of less than a day for each of two animals is used there.
}
\usage{
data(beav1)
}
\format{
The \code{beav1} data frame has 114 rows and 4 columns.
This data frame contains the following columns:
\describe{
\item{\code{day}}{
Day of observation (in days since the beginning of 1990),
December 12-13.
}
\item{\code{time}}{
Time of observation, in the form \code{0330} for 3.30am
}
\item{\code{temp}}{
Measured body temperature in degrees Celcius
}
\item{\code{activ}}{
Indicator of activity outside the retreat
}}}
\note{
The observation at 22:20 is missing.
}
\source{
P. S. Reynolds (1994) Time-series analyses of beaver body temperatures.
Chapter 11 of
Lange, N., Ryan, L., Billard, L., Brillinger, D., Conquest, L.
and Greenhouse, J. eds (1994)
\emph{Case Studies in Biometry.}
New York: John Wiley and Sons.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{beav2}}
}
\examples{
attach(beav1)
beav1$hours <- 24*(day-346) + trunc(time/100) + (time\%\%100)/60
plot(beav1$hours, beav1$temp, type="l", xlab="time",
   ylab="temperature", main="Beaver 1")
usr <- par("usr"); usr[3:4] <- c(-0.2, 8); par(usr=usr)
lines(beav1$hours, beav1$activ, type="s", lty=2)
temp <- ts(c(beav1$temp[1:82], NA, beav1$temp[83:114]), start=9.5, frequency=6)
activ <- ts(c(beav1$activ[1:82], NA, beav1$activ[83:114]), start=9.5, frequency=6)


acf(temp[1:53])
acf(temp[1:53], type = "partial")
ar(temp[1:53])
act <- c(rep(0, 10), activ)
X <- cbind(1, act = act[11:125], act1 = act[10:124],
          act2 = act[9:123], act3 = act[8:122])
alpha <- 0.80
stemp <- as.vector(temp - alpha*lag(temp, -1))
sX <- X[-1, ] - alpha * X[-115,]
beav1.ls <- lm(stemp ~ -1 + sX, na.action = na.omit)
summary(beav1.ls, cor = FALSE)
detach("beav1"); rm(temp, activ)
}
\keyword{datasets}

\eof
% file MASS/beav2.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{beav2}
\alias{beav2}
\title{
Body Temperature Series of Beaver 2
}
\description{
Reynolds (1994) describes a
small part of a study of the long-term temperature dynamics of beaver
\emph{Castor canadensis}
in north-central Wisconsin.  Body temperature was measured by
telemetry every 10 minutes for four females, but data from a one
period of less than a day for each of two animals is used there.
}
\usage{
data(beav2)
}
\format{
The \code{beav2} data frame has 100 rows and 4 columns.
This data frame contains the following columns:
\describe{
\item{\code{day}}{
Day of observation (in days since the beginning of 1990),
November 3-4.
}
\item{\code{time}}{
Time of observation, in the form \code{0330} for 3.30am
}
\item{\code{temp}}{
Measured body temperature in degrees Celcius
}
\item{\code{activ}}{
Indicator of activity outside the retreat
}}}
\source{
P. S. Reynolds (1994) Time-series analyses of beaver body temperatures.
Chapter 11 of
Lange, N., Ryan, L., Billard, L., Brillinger, D., Conquest, L.
and Greenhouse, J. eds (1994)
\emph{Case Studies in Biometry.}
New York: John Wiley and Sons.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{beav1}}
}
\examples{
attach(beav2)
beav2$hours <- 24*(day-307) + trunc(time/100) + (time\%\%100)/60
plot(beav2$hours, beav2$temp, type = "l", xlab = "time",
   ylab = "temperature", main = "Beaver 2")
usr <- par("usr"); usr[3:4] <- c(-0.2, 8); par(usr = usr)
lines(beav2$hours, beav2$activ, type = "s", lty = 2)

temp <- ts(temp, start = 8+2/3, frequency = 6)
activ <- ts(activ, start = 8+2/3, frequency = 6)
acf(temp[activ == 0]); acf(temp[activ == 1]) # also look at PACFs
ar(temp[activ == 0]); ar(temp[activ == 1])

arima(temp, order = c(1,0,0), xreg = activ)
dreg <- cbind(sin = sin(2*pi*beav2$hours/24), cos = cos(2*pi*beav2$hours/24))
arima(temp, order = c(1,0,0), xreg = cbind(active=activ, dreg))

library(nlme)
beav2.gls <- gls(temp ~ activ, data = beav2, corr = corAR1(0.8),
                 method = "ML")
summary(beav2.gls)
summary(update(beav2.gls, subset = 6:100))
detach("beav2"); rm(temp, activ)
}
\keyword{datasets}

\eof
% file MASS/biopsy.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{biopsy}
\alias{biopsy}
\title{
Biopsy Data on Breast Cancer Patients
}
\description{
This breast cancer database was obtained from the University of Wisconsin
Hospitals, Madison from Dr. William H. Wolberg. He assessed biopsies
of breast tumours for 699 patients up to 15 July 1992; each of nine
attributes has been scored on a scale of 1 to 10, and the outcome is
also known. There are 699 rows and 11 columns.
}
\usage{
data(biopsy)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{ID}}{
Sample code number (not unique)
}
\item{\code{V1}}{
Clump thickness
}
\item{\code{V2}}{
Uniformity of cell size
}
\item{\code{V3}}{
Uniformity of cell shape
}
\item{\code{V4}}{
Marginal adhesion
}
\item{\code{V5}}{
Single epithelial cell size
}
\item{\code{V6}}{
Bare nuclei (16 values are missing)
}
\item{\code{V7}}{
Bland chromatin
}
\item{\code{V8}}{
Normal nucleoli
}
\item{\code{V9}}{
Mitoses
}
\item{\code{class}}{
\code{"benign"} or \code{"malignant"}
}}}
\source{
P. M. Murphy and D. W. Aha  (1992). UCI Repository of machine
learning databases. [Machine-readable data repository]. Irvine, CA:
University of California, Department of Information and Computer Science.

O. L. Mangasarian and W. H. Wolberg (1990)
Cancer diagnosis via linear programming.
\emph{SIAM News} \bold{23}, pp 1 & 18.

William H. Wolberg and O.L. Mangasarian (1990)
Multisurface method of pattern separation for medical diagnosis
applied to breast cytology.
\emph{Proceedings of the National Academy of Sciences, U.S.A.}
\bold{87}, pp. 9193--9196.

O. L. Mangasarian, R. Setiono and W.H. Wolberg (1990)
Pattern recognition via linear programming: Theory and application
to medical diagnosis. In
\emph{Large-scale Numerical Optimization}
eds Thomas F. Coleman and Yuying Li, SIAM Publications, Philadelphia,
pp 22--30.

K. P. Bennett and O. L. Mangasarian (1992)
Robust linear programming discrimination of two linearly inseparable sets.
\emph{Optimization Methods and Software}
\bold{1}, pp. 23--34 (Gordon & Breach Science Publishers).
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/birthwt.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{birthwt}
\alias{birthwt}
\title{
Risk Factors Associated with Low Infant Birth Weight
}
\description{
The \code{birthwt} data frame has 189 rows and 10 columns.
The data were collected at Baystate Medical Center, Springfield, Mass
during 1986.
}
\usage{
data(birthwt)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{low}}{
indicator of birth weight less than 2.5kg
}
\item{\code{age}}{
mother's age in years
}
\item{\code{lwt}}{
mother's weight in pounds at last menstrual period
}
\item{\code{race}}{
mother's race (\code{1} = white, \code{2} = black, \code{3} = other)
}
\item{\code{smoke}}{
smoking status during pregnancy
}
\item{\code{ptl}}{
number of previous premature labours
}
\item{\code{ht}}{
history of hypertension
}
\item{\code{ui}}{
presence of uterine irritability
}
\item{\code{ftv}}{
number of physician visits during the first trimester
}
\item{\code{bwt}}{
birth weight in grams
}}}
\source{
Hosmer, D.W. and Lemeshow, S. (1989)
\emph{Applied Logistic Regression.}
New York: Wiley
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
attach(birthwt)
race <- factor(race, labels = c("white", "black", "other"))
ptd <- factor(ptl > 0)
ftv <- factor(ftv)
levels(ftv)[-(1:2)] <- "2+"
bwt <- data.frame(low = factor(low), age, lwt, race,
    smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv)
detach("birthwt")
options(contrasts = c("contr.treatment", "contr.poly"))
glm(low ~ ., binomial, bwt)
}
\keyword{datasets}

\eof
% file MASS/boxcox.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{boxcox}
\alias{boxcox}
\alias{boxcox.default}
\alias{boxcox.formula}
\alias{boxcox.lm}
\title{
Box-Cox Transformations for Linear Models
}
\description{
Computes and optionally plots profile log-likelihoods for the
parameter of the Box-Cox power transformation.
}
\usage{
boxcox(object, \dots)

\method{boxcox}{default}(object, lambda = seq(-2, 2, 1/10), plotit = TRUE,
       interp, eps = 1/50, xlab = expression(lambda),
       ylab = "log-Likelihood", \dots)

\method{boxcox}{formula}(object, lambda = seq(-2, 2, 1/10), plotit = TRUE,
       interp, eps = 1/50, xlab = expression(lambda),
       ylab = "log-Likelihood", \dots)

\method{boxcox}{lm}(object, lambda = seq(-2, 2, 1/10), plotit = TRUE,
       interp, eps = 1/50, xlab = expression(lambda),
       ylab = "log-Likelihood", \dots)
}
\arguments{
\item{object}{
  a formula or fitted model object.  Currently only \code{lm} and
  \code{aov} objects are handled.
}
\item{lambda}{
vector of values of \code{lambda} -- default \eqn{(-2, 2)} in steps of 0.1.
}
\item{plotit}{
logical which controls whether the result should be plotted.
}
\item{interp}{
logical which controls whether spline interpolation is used.
Default to \code{TRUE} if plotting with \code{lambda} of length less than 100.
}
\item{eps}{
Tolerance for \code{lambda = 0}; defaults to 0.02.
}
\item{xlab}{
defaults to \code{"lambda"}.
}
\item{ylab}{
defaults to \code{"log-Likelihood"}.
}
\item{\dots}{
additional parameters to be used in the model fitting.
}}
\value{
A list of the \code{lambda} vector and the computed profile
log-likelihood vector, invisibly if the result is plotted.
}
\section{Side Effects}{
If \code{plotit = TRUE} plots loglik \emph{vs} \code{lambda} and indicates a 95\%
confidence interval about the maximum observed value of \code{lambda}. If
\code{interp = TRUE}, spline interpolation is used to give a smoother plot.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
data(trees)
boxcox(Volume ~ log(Height) + log(Girth), data = trees,
       lambda = seq(-0.25, 0.25, length = 10))

boxcox(Days+1 ~ Eth*Sex*Age*Lrn, data = quine,
       lambda = seq(-0.05, 0.45, len = 20))
}
\keyword{regression}
\keyword{models}
\keyword{hplot}

\eof
% file MASS/cabbages.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{cabbages}
\alias{cabbages}
\title{
Data from a cabbage field trial
}
\description{
The \code{cabbages} data set has 60 observations and 4 variables
}
\usage{
data(cabbages)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Cult}}{
Factor giving the cultivar of the cabbage, two levels: \code{c39} and \code{c52}.
}
\item{\code{Date}}{
Factor specifying one of three planting dates: \code{d16}, \code{d20} or \code{d21}.
}
\item{\code{HeadWt}}{
Weight of the cabbage head, presumably in kg.
}
\item{\code{VitC}}{
Ascorbic acid content, in undefined units.
}}}
\source{
Rawlings, J. O. (1988)
\emph{Applied Regression Analysis: A Research Tool.}
Wadsworth and Brooks/Cole.  Example 8.4, page 219.
(Rawlings cites the original source as the files of the late
Dr Gertrude M Cox.)
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/caith.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{caith}
\alias{caith}
\title{
Colours of Eyes and Hair of People in Caithness
}
\description{
Data on the cross-classification of people in Caithness, Scotland, by
eye and hair colour. The region of the UK is particularly interesting
as there is a mixture of people of Nordic, Celtic and Anglo-Saxon origin.
}
\usage{
data(caith)
}
\format{
A 4 by 5 table with rows the eye colours (blue, light, medium, dark) and
columns the hair colours (fair, red, medium, dark, black).
}
\source{
Fisher, R.A. (1940) The precision of discriminant functions.
\emph{Annals of Eugenics (London)}
\bold{10}, 422--429.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
corresp(caith)
dimnames(caith)[[2]] <- c("F", "R", "M", "D", "B")
par(mfcol=c(1,3))
plot(corresp(caith, nf=2)); title("symmetric")
plot(corresp(caith, nf=2), type="rows"); title("rows")
plot(corresp(caith, nf=2), type="col"); title("columns")
par(mfrow=c(1,1))
}
\keyword{datasets}

\eof
% file MASS/cats.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{cats}
\alias{cats}
\title{
Anatomical Data from Domestic Cats
}
\description{
The heart and body weights of samples of male and female cats used for
digitalis experiments.  The cats were all adult, over 2 kg body weight.
}
\usage{
data(cats)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Sex}}{
Sex factor. Levels \code{"F"} and \code{"M"}.
}
\item{\code{Bwt}}{
Body weight in kg.
}
\item{\code{Hwt}}{
Heart weight in g.
}}}
\source{
R. A. Fisher (1947) The analysis of covariance method for the relation
between a part and the whole,
\emph{Biometrics}
\bold{3}, 65--68.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/cement.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{cement}
\alias{cement}
\title{
Heat Evolved by Setting Cements
}
\description{
Experiment on the heat evolved in the setting of each of 13
cements.
}
\usage{
data(cement)
}
\details{
13 samples of Portland cement were set. For each sample, the percentages of the
four main chemical ingredients was accurately measured.  While the cement was
setting the amount of heat evolved was also measured.
}
\format{
\describe{
\item{\code{x1, x2, x3, x4}}{
Proportions (\%) of active ingredients
}
\item{\code{y}}{
heat evolved in cals/gm
}}}
\source{
Woods, H., Steinour, H.H. and Starke, H.R. (1932) Effect of composition of 
Portland cement on heat evolved during hardening. 
\emph{Industrial Engineering and Chemistry}, \bold{24}, 1207--1214.
}
\references{
Hald, A. (1957)
\emph{Statistical Theory with Engineering Applications.}
Wiley, New York.
}
\examples{
lm(y ~ x1 + x2 + x3 + x4, cement)
}
\keyword{datasets}

\eof
% file MASS/chem.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{chem}
\alias{chem}
\title{
Copper in Wholemeal Flour
}
\description{
A numeric vector of 24 determinations of copper in wholemeal
flour, in parts per million.
}
\usage{
data(chem)
}
\source{
Analytical Methods Committee (1989) Robust statistics -- how not to
reject outliers.
\emph{The Analyst}
\bold{114}, 1693--1702, 1989
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
\name{con2tr}
\alias{con2tr}
\title{
Convert Lists to Data Frames for use by Trellis
}
\description{
Convert lists to data frames for use by Trellis.
}
\usage{
con2tr(obj)
}
\arguments{
\item{obj}{
A list of components \code{x}, \code{y} and \code{z} as passed to \code{contour}
}
}
\value{
A data frame suitable for passing to Trellis functions.
}
\details{
\code{con2tr} repeats the \code{x} and \code{y} components suitably to match the
vector \code{z}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{misc}

\eof
% file MASS/confint.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{confint-MASS}
\alias{confint.glm}
\alias{confint.nls}
\alias{confint.profile.glm}
\alias{confint.profile.nls}
\alias{profile.glm}
\title{
Confidence Intervals for Model Parameters
}
\description{
Computes confidence intervals for one or more parameters in a fitted
model.  Package \pkg{MASS} adds methods for \code{glm} and \code{nls} fits.
}
\usage{
\method{confint}{glm}(object, parm, level = 0.95, trace = FALSE, \dots)

\method{confint}{nls}(object, parm, level = 0.95, \dots)
}
\arguments{
\item{object}{
a fitted model object. Methods currently exist for the classes
\code{"glm"}, \code{"nls"} and for profile objects from these classes.
}
\item{parm}{
a specification of which parameters are to be given confidence
intervals, either a vector of numbers or a vector of names. If
missing, all parameters are considered.
}
\item{level}{
the confidence level required.
}
\item{trace}{
  logical.  Should profiling be traced?
}
\item{\dots}{
  additional argument(s) for methods.
}}
\value{
A matrix (or vector) with columns giving lower and upper confidence
limits for each parameter. These will be labelled as (1-level)/2 and
1 - (1-level)/2 in \% (by default 2.5\% and 97.5\%).
}
\details{
  \code{\link[base]{confint}} is a generic function in package \code{base}.
  
  These \code{confint} methods calls
  the appropriate profile method, then finds the
  confidence intervals by interpolation in the profile traces.
  If the profile object is already available it should be used as the
  main argument rather than the fitted model object itself.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{profile}}
}
\examples{
expn1 <- deriv(y ~ b0 + b1 * 2^(-x/th), c("b0", "b1", "th"),
               function(b0, b1, th, x) {})

wtloss.gr <- nls(Weight ~ expn1(b0, b1, th, Days),
   data = wtloss, start = c(b0=90, b1=95, th=120))

expn2 <- deriv(~b0 + b1*((w0 - b0)/b1)^(x/d0),
         c("b0","b1","d0"), function(b0, b1, d0, x, w0) {})

wtloss.init <- function(obj, w0) {
  p <- coef(obj)
  d0 <-  - log((w0 - p["b0"])/p["b1"])/log(2) * p["th"]
  c(p[c("b0", "b1")], d0 = as.vector(d0))
}

out <- NULL
w0s <- c(110, 100, 90)
for(w0 in w0s) {
    fm <- nls(Weight ~ expn2(b0, b1, d0, Days, w0),
              wtloss, start = wtloss.init(wtloss.gr, w0))
    out <- rbind(out, c(coef(fm)["d0"], confint(fm, "d0")))
  }
dimnames(out) <- list(paste(w0s, "kg:"),  c("d0", "low", "high"))
out

ldose <- rep(0:5, 2)
numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16)
sex <- factor(rep(c("M", "F"), c(6, 6)))
SF <- cbind(numdead, numalive = 20 - numdead)
budworm.lg0 <- glm(SF ~ sex + ldose - 1, family = binomial)
confint(budworm.lg0)
confint(budworm.lg0, "ldose")
}
\keyword{models}

\eof
\name{contr.sdif}
\alias{contr.sdif}
\title{
Successive Differences contrast coding
}
\description{
A coding for unordered factors based on successive differences.
}
\usage{
contr.sdif(n, contrasts = TRUE)
}
\arguments{
\item{n}{
The number of levels required.
}
\item{contrasts}{
Should there be \code{n - 1} columns orthogonal to the mean (the default)
or \code{n} columns spanning the space.
}}
\value{
If \code{contrasts is} \code{TRUE}, a matrix with \code{n} rows and \code{n - 1} columns,
and the \code{n} by \code{n} identity matrix if \code{contrasts} is \code{FALSE}.
}
\details{
The contrast coefficients are chosen so that the coded coefficients
in a one-way layout are the differences between the means of the
second and first levels, the third and second levels, and so on.
}
\references{
Venables, W. N. and Ripley, B. D. (2002)
\emph{Modern Applied Statistics with S.}
Fourth Edition, Springer.
}
\seealso{
\code{\link{contr.treatment}}, \code{\link{contr.sum}}, \code{\link{contr.helmert}}
}
\examples{
contr.sdif(6)
}
\keyword{models}

\eof
% file MASS/coop.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{coop}
\alias{coop}
\title{
Co-operative Trial in Analytical Chemistry
}
\description{
Seven specimens were sent to 6 laboratories in 3 separate batches and
each analysed for Analyte.  Each analysis was duplicated.
}
\usage{
data(coop)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Lab}}{
Laboratory, \code{L1}, \code{L2}, \dots, \code{L6}.
}
\item{\code{Spc}}{
Specimen, \code{S1}, \code{S2}, \dots, \code{S6}.
}
\item{\code{Bat}}{
Batch, \code{B1}, \code{B2}, \code{B3} (nested within \code{Spc/Lab}),
}
\item{\code{Conc}}{
Concentration of Analyte in g/kg.
}}}
\source{
Analytical Methods Committee (1987)
Recommendations for the conduct and
interpretation of co-operative trials,
\emph{The Analyst}
\bold{112}, 679--686.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{chem}}, \code{\link{abbey}}.
}
\keyword{datasets}

\eof
% file MASS/corresp.d
% copyright (C) 1994-2003 W. N. Venables and B. D. Ripley
%
\name{corresp}
\alias{corresp}
\alias{corresp.xtabs}
\alias{corresp.data.frame}
\alias{corresp.default}
\alias{corresp.factor}
\alias{corresp.formula}
\alias{corresp.matrix}
% \alias{biplot.correspondence}
% \alias{plot.correspondence}
% \alias{print.correspondence}
\title{
Simple Correspondence Analysis
}
\description{
Find the principal canonical correlation and corresponding row- and
column-scores from a correspondence analysis of a two-way contingency
table.
}
\usage{
corresp(x, \dots)

\method{corresp}{matrix}(x, nf = 1, \dots)

\method{corresp}{factor}(x, y, \dots)

\method{corresp}{data.frame}(x, \dots)

\method{corresp}{xtabs}(x, \dots)

\method{corresp}{formula}(formula, data, \dots)
}
\arguments{
\item{x, formula}{
The function is generic, accepting various forms of the principal
argument for specifying a two-way frequency table.  Currently accepted
forms are matrices, data frames (coerced to frequency tables), objects
of class \code{"\link{xtabs}"} and formulae of the form \code{~ F1 + F2},
where \code{F1} and \code{F2} are factors.
}
\item{nf}{
The number of factors to be computed. Note that although 1 is the most
usual, one school of thought takes the first two singular vectors for
a sort of biplot.
}
\item{y}{a second factor for a cross-classification}
\item{data}{a data frame against which to preferentially resolve
  variables in the formula.}
\item{\dots}{
If the principal argument is a formula, a data frame may be specified
as well from which variables in the formula are preferentially
satisfied.
}}
\value{
An list object of class \code{"correspondence"} for which \code{print}, \code{plot} and
\code{biplot} methods are supplied.  The main components are the canonical
correlation(s) and the row and column scores.
}
\details{
  See Venables \& Ripley (2002).  The \code{plot} method produces a graphical
  representation of the table if \code{nf=1}, with the \emph{areas} of circles
  representing the numbers of points.  If \code{nf} is two or more the
  \code{biplot} method is called, which plots the second and third columns of
  the matrices \code{A = Dr^(-1/2) U L} and \code{B = Dc^(-1/2) U V} where the
  singular value decomposition is \code{U L V}.  Thus the x-axis is the
  canonical correlation times the row and column scores. Although this
  is called a biplot, it does \emph{not} have any useful inner product
  relationship between the row and column scores.  Think of this as an
  equally-scaled plot with two unrelated sets of labels.  The origin is
  marked on the plot with a cross.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

  Gower, J. C. and Hand, D. J. (1996)
  \emph{Biplots.}  Chapman \& Hall.
}
\seealso{
\code{\link{svd}}, \code{\link{princomp}}
}
\examples{
(ct <- corresp(~ Age + Eth, data = quine))
\dontrun{plot(ct)}

corresp(caith)
biplot(corresp(caith, nf = 2))
}
\keyword{category}
\keyword{multivariate}

\eof
% file lqs/man/cov.rob.Rd
% copyright (C) 1998-9 B. D. Ripley
%
\name{cov.rob}
\alias{cov.rob}
\alias{cov.mve}
\alias{cov.mcd}
\title{
  Resistant Estimation of Multivariate Location and Scatter
}
\description{
  Compute a multivariate location and scale estimate with a high
  breakdown point -- this can be thought of as estimating the mean and
  covariance of the \code{good} part of the data. \code{cov.mve} and
  \code{cov.mcd} are compatibility wrappers.
}
\usage{
cov.rob(x, cor = FALSE, quantile.used = floor((n + p + 1)/2),
        method = c("mve", "mcd", "classical"), nsamp = "best", seed)

cov.mve(\dots)
cov.mcd(\dots)
}
\arguments{
  \item{x}{
    a matrix or data frame.
  }
  \item{cor}{
    should the returned result include a correlation matrix?
  }
  \item{quantile.used}{
    the minimum number of the data points regarded as \code{good} points.
  }
  \item{method}{
    the method to be used -- minimum volume ellipsoid, minimum
    covariance determinant or classical product-moment. Using
    \code{cov.mve} or \code{cov.mcd} forces \code{mve} or \code{mcd}
    respectively.
  }
  \item{nsamp}{
    the number of samples or \code{"best"} or \code{"exact"} or
    \code{"sample"}.
    If \code{"sample"} the number chosen is \code{min(5*p, 3000)}, taken
    from Rousseeuw and Hubert (1997). If \code{"best"} exhaustive
    enumeration is done up to 5000 samples: if \code{"exact"}
    exhaustive enumeration will be attempted however many samples are needed.
  }
  \item{seed}{
    the seed to be used for random sampling: see \code{\link{RNGkind}}. The
    current value of \code{.Random.seed} will be preserved if it is set.
  }
  \item{\dots}{arguments to \code{cov.rob} other than \code{method}.}
}
\value{
  A list with components

  \item{center}{
    the final estimate of location.
  }
  \item{cov}{
    the final estimate of scatter.
  }
  \item{cor}{
    (only is \code{cor = TRUE}) the estimate of the correlation
    matrix.
  }
  \item{sing}{
    message giving number of singular samples out of total
  }
  \item{crit}{
    the value of the criterion on log scale. For MCD this is
    the determinant, and for MVE it is proportional to the volume.
  }
  \item{best}{
    the subset used. For MVE the best sample, for MCD the best
    set of size \code{quantile.used}.
  }
  \item{n.obs}{
    total number of observations.
}}
\details{
  For method \code{"mve"}, an approximate search is made of a subset of
  size \code{quantile.used} with an enclosing ellipsoid of smallest volume; in
  method \code{"mcd"} it is the volume of the Gaussian confidence
  ellipsoid, equivalently the determinant of the classical covariance
  matrix, that is minimized. The mean of the subset provides a first
  estimate of the location, and the rescaled covariance matrix a first
  estimate of scatter. The Mahalanobis distances of all the points from
  the location estimate for this covariance matrix are calculated, and
  those points within the 97.5\% point under Gaussian assumptions are
  declared to be \code{good}. The final estimates are the mean and rescaled
  covariance of the \code{good} points.

  The rescaling is by the appropriate percentile under Gaussian data; in
  addition the first covariance matrix has an \emph{ad hoc} finite-sample
  correction given by Marazzi.

  For method \code{"mve"} the search is made over ellipsoids determined
  by the covariance matrix of \code{p} of the data points. For method
  \code{"mcd"} an additional improvement step suggested by Rousseeuw and
  van Driessen (1999) is used, in which once a subset of size
  \code{quantile.used} is selected, an ellipsoid based on its covariance
  is tested (as this will have no larger a determinant, and may be smaller).
}
\author{B.D. Ripley}
\references{
  P. J. Rousseeuw and A. M. Leroy (1987) 
  \emph{Robust Regression and Outlier Detection.}
  Wiley.

  A. Marazzi (1993) 
  \emph{Algorithms, Routines and S Functions for Robust Statistics.}
  Wadsworth and Brooks/Cole. 

  P. J. Rousseeuw and B. C. van Zomeren (1990) Unmasking
  multivariate outliers and leverage points, 
  \emph{Journal of the American Statistical Association}, \bold{85}, 633--639.

  P. J. Rousseeuw and K. van Driessen (1999) A fast algorithm for the
  minimum covariance determinant estimator. \emph{Technometrics}
  \bold{41}, 212--223.

  P. Rousseeuw and M. Hubert (1997) Recent developments in PROGRESS. In
  \emph{L1-Statistical Procedures and Related Topics }
  ed Y. Dodge, IMS Lecture Notes volume \bold{31}, pp. 201--214.
}
\seealso{
\code{\link{lqs}}
}
\examples{
data(stackloss)
set.seed(123)
cov.rob(stackloss)
cov.rob(stack.x, method = "mcd", nsamp = "exact")
}
\keyword{robust}
\keyword{multivariate}

\eof
% file MASS/cov.trob.d
% copyright (C) 1997-9 W. N. Venables and B. D. Ripley
%
\name{cov.trob}
\alias{cov.trob}
\title{
Covariance Estimation for Multivariate t Distribution
}
\description{
Estimates a covariance or correlation matrix assuming the data came
from a multivariate t distribution: this provides some degree of
robustness to outlier without giving a high breakdown point.
}
\usage{
cov.trob(x, wt = rep(1, n), cor = FALSE, center = TRUE, nu = 5,
         maxit = 25, tol = 0.01)
}
\arguments{
\item{x}{
data  matrix. Missing values (NAs) are not allowed.
}
\item{wt}{
A vector of weights for each case: these are treated as if the case \code{i}
actually occurred \code{wt[i]} times.
}
\item{cor}{
Flag to choose between returning the correlation (\code{cor = TRUE}) or
covariance (\code{cor = FALSE}) matrix.
}
\item{center}{
a logical value or a numeric vector providing the location about which
the covariance is to be taken. If \code{center = FALSE}, no centering
is done; if \code{center = TRUE} the MLE of the location vector is used.
}
\item{nu}{
\dQuote{degrees of freedom} for the multivariate t distribution. Must exceed
2 (so that the covariance matrix is finite).
}
\item{maxit}{
Maximum number of iterations in fitting.
}
\item{tol}{
Convergence tolerance for fitting.
}}
\value{
A list with the following components

\item{cov}{
the fitted covariance matrix.
}
\item{center}{
the estimated or specified location vector.
}
\item{wt}{
the specified weights: only returned if the \code{wt} argument was given.
}
\item{n.obs}{
the number of cases used in the fitting.
}
\item{cor}{
the fitted correlation matrix: only returned if \code{cor = TRUE}.
}
\item{call}{
The matched call.
}
\item{iter}{
The number of iterations used.
}}
\references{
J. T. Kent, D. E. Tyler and Y. Vardi (1994)
A curious likelihood identity for the multivariate t-distribution.
\emph{Communications in Statistics---Simulation and Computation}
\bold{23}, 441--453.

  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\seealso{
\code{\link{cov}}, \code{\link{cov.wt}}, \code{\link{cov.mve}}
}
\examples{
data(stackloss)
cov.trob(stackloss)
}
\keyword{multivariate}

\eof
% file MASS/cpus.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{cpus}
\alias{cpus}
\title{
Performance of Computer CPUs
}
\description{
A relative performance measure and characteristics of 209 CPUs.
}
\usage{
data(cpus)
}
\format{
The components are:


\describe{
\item{\code{name}}{
Manufacturer and model
}
\item{\code{syct}}{
cycle time in nanoseconds
}
\item{\code{mmin}}{
minimum main memory in kilobytes
}
\item{\code{mmax}}{
maximum main memory in kilobytes
}
\item{\code{cach}}{
cache size in kilobytes
}
\item{\code{chmin}}{
minimum number of channels
}
\item{\code{chmax}}{
maximum number of channels
}
\item{\code{perf}}{
published performance on a benchmark mix relative to an IBM 370/158-3
}
\item{\code{estperf}}{
estimated performance (by Ein-Dor & Feldmesser)
}}}
\source{
P. Ein-Dor & J. Feldmesser (1987)
Attributes of the performance of central processing units: a relative
performance prediction model.
\emph{Comm. ACM.}
\bold{30}, 308--317.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/crabs.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{crabs}
\alias{crabs}
\title{
Morphological Measurements on Leptograpsus Crabs
}
\description{
The \code{crabs} data frame has 200 rows and 8 columns, describing 5 morphological
measurements on 50 crabs each of two colour forms and both sexes, of the
species
\emph{Leptograpsus variegatus}
collected at Fremantle, W. Australia.
}
\usage{
data(crabs)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{sp}}{
\code{species} - \code{"B"} or \code{"O"} for blue or orange
}
\item{\code{sex}}{
as it says
}
\item{\code{index}}{
index 1:50 within each of the four groups
}
\item{\code{FL}}{
frontal lobe size (mm)
}
\item{\code{RW}}{
rear width (mm)
}
\item{\code{CL}}{
carapace length (mm)
}
\item{\code{CW}}{
carapace width (mm)
}
\item{\code{BD}}{
body depth (mm)
}}}
\source{
Campbell, N.A. and Mahon, R.J. (1974) A multivariate
study of variation in two species of rock crab of genus
\emph{Leptograpsus.}
\emph{Australian Journal of  Zoology}
\bold{22}, 417--425.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/deaths.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{deaths}
\alias{deaths}
\alias{fdeaths}
\alias{mdeaths}
\title{
Monthly Deaths from Lung Diseases in the UK
}
\description{
  A time series giving the monthly deaths from bronchitis,
  emphysema and asthma in the UK, 1974-1979, both sexes (\code{deaths}),
  males (\code{mdeaths}) and females (\code{fdeaths}).
}
\usage{
data(deaths)
data(mdeaths)
data(deaths)
}
\source{
P. J. Diggle (1990)
\emph{Time Series: A Biostatistical Introduction.}
Oxford, table A.3
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{fdeaths}}, \code{\link{mdeaths}}
}
\keyword{datasets}

\eof
% file MASS/denumerate.d
% copyright (C) 2000 W. N. Venables and B. D. Ripley
%
\name{denumerate}
\alias{denumerate}
\alias{denumerate.formula}
\title{
Transform an Allowable Formula for 'loglm' into one for 'terms'
}
\description{
\code{\link{loglm}} allows dimension numbers to be used in place of names in
the formula.  \code{denumerate} modifies such a formula into one that
\code{\link{terms}} can process.
}
\usage{
denumerate(x)
}
\arguments{
\item{x}{
A formula conforming to the conventions of \code{\link{loglm}}, that is, it
may allow dimension numbers to stand in for names when specifying
a log-linear model.
}}
\value{
A linear model formula like that presented, except that where
dimension numbers, say \code{n}, have been used to specify fixed
margins these are replaced by names of the form \code{.vn} which may
be processed by \code{terms}.
}
\details{
The model fitting function \code{\link{loglm}} fits log-linear models to
frequency data using iterative proportional scaling.  To specify
the model the user must nominate the margins in the data that
remain fixed under the log-linear model.  It is convenient to
allow the user to use dimension numbers, 1, 2, 3, \dots for the
first, second, third, \dots, margins in a similar way to variable
names.  As the model formula has to be parsed by \code{\link{terms}}, which
treats \code{1} in a special way and requires parsable variable names,
these formulae have to be modified by giving genuine names for
these margin, or dimension numbers.  \code{denumerate} replaces these
numbers with names of a special form, namely \code{n} is replaced by
\code{.vn}.  This allows \code{terms} to parse the formula in the usual way.
}
\seealso{
\code{\link{renumerate}}
}
\examples{
denumerate(~(1+2+3)^3 + a/b)
\dontrun{~ (.v1 + .v2 + .v3)^3 + a/b}
}
\keyword{models}

\eof
\name{dose.p}
\alias{dose.p}
\alias{print.glm.dose}
\title{
Predict Doses for Binomial Assay model
}
\description{
Calibrate binomial assays, generalizing the calculation of LD50.
}
\usage{
dose.p(obj, cf = 1:2, p = 0.5)
}
\arguments{
\item{obj}{
A fitted model object of class inheriting from \code{"glm"}.
}
\item{cf}{
The terms in the coefficient vector giving the intercept and
coefficient of (log-)dose
}
\item{p}{
Probabilities at which to predict the dose needed.
}}
\value{
  An object of class \code{"glm.dose"} giving the prediction (attribute
  \code{"p"} and standard error (attribute \code{"SE"}) at each response
  probability.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.}
  Springer.
}
\examples{
ldose <- rep(0:5, 2)
numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16)
sex <- factor(rep(c("M", "F"), c(6, 6)))
SF <- cbind(numdead, numalive = 20 - numdead)
budworm.lg0 <- glm(SF ~ sex + ldose - 1, family = binomial)

dose.p(budworm.lg0, cf = c(1,3), p = 1:3/4)
dose.p(update(budworm.lg0, family = binomial(link=probit)),
       cf = c(1,3), p = 1:3/4)
}
\keyword{regression}
\keyword{models}

\eof
% file MASS/drivers.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{drivers}
\alias{drivers}
\title{
Deaths of Car Drivers in Great Britain 1969-84
}
\description{
A regular time series giving the monthly totals of car drivers in
Great Britain killed or seriously
injured Jan 1969 to Dec 1984. Compulsory wearing of seat belts was
introduced on 31 Jan 1983.
}
\usage{
data(drivers)
}
\source{
Harvey, A.C. (1989)
\emph{Forecasting, Structural Time Series Models and the Kalman Filter.}
Cambridge University Press, pp. 519--523.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/dropterm.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{dropterm}
\alias{dropterm}
\alias{dropterm.default}
\alias{dropterm.glm}
\alias{dropterm.lm}
%\alias{dropterm.mlm}
%\alias{dropterm.negbin}
%\alias{dropterm.survreg}
\title{
Try All One-Term Deletions from a Model
}
\description{
Try fitting all models that differ from the current model by dropping a
single term, maintaining marginality.


This function is generic; there exist methods for classes \code{lm} and
\code{glm} and the default method will work for many other classes.
}
\usage{
dropterm (object, \dots)

\method{dropterm}{default}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
         k = 2, sorted = FALSE, trace = FALSE, \dots)

\method{dropterm}{lm}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
         k = 2, sorted = FALSE, \dots)

\method{dropterm}{glm}(object, scope, scale = 0, test = c("none", "Chisq", "F"),
         k = 2, sorted = FALSE, trace = FALSE, \dots)
}
\arguments{
\item{object}{
A object fitted by some model-fitting function.
}
\item{scope}{
a formula giving terms which might be dropped. By default, the
model formula. Only terms that can be dropped and maintain marginality
are actually tried.
}
\item{scale}{
used in the definition of the AIC statistic for selecting the models,
currently only for \code{lm}, \code{aov} and \code{glm} models. Specifying \code{scale}
asserts that the residual standard error or dispersion is known.
}
\item{test}{
should the results include a test statistic relative to the original
model?  The F test is only appropriate for \code{lm} and \code{aov} models,
and perhaps for some over-dispersed \code{glm} models. The
Chisq test can be an exact test (\code{lm} models with known scale) or a
likelihood-ratio test depending on the method.
}
\item{k}{
the multiple of the number of degrees of freedom used for the penalty.
Only \code{k = 2} gives the genuine AIC: \code{k = log(n)} is sometimes
referred to as BIC or SBC.
}
\item{sorted}{
should the results be sorted on the value of AIC?
}
\item{trace}{
if \code{TRUE} additional information may be given on the fits as they are tried.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
A table of class \code{"anova"} containing at least columns for the change
in degrees of freedom and AIC (or Cp) for the models. Some methods
will give further information, for example sums of squares, deviances,
log-likelihoods and test statistics.
}
\details{
The definition of AIC is only up to an additive constant: when
appropriate (\code{lm} models with specified scale) the constant is taken
to be that used in Mallows' Cp statistic and the results are labelled
accordingly.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{addterm}}, \code{\link{stepAIC}}
}
\examples{
quine.hi <- aov(log(Days + 2.5) ~ .^4, quine)
quine.nxt <- update(quine.hi, . ~ . - Eth:Sex:Age:Lrn)
dropterm(quine.nxt, test=  "F")
quine.stp <- stepAIC(quine.nxt,
    scope = list(upper = ~Eth*Sex*Age*Lrn, lower = ~1),
    trace = FALSE)
dropterm(quine.stp, test = "F")
quine.3 <- update(quine.stp, . ~ . - Eth:Age:Lrn)
dropterm(quine.3, test = "F")
quine.4 <- update(quine.3, . ~ . - Eth:Age)
dropterm(quine.4, test = "F")
quine.5 <- update(quine.4, . ~ . - Age:Lrn)
dropterm(quine.5, test = "F")

house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat, family=poisson,
                   data = housing)
house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl+Type+Cont))
dropterm(house.glm1, test = "Chisq")
}
\keyword{models}

\eof
% file MASS/eagles.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{eagles}
\alias{eagles}
\title{
Foraging Ecology of Bald Eagles
}
\description{
Knight and Skagen collected during a field study on the foraging
behaviour of wintering Bald Eagles in Washington State, USA data
concerning 160 attempts by one (pirating) Bald Eagle to steal a chum
salmon from another (feeding) Bald Eagle.
}
\usage{
data(eagles)
}
\format{
The \code{eagles} data frame has 8 rows and 5 columns.
\describe{
\item{\code{y}}{
Number of successful attempts.
}
\item{\code{n}}{
Total number of attempts.
}
\item{\code{P}}{
Size of pirating eagle (\code{L} = large, \code{S} = small).
}
\item{\code{A}}{
Age of pirating eagle (\code{I} = immature, \code{A} = adult).
}
\item{\code{V}}{
Size of victim eagle (\code{L} = large, \code{S} = small).
}}}
\source{
Knight, R. L. and Skagen, S. K. (1988)
Agonistic asymmetries and the foraging ecology of Bald Eagles.
\emph{Ecology}
\bold{69}, 1188--1194.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\examples{
eagles.glm <- glm(cbind(y, n - y) ~ P*A + V, data = eagles,
                  family = binomial)
dropterm(eagles.glm)
prof <- profile(eagles.glm)
plot(prof)
pairs(prof)
}
\keyword{datasets}

\eof
\name{epil}
\alias{epil}
\title{
Seizure Counts for Epileptics
}
\description{
Thall and Vail (1990) give a data set on two-week seizure counts for
59 epileptics.  The number of seizures was recorded for a baseline
period of 8 weeks, and then patients were randomly assigned to a
treatment group or a control group.  Counts were then recorded for
four successive two-week periods. The subject's age is the only
covariate.
}
\usage{
data(epil)
}
\format{
This data frame has 236 rows and the following 9 columns:
\describe{
\item{\code{y}}{
The count for the 2-week period.
}
\item{\code{trt}}{
The treatment, \code{"placebo"} or \code{"progabide"}.
}
\item{\code{base}}{
The counts in the baseline 8-week period.
}
\item{\code{age}}{
The subject's age, in years.
}
\item{\code{V4}}{
\code{0/1} indicator variable of period 4.
}
\item{\code{subject}}{
The subject number, 1 to 59.
}
\item{\code{period}}{
The period, 1 to 4.
}
\item{\code{lbase}}{
The log-counts for the baseline period, centred to have zero mean.
}
\item{\code{lage}}{
The log-ages, centred to have zero mean.
}}}
\source{
Thall, P. F. and Vail, S. C. (1990)
Some covariance models for longitudinal count data with over-dispersion.
\emph{Biometrics}
\bold{46}, 657--671.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth Edition. Springer.
}
\examples{
summary(glm(y ~ lbase*trt + lage + V4, family = poisson,
            data = epil), cor = FALSE)
epil2 <- epil[epil$period == 1, ]
epil2["period"] <- rep(0, 59); epil2["y"] <- epil2["base"]
epil["time"] <- 1; epil2["time"] <- 4
epil2 <- rbind(epil, epil2)
epil2$pred <- unclass(epil2$trt) * (epil2$period > 0)
epil2$subject <- factor(epil2$subject)
epil3 <- aggregate(epil2, list(epil2$subject, epil2$period > 0),
   function(x) if(is.numeric(x)) sum(x) else x[1])
epil3$pred <- factor(epil3$pred,
   labels = c("base", "placebo", "drug"))

contrasts(epil3$pred) <- structure(contr.sdif(3),
    dimnames = list(NULL, c("placebo-base", "drug-placebo")))
summary(glm(y ~ pred + factor(subject) + offset(log(time)),
            family = poisson, data = epil3), cor = FALSE)

summary(glmmPQL(y ~ lbase*trt + lage + V4,
                random = ~ 1 | subject,
                family = poisson, data = epil))
summary(glmmPQL(y ~ pred, random = ~1 | subject,
                family = poisson, data = epil3))
}
\keyword{datasets}

\eof
% file MASS/eqscplot.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{eqscplot}
\alias{eqscplot}
\title{
Plots with Geometrically Equal Scales
}
\description{
Version of a scatterplot with scales chosen to be equal on both axes, that
is 1cm represents the same units on each
}
\usage{
eqscplot(x, y, ratio = 1, tol = 0.04, uin, \dots)
}
\synopsis{
eqscplot(x, y, ratio = 1, tol = 0.04, uin, xlim = range(x[is.finite(x)]),
         ylim = range(y[is.finite(y)]), xlab, ylab, ...)
}
\arguments{
\item{x}{
vector of x values, or a 2-column matrix, or a list with components
\code{x} and \code{y}
}
\item{y}{
vector of y values
}
\item{ratio}{
desired ratio of units on the axes. Units on the y axis are drawn at
\code{ratio} times the size of units on the x axis. Ignored if \code{uin} is
specified and of length 2.
}
\item{tol}{
proportion of white space at the margins of plot
}
\item{uin}{
desired values for the units per inch parameter. If of length 1, the
desired units per inch on the x axis.
}
\item{\dots}{
further arguments for \code{plot}
}}
\value{
invisibly, the values of \code{uin} used for the plot.
}
\section{Side Effects}{
performs the plot.
}
\details{
Limits for the x and y axes are chosen so that they include the
data. One of the sets of limits is then stretched from the midpoint to
make the units in the ratio given by \code{ratio}. Finally both are
stretched by \code{1 + tol} to move points away from the axes, and the
points plotted.
}
\note{
Arguments \code{ratio} and \code{uin} were suggested by Bill Dunlap.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{plot}}, \code{\link{par}}
}
\keyword{hplot}

\eof
% file MASS/farms.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{farms}
\alias{farms}
\title{
Ecological Factors in Farm Management
}
\description{
The \code{farms} data frame has 20 rows and 4 columns. The rows are farms
on the Dutch island of Terschelling and the columns are factors
describing the management of grassland.
}
\usage{
data(farms)
}
\format{
  This data frame contains the following columns:
  \describe{
    \item{\code{Mois}}{
      Five levels of soil moisture -- level 3 does not occur at these 20 farms.
    }
    \item{\code{Manag}}{
      Grassland management type (\code{SF} = standard,
      \code{BF} = biological, \code{HF} = hobby farming,
      \code{NM} = nature conservation).
    }
    \item{\code{Use}}{
      Grassland use (\code{U1} = hay production, \code{U2} =
      intermediate, \code{U3} = grazing)
    }
    \item{\code{Manure}}{
      Manure usage -- classes \code{C0} to \code{C4}.
    }
  }
}
\source{
  J.C. Gower and D.J. Hand (1996) \emph{Biplots}. Chapman & Hall, Table 4.6.

  Quoted as from:\cr
  R.H.G. Jongman, C.J.F. ter Braak and O.F.R. van Tongeren (1987)
  \emph{Data Analysis in Community and Landscape Ecology.}
  PUDOC, Wageningen
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
farms.mca <- mca(farms, abbrev = TRUE)  # Use levels as names
eqscplot(farms.mca$cs, type = "n")
text(farms.mca$rs, cex = 0.7)
text(farms.mca$cs, labels = dimnames(farms.mca$cs)[[1]], cex = 0.7)
}
\keyword{datasets}

\eof
% file MASS/fgl.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{fgl}
\alias{fgl}
\title{
Measurements of Forensic Glass Fragments
}
\description{
The \code{fgl} data frame has 214 rows and 10 columns.
It was collected by B. German on fragments of glass
collected in forensic work.
}
\usage{
data(fgl)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{RI}}{
refractive index; more precisely the refractive index is 1.518xxxx.


The remaining 8 measurements are percentages by weight of oxides.
}
\item{\code{Na}}{
sodium
}
\item{\code{Mg}}{
manganese
}
\item{\code{Al}}{
aluminium
}
\item{\code{Si}}{
silicon
}
\item{\code{K}}{
potassium
}
\item{\code{Ca}}{
calcium
}
\item{\code{Ba}}{
barium
}
\item{\code{Fe}}{
iron
}
\item{\code{type}}{
The fragments were originally classed into seven types, one of which
was absent in this dataset.  The categories which occur are
window float glass (\code{WinF}: 70),
window non-float glass (\code{WinNF}: 76),
vehicle window glass (\code{Veh}: 17),
containers (\code{Con}: 13),
tableware (\code{Tabl}: 9) and
vehicle headlamps (\code{Head}: 29).
}}}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
\name{fitdistr}
\alias{fitdistr}
\alias{print.fitdistr}
\alias{coef.fitdistr}
\title{
Maximum-likelihood Fitting of Univariate Distributions
}
\description{
Maximum-likelihood fitting of univariate distributions, allowing
parameters to be held fixed if desired.
}
\usage{
fitdistr(x, densfun, start, \dots)
}
\arguments{
\item{x}{
A numeric vector.
}
\item{densfun}{
  Either a character string or a function returning a density evaluated
  at its first argument.

  Distributions \code{"beta"}, \code{"cauchy"}, \code{"chi-squared"},
  \code{"exponential"}, \code{"f"}, \code{"gamma"}, \code{"log-normal"},
  \code{"lognormal"}, \code{"logistic"}, \code{"negative binomial"},
  \code{"normal"}, \code{"t"}, \code{"uniform"} and \code{"weibull"} are
  recognised, case being ignored.
}
\item{start}{
  A named list giving the parameters to be optimized with initial
  values.  This can be omitted for some of the named distributions
  (see Details).
}
\item{\dots}{
  Additional parameters, either for \code{densfun} or for \code{optim}.
  In particular, it can be used to specify bounds via \code{lower} or
  \code{upper} or both.  If arguments of \code{densfun} (or the density
  function corresponding to a character-string specification) are included
  they will be held fixed.
}}
\value{
  An object of class \code{"fitdistr"}, a list with two components,

  \item{estimate}{the parameter estimates, and}
  \item{sd}{the estimated standard errors.}
}
\details{
  For \code{densfun = "normal"} the closed-form MLEs (and standard
  errors) are used, and \code{start} should not be supplied.

  For all other distributions, direct optimization of the log-likelihood
  is performed, with numerical derivatives.  The estimated standard
  errors are taken from the observed information matrix, calculated by a
  numerical approximation.

  For the following named distributions, reasonable starting values will
  be computed if \code{start} is omitted or only partially specified:
  \code{cauchy}, \code{gamma}, \code{logistic}, \code{negative binomial}
  (parametrized by \code{mu} and \code{size}), \code{t}, \code{uniform},
  \code{weibull}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
set.seed(123)
x <- rgamma(100, shape = 5, rate = 0.1)
fitdistr(x, "gamma")
## now do this directly with more control.
fitdistr(x, dgamma, list(shape = 1, rate = 0.1), lower = 0.01)

set.seed(123)
x2 <- rt(250, df = 9)
fitdistr(x2, "t", df = 9)
## allow df to vary: not a very good idea!
fitdistr(x2, "t")
## now do this directly with more control.
mydt <- function(x, m, s, df) dt((x-m)/s, df)/s
fitdistr(x2, mydt, list(m = 0, s = 1), df = 9, lower = c(-Inf, 0))

set.seed(123)
x3 <- rweibull(100, shape = 4, scale = 100)
fitdistr(x3, "weibull")

set.seed(123)
x4 <- rnegbin(500, mu = 5, theta = 4)
fitdistr(x4, "Negative Binomial") # R only
}
\keyword{distribution}
\keyword{htest}

\eof
% file MASS/forbes.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{forbes}
\alias{forbes}
\title{
Forbes' Data on Boiling Points in the Alps
}
\description{
A data frame with 17 observations on boiling point (degrees F)
and barometric pressure in inches of mercury.
}
\usage{
data(forbes)
}
\format{
\describe{
\item{\code{bp}}{
boiling point (degrees F)
}
\item{\code{pres}}{
barometric pressure in inches of mercury
}}}
\source{
A. C. Atkinson (1985)
\emph{Plots, Transformations and Regression.}
Oxford.


S. Weisberg (1980)
\emph{Applied Linear Regression.}
Wiley.
}
\keyword{datasets}

\eof
% file MASS/fractions.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{fractions}
\alias{fractions}
\alias{Math.fractions}
\alias{Ops.fractions}
\alias{Summary.fractions}
\alias{[.fractions}
\alias{[<-.fractions}
\alias{as.character.fractions}
\alias{as.fractions}
\alias{is.fractions}
\alias{print.fractions}
\alias{t.fractions}
\title{
Rational Approximation
}
\description{
Find rational approximations to the components of a real numeric
object using a standard continued fraction method.
}
\usage{
fractions(x, cycles = 10, max.denominator = 2000, \dots)
}
\synopsis{
fractions(x, ...)
}
\arguments{
\item{x}{
Any object of mode numeric. Missing values are now allowed.
}
\item{cycles}{
The maximum number of steps to be used in the continued fraction
approximation process.
}
\item{max.denominator}{
An early termination criterion.  If any partial denominator
exceeds \code{max.denominator} the continued fraction stops at that point.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
An object of class \code{"fractions"}.  A structure with \code{.Data} component
the same as the input numeric \code{x}, but with the rational
approximations held as a character vector attribute, \code{"fracs"}.
Arithmetic operations on \code{"fractions"} objects are possible.
}
\details{
Each component is first expanded in a continued fraction of the
form

\code{x = floor(x) + 1/(p1 + 1/(p2 + \dots)))}

where \code{p1}, \code{p2}, \dots are positive integers, terminating either
at \code{cycles} terms or when a \code{pj > max.denominator}.  The
continued fraction is then re-arranged to retrieve the numerator
and denominator as integers.

The numerators and denominators are then combined into a
character vector that becomes the \code{"fracs"} attribute and used in
printed representations.


Arithmetic operations on \code{"fractions"} objects have full floating
point accuracy, but the character representation printed out may
not.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth Edition. Springer.
}
\seealso{
\code{\link{rational}}
}
\examples{
X <- matrix(runif(25), 5, 5)
solve(X, X/5)
##              [,1]        [,2]       [,3]        [,4]        [,5]
##  [1,]  2.0000e-01  3.7199e-17 1.2214e-16  5.7887e-17 -8.7841e-17
##  [2,] -1.1473e-16  2.0000e-01 7.0955e-17  2.0300e-17 -1.0566e-16
##  [3,]  2.7975e-16  1.3653e-17 2.0000e-01 -1.3397e-16  1.5577e-16
##  [4,] -2.9196e-16  2.0412e-17 1.5618e-16  2.0000e-01 -2.1921e-16
##  [5,] -3.6476e-17 -3.6430e-17 3.6432e-17  4.7690e-17  2.0000e-01

fractions(solve(X, X/5))
##      [,1] [,2] [,3] [,4] [,5]
## [1,] 1/5    0    0    0    0
## [2,]   0  1/5    0    0    0
## [3,]   0    0  1/5    0    0
## [4,]   0    0    0  1/5    0
## [5,]   0    0    0    0  1/5

fractions(solve(X, X/5)) + 1
##      [,1] [,2] [,3] [,4] [,5]
## [1,] 6/5    1    1    1    1
## [2,]   1  6/5    1    1    1
## [3,]   1    1  6/5    1    1
## [4,]   1    1    1  6/5    1
## [5,]   1    1    1    1  6/5
}
\keyword{math}

\eof
% file MASS/galaxies.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{galaxies}
\alias{galaxies}
\title{
Velocities for 82 Galaxies
}
\description{
A numeric vector of velocities in km/sec of 82 galaxies from 6
well-separated conic sections of an \code{unfilled} survey of the Corona
Borealis region.  Multimodality in such surveys is evidence for voids
and superclusters in the far universe.
}
\usage{
data(galaxies)
}
\source{
Roeder, K. (1990) Density estimation with confidence sets exemplified
by superclusters and voids in galaxies.
\emph{Journal of the American Statistical Association}
\bold{85}, 617--624.

Postman, M., Huchra, J. P. and Geller, M. J. (1986)
Probes of large-scale structures in the Corona Borealis region.
\emph{Astrophysical Journal}
\bold{92}, 1238--1247.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\note{
There is an 83rd measurement of 5607 km/sec in the Postman
\emph{et al.} paper which is omitted in Roeder (1990) and from the
dataset here.
}
\examples{
data(galaxies)
gal <- galaxies/1000
c(width.SJ(gal, method = "dpi"), width.SJ(gal))
plot(x = c(0, 40), y = c(0, 0.3), type = "n", bty = "l",
     xlab = "velocity of galaxy (1000km/s)", ylab = "density")
rug(gal)
lines(density(gal, width = 3.25, n = 200), lty = 1)
lines(density(gal, width = 2.56, n = 200), lty = 3)
}
\keyword{datasets}

\eof
% file MASS/gamma.dispersion.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{gamma.dispersion}
\alias{gamma.dispersion}
\title{
Calculate the MLE of the Gamma Dispersion Parameter in a GLM Fit
}
\description{
A front end to \code{gamma.shape} for convenience.  Finds the
reciprocal of the estimate of the shape parameter only.
}
\usage{
gamma.dispersion(object, \dots)
}
\arguments{
\item{object}{
Fitted model object giving the gamma fit.
}
\item{\dots}{
Additional arguments passed on to \code{gamma.shape}.
}}
\value{
The MLE of the dispersion parameter of the gamma distribution.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{gamma.shape.glm}}, including the example on its help page.
}
\keyword{models}

\eof
% file MASS/gamma.shape.glm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{gamma.shape}
\alias{gamma.shape}
\alias{gamma.shape.glm}
\alias{print.gamma.shape}
\title{
Estimate the Shape Parameter of the Gamma Distribution in a GLM Fit
}
\description{
Find the maximum likelihood estimate of the shape parameter of
the gamma distribution after fitting a \code{Gamma} generalized
linear model.
}
\usage{
\method{gamma.shape}{glm}(object, it.lim = 10,
            eps.max = .Machine$double.eps^0.25, verbose = FALSE, \dots)
}
\arguments{
\item{object}{
Fitted model object from a \code{Gamma} family or \code{quasi} family with
\code{variance = mu^2}.
}
\item{it.lim}{
Upper limit on the number of iterations.
}
\item{eps.max}{
Maximum discrepancy between approximations for the iteration
process to continue.
}
\item{verbose}{
If \code{TRUE}, causes successive iterations to be printed out.  The
initial estimate is taken from the deviance.
}
\item{\dots}{
further arguments passed to or from other methods.
}}
\value{
List of two components

\item{alpha}{
the maximum likelihood estimate
}
\item{SE}{
the approximate standard error, the square-root of the reciprocal of
the observed information.
}}
\details{
A glm fit for a Gamma family correctly calculates the maximum
likelihood estimate of the mean parameters but provides only a
crude estimate of the dispersion parameter.  This function takes
the results of the glm fit and solves the maximum likelihood
equation for the reciprocal of the dispersion parameter, which is
usually called the shape (or exponent) parameter.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{gamma.dispersion}}
}
\examples{
clotting <- data.frame(
    u = c(5,10,15,20,30,40,60,80,100),
    lot1 = c(118,58,42,35,27,25,21,19,18),
    lot2 = c(69,35,26,21,18,16,13,12,12))
clot1 <- glm(lot1 ~ log(u), data = clotting, family = Gamma)
gamma.shape(clot1)
\dontrun{
Alpha: 538.13
   SE: 253.60
}
gm <- glm(Days + 0.1 ~ Age*Eth*Sex*Lrn,
		quasi(link=log, variance=mu^2), quine, start = rep(0,32))
gamma.shape(gm, verbose = TRUE)
\dontrun{
Initial estimate: 1.0603
Iter.  1  Alpha: 1.23840774338543
Iter.  2  Alpha: 1.27699745778205
Iter.  3  Alpha: 1.27834332265501
Iter.  4  Alpha: 1.27834485787226

Alpha: 1.27834
   SE: 0.13452
}
summary(gm, dispersion = gamma.dispersion(gm))  # better summary
}
\keyword{models}

\eof
% file MASS/gehan.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{gehan}
\alias{gehan}
\title{
Remission Times of Leukaemia Patients
}
\description{
A data frame from a trial of 42 leukaemia patients. Some were
treated with the drug
\emph{6-mercaptopurine}
and the rest are controls.  The trial was designed as matched pairs,
both withdrawn from the trial when either came out of remission.
}
\usage{
data(gehan)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{pair}}{
label for pair
}
\item{\code{time}}{
remission time in weeks
}
\item{\code{cens}}{
censoring, 0/1
}
\item{\code{treat}}{
treatment, control or 6-MP
}}}
\source{
Cox, D. R. and Oakes, D. (1984)
\emph{Analysis of Survival Data.}
Chapman & Hall, p. 7. Taken from

Gehan, E.A. (1965) A generalized Wilcoxon test for comparing
arbitrarily single-censored samples.
\emph{Biometrika}
\bold{52}, 203--233.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
library(survival)
gehan.surv <- survfit(Surv(time, cens) ~ treat, data = gehan,
     conf.type = "log-log")
summary(gehan.surv)
survreg(Surv(time, cens) ~ factor(pair) + treat, gehan, dist = "exp")
summary(survreg(Surv(time, cens) ~ treat, gehan, dist = "exp"))
summary(survreg(Surv(time, cens) ~ treat, gehan))
gehan.cox <- coxph(Surv(time, cens) ~ treat, gehan)
summary(gehan.cox)
}
\keyword{datasets}

\eof
% file MASS/genotype.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{genotype}
\alias{genotype}
\title{
Rat Genotype Data
}
\description{
Data from a foster feeding experiment with rat mothers and litters of
four different genotypes: \code{A}, \code{B}, \code{I} and \code{J}.  Rice litters were
separated from their natural mothers at birth and given to foster
mothers to rear.
}
\usage{
data(genotype)
}
\format{
The data frame has the following components:
\describe{
\item{\code{Litter}}{
The genotype of the litter
}
\item{\code{Mother}}{
The genotype of the foster mother
}
\item{\code{Wt}}{
Litter average weight gain of the litter, in grams at age 28 days.
(The source states that the within-litter variability is negligible.)
}}}
\source{
Scheffe, H. (1959)
\emph{The Analysis of Variance}
Wiley p. 140.

Bailey, D. W. (1953)
\emph{The Inheritance of Maternal Influences on the Growth of the Rat.}
Unpublished Ph.D. thesis, University of California. Table B of the Appendix.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
\name{geyser}
\alias{geyser}
\non_function{}
\usage{data(geyser)}
\title{Old Faithful Geyser Data}
\description{
  A version of the eruptions data from the \sQuote{Old Faithful} geyser
  in Yellowstone National  Park,  Wyoming. This version comes from
  Azzalini and Bowman (1990) and is of continuous measurement from August
  1 to August 15, 1985.

  Some nocturnal duration measurements were coded as 2, 3 or 4 minutes,
  having originally been described as \sQuote{short}, \sQuote{medium}
  or \sQuote{long}.
}
\format{A data frame with 299 observations on 2 variables.
  \tabular{lll}{
    \code{duration}  \tab numeric  \tab Eruption time in mins \cr
    \code{waiting}   \tab numeric  \tab Waiting time to next eruption \cr
  }
}
\seealso{\code{\link{faithful}}}
\references{
  Azzalini, A. and Bowman, A. W. (1990) A look at some
  data on the Old Faithful geyser.  \emph{Applied Statistics}
  \bold{39}, 357--365.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

}
\keyword{datasets}

\eof
% file MASS/gilgais.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{gilgais}
\alias{gilgais}
\title{
Line Transect of Soil in Gilgai Territory
}
\description{
This dataset was collected on a line transect survey in gilgai
territory in New South Wales, Australia.  Gilgais are natural gentle
depressions in otherwise flat land, and sometimes seem to be regularly
distributed. The data collection was stimulated by the question: are
these patterns reflected in soil properties?  At each of 365 sampling
locations on a linear grid of 4 meters spacing, samples were taken at
depths 0-10 cm, 30-40 cm and 80-90 cm below the surface. pH, electrical
conductivity and chloride content were measured on a 1:5 soil:water
extract from each sample.
}
\usage{
data(gilgais)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{pH00}}{
pH at depth 0-10cm
}
\item{\code{pH30}}{
pH at depth 30-40cm
}
\item{\code{pH80}}{
pH at depth 80-90cm
}
\item{\code{e00}}{
electrical conductivity in mS/cm  (0-10 cm)
}
\item{\code{e30}}{
electrical conductivity in mS/cm (30-40 cm)
}
\item{\code{e80}}{
electrical conductivity in mS/cm (80-90 cm)
}
\item{\code{c00}}{
chloride content in ppm  (0-10 cm)
}
\item{\code{c30}}{
chloride content in ppm (30-40 cm)
}
\item{\code{c80}}{
chloride content in ppm (80-90 cm)
}}}
\source{
Webster, R. (1977) Spectral analysis of gilgai soil.
\emph{Australian Journal of Soil Research}
\bold{15}, 191--204.

Laslett, G. M. (1989)
Kriging and splines: An empirical comparison of their
predictive performance in some applications (with discussion).
\emph{Journal of the American Statistical Association}
\bold{89}, 319--409
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/ginv.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{ginv}
\alias{ginv}
\title{
Generalized Inverse of a Matrix
}
\description{
Calculates the Moore-Penrose generalized inverse of a matrix
\code{X}.
}
\usage{
ginv(X, tol = sqrt(.Machine$double.eps))
}
\arguments{
\item{X}{
Matrix for which the Moore-Penrose inverse is required.
}
\item{tol}{
A relative tolerance to detect zero singular values.
}}
\value{
A MP generalized inverse matrix for \code{X}.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer. p.100.
}
\seealso{
\code{\link{solve}}, \code{\link{svd}}, \code{\link{eigen}}
}
\examples{
\dontrun{
# The function is currently defined as
function(X, tol = sqrt(.Machine$double.eps))
{
## Generalized Inverse of a Matrix
  dnx <- dimnames(X)
  if(is.null(dnx)) dnx <- vector("list", 2)
  s <- svd(X)
  nz <- s$d > tol * s$d[1]
  structure(
    if(any(nz)) s$v[, nz] \%*\% (t(s$u[, nz])/s$d[nz]) else X,
    dimnames = dnx[2:1])
}
}}
\keyword{algebra}

\eof
% file MASS/glm.convert.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{glm.convert}
\alias{glm.convert}
\title{
Change a Negative Binomial fit to a GLM fit
}
\description{
This function modifies an output object from \code{glm.nb()} to one
that looks like the output from \code{glm()} with a negative binomial
family.  This allows it to be updated keeping the theta parameter
fixed.
}
\usage{
glm.convert(object)
}
\arguments{
\item{object}{
  An object of class \code{"negbin"}, typically the output from
  \code{\link{glm.nb}()}.
}}
\value{
An object of class \code{"glm"} with negative binomial family.  The theta
parameter is then fixed at its present estimate.
}
\details{
Convenience function needed to effect some low level changes to the
structure of the fitted model object.
}
\seealso{
\code{\link{glm.nb}}, \code{\link{negative.binomial}}, \code{\link{glm}}
}
\examples{
quine.nb1 <- glm.nb(Days ~ Sex/(Age + Eth*Lrn), data = quine)
quine.nbA <- glm.convert(quine.nb1)
quine.nbB <- update(quine.nb1, . ~ . + Sex:Age:Lrn)
anova(quine.nbA, quine.nbB)
}
\keyword{regression}
\keyword{models}

\eof
% file MASS/glm.nb.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{glm.nb}
\alias{glm.nb}
\alias{family.negbin}
\alias{logLik.negbin}
\title{
Fit a Negative Binomial Generalized Linear Model
}
\description{
  A modification of the system function \code{\link{glm}()} to include
  estimation of the additional parameter, \code{theta}, for a
  Negative Binomial generalized linear model.
}
\usage{
glm.nb(formula, \dots,  init.theta, link = log)
}
\synopsis{
glm.nb(formula, data, weights, subset, na.action, start = NULL, etastart,
                 control = glm.control(...), method = "glm.fit",
                 model = TRUE, x = FALSE, y = TRUE, contrasts = NULL, ...,
                 init.theta, link = log)
}
\arguments{
\item{formula}{
  Model formula, as for \code{glm()}.
}
\item{\dots}{
  Any other arguments for the \code{\link{glm}()} function except \code{family}
  and, in \R, \code{offset} (but \code{\link{offset}} can be used).
}
\item{init.theta}{
  Optional initial value for the theta parameter.  If omitted a moment
  estimator after an initial fit using a Poisson GLM is used.
}
\item{link}{
  The link function.  Currently must be one of \code{log}, \code{sqrt}
  or \code{identity}.
}}
\value{
  A fitted model object of class \code{negbin} inheriting from \code{glm}
  and \code{lm}.  The object is like the output of \code{glm} but contains
  three additional components, namely \code{theta} for the ML estimate of
  theta, \code{SE.theta} for its approximate standard error (using
  observed rather than expected information), and \code{twologlik} for
  twice the log-likelihood function.
}
\details{
  An alternating iteration process is used.  For given \code{theta} the GLM
  is fitted using the same process as used by \code{glm()}.  For fixed means
  the \code{theta} parameter is estimated using score and information
  iterations.  The two are alternated until convergence of both. (The
  number of alternations and the number of iterations when estimating
  \code{theta} are controlled by the \code{maxit} parameter of
  \code{glm.control}.)

  Setting \code{trace > 0} traces the alternating iteration
  process. Setting \code{trace > 1} traces the \code{glm} fit, and
  setting \code{trace > 2} traces the estimation of \code{theta}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
  \code{\link{glm}}, \code{\link{negative.binomial}},
  \code{\link{anova.negbin}}, \code{\link{summary.negbin}},
  \code{\link{theta.ml}}, \code{\link{theta.md}}, \code{\link{theta.mm}}
}
\examples{
quine.nb1 <- glm.nb(Days ~ Sex/(Age + Eth*Lrn), data = quine)
quine.nb2 <- update(quine.nb1, . ~ . + Sex:Age:Lrn)
quine.nb3 <- update(quine.nb2, Days ~ .^4)
anova(quine.nb1, quine.nb2, quine.nb3)
\dontshow{## PR#1695
y <- c(7, 5, 4, 7, 5, 2, 11, 5, 5, 4, 2, 3, 4, 3, 5, 9, 6, 7, 10, 6, 12,
6, 3, 5, 3, 9, 13, 0, 6, 1, 2, 0, 1, 0, 0, 4, 5, 1, 5, 3, 3, 4)

lag1 <- c(0, 7, 5, 4, 7, 5, 2, 11, 5, 5, 4, 2, 3, 4, 3, 5, 9, 6, 7, 10,
6, 12, 6, 3, 5, 3, 9, 13, 0, 6, 1, 2, 0, 1, 0, 0, 4, 5, 1, 5, 3, 3)

lag2 <- c(0, 0, 7, 5, 4, 7, 5, 2, 11, 5, 5, 4, 2, 3, 4, 3, 5, 9, 6, 7,
10, 6, 12, 6, 3, 5, 3, 9, 13, 0, 6, 1, 2, 0, 1, 0, 0, 4, 5, 1, 5, 3)

lag3 <- c(0, 0, 0, 7, 5, 4, 7, 5, 2, 11, 5, 5, 4, 2, 3, 4, 3, 5, 9, 6,
7, 10, 6, 12, 6, 3, 5, 3, 9, 13, 0, 6, 1, 2, 0, 1, 0, 0, 4, 5, 1, 5)

(fit <- glm(y ~ lag1+lag2+lag3, family=poisson(link=identity),
            start=c(2, 0.1, 0.1, 0.1)))
try(glm.nb(y ~ lag1+lag2+lag3, link=identity))
glm.nb(y ~ lag1+lag2+lag3, link=identity,  start=c(2, 0.1, 0.1, 0.1))
glm.nb(y ~ lag1+lag2+lag3, link=identity,  start=coef(fit))
glm.nb(y ~ lag1+lag2+lag3, link=identity, etastart=rep(5, 42))
}}
\keyword{regression}
\keyword{models}

\eof
\name{glmmPQL}
\alias{glmmPQL}
\title{
Fit Generalized Linear Mixed Models via PQL
}
\description{
Fit a GLMM model with multivariate normal random effects, using
Penalized Quasi-Likelihood.
}
\usage{
glmmPQL(fixed, random, family, data, correlation, weights,
        control, niter = 10, verbose = TRUE, \dots)
}
\arguments{
\item{fixed}{
a two-sided linear formula giving fixed-effects part of the model.
}
\item{random}{
A formula or list of formulae describing the random effects.
}
\item{family}{
a GLM family.
}
\item{data}{
an optional data frame used as the first place to find
variables in the formulae.
}
\item{correlation}{
an optional correlation structure.
}
\item{weights}{
optional case weights as in \code{glm}.
}
\item{control}{
an optional argument to be passed to \code{lme}.
}
\item{niter}{
maximum number of iterations.
}
\item{verbose}{
logical: print out record of iterations?
}
\item{\dots}{
Further arguments for \code{lme}.
}}
\value{
A object of class \code{"lme"}: see \code{\link[nlme]{lmeObject}}.
}
\details{
  \code{glmmPQL} works by repeated calls to \code{\link[nlme]{lme}}, so
  package \code{nlme} will be loaded at first use if necessary.
}
\references{
Schall, R. (1991) Estimation in generalized linear models with
random effects.
\emph{Biometrika}
\bold{78}, 719--727.

Breslow, N. E. and Clayton, D. G. (1993) Approximate inference in
generalized linear mixed models.
\emph{Journal of the American Statistical Association}
\bold{88}, 9--25.

Wolfinger, R. and O'Connell, M. (1993) Generalized linear mixed models: a
pseudo-likelihood approach.
\emph{Journal of Statistical Computation and Simulation}
\bold{48}, 233--243.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

}
\seealso{
  \code{\link[nlme]{lme}}
}
\examples{
library(nlme) # will be loaded automatically if omitted
summary(glmmPQL(y ~ trt + I(week > 2), random = ~ 1 | ID,
                family = binomial, data = bacteria))
\testonly{ # an example of offset
summary(glmmPQL(y ~ trt + week, random = ~ 1 | ID,
                family = binomial, data = bacteria))
summary(glmmPQL(y ~ trt + week + offset(week), random = ~ 1 | ID,
                family = binomial, data = bacteria))
}}
\keyword{models}

\eof
% file MASS/hills.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{hills}
\alias{hills}
\title{
Record Times in Scottish Hill Races
}
\description{
The record times in 1984 for 35 Scottish hill races.
}
\usage{
data(hills)
}
\format{
The components are:
\describe{
\item{\code{dist}}{
distance in miles (on the map)
}
\item{\code{climb}}{
total height gained during the route, in feet.
}
\item{\code{time}}{
record time in minutes.
}}}
\source{
A.C. Atkinson (1986)
Comment: Aspects of diagnostic regression analysis.
\emph{Statistical Science }
\bold{1}, 397--402.


[A.C. Atkinson (1988)
Transformations unmasked.
\emph{Technometrics}
\bold{30}, 311--318
"corrects" the time for Knock Hill from 78.65 to 18.65. It is unclear
if this based on the original records.]
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
\name{hist.scott}
\alias{hist.scott}
\alias{hist.FD}
\title{
Plot a Histogram with Automatic Bin Width Selection
}
\description{
Plot a histogram with automatic bin width selection, using the Scott
or Freedman-Diaconis formulae.
}
\usage{
hist.scott(x, prob = TRUE, \dots)
hist.FD(x, prob = TRUE,  \dots)
}
\synopsis{
hist.scott(x, prob = TRUE, xlab = deparse(substitute(x)), ...)
hist.FD(x, prob = TRUE, xlab = deparse(substitute(x)), ...)
}
\arguments{
\item{x}{
A data vector
}
\item{prob}{
Should the plot have unit area, so be a density estimate?
}
\item{\dots}{
Further arguments to \code{hist}.
}}
\value{
For the \code{nclass.*} functions, the suggested number of classes.
}
\section{Side Effects}{
Plot a histogram.
}
\references{
Venables, W. N. and Ripley, B. D. (2002)
\emph{Modern Applied Statistics with S.}
Springer.
}
\seealso{
\code{\link{hist}}
}
\keyword{hplot}
\keyword{dplot}

\eof
% file MASS/housing.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{housing}
\alias{housing}
\title{
Frequency Table from a Copenhagen Housing Conditions Survey
}
\description{
The \code{housing} data frame has 72 rows and 5 variables.
}
\usage{
data(housing)
}
\format{
\describe{
\item{\code{Sat}}{
Satisfaction of householders with their present housing
circumstances, (High, Medium or Low, ordered factor).
}
\item{\code{Infl}}{
Perceived degree of influence householders have on the
management of the property (High, Medium, Low).
}
\item{\code{Type}}{
Type of rental accommodation, (Tower, Atrium, Apartment, Terrace).
}
\item{\code{Cont}}{
Contact residents are afforded with other residents, (Low, High).
}
\item{\code{Freq}}{
Frequencies: the numbers of residents in each class.
}}}
\source{
Madsen, M. (1976)
Statistical analysis of multiple contingency tables. Two examples.
\emph{Scand. J. Statist.} \bold{3}, 97--106.

Cox, D. R. and Snell, E. J. (1984)
\emph{Applied Statistics, Principles and Examples}.
Chapman & Hall.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
options(contrasts = c("contr.treatment", "contr.poly"))

# Surrogate Poisson models
house.glm0 <- glm(Freq ~ Infl*Type*Cont + Sat, family = poisson,
                  data = housing)
summary(house.glm0, cor = FALSE)

addterm(house.glm0, ~. + Sat:(Infl+Type+Cont), test = "Chisq")

house.glm1 <- update(house.glm0, . ~ . + Sat*(Infl+Type+Cont))
summary(house.glm1, cor = FALSE)

1 - pchisq(deviance(house.glm1), house.glm1$df.resid)

dropterm(house.glm1, test = "Chisq")

addterm(house.glm1, ~. + Sat:(Infl+Type+Cont)^2, test  =  "Chisq")

hnames <- lapply(housing[, -5], levels) # omit Freq
newData <- expand.grid(hnames)
newData$Sat <- ordered(newData$Sat)
house.pm <- predict(house.glm1, newData,
                    type = "response")  # poisson means
house.pm <- matrix(house.pm, ncol = 3, byrow = TRUE,
                   dimnames = list(NULL, hnames[[1]]))
house.pr <- house.pm/drop(house.pm \%*\% rep(1, 3))
cbind(expand.grid(hnames[-1]), round(house.pr, 2))

# Iterative proportional scaling
loglm(Freq ~ Infl*Type*Cont + Sat*(Infl+Type+Cont), data = housing)


# multinomial model
library(nnet)
(house.mult<- multinom(Sat ~ Infl + Type + Cont, weights = Freq,
                       data = housing))
house.mult2 <- multinom(Sat ~ Infl*Type*Cont, weights = Freq,
                        data = housing)
anova(house.mult, house.mult2)

house.pm <- predict(house.mult, expand.grid(hnames[-1]),
                    type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pm, 2))

# proportional odds model
house.cpr <- apply(house.pr, 1, cumsum)
logit <- function(x) log(x/(1-x))
house.ld <- logit(house.cpr[2, ]) - logit(house.cpr[1, ])
(ratio <- sort(drop(house.ld)))
mean(ratio)

(house.plr <- polr(Sat ~ Infl + Type + Cont,
                   data = housing, weights = Freq))

house.pr1 <- predict(house.plr, expand.grid(hnames[-1]),
                   type = "probs")
cbind(expand.grid(hnames[-1]), round(house.pr1, 2))

Fr <- matrix(housing$Freq, ncol  =  3, byrow = TRUE)
2*sum(Fr*log(house.pr/house.pr1))

house.plr2 <- stepAIC(house.plr, ~.^2)
house.plr2$anova
}
\keyword{datasets}

\eof
% file MASS/huber.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{huber}
\alias{huber}
\title{
Huber M-estimator of Location with MAD Scale
}
\description{
Finds the Huber M-estimator of location with MAD scale.
}
\usage{
huber(y, k = 1.5, tol = 1e-06)
}
\arguments{
\item{y}{
vector of data values
}
\item{k}{
Winsorizes at \code{k} standard deviations
}
\item{tol}{
convergence tolerance
}}
\value{
list of location and scale parameters

\item{mu}{
location estimate
}
\item{s}{
MAD scale estimate
}}
\references{
Huber, P. J. (1981)
\emph{Robust Statistics.}
Wiley.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

}
\seealso{
\code{\link{hubers}}, \code{\link{mad}}
}
\examples{
huber(chem)
}
\keyword{robust}

\eof
% file MASS/hubers.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{hubers}
\alias{hubers}
\title{
Huber Proposal 2 Robust Estimator of Location and/or Scale
}
\description{
Finds the Huber M-estimator for location with scale specified, scale
with location specified, or both if neither is specified.
}
\usage{
hubers(y, k = 1.5, mu, s, initmu = median(y), tol = 1e-06)
}
\arguments{
\item{y}{
vector y of data values
}
\item{k}{
Winsorizes at \code{k} standard deviations
}
\item{mu}{
specified location
}
\item{s}{
specified scale
}
\item{initmu}{
initial value of \code{mu}
}
\item{tol}{
convergence tolerance
}}
\value{
list of location and scale estimates

\item{mu}{
location estimate
}
\item{s}{
scale estimate
}}
\references{
Huber, P. J. (1981)
\emph{Robust Statistics.}
Wiley.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{huber}}
}
\examples{
hubers(chem)
hubers(chem, mu=3.68)
}
\keyword{robust}

\eof
% file MASS/immer.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{immer}
\alias{immer}
\title{
Yields from a Barley Field Trial
}
\description{
The \code{immer} data frame has 30 rows and 4 columns.  Five varieties of
barley were grown in six locations in each of 1931 and
1932.
}
\usage{
data(immer)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Loc}}{
The location.
}
\item{\code{Var}}{
The variety of barley (\code{"manchuria"}, \code{"svansota"}, \code{"velvet"},
\code{"trebi"} and \code{"peatland"}).
}
\item{\code{Y1}}{
Yield in 1931
}
\item{\code{Y2}}{
Yield in 1932
}}}
\source{
Immer, F.R., Hayes, H.D. and LeRoy Powers (1934)
Statistical determination of barley varietal adaptation.
\emph{Journal of the American Society for Agronomy}
\bold{26}, 403--407.

Fisher, R.A. (1947)
\emph{The Design of Experiments.}
4th edition.
Edinburgh: Oliver and Boyd.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\examples{
immer.aov <- aov(cbind(Y1,Y2) ~ Loc + Var, data = immer)
summary(immer.aov)

immer.aov <- aov((Y1+Y2)/2 ~ Var + Loc, data = immer)
summary(immer.aov)
model.tables(immer.aov, type = "means", se = TRUE, cterms = "Var")
}
\keyword{datasets}

\eof
% file MASS/man/isoMDS.Rd
% copyright (C) 1994-2004 W. N. Venables and B. D. Ripley
%
\name{isoMDS}
\alias{isoMDS}
\alias{Shepard}
\title{
Kruskal's Non-metric Multidimensional Scaling
}
\description{
One form of non-metric multidimensional scaling
}
\usage{
isoMDS(d, y = cmdscale(d, k), k = 2, maxit = 50, trace = TRUE,
       tol = 1e-3, p = 2)

Shepard(d, x, p = 2)
}
\arguments{
\item{d}{
  distance structure of the form returned by \code{dist}, or a full,
  symmetric matrix.  Data are assumed to be dissimilarities or relative
  distances, but must be positive except for self-distance.  Both
  missing and infinite values are allowed.
}
\item{y}{
  An initial configuration. If none is supplied, \code{cmdscale} is used
  to provide the classical solution, unless there are missing or
  infinite dissimilarities.
}
\item{k}{
  The desired dimension for the solution, passed to \code{cmdscale}.
}
\item{maxit}{
  The maximum number of iterations.
}
\item{trace}{
  Logical for tracing optimization. Default \code{TRUE}.
}
\item{tol}{
  convergence tolerance.
}
\item{p}{Power for Minkowski distance in the configuration space.}
\item{x}{A final configuration.}
}
\value{
Two components:

\item{points}{
  A two-column vector of the fitted configuration.
}
\item{stress}{
  The final stress achieved (in percent).
}}
\section{Side Effects}{
  If \code{trace} is true, the initial stress and the current stress
  are printed out every 5 iterations.
}
\details{
  This chooses a k-dimensional (default k = 2) configuration to minimize
  the stress, the square root of the ratio of the sum of squared
  differences between the input distances and those of the configuration
  to the sum of configuration distances squared. However, the input
  distances are allowed a monotonic transformation.

  An iterative algorithm is used, which will usually converge in around
  10 iterations. As this is necessarily an \eqn{O(n^2)} calculation, it is slow
  for large datasets. Further, since for the default \eqn{p = 2}
  the configuration is only determined
  up to rotations and reflections (by convention the centroid is at the
  origin), the result can vary considerably from machine to machine.
}
\references{
  T. F. Cox and M. A. A. Cox (1994, 2001)
  \emph{Multidimensional Scaling}. Chapman & Hall.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
  \code{\link{cmdscale}}, \code{\link{sammon}}
}
\examples{
data(swiss)
swiss.x <- as.matrix(swiss[, -1])
swiss.dist <- dist(swiss.x)
swiss.mds <- isoMDS(swiss.dist)
plot(swiss.mds$points, type = "n")
text(swiss.mds$points, labels = as.character(1:nrow(swiss.x)))
swiss.sh <- Shepard(swiss.dist, swiss.mds$points)
plot(swiss.sh, pch = ".")
lines(swiss.sh$x, swiss.sh$yf, type = "S")
}
\keyword{multivariate}

\eof
% file MASS/kde2d.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{kde2d}
\alias{kde2d}
\title{
Two-Dimensional Kernel Density Estimation
}
\description{
Two-dimensional kernel density estimation with an axis-aligned
bivariate normal kernel, evaluated on a square grid.
}
\usage{
kde2d(x, y, h, n = 25, lims = c(range(x), range(y)))
}
\arguments{
\item{x}{
x coordinate of data
}
\item{y}{
y coordinate of data
}
\item{h}{
vector of bandwidths for x and y directions. Defaults to
normal reference bandwidth.
}
\item{n}{
Number of grid points in each direction.
}
\item{lims}{
The limits of the rectangle covered by the grid as \code{c(xl, xu, yl, yu)}.
}}
\value{
A list of three components.

\item{x, y}{
The x and y coordinates of the grid points, vectors of length \code{n}.
}
\item{z}{
An \code{n}  x  \code{n} matrix of the evaluated density.
}}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
attach(geyser)
plot(duration, waiting, xlim = c(0.5,6), ylim = c(40,100))
f1 <- kde2d(duration, waiting, n = 50, lims = c(0.5, 6, 40, 100))
image(f1, zlim = c(0, 0.05))
f2 <- kde2d(duration, waiting, n = 50, lims = c(0.5, 6, 40, 100),
            h = c(width.SJ(duration), width.SJ(waiting)) )
image(f2, zlim = c(0, 0.05))
persp(f2, phi = 30, theta = 20, d = 5)

plot(duration[-272], duration[-1], xlim = c(0.5, 6),
     ylim = c(1, 6),xlab = "previous duration", ylab = "duration")
f1 <- kde2d(duration[-272], duration[-1],
            h = rep(1.5, 2), n = 50, lims = c(0.5, 6, 0.5, 6))
contour(f1, xlab = "previous duration",
        ylab = "duration", levels  =  c(0.05, 0.1, 0.2, 0.4) )
f1 <- kde2d(duration[-272], duration[-1],
            h = rep(0.6, 2), n = 50, lims = c(0.5, 6, 0.5, 6))
contour(f1, xlab = "previous duration",
        ylab = "duration", levels  =  c(0.05, 0.1, 0.2, 0.4) )
f1 <- kde2d(duration[-272], duration[-1],
            h = rep(0.4, 2), n = 50, lims = c(0.5, 6, 0.5, 6))
contour(f1, xlab = "previous duration",
        ylab = "duration", levels  =  c(0.05, 0.1, 0.2, 0.4) )
detach("geyser")
}
\keyword{dplot}

\eof
% file MASS/lda.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{lda}
\alias{lda}
\alias{lda.default}
\alias{lda.data.frame}
\alias{lda.formula}
\alias{lda.matrix}
\alias{model.frame.lda}
\alias{print.lda}
\alias{coef.lda}
\title{
Linear Discriminant Analysis
}
\description{
Linear discriminant analysis.
}
\usage{
lda(x, \dots)

\method{lda}{formula}(formula, data, \dots, subset, na.action)

\method{lda}{default}(x, grouping, prior = proportions, tol = 1.0e-4,
    method, CV = FALSE, nu, \dots)

\method{lda}{data.frame}(x, \dots)

\method{lda}{matrix}(x, grouping, \dots, subset, na.action)
}
\arguments{
  \item{formula}{
    A formula of the form \code{groups ~ x1 + x2 + \dots}  That is, the
    response is the grouping factor and the right hand side specifies
    the (non-factor) discriminators.
  }
  \item{data}{
    Data frame from which variables specified in \code{formula} are
    preferentially to be taken.
  }
  \item{x}{
    (required if no formula is given as the principal argument.)
    a matrix or data frame or Matrix containing the explanatory variables.
  }
  \item{grouping}{
    (required if no formula principal argument is given.)
    a factor specifying the class for each observation.
  }
  \item{prior}{
    the prior probabilities of class membership.  If unspecified, the
    class proportions for the training set are used.  If present, the
    probabilities should be specified in the order of the factor
    levels.
  }
  \item{tol}{
    A tolerance to decide if a matrix is singular; it will reject variables
    and linear combinations of unit-variance variables whose variance is
    less than \code{tol^2}.
  }
  \item{subset}{
    An index vector specifying the cases to be used in the training
    sample.  (NOTE: If given, this argument must be named.)
  }
  \item{na.action}{
    A function to specify the action to be taken if \code{NA}s are found.
    The default action is for the procedure to fail.  An alternative is
    \code{na.omit}, which leads to rejection of cases with missing values on
    any required variable.  (NOTE: If given, this argument must be named.)
  }
  \item{method}{
    \code{"moment"} for standard estimators of the mean and variance,
    \code{"mle"} for MLEs, \code{"mve"} to use \code{\link{cov.mve}}, or
    \code{"t"} for robust estimates based on a \eqn{t} distribution.
  }
  \item{CV}{
    If true, returns results (classes and posterior probabilities) for
    leave-one-out cross-validation. Note that if the prior is estimated,
    the proportions in the whole dataset are used.
  }
  \item{nu}{
    degrees of freedom for \code{method = "t"}.
  }
  \item{\dots}{
    arguments passed to or from other methods.
}}
\value{
  If \code{CV = TRUE} the return value is a list with components
  \code{class}, the MAP classification (a factor), and \code{posterior},
  posterior probabilities for the classes.

  Otherwise it is an object of class \code{"lda"} containing the
  following components:
  \item{prior}{
    the prior probabilities used.
  }
  \item{means}{
    the group means.
  }
  \item{scaling}{
    a matrix which transforms observations to discriminant functions,
    normalized so that within groups covariance matrix is spherical.
  }
  \item{svd}{
    the singular values, which give the ratio of the between- and
    within-group standard deviations on the linear discriminant
    variables.  Their squares are the canonical F-statistics.
  }
  \item{N}{
    The number of observations used.
  }
  \item{call}{
    The (matched) function call.
  }
}
\details{
The function
tries hard to detect if the within-class covariance matrix is
singular. If any variable has within-group variance less than
\code{tol^2} it will stop and report the variable as constant.  This
could result from poor scaling of the problem, but is more
likely to result from constant variables.

Specifying the \code{prior} will affect the classification unless
over-ridden in \code{predict.lda}.  Unlike in most statistical packages, it
will also affect the rotation of the linear discriminants within their
space, as a weighted between-groups covariance matrix is used. Thus
the first few linear discriminants emphasize the differences between
groups with the weights given by the prior, which may differ from
their prevalence in the dataset.

If one or more groups is missing in the supplied data, they are dropped
with a warning, but the classifications produced are with respect to the
original set of levels.
}
\note{
This function may be called giving either a formula and
optional data frame, or a matrix and grouping factor as the first
two arguments.  All other arguments are optional, but \code{subset=} and
\code{na.action=}, if required, must be fully named.

If a formula is given as the principal argument the object may be
modified using \code{update()} in the usual way.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.
}
\seealso{
\code{\link{predict.lda}}, \code{\link{qda}}, \code{\link{predict.qda}}
}
\examples{
data(iris3)
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]),
                   Sp = rep(c("s","c","v"), rep(50,3)))
train <- sample(1:150, 75)
table(Iris$Sp[train])
## your answer may differ
##  c  s  v
## 22 23 30
z <- lda(Sp ~ ., Iris, prior = c(1,1,1)/3, subset = train)
predict(z, Iris[-train, ])$class
##  [1] s s s s s s s s s s s s s s s s s s s s s s s s s s s c c c
## [31] c c c c c c c v c c c c v c c c c c c c c c c c c v v v v v
## [61] v v v v v v v v v v v v v v v
(z1 <- update(z, . ~ . - Petal.W.))
}
\keyword{multivariate}

\eof
% file MASS/ldahist.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{ldahist}
\alias{ldahist}
\title{
Histograms or Density Plots of Multiple Groups
}
\description{
Plot histograms or density plots of data on a single Fisher linear
discriminant.
}
\usage{
ldahist(data, g, nbins = 25, h, x0 = - h/1000, breaks,
        xlim = range(breaks), ymax = 0, width,
        type = c("histogram", "density", "both"),
        sep = (type != "density"),
        col = 5, xlab = deparse(substitute(data)), bty = "n", \dots)
}
\arguments{
\item{data}{
vector of data. Missing values (\code{NA}s) are allowed and omitted.
}
\item{g}{
factor or vector giving groups, of the same length as \code{data}.
}
\item{nbins}{
Suggested number of bins to cover the whole range of the data.
}
\item{h}{
The bin width (takes precedence over \code{nbins}).
}
\item{x0}{
Shift for the bins - the breaks are at \code{x0 + h * (\dots, -1, 0, 1, \dots)}
}
\item{breaks}{
The set of breakpoints to be used. (Usually omitted, takes precedence
over \code{h} and \code{nbins}).
}
\item{xlim}{
The limits for the x-axis.
}
\item{ymax}{
The upper limit for the y-axis.
}
\item{width}{
Bandwidth for density estimates. If missing, the Sheather-Jones
selector is used for each group separately.
}
\item{type}{
Type of plot.
}
\item{sep}{
Whether there is a separate plot for each group, or one combined plot.
}
\item{col}{
The colour number for the bar fill.
}
\item{xlab}{
label for the plot x-axis. By default, this will be the name of \code{data}.
}
\item{bty}{
The box type for the plot - defaults to none.
}
\item{\dots}{
additional arguments to \code{polygon}.
}}
\section{Side Effects}{
Histogram and/or density plots are plotted on the current device.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{plot.lda}}.
}
\keyword{dplot}
\keyword{hplot}

\eof
% file MASS/leuk.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{leuk}
\alias{leuk}
\title{
Survival Times and White Blood Counts for Leukaemia Patients
}
\description{
A data frame of data from 33 leukaemia patients.
}
\usage{
data(leuk)
}
\format{
A data frame with columns:
\describe{
\item{\code{wbc}}{
white blood count
}
\item{\code{ag}}{
a test result, \code{"present"} or \code{"absent"}
}
\item{\code{time}}{
survival time in weeks
}}}
\details{
  Survival times are given for 33 patients who died from acute
  myelogenous leukaemia.  Also measured was the patient's white blood cell
  count at the time of diagnosis.  The patients were also factored into 2
  groups according to the presence or absence of a morphologic
  characteristic of white blood cells. Patients termed AG positive were
  identified by the presence of Auer rods and/or significant granulation
  of the leukaemic cells in the bone marrow at the time of diagnosis.
}
\source{
  Cox, D. R. and Oakes, D. (1984) \emph{Analysis of Survival Data}.
  Chapman & Hall, p. 9.

Taken from

Feigl, P. & Zelen, M. (1965) Estimation of exponential survival
probabilities with concomitant information.
\emph{Biometrics} \bold{21}, 826--838.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
library(survival)
plot(survfit(Surv(time) ~ ag, data = leuk), lty = 2:3, col = 2:3)

# now Cox models
leuk.cox <- coxph(Surv(time) ~ ag + log(wbc), leuk)
summary(leuk.cox)
}
\keyword{datasets}

\eof
% file MASS/lh.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{lh-MASS}
\alias{lh}
\title{
Luteinizing Hormone in Blood Samples
}
\description{
A regular time series giving the luteinizing hormone in blood
samples at 10mins intervals from a human female, 48 samples.
}
\usage{
data(lh)
}
\source{
P.J. Diggle (1990)
\emph{Time Series: A Biostatistical Introduction.}
Oxford, table A.1, series 3
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/lm.gls.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{lm.gls}
\alias{lm.gls}
\title{
Fit Linear Models by Generalized Least Squares
}
\description{
Fit linear models by Generalized Least Squares
}
\usage{
lm.gls(formula, data, W, subset, na.action, inverse = FALSE, method = "qr",
       model = FALSE, x = FALSE, y = FALSE, contrasts = NULL, \dots)
}
\arguments{
\item{formula}{
a formula expression as for regression models, of the form
\code{response ~ predictors}.
See the documentation of \code{formula} for other details.
}
\item{data}{
an optional data frame in which to interpret the variables occurring
in \code{formula}.
}
\item{W}{
a weight matrix.
}
\item{subset}{
expression saying which subset of the rows of the data should  be used
in the fit. All observations are included by default.
}
\item{na.action}{
a function to filter missing data.
}
\item{inverse}{
logical: if true \code{W} specifies the inverse of the weight matrix: this
is appropriate if a variance matrix is used.
}
\item{method}{
method to be used by \code{lm.fit}.
}
\item{model}{
should the model frame be returned?
}
\item{x}{
should the design matrix be returned?
}
\item{y}{
should the response be returned?
}
\item{contrasts}{
a list of contrasts to be used for some or all of
}
\item{\dots}{
additional arguments to \code{\link{lm.fit}}.
}}
\value{
An object of class \code{"lm"}, with additional class \code{"lm.gls"}
}
\details{
  The problem is transformed to uncorrelated form and passed to
  \code{\link{lm.fit}}.
}
\seealso{
  \code{\link[nlme]{gls}}, \code{\link{lm}}, \code{\link{lm.ridge}}
}
\keyword{models}

\eof
% file MASS/lm.ridge.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{lm.ridge}
\alias{lm.ridge}
\alias{plot.ridgelm}
\alias{print.ridgelm}
\alias{select}
\alias{select.ridgelm}
\title{
Ridge Regression
}
\description{
Fit a linear model by ridge regression.
}
\usage{
lm.ridge(formula, data, subset, na.action, lambda = 0, model = FALSE,
         x = FALSE, y = FALSE, contrasts = NULL, \dots)
}
\arguments{
\item{formula}{
a formula expression as for regression models, of the form
\code{response ~ predictors}.
See the documentation of \code{formula} for other details.
}
\item{data}{
an optional data frame in which to interpret the variables occurring
in \code{formula}.
}
\item{subset}{
expression saying which subset of the rows of the data should  be used
in the fit.  All observations are included by default.
}
\item{na.action}{
a function to filter missing data.
}
\item{lambda}{
A scalar or vector of ridge constants.
}
\item{model}{
should the model frame be returned?
}
\item{x}{
should the design matrix be returned?
}
\item{y}{
should the response be returned?
}
\item{contrasts}{
a list of contrasts to be used for some or all of
}
\item{\dots}{
additional arguments to \code{\link{lm.fit}}.
}}
\value{
A list with components

\item{coef}{
matrix of coefficients, one row for each value of \code{lambda}.
}
\item{scales}{
scalings used on the X matrix.
}
\item{Inter}{
was intercept included?
}
\item{lambda}{
vector of lambda values
}
\item{ym}{
mean of \code{y}
}
\item{xm}{
column means of \code{x} matrix
}
\item{GCV}{
vector of GCV values
}
\item{kHKB}{
HKB estimate of the ridge constant.
}
\item{kLW}{
L-W estimate of the ridge constant.
}}
\references{
Brown, P. J. (1994)
\emph{Measurement, Regression and Calibration}
Oxford.
}
\seealso{
\code{\link{lm}}
}
\examples{
data(longley)
names(longley)[1] <- "y"
lm.ridge(y ~ ., longley)
plot(lm.ridge(y ~ ., longley,
              lambda = seq(0,0.1,0.001)))
select(lm.ridge(y ~ ., longley,
               lambda = seq(0,0.1,0.0001)))
# modified HKB estimator is 0.0042754
# modified L-W estimator is 0.032295
# smallest value of GCV  at 0.0028
}
\keyword{models}

\eof
% file MASS/loglm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{loglm}
\alias{loglm}
% \alias{anova.loglm}
% \alias{print.anova.loglm}
% \alias{coef.loglm}
% \alias{extractAIC.loglm}
% \alias{fitted.loglm}
% \alias{print.loglm}
% \alias{residuals.loglm}
% \alias{update.loglm}
\title{
Fit Log-Linear Models by Iterative Proportional Scaling
}
\description{
This function provides a front-end to the standard function,
\code{loglin}, to allow log-linear models to be specified and fitted
in a manner similar to that of other fitting functions, such as
\code{glm}.
}
\usage{
loglm(formula, data, subset, na.action, \dots)
}
\arguments{
  \item{formula}{
    A linear model formula specifying the log-linear model.

    If the left-hand side is empty, the \code{data} argument is required
    and must be a (complete) array of frequencies.  In this case the
    variables on the right-hand side may be the names of the
    \code{dimnames} attribute of the frequency array, or may be the
    positive integers: 1, 2, 3, \dots used as alternative names for the
    1st, 2nd, 3rd, \dots dimension (classifying factor).
    If the left-hand side is not empty it specifies a vector of
    frequencies.  In this case the data argument, if present, must be
    a data frame from which the left-hand side vector and the
    classifying factors on the right-hand side are (preferentially)
    obtained.  The usual abbreviation of a \code{.} to stand for \sQuote{all
    other variables in the data frame} is allowed.  Any non-factors
    on the right-hand side of the formula are coerced to factor.
  }
  \item{data}{
    Numeric array or data frame.  In the first case it specifies the
    array of frequencies; in then second it provides the data frame
    from which the variables occurring in the formula are
    preferentially obtained in the usual way.
    
    This argument may be the result of a call to \code{\link{xtabs}}.
  }
  \item{subset}{
    Specifies a subset of the rows in the data frame to be used.  The
    default is to take all rows.
  }
  \item{na.action}{
    Specifies a method for handling missing observations.  The
    default is to fail if missing values are present.
  }
  \item{\dots}{
    May supply other arguments to the function \code{\link{loglm1}}.
}}
\value{
  An object of class \code{"loglm"} conveying the results of the fitted
  log-linear model.  Methods exist for the generic functions \code{print},
  \code{summary}, \code{deviance}, \code{fitted}, \code{coef},
  \code{resid}, \code{anova} and \code{update}, which perform the expected
  tasks.  Only log-likelihood ratio tests are allowed using \code{anova}.

  The deviance is simply an alternative name for the log-likelihood
  ratio statistic for testing the current model within a saturated
  model, in accordance with standard usage in generalized linear
  models.
}
\details{
  If the left-hand side of the formula is empty the \code{data} argument
  supplies the frequency array and the right-hand side of the
  formula is used to construct the list of fixed faces as required
  by \code{loglin}.  Structural zeros may be specified by giving a
  \code{start} argument with those entries set to zero, as described in
  the help information for \code{loglin}.

  If the left-hand side is not empty, all variables on the
  right-hand side are regarded as classifying factors and an array
  of frequencies is constructed.  If some cells in the complete
  array are not specified they are treated as structural zeros.
  The right-hand side of the formula is again used to construct the
  list of faces on which the observed and fitted totals must agree,
  as required by \code{loglin}.  Hence terms such as
  \code{a:b}, \code{a*b} and \code{a/b} are all equivalent.
}
\section{Warning}{
  If structural zeros are present, the calculation of degrees of
  freedom may not be correct.  \code{loglin} itself takes no action to
  allow for structural zeros.  \code{loglm} deducts one degree of
  freedom for each structural zero, but cannot make allowance for
  gains in error degrees of freedom due to loss of dimension in the
  model space.  (This would require checking the rank of the
  model matrix, but since iterative proportional scaling methods
  are developed largely to avoid constructing the model matrix
  explicitly, the computation is at least difficult.)
  
  When structural zeros (or zero fitted values) are present the
  estimated coefficients will not be available due to infinite
  estimates.  The deviances will normally continue to be correct, though.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
  \code{\link{loglm1}}, \code{\link{loglin}}
}
\examples{
# The data frames  Cars93, minn38 and quine are available
# in the MASS package.

# Case 1: frequencies specified as an array.
sapply(minn38, function(x) length(levels(x)))
## hs phs fol sex f
##  3   4   7   2 0
minn38a <- array(0, c(3,4,7,2), lapply(minn38[, -5], levels))
minn38a[data.matrix(minn38[,-5])] <- minn38$f
fm <- loglm(~1 + 2 + 3 + 4, minn38a)  # numerals as names.
deviance(fm)
##[1] 3711.9
fm1 <- update(fm, .~.^2)
fm2 <- update(fm, .~.^3, print = TRUE)
## 5 iterations: deviation 0.0750732
anova(fm, fm1, fm2)
\dontrun{LR tests for hierarchical log-linear models

Model 1:
  ~  1 + 2 + 3 + 4
Model 2:
 .  ~  1 + 2 + 3 + 4 + 1:2 + 1:3 + 1:4 + 2:3 + 2:4 + 3:4
Model 3:
 .  ~  1 + 2 + 3 + 4 + 1:2 + 1:3 + 1:4 + 2:3 + 2:4 + 3:4 +
	1:2:3 + 1:2:4 + 1:3:4 + 2:3:4


          Deviance  df Delta(Dev) Delta(df) P(> Delta(Dev)
  Model 1 3711.915 155
  Model 2  220.043 108   3491.873        47        0.00000
  Model 3   47.745  36    172.298        72        0.00000
Saturated    0.000   0     47.745        36        0.09114

}
# Case 1. An array generated with xtabs.

loglm(~ Type + Origin, xtabs(~ Type + Origin, Cars93))
\dontrun{Call:
loglm(formula = ~Type + Origin, data = xtabs(~Type + Origin,
    Cars93))

Statistics:
                    X^2 df  P(> X^2)
Likelihood Ratio 18.362  5 0.0025255
         Pearson 14.080  5 0.0151101

}
# Case 2.  Frequencies given as a vector in a data frame
names(quine)
## [1] "Eth"  "Sex"  "Age"  "Lrn"  "Days"
fm <- loglm(Days ~ .^2, quine)
gm <- glm(Days ~ .^2, poisson, quine)  # check glm.
c(deviance(fm), deviance(gm))          # deviances agree
## [1] 1368.7 1368.7
c(fm$df, gm$df)                        # resid df do not!
c(fm$df, gm$df.residual)               # resid df do not!
## [1] 127 128
# The loglm residual degrees of freedom is wrong because of
# a non-detectable redundancy in the model matrix.
}
\keyword{category}
\keyword{models}

\eof
\name{loglm1}
\alias{loglm1}
\alias{loglm1.xtabs}
\alias{loglm1.data.frame}
\alias{loglm1.default}
\title{
Fit Log-Linear Models by Iterative Proportional Scaling -- Internal function
}
\description{
  \code{loglm1} is an internal function used by \code{\link{loglm}}.
  It is a generic function dispatching on the \code{data} argument.
}
\usage{
loglm1(formula, data, \dots)

\method{loglm1}{xtabs}(formula, data, \dots)

\method{loglm1}{data.frame}(formula, data, \dots)

\method{loglm1}{default}(formula, data, start = rep(1, length(data)), fitted = FALSE,
       keep.frequencies = fitted, param = TRUE, eps = 1/10,
       iter = 40, print = FALSE, \dots)
}
\arguments{
  \item{formula}{
    A linear model formula specifying the log-linear model.
    See \code{\link{loglm}} for its interpretation.}

  \item{data}{
    Numeric array or data frame.  In the first case it specifies the
    array of frequencies; in then second it provides the data frame
    from which the variables occurring in the formula are
    preferentially obtained in the usual way.

    This argument may also be the result of a call to \code{\link{xtabs}}.
  }
  \item{start, param, eps, iter, print}{Arguments passed to
    \code{\link{loglin}}.}
  \item{fitted}{logical: should the fitted values be returned?}
  \item{keep.frequencies}{
    If \code{TRUE} specifies that the (possibly constructed) array of
    frequencies is to be retained as part of the fitted model object.  The
    default action is to use the same value as that used for \code{fitted}.
  }
  \item{\dots}{arguments passed to the default method.}
}
\value{
  An object of class \code{"loglm"}. 
}
\seealso{
\code{\link{loglm}}, \code{\link{loglin}}
}
\keyword{internal}

\eof
% file MASS/logtrans.d
% copyright (C) 1994-2004 W. N. Venables and B. D. Ripley
%
\name{logtrans}
\alias{logtrans}
\alias{logtrans.formula}
\alias{logtrans.lm}
\alias{logtrans.default}
\title{
Estimate log Transformation Parameter
}
\description{
Find and optionally plot the marginal (profile) likelihood for alpha
for a transformation model of the form \code{log(y + alpha) ~ x1 + x2 + \dots}.
}
\usage{
logtrans(object, ...)

\method{logtrans}{default}(object, \dots, alpha = seq(0.5, 6, by = 0.25) - min(y),
         plotit = TRUE, interp =, xlab = "alpha", ylab = "log Likelihood")

\method{logtrans}{formula}(object, data, \dots)

\method{logtrans}{lm}(object, \dots)
}
\arguments{
\item{object}{
Fitted linear model object, or formula defining the untransformed
model that is \code{y ~ x1 + x2 + \dots}.  The function is generic.
}
\item{\dots}{
If \code{object} is a formula, this argument may specify a data frame
as for \code{lm}.
}
\item{alpha}{
Set of values for the transformation parameter, alpha.
}
\item{plotit}{
Should plotting be done?
}
\item{interp}{
Should the marginal log-likelihood be interpolated with a spline
approximation?   (Default is \code{TRUE} if plotting is to be done and
the number of real points is less than 100.)
}
\item{xlab}{
as for \code{plot}.
}
\item{ylab}{
as for \code{plot}.
}
\item{data}{
  optional \code{data} argument for \code{lm} fit.
}
}
\value{
List with components \code{x} (for alpha) and \code{y} (for the marginal
log-likelihood values).
}
\section{Side Effects}{
A plot of the marginal log-likelihood is produced, if requested,
together with an approximate mle and 95\% confidence interval.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{boxcox}}
}
\examples{
logtrans(Days ~ Age*Sex*Eth*Lrn, data = quine,
         alpha = seq(0.75, 6.5, len=20))
}
\keyword{regression}
\keyword{models}
\keyword{hplot}

\eof
% file lqs/man/lqs.Rd
% copyright (C) 1998-9 B. D. Ripley    (C) 2001-2 R Core Development Team
%
\name{lqs}
\alias{lqs}
\alias{lqs.formula}
\alias{lqs.default}
\alias{lmsreg}
\alias{ltsreg}
%\alias{print.lqs}
\title{
Resistant Regression
}
\description{
  Fit a regression to the \emph{good} points in the dataset, thereby
  achieving a regression estimator with a high breakdown point.
  \code{lmsreg} and \code{ltsreg} are compatibility wrappers.
}
\usage{
lqs(x, \dots)

\method{lqs}{formula}(formula, data, \dots,
    method = c("lts", "lqs", "lms", "S", "model.frame"),
    subset, na.action, model = TRUE,
    x.ret = FALSE, y.ret = FALSE, contrasts = NULL)

\method{lqs}{default}(x, y, intercept = TRUE, method = c("lts", "lqs", "lms", "S"),
    quantile, control = lqs.control(\dots), k0 = 1.548, seed, \dots)

lmsreg(\dots)
ltsreg(\dots)
}
\arguments{
  \item{formula}{a formula of the form \code{y ~ x1 + x2 + \dots}.}
  \item{data}{data frame from which variables specified in
    \code{formula} are preferentially to be taken.}
  \item{subset}{an index vector specifying the cases to be used in
    fitting. (NOTE: If given, this argument must be named exactly.)}
  \item{na.action}{function to specify the action to be taken if
    \code{NA}s are found.  The default action is for the procedure to
    fail.  Alternatives include \code{\link{na.omit}} and
    \code{\link{na.exclude}}, which lead to omission of
    cases with missing values on any required variable.  (NOTE: If
    given, this argument must be named exactly.)
  }
  \item{model, x.ret, y.ret}{logical. If \code{TRUE} the model frame,
    the model matrix and the response are returned, respectively.}
  \item{contrasts}{an optional list.  See the \code{contrasts.arg}
    of \code{\link{model.matrix.default}}.}
  \item{x}{a matrix or data frame containing the explanatory variables.}
  \item{y}{the response: a vector of length the number of rows of \code{x}.}
  \item{intercept}{should the model include an intercept?}
  \item{method}{
    the method to be used. \code{model.frame} returns the model frame: for the
    others see the \code{Details} section. Using \code{lmsreg} or
    \code{ltsreg} forces \code{"lms"} and \code{"lts"} respectively.
  }
  \item{quantile}{
    the quantile to be used: see \code{Details}. This is over-ridden if
    \code{method = "lms"}.
  }
  \item{control}{additional control items: see \code{Details}.}
  \item{k0}{the cutoff / tuning constant used for \eqn{\chi()}{chi()}
    and \eqn{\psi()}{psi()} functions when \code{method = "S"}, currently
    corresponding to Tukey's \dQuote{biweight}.}
  \item{seed}{
    the seed to be used for random sampling: see \code{.Random.seed}. The
    current value of \code{.Random.seed} will be preserved if it is set..
  }
  \item{\dots}{arguments to be passed to \code{lqs.default} or
    \code{lqs.control}, see \code{control} above and \code{Details}.}
}
\value{
  An object of class \code{"lqs"}.  This is a list with components
  \item{crit}{the value of the criterion for the best solution found, in
    the case of \code{method == "S"} before IWLS refinement.}
  \item{sing}{character. A message about the number of samples which
    resulted in singular fits.}
  \item{coefficients}{of the fitted linear model}
  \item{bestone}{the indices of those points fitted by the best sample
    found (prior to adjustment of the intercept, if requested).}
  \item{fitted.values}{the fitted values.}
  \item{residuals}{the residuals.}
  \item{scale}{estimate(s) of the scale of the error. The first is based
    on the fit criterion.  The second (not present for \code{method ==
      "S"}) is based on the variance of those residuals whose absolute
    value is less than 2.5 times the initial estimate.}
}
\details{
  Suppose there are \code{n} data points and \code{p} regressors,
  including any intercept.

  The first three methods minimize some function of the sorted squared
  residuals. For methods \code{"lqs"} and \code{"lms"} is the
  \code{quantile} squared residual, and for \code{"lts"} it is the sum
  of the \code{quantile} smallest squared residuals. \code{"lqs"} and
  \code{"lms"} differ in the defaults for \code{quantile}, which are
  \code{floor((n+p+1)/2)} and \code{floor((n+1)/2)} respectively.
  For \code{"lts"} the default is \code{floor(n/2) + floor((p+1)/2)}.

  The \code{"S"} estimation method solves for the scale \code{s}
  such that the average of a function chi of the residuals divided
  by \code{s} is equal to a given constant.

  The \code{control} argument is a list with components
  \describe{
    \item{\code{psamp}:}{the size of each sample. Defaults to \code{p}.}
    \item{\code{nsamp}:}{the number of samples or \code{"best"} (the
      default) or \code{"exact"} or \code{"sample"}.
      If \code{"sample"} the number chosen is \code{min(5*p, 3000)},
      taken from Rousseeuw and Hubert (1997).
      If \code{"best"} exhaustive enumeration is done up to 5000 samples;
      if \code{"exact"} exhaustive enumeration will be attempted however
      many samples are needed.}
    \item{\code{adjust}:}{should the intercept be optimized for each
      sample?  Defaults to \code{TRUE}.}
    }
}
\note{
  There seems no reason other than historical to use the \code{lms} and
  \code{lqs} options.  LMS estimation is of low efficiency (converging
  at rate \eqn{n^{-1/3}}) whereas LTS has the same asymptotic efficiency
  as an M estimator with trimming at the quartiles (Marazzi, 1993, p.201).
  LQS and LTS have the same maximal breakdown value of
  \code{(floor((n-p)/2) + 1)/n} attained if
  \code{floor((n+p)/2) <= quantile <= floor((n+p+1)/2)}.
  The only drawback mentioned of LTS is greater computation, as a sort
  was thought to be required (Marazzi, 1993, p.201) but this is not
  true as a partial sort can be used (and is used in this implementation).

  Adjusting the intercept for each trial fit does need the residuals to
  be sorted, and may be significant extra computation if \code{n} is large
  and \code{p} small.

  Opinions differ over the choice of \code{psamp}.  Rousseeuw and Hubert
  (1997) only consider p; Marazzi (1993) recommends p+1 and suggests
  that more samples are better than adjustment for a given computational
  limit.

  The computations are exact for a model with just an intercept and
  adjustment, and for LQS for a model with an intercept plus one
  regressor and exhaustive search with adjustment. For all other cases
  the minimization is only known to be approximate.
}
\author{B. D. Ripley}
\references{
  P. J. Rousseeuw and A. M. Leroy (1987)
  \emph{Robust Regression and Outlier Detection.}
  Wiley.

  A. Marazzi (1993)
  \emph{Algorithms, Routines and S Functions for Robust Statistics.}
  Wadsworth and Brooks/Cole.

  P. Rousseeuw and M. Hubert (1997) Recent developments in PROGRESS. In
  \emph{L1-Statistical Procedures and Related Topics},
  ed Y. Dodge, IMS Lecture Notes volume \bold{31}, pp. 201--214.
}
\seealso{
\code{\link{predict.lqs}}
}
\examples{
data(stackloss)
set.seed(123)
lqs(stack.loss ~ ., data = stackloss)
lqs(stack.loss ~ ., data = stackloss, method = "S", nsamp = "exact")
}
\keyword{models}
\keyword{robust}

\eof
% file MASS/mammals.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{mammals}
\alias{mammals}
\title{
Brain and Body Weights for 62 Species of Land Mammals
}
\description{
A data frame with average brain and body weights for 62 species
of land mammals.
}
\usage{
data(mammals)
}
\format{
\describe{
\item{\code{body}}{
body weight in kg
}
\item{\code{brain}}{
brain weight in g
}
\item{\code{name}}{
Common name of species.
Rock hyrax-a =
\emph{Heterohyrax brucci.}
Rock hyrax-b =
\emph{Procavia habessinica.}
}}}
\source{
Weisberg, S. (1985)
\emph{Applied Linear Regression.}
2nd edition.
Wiley, pp. 144--5.

Selected from:
Allison, T. and Cicchetti, D. V. (1976)
Sleep in mammals: ecological and constitutional correlates.
\emph{Science}
\bold{194}, 732--734.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/mca.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{mca}
\alias{mca}
\alias{print.mca}
\title{
Multiple Correspondence Analysis
}
\description{
Computes a multiple correspondence analysis of a set of factors.
}
\usage{
mca(df, nf = 2, abbrev = FALSE)
}
\arguments{
\item{df}{
A data frame containing only factors
}
\item{nf}{
The number of dimensions for the MCA. Rarely 3 might be useful.
}
\item{abbrev}{
Should the vertex names be abbreviated?  By default these are of the
form \dQuote{factor.level} but if \code{abbrev = TRUE} they are just
\dQuote{level} which will suffice if the factors have distinct levels.
}}
\value{
An object of class \code{"mca"}, with components

\item{rs}{
The coordinates of the rows, in \code{nf} dimensions.
}
\item{cs}{
The coordinates of the column vertices, one for each level of each factor.
}
\item{fs}{
Weights for each row, used to interpolate additional factors in \code{predict.mca}.
}
\item{p}{
The number of factors
}
\item{d}{
The singular values for the \code{nf} dimensions.
}
\item{call}{
The matched call.
}}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{predict.mca}}, \code{\link{plot.mca}}, \code{\link{corresp}}
}
\examples{
farms.mca <- mca(farms, abbrev=TRUE)
farms.mca
plot(farms.mca)
}
\keyword{category}
\keyword{multivariate}

\eof
% file MASS/mcycle.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{mcycle}
\alias{mcycle}
\title{
Data from a Simulated Motorcycle Accident
}
\description{
A data frame giving a series of measurements of head acceleration
in a simulated motorcycle
accident, used to test crash helmets.
}
\usage{
data(mcycle)
}
\format{
\describe{
\item{\code{times}}{
in milliseconds after impact
}
\item{\code{accel}}{
in g
}}}
\source{
Silverman, B. W. (1985) Some aspects of the spline smoothing approach to
non-parametric curve fitting.
\emph{Journal of the Royal Statistical Society series B} \bold{47}, 1--52.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/menarche.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{menarche}
\alias{menarche}
\title{
Age of Menarche data
}
\description{
Proportions of female children at various ages during adolescence
who have reached menarche.
}
\usage{
data(menarche)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Age}}{
Average age of the group.  (The groups are reasonably age homogeneous.)
}
\item{\code{Total}}{
Total number of children in the group.
}
\item{\code{Menarche}}{
Number who have reached menarche.
}}}
\source{
Milicer, H. and Szczotka, F. (1966) Age at Menarche in Warsaw girls in
1965.
\emph{Human Biology}
\bold{38}, 199--203.

The data are also given in\cr
Aranda-Ordaz, F.J. (1981)
On two families of transformations to additivity for binary response data.
\emph{Biometrika}
\bold{68}, 357--363.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
mprob <- glm(cbind(Menarche, Total - Menarche) ~ Age,
             binomial(link = probit), data = menarche)
}
\keyword{datasets}

\eof
% file MASS/michelson.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{michelson}
\alias{michelson}
\title{
Michelson's Speed of Light Data
}
\description{
Measurements of the speed of light in air, made between 5th June
and 2nd July, 1879.  The data consists of five experiments, each
consisting of 20 consecutive runs.  The response is the speed
of light in km/s, less 299000.  The currently accepted value, on
this scale of measurement, is 734.5.
}
\usage{
data(michelson)
}
\format{
The data frame contains the following components:
\describe{
\item{\code{Expt}}{
The experiment number, from 1 to 5
}
\item{\code{Run}}{
The run number within each experiment
}
\item{\code{Speed}}{
Speed-of-light measurement
}}}
\source{
A.J. Weekes (1986)
\emph{A Genstat Primer.}
Edward Arnold.

S. M. Stigler (1977) Do robust estimators work with real data?
\emph{Annals of Statistics}
\bold{5}, 1055--1098.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/minn38.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{minn38}
\alias{minn38}
\title{
Minnesota High School Graduates of 1938
}
\description{
The Minnesota high school graduates of 1938 were classified according to
four factors, described below.  The \code{minn38} data frame has 168
rows and 5 columns.
}
\usage{
data(minn38)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{hs}}{
  High school rank: \code{"L"}, \code{"M"} and \code{"U"} for lower,
  middle and upper third.
}
\item{\code{phs}}{
Post high school status: Enrolled in college, (\code{"C"}), enrolled in
non-collegiate school, (\code{"N"}), employed full-time, (\code{"E"})
and other,
(\code{"O"}).
}
\item{\code{fol}}{
  Father's occupational level, (seven levels, \code{"F1"}, \code{"F2"},
  \dots, \code{"F7"}).
}
\item{\code{sex}}{
Sex factor, \code{"F"} or \code{"M"}.
}
\item{\code{f}}{
Frequency.
}}}
\source{
Quoted from
R. L. Plackett, (1974)
\emph{The Analysis of Categorical Data.}
London: Griffin

who quotes the data from

Hoyt, C. J., Krishnaiah, P. R.
and Torrance, E. P. (1959) Analysis of complex contingency tables,
\emph{J. Exp. Ed.}
\bold{27}, 187--194.
}
\keyword{datasets}

\eof
% file MASS/motors.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{motors}
\alias{motors}
\title{
Accelerated Life Testing of Motorettes
}
\description{
The \code{motors} data frame has 40 rows and 3 columns.  It describes an
accelerated life test at each of four temperatures of 10 motorettes,
and has rather discrete times.
}
\usage{
data(motors)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{temp}}{
the temperature (degrees C) of the test
}
\item{\code{time}}{
the time in hours to failure or censoring at 8064 hours (= 336 days).
}
\item{\code{cens}}{
an indicator variable for death
}}}
\source{
Kalbfleisch, J. D. and Prentice, R. L. (1980)
\emph{The Statistical Analysis of Failure Time Data.}
New York: Wiley.

taken from

Nelson, W. D. and Hahn, G. J. (1972)
Linear regression of a regression relationship from censored data.
Part 1 -- simple methods and their application.
\emph{Technometrics}, \bold{14}, 247--276.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
library(survival)
plot(survfit(Surv(time, cens) ~ factor(temp), motors), conf.int = FALSE)
# fit Weibull model
motor.wei <- survreg(Surv(time, cens) ~ temp, motors)
summary(motor.wei)
# and predict at 130C
unlist(predict(motor.wei, data.frame(temp=130), se.fit = TRUE))

motor.cox <- coxph(Surv(time, cens) ~ temp, motors)
summary(motor.cox)
# predict at temperature 200
plot(survfit(motor.cox, newdata = data.frame(temp=200),
     conf.type = "log-log"))
summary( survfit(motor.cox, newdata = data.frame(temp=130)) )
}
\keyword{datasets}

\eof
% file MASS/muscle.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{muscle}
\alias{muscle}
\title{
Effect of Calcium Chloride on Muscle Contraction in Rat Hearts
}
\description{
The purpose of this experiment was to assess the influence of
calcium in solution on the contraction of heart muscle in rats.
The left auricle of 21 rat hearts was isolated and on several
occasions a constant-length strip of tissue was electrically
stimulated and dipped into various concentrations of calcium
chloride solution, after which the shortening of the strip was
accurately measured as the response.
}
\usage{
data(muscle)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Strip}}{
which heart muscle strip was used?
}
\item{\code{Conc}}{
concentration of calcium chloride solution, in multiples of 2.2 mM.
}
\item{\code{Length}}{
the change in length (shortening) of the strip, (allegedly) in mm.
}}}
\source{
Linder, A., Chakravarti, I. M. and Vuagnat, P. (1964)
Fitting asymptotic regression curves with different asymptotes.
In
\emph{Contributions to Statistics. Presented to Professor P. C. Mahalanobis }
\emph{on the occasion of his 70th birthday,}
ed. C. R. Rao, pp. 221--228. Oxford: Pergamon Press.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth Edition. Springer.
}
\examples{
A <- model.matrix(~ Strip - 1, data=muscle)
rats.nls1 <- nls(log(Length) ~ cbind(A, rho^Conc),
   data = muscle, start = c(rho=0.1), algorithm="plinear")
B <- coef(rats.nls1)
B

st <- list(alpha = B[2:22], beta = B[23], rho = B[1])
(rats.nls2 <- nls(log(Length) ~ alpha[Strip] + beta*rho^Conc,
                  data = muscle, start = st))

attach(muscle)
Muscle <- expand.grid(Conc = sort(unique(Conc)),
                     Strip = levels(Strip))
Muscle$Yhat <- predict(rats.nls2, Muscle)
Muscle <- cbind(Muscle, logLength = rep(as.numeric(NA), 126))
ind <- match(paste(Strip, Conc),
            paste(Muscle$Strip, Muscle$Conc))
Muscle$logLength[ind] <- log(Length)
detach()

require(lattice)
xyplot(Yhat ~ Conc | Strip, Muscle, as.table = TRUE,
   ylim = range(c(Muscle$Yhat, Muscle$logLength), na.rm = TRUE),
   subscripts = TRUE, xlab = "Calcium Chloride concentration (mM)",
   ylab = "log(Length in mm)", panel =
   function(x, y, subscripts, ...) {
      lines(spline(x, y))
      panel.xyplot(x, Muscle$logLength[subscripts], ...)
   })
}
\keyword{datasets}

\eof
% file MASS/mvrnorm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{mvrnorm}
\alias{mvrnorm}
\title{
Simulate from a Multivariate Normal Distribution
}
\description{
Produces one or more samples from the specified
multivariate normal distribution.
}
\usage{
mvrnorm(n = 1, mu, Sigma, tol = 1e-6, empirical = FALSE)
}
\arguments{
\item{n}{
the number of samples required.
}
\item{mu}{
a vector giving the means of the variables.
}
\item{Sigma}{
a positive-definite symmetric matrix specifying the covariance matrix
of the variables.
}
\item{tol}{
tolerance (relative to largest variance) for numerical lack of
positive-definiteness in \code{Sigma}.
}
\item{empirical}{
logical. If true, mu and Sigma specify the empirical not population
mean and covariance matrix.
}}
\value{
If \code{n = 1} a vector of the same length as \code{mu}, otherwise an
\code{n} by \code{length(mu)} matrix with one sample in each row.
}
\section{Side Effects}{
Causes creation of the dataset \code{.Random.seed} if it does
not already exist, otherwise its value is updated.
}
\details{
The matrix decomposition is done via \code{eigen}; although a Choleski
decomposition might be faster, the eigendecomposition is
stabler.
}
\references{
B. D. Ripley (1987)
\emph{Stochastic Simulation.}
Wiley. Page 98.
}
\seealso{
\code{\link{rnorm}}
}
\examples{
Sigma <- matrix(c(10,3,3,2),2,2)
Sigma
var(mvrnorm(n=1000, rep(0, 2), Sigma))
var(mvrnorm(n=1000, rep(0, 2), Sigma, empirical = TRUE))
}
\keyword{distribution}
\keyword{multivariate}

\eof
% file MASS/negative.binomial.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{negative.binomial}
\alias{negative.binomial}
\title{
Family function for Negative Binomial GLMs
}
\description{
Specifies the information required to fit a Negative Binomial generalized
linear model, with known \code{theta} parameter, using \code{glm()}.
}
\usage{
negative.binomial(theta = stop("theta must be specified"), link = "log")
}
\arguments{
\item{theta}{
The known value of the additional parameter, \code{theta}.
}
\item{link}{
  The link function.  Currently must be one of \code{log}, \code{sqrt}
  or \code{identity}.
}}
\value{
A list of functions and expressions needed by \code{glm()} to fit a Negative
Binomial generalized linear model.
}
\seealso{
  \code{\link{glm.nb}}, \code{\link{anova.negbin}},
  \code{\link{summary.negbin}}
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\examples{
# Fitting a Negative Binomial model to the quine data
#   with theta = 2 assumed known.
#
glm(Days ~ .^4, family = negative.binomial(2), data = quine)
}
\keyword{regression}
\keyword{models}

\eof
% file MASS/newcomb.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{newcomb}
\alias{newcomb}
\title{
Newcomb's Measurements of the Passage Time of Light
}
\description{
A numeric vector giving the Third Series of measurements of the
passage time of light recorded
by Newcomb in 1882. The given values divided by 1000 plus 24 give the
time in millionths of a second for light to traverse a known distance.
The \dQuote{true} value is now considered to be 33.02.
}
\usage{
data(newcomb)
}
\source{
S. M. Stigler (1973)
Simon Newcombe, Percy Daniell, and the history of robust estimation
1885--1920.
\emph{Journal of the American Statistical Association}
\bold{68}, 872--879


R. G. Staudte and S. J. Sheather (1990)
\emph{Robust Estimation and Testing.}
Wiley
}
\keyword{datasets}

\eof
\name{nlschools}
\alias{nlschools}
\title{
Eighth-Grade Pupils in the Netherlands
}
\description{
Snijders and Bosker (1999) use as a running example a study of 2287
eighth-grade pupils (aged about 11) in 132 classes in 131 schools in
the Netherlands.  Only the variables used in our examples are supplied.
}
\usage{
data(nlschools)
}
\format{
This data frame contains 2287 rows and the following columns:
\describe{
\item{\code{lang}}{
language test score
}
\item{\code{IQ}}{
Verbal IQ
}
\item{\code{class}}{
class ID
}
\item{\code{GS}}{
Class size: number of eighth-grade pupils recorded in the class (there
may be others: see \code{COMB}, and some may have been omitted with missing
values).
}
\item{\code{SES}}{
Social-economic status of pupil's family.
}
\item{\code{COMB}}{
were the pupils taught in a multi-grade class (\code{0/1})?  Classes which
contained pupils from grades 7 and 8 are coded \code{1}, but only
eighth-graders were tested.
}}}
\source{
Snijders, T. A. B. and Bosker, R. J. (1999)
\emph{Multilevel Analysis. An Introduction to Basic and Advanced Multilevel Modelling.}
London: Sage.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
library(nlme)
nl1 <- nlschools
attach(nl1)
classMeans <- tapply(IQ, class, mean)
nl1$IQave <- classMeans[as.character(class)]
nl1$IQ <- nl1$IQ - nl1$IQave
detach()
cen <- c("IQ", "IQave", "SES")
nl1[cen] <- scale(nl1[cen], center = TRUE, scale = FALSE)

nl.lme <- lme(lang ~ IQ*COMB + IQave + SES,
              random = ~ IQ | class, data = nl1)
summary(nl.lme)
}
\keyword{datasets}

\eof
% file MASS/nottem.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{nottem-MASS}
\alias{nottem}
\title{
Average Monthly Temperatures at Nottingham, 1920-1939
}
\description{
A regular time-series object containing average air temperatures at Nottingham
Castle in degrees F for 20 years.
}
\usage{
data(nottem)
}
\source{
Anderson, O.D. (1976)
\emph{Time Series Analysis and Forecasting: The Box-Jenkins approach.}
Butterworths. Series R.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/npk.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{npk}
\alias{npk}
\title{
Classical N, P, K Factorial Experiment
}
\description{
A classical N, P, K (nitrogen, phosphate, potassium) factorial
experiment on the growth of peas conducted on 6 blocks. Each half of a
fractional factorial design confounding the NPK interaction was used
on 3 of the plots.
}
\usage{
data(npk)
}
\format{
The \code{npk} data frame has 24 rows and 5 columns:
\describe{
\item{\code{block}}{
which block (label 1 to 6).
}
\item{\code{N}}{
indicator (0/1) for the application of nitrogen.
}
\item{\code{P}}{
indicator (0/1) for the application of phosphate.
}
\item{\code{K}}{
indicator (0/1) for the application of potassium.
}
\item{\code{yield}}{
Yield of peas, in pounds/plot (the plots were (1/70) acre).
}}}
\source{
Imperial College, London, M.Sc. exercise sheet.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
npk.aov <- aov(yield ~ block + N*P*K, npk)
npk.aov
summary(npk.aov)
alias(npk.aov)
coef(npk.aov)
options(contrasts = c("contr.treatment", "contr.poly"))
npk.aov1 <- aov(yield ~ block + N + K, data = npk)
summary.lm(npk.aov1)
se.contrast(npk.aov1, list(N=="0", N=="1"), data = npk)
model.tables(npk.aov1, type = "means", se = TRUE)
}
\keyword{datasets}

\eof
% file MASS/npr1.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{npr1}
\alias{npr1}
\title{
US Naval Petroleum Reserve No. 1 data
}
\description{
Data on the locations, porosity and permeability (a measure of oil flow)
on 104 oil wells in the US Naval Petroleum Reserve No. 1 in California.
}
\usage{
data(npr1)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{x, y}}{
x and y coordinates, in miles from an arbitrary origin
}
\item{\code{perm}}{
permeability in milli-Darcies
}
\item{\code{por}}{
porosity (\%)
}}}
\source{
Maher, J.C., Carter, R.D. and Lantz, R.J. (1975)
Petroleum geology of Naval Petroleum Reserve No. 1, Elk Hills,
Kern County, California.
\emph{USGS Professional Paper} \bold{912}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/oats.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{oats}
\alias{oats}
\title{
Data from an Oats Field Trial
}
\description{
The yield of oats from a split-plot field trial using three varieties and
four levels of manurial treatment.  The experiment was laid out in 6 blocks
of 3 main plots, each split into 4 sub-plots.  The varieties were applied
to the main plots and the manurial treatments to the sub-plots.
}
\usage{
data(oats)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{B}}{
Blocks, levels I, II, III, IV, V and VI
}
\item{\code{V}}{
Varieties, 3 levels.
}
\item{\code{N}}{
Nitrogen (manurial) treatment, levels  0.0cwt, 0.2cwt, 0.4cwt and 0.6cwt,
showing the application in cwt/acre.
}
\item{\code{Y}}{
Yields in 1/4lbs per sub-plot, each of area 1/80 acre.
}}}
\source{
Yates, F. (1935) Complex experiments,
\emph{Journal of the Royal Statistical Society Suppl.}
\bold{2}, 181--247.

Also given in
Yates, F. (1970)
\emph{Experimental design: Selected papers of Frank Yates, C.B.E, F.R.S.}
 London: Griffin.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
oats$Nf <- ordered(oats$N, levels = sort(levels(oats$N)))
oats.aov <- aov(Y ~ Nf*V + Error(B/V), data = oats, qr = TRUE)
summary(oats.aov)
summary(oats.aov, split = list(Nf=list(L=1, Dev=2:3)))
par(mfrow = c(1,2), pty = "s")
plot(fitted(oats.aov[[4]]), studres(oats.aov[[4]]))
abline(h = 0, lty = 2)
oats.pr <- proj(oats.aov)
qqnorm(oats.pr[[4]][,"Residuals"], ylab = "Stratum 4 residuals")
qqline(oats.pr[[4]][,"Residuals"])

par(mfrow = c(1,1), pty = "m")
oats.aov2 <- aov(Y ~ N + V + Error(B/V), data = oats, qr = TRUE)
model.tables(oats.aov2, type = "means", se = TRUE)
}
\keyword{datasets}

\eof
% file MASS/painters.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{painters}
\alias{painters}
\title{
The Painter's Data of de Piles
}
\description{
The subjective assessment, on a 0 to 20 integer scale, of 54
classical painters.  The painters were assessed on four characteristics:
composition, drawing, colour and expression.  The data is due to the
Eighteenth century art critic, de Piles.
}
\usage{
data(painters)
}
\format{
The row names of the data frame are the painters. The components are:
\describe{
\item{\code{Composition}}{
Composition score
}
\item{\code{Drawing}}{
Drawing score
}
\item{\code{Colour}}{
Colour score
}
\item{\code{Expression}}{
Expression score
}
\item{\code{School}}{
The school to which a painter belongs, as indicated by a factor level
code as follows:
\code{"A"}: Renaissance;
\code{"B"}: Mannerist;
\code{"C"}: Seicento;
\code{"D"}: Venetian;
\code{"E"}: Lombard;
\code{"F"}: Sixteenth Century;
\code{"G"}: Seventeenth Century;
\code{"H"}: French.
}}}
\source{
A. J. Weekes (1986)
\emph{A Genstat Primer.}
Edward Arnold.

M. Davenport and G. Studdert-Kennedy (1972) The statistical
analysis of aesthetic judgement: an exploration.
\emph{Applied Statistics}
\bold{21},  324--333.

I. T. Jolliffe (1986)
\emph{Principal Component Analysis.}
Springer.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/pairs.lda.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{pairs.lda}
\alias{pairs.lda}
\title{
Produce Pairwise Scatterplots from an 'lda' Fit
}
\description{
Pairwise scatterplot of the data on the linear discriminants.
}
\usage{
\method{pairs}{lda}(x, labels = colnames(x), panel = panel.lda,
     dimen, abbrev = FALSE, \dots, cex=0.7, type = c("std", "trellis"))
}
\arguments{
\item{x}{
Object of class \code{"lda"}.
}
\item{labels}{
vector of character strings for labelling the variables.
}
\item{panel}{
panel function to plot the data in each panel.
}
\item{dimen}{
The number of linear discriminants to be used for the plot; if this
exceeds the number determined by \code{x} the smaller value is used.
}
\item{abbrev}{
whether the group labels are abbreviated on the plots. If \code{abbrev > 0}
this gives \code{minlength} in the call to \code{abbreviate}.
}
\item{\dots}{
additional arguments for \code{pairs.default}.
}
\item{cex}{
graphics parameter \code{cex} for labels on plots.
}
\item{type}{
type of plot. The default is in the style of \code{\link{pairs.default}}; the
style \code{"trellis"} uses the Trellis function \code{\link[lattice]{splom}}.
}}
\details{
This function is a method for the generic function
\code{pairs()} for class \code{"lda"}.
It can be invoked by calling \code{pairs(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{pairs.lda(x)} regardless of the
class of the object.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{pairs}}
}
\keyword{hplot}
\keyword{multivariate}

\eof
\name{parcoord}
\alias{parcoord}
\title{
Parallel Coordinates Plot
}
\description{
Parallel coordinates plot
}
\usage{
parcoord(x, col = 1, lty = 1, \dots)
}
\arguments{
\item{x}{
a matrix or data frame who columns represent variables.
}
\item{col}{
A vector of colours, recycled as necessary for each observation.
}
\item{lty}{
A vector of line types, recycled as necessary for each observation.
}
\item{\dots}{
Further graphics parameters which are passed to \code{matplot}.
}}
\section{Side Effects}{
a parallel coordinates plots is drawn.
}
\references{
  Wegman, E. J. (1990) Hyperdimensional data analysis using parallel
  coordinates.
  \emph{Journal of the American Statistical Association}
  \bold{85}, 664--675.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
data(state)
parcoord(state.x77[, c(7, 4, 6, 2, 5, 3)])

data(iris3)
ir <- rbind(iris3[,,1], iris3[,,2], iris3[,,3])
parcoord(log(ir)[, c(3, 4, 2, 1)], col = 1 + (0:149)\%/\%50)
}
\keyword{hplot}

\eof
% file MASS/petrol.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{petrol}
\alias{petrol}
\title{
N. L. Prater's Petrol Refinery Data
}
\description{
The yield of a petroleum refining process with four covariates.
The crude oil appears to come from only 10 distinct samples.


These data were originally used by Prater (1956) to
build an estimation equation for the yield of the refining
process of crude oil to gasoline.
}
\usage{
data(petrol)
}
\format{
The variables are as follows
\describe{
\item{\code{No}}{
Crude oil sample identification label. (factor)
}
\item{\code{SG}}{
Specific gravity, degrees API.  (Constant within sample.)
}
\item{\code{VP}}{
Vapour pressure in psi. (Constant within sample.)
}
\item{\code{V10}}{
Volatility of crude; ASTM 10\% point. (Constant within sample.)
}
\item{\code{EP}}{
Desired volatility of gasoline. (The end point.  Varies within sample.)
}
\item{\code{Y}}{
Yield as a percentage of crude.
}}}
\source{
N. H. Prater (1956)
Estimate gasoline yields from crudes.
\emph{Petroleum Refiner}
\bold{35}, 236--238.

This dataset is also given in
D. J. Hand, F. Daly, K. McConway, D. Lunn and E. Ostrowski (eds) (1994)
\emph{A Handbook of Small Data Sets.}
Chapman & Hall.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
library(nlme)
Petrol <- petrol
Petrol[, 2:5] <- scale(as.matrix(Petrol[, 2:5]), scale = FALSE)
pet3.lme <- lme(Y ~ SG + VP + V10 + EP,
                random = ~ 1 | No, data = Petrol)
pet3.lme <- update(pet3.lme, method = "ML")
pet4.lme <- update(pet3.lme, fixed = Y ~ V10 + EP)
anova(pet4.lme, pet3.lme)
}
\keyword{datasets}

\eof
% file MASS/phones.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{Belgian-phones}
\alias{phones}
\title{
Belgium Phone Calls 1950-1973
}
\usage{
data(phones)
}
\description{
A list object with the annual numbers of telephone calls, in
Belgium.  The components are:
\describe{
\item{\code{year}}{
The last two digits of the year.
}
\item{\code{calls}}{
The number of telephone calls made (in millions of calls).
}}
}
\source{
P. J. Rousseeuw and A. M. Leroy (1987)
\emph{Robust Regression & Outlier Detection.}
Wiley.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/plot.lda.d
% copyright (C) 1998-9 W. N. Venables and B. D. Ripley
%
\name{plot.lda}
\alias{plot.lda}
\title{
Plot Method for Class 'lda'
}
\description{
Plots a set of data on one, two or more linear discriminants.
}
\usage{
\method{plot}{lda}(x, panel = panel.lda, \dots, cex = 0.7, dimen,
     abbrev = FALSE, xlab = "LD1", ylab = "LD2")
}
\arguments{
\item{x}{
An object of class \code{"lda"}.
}
\item{panel}{
the panel function used to plot the data.
}
\item{\dots}{
additional arguments to \code{pairs}, \code{ldahist} or \code{eqscplot}.
}
\item{cex}{
graphics parameter \code{cex} for labels on plots.
}
\item{dimen}{
The number of linear discriminants to be used for the plot; if this
exceeds the number determined by \code{x} the smaller value is used.
}
\item{abbrev}{
whether the group labels are abbreviated on the plots. If \code{abbrev > 0}
this gives \code{minlength} in the call to \code{abbreviate}.
}
\item{xlab}{
label for the x axis
}
\item{ylab}{
label for the y axis
}}
\details{
This function is a method for the generic function
\code{plot()} for class \code{"lda"}.
It can be invoked by calling \code{plot(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{plot.lda(x)} regardless of the
class of the object.

The behaviour is determined by the value of \code{dimen}. For
\code{dimen > 2}, a \code{pairs} plot is used. For \code{dimen = 2}, an
equiscaled scatter plot is drawn. For \code{dimen = 1}, a set of
histograms or density plots are drawn.  Use argument \code{type} to
match \code{"histogram"} or \code{"density"} or \code{"both"}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{pairs.lda}}, \code{\link{ldahist}}, \code{\link{lda}}, \code{\link{predict.lda}}
}
\keyword{hplot}
\keyword{multivariate}

\eof
% file MASS/plot.mca.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{plot.mca}
\alias{plot.mca}
\title{
Plot Method for Objects of Class 'mca'
}
\description{
Plot a multiple correspondence analysis.
}
\usage{
\method{plot}{mca}(x, rows = TRUE, col, cex = par("cex"), \dots)
}
\arguments{
\item{x}{
An object of class \code{"mca"}.
}
\item{rows}{
Should the coordinates for the rows be plotted, or just the vertices
for the levels?
}
\item{col, cex}{
The colours and \code{cex} to be used for the row points and level vertices
respectively.
}
\item{\dots}{
Additional parameters to \code{plot}.
}}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{mca}}, \code{\link{predict.mca}}
}
\examples{
plot(mca(farms, abbrev = TRUE))
}
\keyword{hplot}
\keyword{multivariate}

\eof
% file MASS/polr.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{polr}
\alias{polr}
% \alias{extractAIC.polr}
% \alias{predict.polr}
% \alias{print.polr}
% \alias{summary.polr}
% \alias{print.summary.polr}
% \alias{vcov.polr}
% \alias{model.frame.polr}

\title{
  Proportional Odds Logistic Regression
}
\description{
  Fits a proportional odd logistic regression model to an ordered factor
  response.
}
\usage{
polr(formula, data, weights, start, \dots, subset, na.action,
     contrasts = NULL, Hess = FALSE, model = TRUE)
}
\arguments{
\item{formula}{
  a formula expression as for regression models, of the form
  \code{response ~ predictors}. The response should be a factor
  (preferably an ordered
  factor), which will be interpreted as an ordinal response, with levels
  ordered as in the factor.  A proportional odds model will be
  fitted. The model must have an intercept: attempts to remove one will
  lead to a warning and be ignored. An offset may be used. See the
  documentation of \code{formula} for other details.
}
\item{data}{
  an optional data frame in which to interpret the variables occurring
  in \code{formula}.
}
\item{weights}{
  optional case weights in fitting.  Default to 1.
}
\item{start}{
  initial values for the parameters.
}
\item{\dots}{
  additional arguments to be passed to \code{\link{optim}}, most often a
  \code{control} argument.
}
\item{subset}{
  expression saying which subset of the rows of the data should  be used
  in the fit. All observations are included by default.
}
\item{na.action}{
  a function to filter missing data.
}
\item{contrasts}{
  a list of contrasts to be used for some or all of
  the factors appearing as variables in the model formula.
}
\item{Hess}{
  logical for whether the Hessian (the observed information matrix)
  should be returned.
}
\item{model}{
  logical for whether the model matrix should be returned.
}
}
\value{
A object of class \code{"polr"}.

\item{coefficients}{
  the coefficients of the linear predictor.
}
\item{zeta}{
  the intercepts for the class boundaries.
}
\item{deviance}{
  the residual deviance.
}
\item{fitted.values}{
  a matrix, with a column for each level of the response.
}
\item{lev}{
  the names of the response levels.
}
\item{terms}{
  the \code{terms} structure describing the model.
}
\item{df.residual}{
  the number of residual degrees of freedoms, calculated using the weights.
}
\item{edf}{
  the (effective) number of degrees of freedom used by the model
}
\item{n}{
  the (effective) number of observations,  calculated using the weights
}
\item{call}{
  the matched call.
}
\item{convergence}{
  the convergence code returned by \code{optim}.
}
\item{niter}{
  the number of function and gradient evaluations used by \code{optim}.
}
\item{Hessian}{
  (if \code{Hess} is true).
}
\item{model}{
  (if \code{model} is true).
}}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{optim}}, \code{\link{glm}}, \code{\link[nnet]{multinom}}.
}
\examples{
options(contrasts = c("contr.treatment", "contr.poly"))
house.plr <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)
house.plr
summary(house.plr)
predict(house.plr, housing, type = "p")
addterm(house.plr, ~.^2, test = "Chisq")
house.plr2 <- stepAIC(house.plr, ~.^2)
house.plr2$anova
}
\keyword{models}

\eof
\name{predict.glmmPQL}
\alias{predict.glmmPQL}
\title{Predict Method for glmmPQL Fits}
\description{
  Obtains predictions from a fitted generalized linear model
  with random effects.
}
\usage{
\method{predict}{glmmPQL}(object, newdata = NULL, type = c("link", "response"),
       level, na.action = na.pass, ...)
}
\arguments{
  \item{object}{a fitted object of class inheriting from \code{"glmmPQL"}.}
  \item{newdata}{optionally, a new data frame from which to make the
    predictions.}
  \item{type}{the type of prediction required.  The default is on the
    scale of the linear predictors; the alternative \code{"response"}
    is on the scale of the response variable.  Thus for a default
    binomial model the default predictions are of log-odds (probabilities
    on logit scale) and \code{type = "response"} gives the predicted
    probabilities.}
  \item{level}{an optional integer vector giving the level(s) of grouping
   to be used in obtaining the predictions. Level values increase from
   outermost to innermost grouping, with level zero corresponding to the
   population predictions. Defaults to the highest or innermost level of
   grouping.}  
  \item{na.action}{function determining what should be done with missing
    values in \code{newdata}.  The default is to predict \code{NA}.}
  \item{\dots}{further arguments passed to or from other methods.}
}
\value{
  If \code{level} is a single integer, a vector otherwise a data frame.
}
\seealso{
  \code{\link{glmmPQL}}, \code{\link[nlme]{predict.lme}}.
}
\examples{
fit <- glmmPQL(y ~ trt + I(week > 2), random = ~1 |  ID,
               family = binomial, data = bacteria)
predict(fit, bacteria, level = 0, type="response")
predict(fit, bacteria, level = 1, type="response")
}
\keyword{models}

\eof
% file MASS/predict.lda.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{predict.lda}
\alias{predict.lda}
\title{
Classify Multivariate Observations by Linear Discrimination
}
\description{
Classify multivariate observations in conjunction with \code{lda}, and also
project data onto the linear discriminants.
}
\usage{
\method{predict}{lda}(object, newdata, prior = object$prior, dimen,
        method = c("plug-in", "predictive", "debiased"), \dots)
}
\arguments{
\item{object}{
object  of class \code{"lda"}
}
\item{newdata}{
data frame of cases to be classified or, if object
has a formula, a data frame with columns of the same names as the
variables used. A vector will be interpreted
as a row vector.  If newdata is missing, an attempt will be
made to retrieve the data used to fit the \code{lda} object.
}
\item{prior}{
The prior probabilities of the classes, by default the proportions in the
training set or what was set in the call to \code{lda}.
}
\item{dimen}{
the dimension of the space to be used. If this is less than \code{min(p, ng-1)},
only the first \code{dimen} discriminant components are used (except for
\code{method="predictive"}), and only those dimensions are returned in \code{x}.
}
\item{method}{
This determines how the parameter estimation is handled. With \code{"plug-in"}
(the default) the usual unbiased parameter estimates are used and
assumed to be correct. With \code{"debiased"} an unbiased estimator of
the log posterior probabilities is used, and with \code{"predictive"} the
parameter estimates are integrated out using a vague prior.
}
\item{\dots}{
arguments based from or to other methods
}}
\value{
a list with components

\item{class}{
The MAP classification (a factor)
}
\item{posterior}{
posterior probabilities for the classes
}
\item{x}{
the scores of test cases on up to \code{dimen} discriminant variables
}}
\details{
This function is a method for the generic function \code{predict()} for
class \code{"lda"}.  It can be invoked by calling \code{predict(x)} for
an object \code{x} of the appropriate class, or directly by calling
\code{predict.lda(x)} regardless of the class of the object.

Missing values in \code{newdata} are handled by returning \code{NA} if the
linear discriminants cannot be evaluated. If \code{newdata} is omitted and
the \code{na.action} of the fit omitted cases, these will be omitted on the
prediction.

This version centres the linear discriminants so that the
weighted mean (weighted by \code{prior}) of the group centroids is at
the origin.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.
}
\seealso{
\code{\link{lda}}, \code{\link{qda}}, \code{\link{predict.qda}}
}
\examples{
data(iris3)
tr <- sample(1:50, 25)
train <- rbind(iris3[tr,,1], iris3[tr,,2], iris3[tr,,3])
test <- rbind(iris3[-tr,,1], iris3[-tr,,2], iris3[-tr,,3])
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
z <- lda(train, cl)
predict(z, test)$class
}
\keyword{multivariate}

\eof
% file lqs/man//predict.lqs.Rd
% copyright (C) 1999 B. D. Ripley
%
\name{predict.lqs}
\alias{predict.lqs}
\title{
  Predict from an lqs Fit
}
\description{
  Predict from an resistant regression fitted by \code{lqs}.
}
\usage{
\method{predict}{lqs}(object, newdata, na.action = na.pass, \dots)
}
\arguments{
  \item{object}{
    object inheriting from class \code{"lqs"}
  }
  \item{newdata}{
    matrix or data frame of cases to be predicted or, if object
    has a formula, a data frame with columns of the same names as the
    variables used. A vector will be interpreted
    as a row vector.  If \code{newdata} is missing, an attempt will be
    made to retrieve the data used to fit the \code{lqs} object.
  }
  \item{na.action}{function determining what should be done with missing
    values in \code{newdata}.  The default is to predict \code{NA}.}
  \item{\dots}{arguments to be passed from or to other methods.}
}
\value{
  A vector of predictions.
}
\details{
  This function is a method for the generic function
  \code{predict()} for class \code{lqs}.
  It can be invoked by calling \code{predict(x)} for an
  object \code{x} of the appropriate class, or directly by
  calling \code{predict.lqs(x)} regardless of the
  class of the object.

  Missing values in \code{newdata} are handled by returning \code{NA} if the
  linear fit cannot be evaluated. If \code{newdata} is omitted and
  the \code{na.action} of the fit omitted cases, these will be omitted on the
  prediction.
}
\author{B.D. Ripley}
\seealso{
\code{\link{lqs}}
}
\examples{
data(stackloss)
set.seed(123)
fm <- lqs(stack.loss ~ ., data = stackloss, method = "S", nsamp = "exact")
predict(fm, stackloss)
}
\keyword{models}

\eof
% file MASS/predict.mca.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{predict.mca}
\alias{predict.mca}
\title{
Predict Method for Class 'mca'
}
\description{
Used to compute coordinates for additional rows or additional factors
in a multiple correspondence analysis.
}
\usage{
\method{predict}{mca}(object, newdata, type = c("row", "factor"), \dots)
}
\arguments{
\item{object}{
An object of class \code{"mca"}, usually the result of a call to \code{mca}.
}
\item{newdata}{
A data frame containing \emph{either} additional rows of the factors used to
fit \code{object} \emph{or} additional factors for the cases used in the
original fit.
}
\item{type}{
Are predictions required for further rows or for new factors?
}
\item{\dots}{
Additional arguments from \code{predict}: unused.
}}
\value{
If \code{type = "row"}, the coordinates for the additional rows.


If \code{type = "factor"}, the coordinates of the column vertices for the
levels of the new factors.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{mca}}, \code{\link{plot.mca}}
}
\keyword{category}
\keyword{multivariate}

\eof
% file MASS/predict.qda.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{predict.qda}
\alias{predict.qda}
\title{
Classify from Quadratic Discriminant Analysis
}
\description{
Classify multivariate observations in conjunction with \code{qda}
}
\usage{
\method{predict}{qda}(object, newdata, prior = object$prior,
        method = c("plug-in", "predictive", "debiased", "looCV"), \dots)
}
\arguments{
\item{object}{
object of class \code{"qda"}
}
\item{newdata}{
data frame of cases to be classified or, if object
has a formula, a data frame with columns of the same names as the
variables used. A vector will be interpreted
as a row vector.  If newdata is missing, an attempt will be
made to retrieve the data used to fit the \code{qda} object.
}
\item{prior}{
The prior probabilities of the classes, by default the proportions in the
training set or what was set in the call to \code{qda}.
}
\item{method}{
This determines how the parameter estimation is handled. With \code{"plug-in"}
(the default) the usual unbiased parameter estimates are used and
assumed to be correct. With \code{"debiased"} an unbiased estimator of
the log posterior probabilities is used, and with \code{"predictive"} the
parameter estimates are integrated out using a vague prior.  With
\code{"looCV"} the leave-one-out cross-validation fits to the original
dataset are computed and returned.
}
\item{\dots}{
arguments based from or to other methods
}}
\value{
a list with components

\item{class}{
The MAP classification (a factor)
}
\item{posterior}{
posterior probabilities for the classes
}}
\details{
This function is a method for the generic function
\code{predict()} for class \code{"qda"}.
It can be invoked by calling \code{predict(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{predict.qda(x)} regardless of the
class of the object.

Missing values in \code{newdata} are handled by returning \code{NA} if the
quadratic discriminants cannot be evaluated. If \code{newdata} is omitted and
the \code{na.action} of the fit omitted cases, these will be omitted on the
prediction.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.
}
\seealso{
\code{\link{qda}}, \code{\link{lda}}, \code{\link{predict.lda}}
}
\examples{
data(iris3)
tr <- sample(1:50, 25)
train <- rbind(iris3[tr,,1], iris3[tr,,2], iris3[tr,,3])
test <- rbind(iris3[-tr,,1], iris3[-tr,,2], iris3[-tr,,3])
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
zq <- qda(train, cl)
predict(zq, test)$class
}
\keyword{multivariate}

\eof
% file MASS/qda.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{qda}
\alias{qda}
\alias{qda.data.frame}
\alias{qda.default}
\alias{qda.formula}
\alias{qda.matrix}
\alias{model.frame.qda}
\alias{print.qda}
\title{
Quadratic Discriminant Analysis
}
\description{
Quadratic discriminant analysis.
}
\usage{
qda(x, \dots)

\method{qda}{formula}(formula, data, \dots, subset, na.action)

\method{qda}{default}(x, grouping, prior = proportions,
    method, CV = FALSE, nu, \dots)

\method{qda}{data.frame}(x, \dots)

\method{qda}{matrix}(x, grouping, \dots, subset, na.action)
}
\arguments{
\item{formula}{
A formula of the form \code{groups ~ x1 + x2 + \dots}  That is, the
response is the grouping factor and the right hand side specifies
the (non-factor) discriminators.
}
\item{data}{
Data frame from which variables specified in \code{formula} are
preferentially to be taken.
}
\item{x}{
(required if no formula is given as the principal argument.)
a matrix or data frame or Matrix containing the explanatory variables.
}
\item{grouping}{
(required if no formula principal argument is given.)
a factor specifying the class for each observation.
}
\item{prior}{
the prior probabilities of class membership.  If unspecified, the class
proportions for the training set are used.  If specified, the
probabilities should be specified in the order of the factor levels.
}
\item{subset}{
An index vector specifying the cases to be used in the training
sample.  (NOTE: If given, this argument must be named.)
}
\item{na.action}{
A function to specify the action to be taken if \code{NA}s are found.
The default action is for the procedure to fail.  An alternative is
na.omit, which leads to rejection of cases with missing values on
any required variable.  (NOTE: If given, this argument must be named.)
}
\item{method}{
\code{"moment"} for standard estimators of the mean and variance,
\code{"mle"} for MLEs, \code{"mve"} to use \code{cov.mve}, or \code{"t"} for robust
estimates based on a t distribution.
}
\item{CV}{
If true, returns results (classes and posterior probabilities) for
leave-out-out cross-validation. Note that if the prior is estimated,
the proportions in the whole dataset are used.
}
\item{nu}{
degrees of freedom for \code{method = "t"}.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
an object of class \code{"qda"} containing the following components:

\item{prior}{
the prior probabilities used.
}
\item{means}{
the group means.
}
\item{scaling}{
for each group \code{i}, \code{scaling[,,i]} is an array which transforms observations
so that within-groups covariance matrix is spherical.
}
\item{ldet}{
a vector of half log determinants of the dispersion matrix.
}
\item{lev}{
the levels of the grouping factor.
}
\item{terms}{
(if formula is a formula)
an object of mode expression and class term summarizing
the  formula.
}
\item{call}{
the (matched) function call.
}

unless \code{CV=TRUE}, when the return value is a list with components:
\item{class}{
The MAP classification (a factor)
}
\item{posterior}{
posterior probabilities for the classes
}}
\details{
Uses a QR decomposition which will give an error message if the
within-group variance is singular for any group.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.
}
\seealso{
\code{\link{predict.qda}}, \code{\link{lda}}
}
\examples{
data(iris3)
tr <- sample(1:50, 25)
train <- rbind(iris3[tr,,1], iris3[tr,,2], iris3[tr,,3])
test <- rbind(iris3[-tr,,1], iris3[-tr,,2], iris3[-tr,,3])
cl <- factor(c(rep("s",25), rep("c",25), rep("v",25)))
z <- qda(train, cl)
predict(z,test)$class
}
\keyword{multivariate}

\eof
% file MASS/quine.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{quine}
\alias{quine}
\title{
Absenteeism from School in Rural New South Wales
}
\description{
The \code{quine} data frame has 146 rows and 5 columns.
Children from Walgett, New South Wales, Australia, were classified by
Culture, Age, Sex and Learner status and the number of days absent from
school in a particular school year was recorded.
}
\usage{
data(quine)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Eth}}{
Ethnic background: Aboriginal or Not, (\code{"A"} or \code{"N"}).
}
\item{\code{Sex}}{
Sex factor: (\code{"F"} or \code{"M"}).
}
\item{\code{Age}}{
Age group: Primary (\code{"F0"}), or forms \code{"F1,"} \code{"F2"} or \code{"F3"}.
}
\item{\code{Lrn}}{
Learner status factor: Average or Slow learner, (\code{"AL"} or \code{"SL"}).
}
\item{\code{Days}}{
Days absent from school in the year.
}}}
\source{
S. Quine, quoted in Aitkin, M. (1978) The analysis of unbalanced cross
classifications (with discussion).
\emph{Journal of the Royal Statistical Society series A}
\bold{141}, 195--223.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/rational.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{rational}
\alias{rational}
\alias{.rat}
\title{
Rational Approximation
}
\description{
Find rational approximations to the components of a real numeric
object using a standard continued fraction method.
}
\usage{
rational(x, cycles = 10, max.denominator = 2000, \dots)
}
\synopsis{
rational(x, ...)
}
\arguments{
\item{x}{
Any object of mode numeric. Missing values are now allowed.
}
\item{cycles}{
The maximum number of steps to be used in the continued fraction
approximation process.
}
\item{max.denominator}{
An early termination criterion.  If any partial denominator
exceeds \code{max.denominator} the continued fraction stops at that point.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
A numeric object with the same attributes as \code{x} but with entries
rational approximations to the values.  This effectively rounds
relative to the size of the object and replaces very small
entries by zero.
}
\details{
Each component is first expanded in a continued fraction of the
form


\code{x = floor(x) + 1/(p1 + 1/(p2 + \dots)))}


where \code{p1}, \code{p2}, \dots are positive integers, terminating either
at \code{cycles} terms or when a \code{pj > max.denominator}.  The
continued fraction is then re-arranged to retrieve the numerator
and denominator as integers and the ratio returned as the value.
}
\seealso{
\code{\link{fractions}}
}
\examples{
X <- matrix(runif(25), 5, 5)
solve(X, X/5)
##             [,1]        [,2]       [,3]        [,4]        [,5]
## [1,]  2.0000e-01  3.7199e-17 1.2214e-16  5.7887e-17 -8.7841e-17
## [2,] -1.1473e-16  2.0000e-01 7.0955e-17  2.0300e-17 -1.0566e-16
## [3,]  2.7975e-16  1.3653e-17 2.0000e-01 -1.3397e-16  1.5577e-16
## [4,] -2.9196e-16  2.0412e-17 1.5618e-16  2.0000e-01 -2.1921e-16
## [5,] -3.6476e-17 -3.6430e-17 3.6432e-17  4.7690e-17  2.0000e-01

## rational(solve(X, X/5))
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  0.2  0.0  0.0  0.0  0.0
## [2,]  0.0  0.2  0.0  0.0  0.0
## [3,]  0.0  0.0  0.2  0.0  0.0
## [4,]  0.0  0.0  0.0  0.2  0.0
## [5,]  0.0  0.0  0.0  0.0  0.2
}
\keyword{math}

\eof
% file MASS/renumerate.d
% copyright (C) 2000 W. N. Venables and B. D. Ripley
%
\name{renumerate}
\alias{renumerate}
\alias{renumerate.formula}
\title{
Convert a Formula Transformed by 'denumerate'
}
\description{
\code{\link{denumerate}} converts a formula written using the conventions of
\code{\link{loglm}} into one that \code{\link{terms}} is able to process.  \code{renumerate}
converts it back again to a form like the original.
}
\usage{
renumerate(x)
}
\arguments{
\item{x}{
A formula, normally as modified by \code{\link{denumerate}}.
}}
\value{
A formula where all variables with names of the form \code{.vn}, where
\code{n} is an integer, converted to numbers, \code{n}, as allowed by the
formula conventions of \code{\link{loglm}}.
}
\details{
This is an inverse function to \code{\link{denumerate}}.  It is only needed
since \code{\link{terms}} returns an expanded form of the original formula
where the non-marginal terms are exposed.  This expanded form is
mapped back into a form corresponding to the one that the user
originally supplied.
}
\seealso{
\code{\link{denumerate}}
}
\examples{
denumerate(~(1+2+3)^3 + a/b)
## ~ (.v1 + .v2 + .v3)^3 + a/b
renumerate(.Last.value)
## ~ (1 + 2 + 3)^3 + a/b
}
\keyword{models}

\eof
% file MASS/rlm.d
% copyright (C) 1998,9, 2004 B. D. Ripley
%
\name{rlm}
\alias{rlm}
\alias{rlm.default}
\alias{rlm.formula}
\alias{print.rlm}
\alias{predict.rlm}
\alias{psi.bisquare}
\alias{psi.hampel}
\alias{psi.huber}
\title{
Robust Fitting of Linear Models
}
\description{
Fit a linear model by robust regression using an M estimator.
}
\usage{
rlm(x, \dots)

\method{rlm}{formula}(formula, data, weights, \dots, subset, na.action,
    method = c("M", "MM", "model.frame"),
    wt.method = c("case", "inv.var"),
    model = TRUE, x.ret = TRUE, y.ret = FALSE, contrasts = NULL)

\method{rlm}{default}(x, y, weights, \dots, w = rep(1, nrow(x)),
    init, psi = psi.huber, scale.est, k2 = 1.345,
    method = c("M", "MM"), wt.method = c("inv.var", "case"),
    maxit = 20, acc = 1e-4, test.vec = "resid")
}
\arguments{
\item{formula}{
a formula of the form \code{y ~ x1 + x2 + \dots}.
}
\item{data}{
data frame from which variables specified in \code{formula} are
preferentially to be taken.
}
\item{weights}{
prior weights for each case.
}
\item{subset}{
An index vector specifying the cases to be used in
fitting.
}
\item{na.action}{
A function to specify the action to be taken if \code{NA}s are found. The
default action is for the procedure to fail. An alternative is
\code{na.omit}, which leads to omission of cases with missing values on any
required variable.
}
\item{x}{
a matrix or data frame containing the explanatory variables.
}
\item{y}{
the response: a vector of length the number of rows of \code{x}.
}
\item{method}{
currently either M-estimation or find the model frame. MM estimation
is M-estimation with Tukey's biweight initialized by a specific
S-estimator. See the details section.
}
\item{wt.method}{
are the weights case weights (giving the relative importance of case,
so a weight of 2 means there are two of these) or the inverse of the
variances, so a weight of two means this error is twice as variable?
}
\item{model}{
should the model frame be returned in the object?
}
\item{x.ret}{
should the model matrix be returned in the object?
}
\item{y.ret}{
should the response be returned in the object?
}
\item{contrasts}{
optional contrast specifications: se \code{lm}.
}
\item{w}{
(optional) initial down-weighting for each case.
}
\item{init}{
(optional) initial values for the coefficients OR a method to find
initial values OR the result of a fit with a \code{coef} component. Known
methods are \code{"ls"} (the default) for an initial least-squares fit
using weights \code{w*weights}, and \code{"lqs"} for an unweighted least-trimmed
squares fit with 200 samples.
}
\item{psi}{
the psi function is specified by this argument. It must give
(possibly by name) a function \code{g(x, \dots, deriv)} that for \code{deriv=0}
returns psi(x)/x and for \code{deriv=1} returns psi'(x). Tuning constants
will be passed in via \code{\dots}.
}
\item{scale.est}{
method of scale estimation: re-scaled MAD of the residuals or Huber's
proposal 2.
}
\item{k2}{
tuning constant used for Huber proposal 2 scale estimation.
}
\item{maxit}{
the limit on the number of IWLS iterations.
}
\item{acc}{
the accuracy for the stopping criterion.
}
\item{test.vec}{
the stopping criterion is based on changes in this vector.
}
\item{\dots}{
additional arguments to be passed to \code{rlm.default} or to the \code{psi}
function.
}}
\value{
An object of class \code{"rlm"} inheriting from \code{"lm"}.
The additional components not in an \code{lm} object are

\item{s}{
the robust scale estimate used
}
\item{w}{
the weights used in the IWLS process
}
\item{psi}{
the psi function with parameters substituted
}
\item{conv}{
the convergence criteria at each iteration
}
\item{converged}{
did the IWLS converge?
}}
\details{
  Fitting is done by iterated re-weighted least squares (IWLS).


  Psi functions are supplied for the Huber, Hampel and Tukey bisquare
  proposals as \code{psi.huber}, \code{psi.hampel} and
  \code{psi.bisquare}. Huber's corresponds to a convex optimization
  problem and gives a unique solution (up to collinearity). The other
  two will have multiple local minima, and a good starting point is
  desirable.


  Selecting \code{method = "MM"} selects a specific set of options which
  ensures that the estimator has a high breakdown point. The initial set
  of coefficients and the final scale are selected by an S-estimator
  with \code{k0 = 1.548}; this gives (for \eqn{n \gg p}{n >> p}) breakdown point 0.5.
  The final estimator is an M-estimator with Tukey's biweight and fixed
  scale that will inherit this breakdown point provided \code{c > k0};
  this is true for the default value of \code{c} that corresponds to
  95\% relative efficiency at the normal.
}
\references{
P. J. Huber (1981)
\emph{Robust Statistics}.
Wiley.

F. R. Hampel, E. M. Ronchetti, P. J. Rousseeuw and W. A. Stahel (1986)
\emph{Robust Statistics: The Approach based on Influence Functions}.
Wiley.

A. Marazzi (1993)
\emph{Algorithms, Routines and S Functions for Robust Statistics}.
Wadsworth & Brooks/Cole.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{lm}}, \code{\link{lqs}}.
}
\examples{
data(stackloss)
summary(rlm(stack.loss ~ ., stackloss))
rlm(stack.loss ~ ., stackloss, psi = psi.hampel, init = "lts")
rlm(stack.loss ~ ., stackloss, psi = psi.bisquare)
}
\keyword{models}
\keyword{robust}

\eof
% file MASS/rms.curv.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{rms.curv}
\alias{rms.curv}
\alias{print.rms.curv}
\title{
Relative Curvature Measures for Non-Linear Regression
}
\description{
Calculates the root mean square parameter effects and intrinsic relative
curvatures, \eqn{c^\theta} and \eqn{c^\iota}, for a fitted nonlinear regression, as
defined in Bates & Watts, section 7.3, p. 253 et seq.
}
\usage{
rms.curv(obj)
}
\arguments{
\item{obj}{
Fitted model object of class \code{"nls"}.  The model must be fitted using the
default algorithm.
}
}
\value{
A list of class \code{rms.curv} with components \code{pc} and \code{ic}
for parameter effects and intrinsic relative curvatures multiplied by
sqrt(F), \code{ct} and \code{ci} for \eqn{c^\theta} and \eqn{c^\iota} (unmultiplied),
and \code{C} the C-array as used in section 7.3.1 of Bates & Watts.
}
\details{
The method of section 7.3.1 of Bates & Watts is implemented.  The
function \code{deriv3} should be used generate a model function with first
derivative (gradient) matrix and second derivative (Hessian) array
attributes.  This function should then be used to fit the nonlinear
regression model.

A print method, \code{print.rms.curv}, prints the \code{pc} and
\code{ic} components only, suitably annotated.

If either \code{pc} or \code{ic} exceeds some threshold (0.3 has been
suggested) the curvature is unacceptably high for the planar assumption.
}
\references{
Bates, D. M, and Watts, D. G. (1988)
\emph{Nonlinear Regression Analysis and its Applications.}
Wiley, New York.
}
\seealso{
\code{\link{deriv3}}
}
\examples{
# The treated sample from the Puromycin data
data(Puromycin)
mmcurve <- deriv3(~ Vm * conc/(K + conc), c("Vm", "K"),
                  function(Vm, K, conc) NULL)
Treated <- Puromycin[Puromycin$state == "treated", ]
(Purfit1 <- nls(rate ~ mmcurve(Vm, K, conc), data = Treated,
                start = list(Vm=200, K=0.1)))
rms.curv(Purfit1)
##Parameter effects: c^theta x sqrt(F) = 0.2121
##        Intrinsic: c^iota  x sqrt(F) = 0.092
}
\keyword{nonlinear}

\eof
% file MASS/rnegbin.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{rnegbin}
\alias{rnegbin}
\title{
Simulate Negative Binomial Variates
}
\description{
Function to generate random outcomes from a Negative Binomial distribution,
with mean \code{mu} and variance \code{mu + mu^2/theta}.
}
\usage{
rnegbin(n, mu = n, theta = stop("theta must be given"))
}
\arguments{
\item{n}{
If a scalar, the number of sample values required.  If a vector,
\code{length(n)} is the number required and \code{n} is used as the mean vector if
\code{mu} is not specified.
}
\item{mu}{
The vector of means.  Short vectors are recycled.
}
\item{theta}{
Vector of values of the \code{theta} parameter.  Short vectors are recycled.
}}
\value{
Vector of random Negative Binomial variate values.
}
\section{Side Effects}{
Changes \code{.Random.seed} in the usual way.
}
\details{
The function uses the representation of the Negative Binomial distribution
as a continuous mixture of Poisson distributions with Gamma distributed means.
Unlike \code{rnbinom} the index can be arbitrary.
}
\examples{
# Negative Binomials with means fitted(fm) and theta = 4.5
fm <- glm.nb(Days ~ ., data = quine)
dummy <- rnegbin(fitted(fm), theta = 4.5)
}
\keyword{distribution}

\eof
% file MASS/road.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{road}
\alias{road}
\title{
Road Accident Deaths in US States
}
\description{
A data frame with the annual deaths in road accidents for half
the US states. Components are:
}
\usage{
data(road)
}
\format{
\describe{
\item{\code{state}}{
name
}
\item{\code{deaths}}{
number of deaths
}
\item{\code{drivers}}{
number of drivers (in 10,000's)
}
\item{\code{popden}}{
population density in people per square mile
}
\item{\code{rural}}{
length of rural roads, in 1000's of miles
}
\item{\code{temp}}{
average daily maximum temperature in January
}
\item{\code{fuel}}{
fuel consumption in 10,000,000 US gallons per year
}}}
\source{
Imperial College, London M.Sc. exercise
}
\keyword{datasets}

\eof
% file MASS/rock.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{rock-MASS}
\alias{rock}
\title{
  Measurements on Petroleum Rock Samples
}
\description{
Twelve core samples from petroleum reservoirs were sampled by 4
cross-sections.  Each core sample was measured for permeability, and
each cross-section has total area of pores, total perimeter of pores,
and shape.
}
\usage{
data(rock)
}
\format{
    A data frame with 48 rows and 4 numeric columns.
    \tabular{ll}{
      \code{area}  \tab area of pores space, in pixels
      out of 256 by 256 \cr
     	\code{peri}  \tab perimeter in pixels \cr
      \code{shape} \tab perimeter/sqrt(area) \cr
      \code{perm}  \tab permeability in milli-Darcies
    }
}
\source{
  Data from BP Research, image analysis by Ronit Katz, U. Oxford.

  Katz, R. (1995) \emph{Spatial Analysis of Pore Images}. D.Phil. thesis,
  Department of Statistics, University of Oxford.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/rotifer.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{rotifer}
\alias{rotifer}
\title{
Numbers of Rotifers by Fluid Density
}
\description{
The data give the numbers of rotifers falling out of suspension for
different fluid densities. There are two species, \code{pm}
\emph{Polyartha major}
and \code{kc},
\emph{Keratella cochlearis}
and for each species the number falling out and the total number are given.
}
\usage{
data(rotifer)
}
\format{
\describe{
\item{\code{density}}{
specific density of fluid
}
\item{\code{pm.y}}{
number falling out for
\emph{P. major}
}
\item{\code{pm.total}}{
total number of
\emph{P. major}
}
\item{\code{kc.y}}{
number falling out for
\emph{K. cochlearis}
}
\item{\code{kc.tot}}{
total number of
\emph{K. cochlearis}
}}}
\source{
D. Collett (1991)
\emph{Modelling Binary Data.}
Chapman & Hall.
p.217
}
\keyword{datasets}

\eof
% file MASS/sammon.d
% copyright (C) 1994-2003 W. N. Venables and B. D. Ripley
%
\name{sammon}
\alias{sammon}
\title{
Sammon's Non-Linear Mapping
}
\description{
One form of non-metric multidimensional scaling.
}
\usage{
sammon(d, y = cmdscale(d, k), k = 2, niter = 100, trace = TRUE,
       magic = 0.2, tol = 1e-4)
}
\arguments{
\item{d}{
distance structure of the form returned by \code{dist}, or a full, symmetric
matrix.  Data are assumed to be dissimilarities or relative distances,
but must be positive except for self-distance.  This can contain missing
values.
}
\item{y}{
  An initial configuration. If none is supplied, \code{cmdscale}
  is used to provide the classical solution.  (If there are missing
  values in \code{d}, an initial configuration must be provided.)
}
\item{k}{
The dimension of the configuration.
}
\item{niter}{
The maximum number of iterations.
}
\item{trace}{
Logical for tracing optimization. Default \code{TRUE}.
}
\item{magic}{
initial value of the step size constant in diagonal Newton method.
}
\item{tol}{
Tolerance for stopping, in units of stress.
}}
\value{
Two components:

\item{points}{
A two-column vector of the fitted configuration.
}
\item{stress}{
The final stress achieved.
}}
\section{Side Effects}{
If trace is true, the initial stress and the current stress are printed
out every 10 iterations.
}
\details{
This chooses a two-dimensional configuration to minimize the stress,
the sum of squared differences between the input distances and those
of the configuration, weighted by the distances, the whole sum being
divided by the sum of input distances to make the stress scale-free.

An iterative algorithm is used, which will usually converge in around
50 iterations.  As this is necessarily an \eqn{O(n^2)} calculation, it is slow
for large datasets.  Further, since the configuration is only determined
up to rotations and reflections (by convention the centroid is at the
origin), the result can vary considerably from machine to machine.
In this release the algorithm has been modified by adding a step-length
search (\code{magic}) to ensure that it always goes downhill.
}
\references{
  Sammon, J. W. (1969)
  A non-linear mapping for data structure analysis.
  \emph{IEEE Trans. Comput.}, \bold{C-18} 401--409.

  Ripley, B. D. (1996)
  \emph{Pattern Recognition and Neural Networks}. Cambridge University Press.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{cmdscale}}, \code{\link{isoMDS}}
}
\examples{
data(swiss)
swiss.x <- as.matrix(swiss[, -1])
swiss.sam <- sammon(dist(swiss.x))
plot(swiss.sam$points, type = "n")
text(swiss.sam$points, labels = as.character(1:nrow(swiss.x)))
}
\keyword{multivariate}

\eof
% file MASS/ships.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{ships}
\alias{ships}
\title{
Ships Damage Data
}
\description{
Data frame giving the number of damage incidents and aggregate
months of service by ship type,
year of construction, and period of operation.
}
\usage{
data(ships)
}
\format{
\describe{
\item{\code{type}}{
type: \code{"A"} to \code{"E"}
}
\item{\code{year}}{
year of construction: 1960-64, 65-69, 70-74, 75-79
(coded as \code{"60"}, \code{"65"}, \code{"70"}, \code{"75"})
}
\item{\code{period}}{
period of operation : 1960-74, 75-79
}
\item{\code{service}}{
aggregate months of service
}
\item{\code{incidents}}{
number of damage incidents
}}}
\source{
P. McCullagh and J. A. Nelder, (1983),
\emph{Generalized Linear Models.}
Chapman & Hall, section 6.3.2, page 137
}
\keyword{datasets}

\eof
% file MASS/shoes.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{shoes}
\alias{shoes}
\title{
Shoe wear data of Box, Hunter and Hunter
}
\description{
A list of two vectors, giving the wear of shoes of materials A and B
for one foot each of ten boys.
}
\usage{
data(shoes)
}
\source{
G. E. P. Box, W. G. Hunter and J. S. Hunter (1978)
\emph{Statistics for Experimenters.}
Wiley, p. 100
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/shrimp.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{shrimp}
\alias{shrimp}
\title{
Percentage of Shrimp in Shrimp Cocktail
}
\description{
A numeric vector with 18 determinations by different laboratories
of the amount (percentage of the declared total weight) of shrimp
in shrimp cocktail.
}
\usage{
data(shrimp)
}
\source{
F. J. King and J. J. Ryan (1976)
Collaborative study of the determination of the amount of shrimp in
shrimp cocktail. \emph{J. Off. Anal. Chem.} \bold{59}, 644--649

R. G. Staudte and S. J. Sheather (1990)
\emph{Robust Estimation and Testing.}
Wiley
}
\keyword{datasets}

\eof
% file MASS/shuttle.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{shuttle}
\alias{shuttle}
\title{
Space Shuttle Autolander Problem
}
\description{
The \code{shuttle} data frame has 256 rows and 7 columns.
The first six columns are categorical variables giving example
conditions; the seventh is the decision. The first 253 rows are the
training set, the last 3 the test conditions.
}
\usage{
data(shuttle)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{stability}}{
Stable positioning or not (\code{stab / xstab})
}
\item{\code{error}}{
Size of error (\code{MM / SS / LX / XL})
}
\item{\code{sign}}{
Sign of error, positive or negative (\code{pp / nn})
}
\item{\code{wind}}{
Wind sign (\code{head / tail})
}
\item{\code{magn}}{
Wind strength (\code{Light / Medium / Strong / Out of Range})
}
\item{\code{vis}}{
Visibility (\code{yes / no})
}
\item{\code{use}}{
Use the autolander or not
}}}
\source{
D. Michie (1989)
Problems of computer-aided concept formation. In
\emph{Applications of Expert Systems 2},
ed. J. R. Quinlan, Turing Institute Press / Addison-Wesley, pp. 310--333.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/snails.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{snails}
\alias{snails}
\title{
Snail Mortality Data
}
\description{
Groups of 20 snails were held for periods of 1, 2, 3 or 4 weeks
in carefully controlled conditions of temperature and relative
humidity.  There were two species of snail, A and B, and the
experiment was designed as a 4 by 3 by 4 by 2 completely randomized
design.  At the end of the exposure time the snails were tested to see if
they had survived; the process itself is fatal for the animals.  The
object of the exercise was to model the probability of survival in terms of
the stimulus variables, and in particular to test for differences between
species.

The data are unusual in that in most cases fatalities during the experiment
were fairly small.
}
\usage{
data(snails)
}
\format{
The data frame contains the following components:
\describe{
\item{\code{Species}}{
Snail species A (1) or B (2)
}
\item{\code{Exposure}}{
Exposure in weeks
}
\item{\code{Rel.Hum}}{
Relative humidity (4 levels)
}
\item{\code{Temp}}{
Temperature, in degrees Celsius (3 levels)
}
\item{\code{Deaths}}{
Number of deaths
}
\item{\code{N}}{
Number of snails exposed
}}}
\source{
Zoology Department, The University of Adelaide.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/stdres.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{stdres}
\alias{stdres}
\alias{lmwork}
\title{
Extract Standardized Residuals from a Linear Model
}
\description{
The standardized residuals.  These are normalized to unit
variance, fitted including the current data point.
}
\usage{
stdres(object)
}
\arguments{
\item{object}{
any object representing a linear model.
}}
\value{
The vector of appropriately transformed residuals.
}
\seealso{
\code{\link{residuals}}, \code{\link{studres}}
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{models}

\eof
% file MASS/steam.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{steam}
\alias{steam}
\title{
The Saturated Steam Pressure Data
}
\description{
Temperature and pressure in a saturated steam driven experimental
device.
}
\usage{
data(steam)
}
\format{
The data frame contains the following components:
\describe{
\item{\code{Temp}}{
Temperature, in degrees Celsius
}
\item{\code{Press}}{
Pressure, in Pascals
}}}
\source{
N.R. Draper  and H. Smith (1981)
\emph{Applied Regression Analysis.}
Wiley, pp. 518--9.
}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/stepAIC.d
% copyright (C) 1994-2002 W. N. Venables and B. D. Ripley
%
\name{stepAIC}
\alias{stepAIC}
\alias{extractAIC.gls}
\alias{terms.gls}
\alias{extractAIC.lme}
\alias{terms.lme}
\title{
  Choose a model by AIC in a Stepwise Algorithm
}
\description{
  Performs stepwise model selection by exact AIC.
}
\usage{
stepAIC(object, scope, scale = 0,
        direction = c("both", "backward", "forward"),
        trace = 1, keep = NULL, steps = 1000, use.start = FALSE, k = 2, \dots)
}
\arguments{
  \item{object}{
    an object representing a model of an appropriate class.
    This is used as the initial model in the stepwise search.
  }
  \item{scope}{
    defines the range of models examined in the stepwise search.
    This should be either a single formula, or a list containing
    components \code{upper} and \code{lower}, both formulae.  See the
    details for how to specify the formulae and how they are used.
  }
  \item{scale}{
    used in the definition of the AIC statistic for selecting the models,
    currently only for \code{\link{lm}}, \code{\link{aov}} and
    \code{\link{glm}} models.
  }
  \item{direction}{
    the mode of stepwise search, can be one of \code{"both"},
    \code{"backward"}, or \code{"forward"}, with a default of \code{"both"}.
    If the \code{scope} argument is missing the default for
    \code{direction} is \code{"backward"}.
  }
  \item{trace}{
    if positive, information is printed during the running of
    \code{stepAIC}.
    Larger values may give more information on the fitting process.
  }
  \item{keep}{
    a filter function whose input is a fitted model object and the
    associated \code{AIC} statistic, and whose output is arbitrary.
    Typically \code{keep} will select a subset of the components of
    the object and return them. The default is not to keep anything.
  }
  \item{steps}{
    the maximum number of steps to be considered.  The default is 1000
    (essentially as many as required).  It is typically used to stop the
    process early.
  }
  \item{use.start}{
    if true the updated fits are done starting at the linear predictor for
    the currently selected model. This may speed up the iterative
    calculations for \code{glm} (and other fits), but it can also slow them
    down. \bold{Not used} in \R.
  }
  \item{k}{
    the multiple of the number of degrees of freedom used for the penalty.
    Only \code{k = 2} gives the genuine AIC: \code{k = log(n)} is
    sometimes referred to as BIC or SBC.
  }
  \item{\dots}{
    any additional arguments to \code{extractAIC}. (None are currently used.)
}}
\value{
  the stepwise-selected model is returned, with up to two additional
  components.  There is an \code{"anova"} component corresponding to the
  steps taken in the search, as well as a \code{"keep"} component if the
  \code{keep=} argument was supplied in the call. The
  \code{"Resid. Dev"} column of the analysis of deviance table refers
  to a constant minus twice the maximized log likelihood: it will be a
  deviance only in cases where a saturated model is well-defined
  (thus excluding \code{lm}, \code{aov} and \code{survreg} fits,
  for example).
}
\details{
  The set of models searched is determined by the \code{scope} argument.
  The right-hand-side of its \code{lower} component is always included
  in the model, and right-hand-side of the model is included in the
  \code{upper} component.  If \code{scope} is a single formula, it
  specifes the \code{upper} component, and the \code{lower} model is
  empty.  If \code{scope} is missing, the initial model is used as the
  \code{upper} model.

  Models specified by \code{scope} can be templates to update
  \code{object} as used by \code{\link{update.formula}}.
  
  There is a potential problem in using \code{\link{glm}} fits with a
  variable \code{scale}, as in that case the deviance is not simply
  related to the maximized log-likelihood. The \code{glm} method for
  \code{\link{extractAIC}} makes the
  appropriate adjustment for a \code{gaussian} family, but may need to be
  amended for other cases. (The \code{binomial} and \code{poisson}
  families have fixed \code{scale} by default and do not correspond
  to a particular maximum-likelihood problem for variable \code{scale}.)

  Where a conventional deviance exists (e.g. for \code{lm}, \code{aov}
  and \code{glm} fits) this is quoted in the analysis of variance table:
  it is the \emph{unscaled} deviance.
}
\note{
  The model fitting must apply the models to the same dataset. This may
  be a problem if there are missing values and an \code{na.action} other than
  \code{na.fail} is used (as is the default in \R).
  We suggest you remove the missing values first.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{addterm}}, \code{\link{dropterm}}, \code{\link{step}}
}
\examples{
quine.hi <- aov(log(Days + 2.5) ~ .^4, quine)
quine.nxt <- update(quine.hi, . ~ . - Eth:Sex:Age:Lrn)
quine.stp <- stepAIC(quine.nxt,
    scope = list(upper = ~Eth*Sex*Age*Lrn, lower = ~1),
    trace = FALSE)
quine.stp$anova

cpus1 <- cpus
attach(cpus)
for(v in names(cpus)[2:7])
  cpus1[[v]] <- cut(cpus[[v]], unique(quantile(cpus[[v]])),
                    include.lowest = TRUE)
detach()
cpus0 <- cpus1[, 2:8]  # excludes names, authors' predictions
cpus.samp <- sample(1:209, 100)
cpus.lm <- lm(log10(perf) ~ ., data = cpus1[cpus.samp,2:8])
cpus.lm2 <- stepAIC(cpus.lm, trace = FALSE)
cpus.lm2$anova

example(birthwt)
birthwt.glm <- glm(low ~ ., family = binomial, data = bwt)
birthwt.step <- stepAIC(birthwt.glm, trace = FALSE)
birthwt.step$anova
birthwt.step2 <- stepAIC(birthwt.glm, ~ .^2 + I(scale(age)^2)
    + I(scale(lwt)^2), trace = FALSE)
birthwt.step2$anova

quine.nb <- glm.nb(Days ~ .^4, data = quine)
quine.nb2 <- stepAIC(quine.nb)
quine.nb2$anova
}
\keyword{models}

\eof
% file MASS/stormer.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{stormer}
\alias{stormer}
\title{
The Stormer Viscometer Data
}
\description{
The stormer viscometer measures the viscosity of a fluid by measuring the
time taken for an inner cylinder in the mechanism to perform a fixed number
of revolutions in response to an actuating weight.  The viscometer is
calibrated by measuring the time taken with varying weights while the
mechanism is suspended in fluids of accurately known viscosity.  The data
comes from such a calibration, and theoretical considerations suggest a
nonlinear relationship between time, weight and viscosity, of the form
\code{Time = (B1*Viscosity)/(Weight - B2) + E}
where \code{B1} and \code{B2}
are unknown parameters to be estimated, and \code{E} is error.
}
\usage{
data(stormer)
}
\format{
The data frame contains the following components:
\describe{
\item{\code{Viscosity}}{
Viscosity of fluid
}
\item{\code{Wt}}{
Actuating weight
}
\item{\code{Time}}{
Time taken
}}}
\source{
E. J. Williams (1959)
\emph{Regression Analysis.}
Wiley.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/studres.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{studres}
\alias{studres}
\title{
Extract Studentized Residuals from a Linear Model
}
\description{
The Studentized residuals.  Like standardized residuals, these are
normalized to unit variance, but the Studentized version is fitted
ignoring the current data point. (They are sometimes called jackknifed
residuals).
}
\usage{
studres(object)
}
\arguments{
\item{object}{
any object representing a linear model.
}}
\value{
The vector of appropriately transformed residuals.
}
\seealso{
\code{\link{residuals}}, \code{\link{stdres}}
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{models}

\eof
% file MASS/summary.loglm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{summary.loglm}
\alias{summary.loglm}
\alias{print.summary.loglm}
\title{
Summary Method Function for Objects of Class 'loglm'
}
\description{
Returns a summary list for log-linear models fitted by
iterative proportional scaling using \code{loglm}.
}
\usage{
\method{summary}{loglm}(object, fitted = FALSE, \dots)
}
\arguments{
\item{object}{
a fitted loglm model object.
}
\item{fitted}{
if \code{TRUE} return observed and expected frequencies in the result.
Using \code{fitted = TRUE} may necessitate re-fitting the object.
}
\item{\dots}{
arguments to be passed to or from other methods.
}}
\value{
a list is returned for use by \code{print.summary.loglm}.
This has components

\item{formula}{
the formula used to produce \code{object}
}
\item{tests}{
the table of test statistics (likelihood ratio, Pearson) for the fit.
}
\item{oe}{
if \code{fitted = TRUE}, an array of the observed and expected frequencies,
otherwise \code{NULL}.
}}
\details{
This function is a method for the generic function
\code{summary()} for class \code{"loglm"}.
It can be invoked by calling \code{summary(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{summary.loglm(x)} regardless of the
class of the object.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{loglm}}, \code{\link{summary}}
}
\keyword{models}

\eof
% file MASS/summary.negbin.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{summary.negbin}
\alias{summary.negbin}
\alias{print.summary.negbin}
\title{
Summary Method Function for Objects of Class 'negbin'
}
\description{
Identical to \code{summary.glm}, but with three lines of additional output: the
ML estimate of theta, its standard error, and twice the log-likelihood
function.
}
\usage{
\method{summary}{negbin}(object, dispersion = 1, correlation = TRUE, \dots)
}
\arguments{
\item{object}{
Fitted model object of class \code{negbin} inheriting from \code{glm} and \code{lm}.
Typically the output of \code{glm.nb}.
}
\item{dispersion}{
as for \code{summary.glm}, with a default of 1.
}
\item{correlation}{
as for \code{summary.glm}.
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
As for \code{summary.glm}; the additional lines of output are not included in
the resultant object.
}
\section{Side Effects}{
A summary table is produced as for \code{summary.glm}, with the additional
information described above.
}
\details{
\code{summary.glm} is used to produce the majority of the output and supply the
result.
This function is a method for the generic function
\code{summary()} for class \code{"negbin"}.
It can be invoked by calling \code{summary(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{summary.negbin(x)} regardless of the
class of the object.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{summary}}, \code{\link{glm.nb}}, \code{\link{negative.binomial}}, \code{\link{anova.negbin}}
}
\keyword{models}
\examples{
summary(glm.nb(Days ~ Eth*Age*Lrn*Sex, quine, link = log))
}

\eof
% file MASS/summary.rlm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{summary.rlm}
\alias{summary.rlm}
\alias{print.summary.rlm}
\title{
Summary Method for Robust Linear Models
}
\description{
\code{summary} method for objects of class \code{"rlm"}
}
\usage{
\method{summary}{rlm}(object, method = c("XtX", "XtWX"), correlation = TRUE, \dots)
}
\arguments{
\item{object}{
the fitted model.
This is assumed to be the result of some fit that produces
an object inheriting from the class \code{rlm}, in the sense that
the components returned by the \code{rlm} function will be available.
}
\item{method}{
Should the weighted (by the IWLS weights) or unweighted cross-products
matrix be used?
}
\item{correlation}{
logical. Should correlations be computed (and printed)?
}
\item{\dots}{
arguments passed to or from other methods.
}}
\value{
If printing takes place, only a null value is returned.
Otherwise, a list is returned with the following components.
Printing always takes place if this function is invoked automatically
as a method for the \code{summary} function.

\item{correlation}{
The computed correlation coefficient matrix for the coefficients in the model.
}
\item{cov.unscaled}{
The unscaled covariance matrix; i.e, a matrix such that multiplying it by
an estimate of the error variance produces an estimated covariance matrix
for the coefficients.
}
\item{sigma}{
The scale estimate.
}
\item{df}{
The number of degrees of freedom for the model and for residuals.
}
\item{coefficients}{
A matrix with three columns, containing the coefficients, their standard errors
and the corresponding t statistic.
}
\item{terms}{
The terms object used in fitting this model.
}}
\details{
This function is a method for the generic function
\code{summary()} for class \code{"rlm"}.
It can be invoked by calling \code{summary(x)} for an
object \code{x} of the appropriate class, or directly by
calling \code{summary.rlm(x)} regardless of the
class of the object.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{summary}}
}
\examples{
summary(rlm(calls ~ year, data = phones, maxit = 50))
\dontrun{
Call:
rlm(formula = calls ~ year, data = phones, maxit = 50)

Residuals:
   Min     1Q Median     3Q    Max
-18.31  -5.95  -1.68  26.46 173.77

Coefficients:
            Value    Std. Error t value
(Intercept) -102.622   26.553   -3.86
year           2.041    0.429    4.76

Residual standard error: 9.03 on 22 degrees of freedom

Correlation of Coefficients:
[1] -0.994

}}
\keyword{robust}

\eof
% file MASS/survey.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{survey}
\alias{survey}
\title{
Student Survey Data
}
\description{
This data frame contains the responses of 237 Statistics I students at
the University of Adelaide to a number of questions.
}
\usage{
data(survey)
}
\format{
The components of the data frame are:
\describe{
\item{\code{Sex}}{
The sex of the student. (Factor with levels \code{"Male"} and \code{"Female"}.)
}
\item{\code{Wr.Hnd}}{
The span (distance from tip of thumb to tip of little finger of spread
hand) of writing hand, in centimetres.
}
\item{\code{NW.Hnd}}{
Span of non-writing hand
}
\item{\code{W.Hnd}}{
Writing hand of student. (Factor, with levels \code{"Left"} and \code{"Right"}.)
}
\item{\code{Fold}}{
  \dQuote{Fold your arms! Which is on top} (Factor, with levels
  \code{"R on L"}, \code{"L on R"}, \code{"Neither"}.)
}
\item{\code{Pulse}}{
Pulse rate of student (beats per minute.)
}
\item{\code{Clap}}{
  \sQuote{Clap your hands! Which hand is on top?} (Factor, with levels
  \code{"Right"}, \code{"Left"}, \code{"Neither"}.)
}
\item{\code{Exer}}{
How often the student exercises. (Factor, with levels \code{"Freq"}
(frequently), \code{"Some"}, "\code{None"})
}
\item{\code{Smoke}}{
How much the student smokes. (Factor, levels \code{"Heavy"}, \code{"Regul"}
(regularly), \code{"Occas"} (occasionally), \code{"Never"}.)
}
\item{\code{Height}}{
The height of the student, in centimetres.
}
\item{\code{M.I}}{
Indicates whether the student expressed height in imperial
(feet/inches) or metric (centimetres/metres) units. (Factor, levels
\code{"Metric"}, \code{"Imperial"}.)
}
\item{\code{Age}}{
Age of the student, in years.
}}}
\references{
  Venables, W. N. and Ripley, B. D. (1999)
  \emph{Modern Applied Statistics with S-PLUS.} Third
  Edition. Springer.
}
\keyword{datasets}

\eof
% file MASS/synth.tr.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{synth.tr}
\alias{synth.tr}
\alias{synth.te}
\title{
Synthetic Classification Problem
}
\description{
The \code{synth.tr} data frame has 250 rows and 3 columns.
The \code{synth.te} data frame has 100 rows and 3 columns.
It is intended that \code{synth.tr} be used from training and
\code{synth.te} for testing.
}
\usage{
data(synth.tr)
data(synth.te)
}
\format{
These data frames contains the following columns:
\describe{
\item{\code{xs}}{
x-coordinate
}
\item{\code{ys}}{
y-coordinate
}
\item{\code{yc}}{
class, coded as 0 or 1.
}}}
\source{
Ripley, B.D. (1994)
Neural networks and related methods for
classification (with discussion).
\emph{Journal of the Royal Statistical Society series B}
\bold{56}, 409--456.


Ripley, B.D. (1996)
\emph{Pattern Recognition and Neural Networks.}
Cambridge: Cambridge University Press.
}
\keyword{datasets}

\eof
% file MASS/theta.md.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{theta.md}
\alias{theta.md}
\title{
Estimate theta of the Negative Binomial by Deviance
}
\description{
Given the estimated mean vector, estimate \code{theta} of the Negative Binomial
Distribution by equating the deviance to the residual degrees of freedom.
Analogue of a moment estimator.
}
\usage{
theta.md(y, u, dfr, limit = 20, eps = .Machine$double.eps^0.25)
}
\arguments{
\item{y}{
Vector of observed values from the Negative Binomial.
}
\item{u}{
Estimated mean vector.
}
\item{dfr}{
Residual degrees of freedom (assuming \code{theta} known).
}
\item{limit}{
Limit on the number of iterations.
}
\item{eps}{
Tolerance to determine convergence.
}}
\value{
The required estimate of \code{theta}, as a scalar.
}
\seealso{
\code{\link{glm.nb}}, \code{\link{theta.ml}}, \code{\link{theta.mm}}
}
\examples{
\dontrun{theta <- theta.md(y, fitted(fm), dfr = 123)}
}
\keyword{models}

\eof
% file MASS/theta.ml.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{theta.ml}
\alias{theta.ml}
\title{
Estimate theta of the Negative Binomial by Maximum Likelihood
}
\description{
Given the estimated mean vector, estimate \code{theta} of the Negative Binomial
Distribution.
}
\usage{
theta.ml(y, mu, n, limit = 10, eps = .Machine$double.eps^0.25, trace = FALSE)
}
\arguments{
\item{y}{
Vector of observed values from the Negative Binomial.
}
\item{mu}{
Estimated mean vector.
}
\item{n}{
Number of data points (defaults to length of \code{y})
}
\item{limit}{
Limit on the number of iterations.
}
\item{eps}{
Tolerance to determine convergence.
}
\item{trace}{
logical: should iteration progress be printed?
}}
\value{
The required estimate of \code{theta}, as a scalar.
}
\seealso{
\code{\link{glm.nb}}, \code{\link{theta.md}}, \code{\link{theta.mm}}
}
\examples{
\dontrun{theta <- theta.ml(y, fitted(fm), dfr = 123)}
}
\keyword{models}

\eof
% file MASS/theta.mm.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{theta.mm}
\alias{theta.mm}
\title{
Estimate theta of the Negative Binomial by Moments
}
\description{
Given the mean estimate, calculate the moment estimator of \code{theta} by
equating \eqn{sum((y-\mu)^2/(\mu+\mu^2/\theta))} to the residual
degrees of freedom.
}
\usage{
theta.mm(y, u, dfr, limit = 10, eps = .Machine$double.eps^0.25)
}
\arguments{
\item{y}{
Vector of observed values from the Negative Binomial.
}
\item{u}{
Estimated mean vector.
}
\item{dfr}{
Residual degrees of freedom (assuming \code{theta} known).
}
\item{limit}{
Limit on the number of iterations.
}
\item{eps}{
Tolerance to determine convergence.
}}
\value{
The required estimate of \code{theta}, as a scalar.
}
\seealso{
\code{\link{glm.nb}}, \code{\link{theta.ml}}, \code{\link{theta.md}}
}
\examples{
\dontrun{theta <- theta.mm(y, fitted(fm), dfr = 123)}
}
\keyword{models}

\eof
% file MASS/topo.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{topo}
\alias{topo}
\title{
Spatial Topographic Data
}
\description{
The \code{topo} data frame has 52 rows and 3 columns, of
topographic heights within a 310 feet square.
}
\usage{
data(topo)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{x}}{
x coordinates (units of 50 feet)
}
\item{\code{y}}{
y coordinates (units of 50 feet)
}
\item{\code{z}}{
heights (feet)
}}}
\source{
Davis, J.C. (1973)
\emph{Statistics and Data Analysis in Geology.}
Wiley.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\keyword{datasets}

\eof
% file MASS/truehist.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{truehist}
\alias{truehist}
\title{
Plot a Histogram
}
\description{
Creates a histogram on the current graphics device.
}
\usage{
truehist(data, nbins = "Scott", h, x0 = -h/1000,
         breaks, prob = TRUE, xlim = range(breaks),
         ymax = max(est), col,
         xlab = deparse(substitute(data)), bty = "n", \dots)
}
\arguments{
\item{data}{
numeric vector of data for histogram.  Missing values (\code{NA}s)
are allowed and omitted.
}
\item{nbins}{
The suggested number of bins.  Either a number, or a character string
naming a rule: \code{"Scott"} or \code{"Freedman-Diaconis"} or \code{"FD"}.  (Case is
ignored.)
}
\item{h}{
The bin width (takes precedence over \code{nbins}).
}
\item{x0}{
Shift for the bins - the breaks are at \code{x0 + h * (\dots, -1, 0, 1, \dots)}
}
\item{breaks}{
The set of breakpoints to be used. (Usually omitted, takes precedence
over \code{h} and \code{nbins}).
}
\item{prob}{
If true (the default) plot a true histogram.
The vertical axis has a
\emph{relative frequency density}
scale, so the product of the dimensions of any panel gives the
relative frequency.  Hence the total area under the histogram
is 1 and it is directly comparable with most other estimates
of the probability density function.
If false plot the counts in the bins.
}
\item{xlim}{
The limits for the x-axis.
}
\item{ymax}{
The upper limit for the y-axis.
}
\item{col}{
The colour number for the bar fill.
}
\item{xlab}{
label for the plot x-axis. By default, this will be the name of \code{data}.
}
\item{bty}{
The box type for the plot - defaults to none.
}
\item{\dots}{
  additional arguments to \code{\link{rect}} or \code{\link{plot}}.
}}
\section{Side Effects}{
A histogram is plotted on the current device.
}
\details{
  This plots a true histogram, a density estimate of total area 1.  If
  \code{breaks} is specified, those breakpoints are used. Otherwise if
  \code{h} is specified, a regular grid of bins is used with width
  \code{h}.  If neither \code{breaks} nor \code{h} is specified,
  \code{nbins} is used to select a suitable \code{h}.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{hist}}
}
\keyword{hplot}
\keyword{dplot}

\eof
% file MASS/ucv.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{ucv}
\alias{ucv}
\title{
Unbiased Cross-Validation for Bandwidth Selection
}
\description{
Uses unbiased cross-validation to select the bandwidth of a Gaussian
kernel density estimator.
}
\usage{
ucv(x, nb = 1000, lower, upper)
}
\arguments{
\item{x}{
a numeric vector
}
\item{nb}{
number of bins to use.
}
\item{lower, upper}{
Range over which to minimize.  The default is almost always satisfactory.
}}
\value{
a bandwidth.
}
\references{
Scott, D. W. (1992)
\emph{Multivariate Density Estimation: Theory, Practice, and  Visualization.}
Wiley.

  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
\code{\link{bcv}}, \code{\link{width.SJ}}, \code{\link{density}}
}
\examples{
data(geyser)
ucv(geyser$duration)
}
\keyword{dplot}

\eof
% file MASS/waders.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{waders}
\alias{waders}
\title{
Counts of Waders at 15 Sites in South Africa
}
\description{
The \code{waders} data frame has 15 rows and 19 columns.
The entries are counts of waders in summer.
}
\usage{
data(waders)
}
\format{
This data frame contains the following columns (species)
\describe{
\item{\code{S1}}{
Oystercatcher
}
\item{\code{S2}}{
White-fronted Plover
}
\item{\code{S3}}{
Kitt Lutz's Plover
}
\item{\code{S4}}{
Three-banded Plover
}
\item{\code{S5}}{
Grey Plover
}
\item{\code{S6}}{
Ringed Plover
}
\item{\code{S7}}{
Bar-tailed Godwit
}
\item{\code{S8}}{
Whimbrel
}
\item{\code{S9}}{
Marsh Sandpiper
}
\item{\code{S10}}{
Greenshank
}
\item{\code{S11}}{
Common Sandpiper
}
\item{\code{S12}}{
Turnstone
}
\item{\code{S13}}{
Knot
}
\item{\code{S14}}{
Sanderling
}
\item{\code{S15}}{
Little Stint
}
\item{\code{S16}}{
Curlew Sandpiper
}
\item{\code{S17}}{
Ruff
}
\item{\code{S18}}{
Avocet
}
\item{\code{S19}}{
Black-winged Stilt


The rows are the sites:
A = Namibia North coast
B = Namibia North wetland
C = Namibia South coast
D = Namibia South wetland
E = Cape North coast
F = Cape North wetland
G = Cape West coast
H = Cape West wetland
I = Cape South coast
J= Cape South wetland
K = Cape East coast
L = Cape East wetland
M = Transkei coast
N = Natal coast
O = Natal wetland
}}}
\source{
J.C. Gower and D.J. Hand (1996)
\emph{Biplots}
Chapman & Hall Table 9.1. Quoted as from:


R.W. Summers, L.G. Underhill, D.J. Pearson and D.A. Scott (1987)
Wader migration systems in south and eastern Africa and western Asia.
\emph{Wader Study Group Bulletin}
\bold{49} Supplement, 15--34.
}
\examples{
plot(corresp(waders, nf=2))
}
\keyword{datasets}

\eof
% file MASS/whiteside.d
% copyright (C) 1999 W. N. Venables and B. D. Ripley
%
\name{whiteside}
\alias{whiteside}
\title{
House Insulation: Whiteside's Data
}
\description{
Mr Derek Whiteside of the UK Building Research Station recorded the
weekly gas consumption and average external temperature at his own
house in south-east England for two heating seasons, one of 26 weeks
before, and one of 30 weeks after cavity-wall insulation was
installed. The object of the exercise was to assess the effect of the
insulation on gas consumption.
}
\usage{
data(whiteside)
}
\format{
The \code{whiteside} data frame has 56 rows and 3 columns.:
\describe{
\item{\code{Insul}}{
A factor, before or after insulation.
}
\item{\code{Temp}}{
Purportedly the average outside temperature in degrees Celsius. (These
values is far too low for any 56-week period in the 1960s in
South-East England. It might be the weekly average of daily minima.)
}
\item{\code{Gas}}{
The weekly gas consumption in 1000s of cubic feet.
}}}
\source{
A data set collected in the 1960s by Mr Derek Whiteside of the
UK~Building Research Station. Reported by

Hand, D. J., Daly, F., McConway, K., Lunn, D. and Ostrowski, E. eds (1993)
\emph{A Handbook of Small Data Sets.}
Chapman & Hall, p. 69.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
require(lattice)
xyplot(Gas ~ Temp | Insul, whiteside, panel =
  function(x, y, ...) {
    panel.xyplot(x, y, ...)
    panel.lmline(x, y, ...)
  }, xlab = "Average external temperature (deg. C)",
  ylab = "Gas consumption  (1000 cubic feet)", aspect = "xy",
  strip = function(...) strip.default(..., style = 1))

gasB <- lm(Gas ~ Temp, whiteside, subset = Insul=="Before")
gasA <- update(gasB, subset = Insul=="After")
summary(gasB)
summary(gasA)
gasBA <- lm(Gas ~ Insul/Temp - 1, whiteside)
summary(gasBA)

gasQ <- lm(Gas ~ Insul/(Temp + I(Temp^2)) - 1, whiteside)
summary(gasQ)$coef

gasPR <- lm(Gas ~ Insul + Temp, whiteside)
anova(gasPR, gasBA)
options(contrasts = c("contr.treatment", "contr.poly"))
gasBA1 <- lm(Gas ~ Insul*Temp, whiteside)
summary(gasBA1)$coef
}
\keyword{datasets}

\eof
% file MASS/width.SJ.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{width.SJ}
\alias{width.SJ}
\title{
Bandwidth Selection by Pilot Estimation of Derivatives
}
\description{
Uses the method of Sheather & Jones (1991) to select the bandwidth of
a Gaussian kernel density estimator.
}
\usage{
width.SJ(x, nb = 1000, lower, upper, method = c("ste", "dpi"))
}
\arguments{
\item{x}{
a numeric vector
}
\item{nb}{
number of bins to use.
}
\item{upper, lower}{
range over which to search for solution if \code{method = "ste"}.
}
\item{method}{
  Either \code{"ste"} ("solve-the-equation") or \code{"dpi"}
  ("direct plug-in").
}}
\value{
a bandwidth.
}
\references{
Sheather, S. J. and Jones, M. C. (1991) A reliable data-based bandwidth
selection method for kernel density estimation.
\emph{Journal of the Royal Statistical Society series B}
\bold{53}, 683--690.

Scott, D. W. (1992)
\emph{Multivariate Density Estimation: Theory, Practice, and  Visualization.}
Wiley.

Wand, M. P. and Jones, M. C. (1995)
\emph{Kernel Smoothing.}
Chapman & Hall.
}
\seealso{
\code{\link{ucv}}, \code{\link{bcv}}, \code{\link{density}}
}
\examples{
attach(geyser)
width.SJ(duration, method = "dpi")
width.SJ(duration)
detach()

width.SJ(galaxies, method = "dpi")
width.SJ(galaxies)
}
\keyword{dplot}

\eof
% file MASS/write.matrix.d
% copyright (C) 1994-2002 W. N. Venables and B. D. Ripley
%
\name{write.matrix}
\alias{write.matrix}
\title{
Write a Matrix or Data Frame
}
\description{
  Writes a matrix or data frame to a file or the console, using column
  labels and a layout respecting columns.
}
\usage{
write.matrix(x, file = "", sep = " ", blocksize)
}
\arguments{
\item{x}{
  matrix or data frame.
}
\item{file}{
  name of output file. The default (\code{""}) is the console.
}
\item{sep}{
  The separator between columns.
}
\item{blocksize}{
  If supplied and positive, the output is written in blocks of
  \code{blocksize} rows.  Choose as large as possible consistent with
  the amount of memory available.
}}
\details{
  If \code{x} is a matrix, supplying \code{blocksize} is more
  memory-efficient and enables larger matrices to be written, but each
  block of rows might be formatted slightly differently.

  If \code{x} is a data frame, the conversion to a matrix may negate the
  memory saving.
}
\section{Side Effects}{
  A formatted file is produced, with column headings (if \code{x} has them)
  and columns of data.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\seealso{
  \code{\link{write.table}}
}
\keyword{file}
\keyword{print}

\eof
% file MASS/wtloss.d
% copyright (C) 1994-9 W. N. Venables and B. D. Ripley
%
\name{wtloss}
\alias{wtloss}
\title{
Weight Loss Data from an Obese Patient
}
\description{
The data frame gives the weight, in kilograms, of an obese patient at 52
time points over an 8 month period of a weight rehabilitation programme.
}
\usage{
data(wtloss)
}
\format{
This data frame contains the following columns:
\describe{
\item{\code{Days}}{
Time in days since the start of the programme.
}
\item{\code{Weight}}{
Weight in kilograms of the patient.
}}}
\source{
Dr T. Davies, Adelaide.
}
\references{
  Venables, W. N. and Ripley, B. D. (2002)
  \emph{Modern Applied Statistics with S.} Fourth edition.  Springer.
}
\examples{
wtloss.fm <- nls(Weight ~ b0 + b1*2^(-Days/th),
    data = wtloss, start = list(b0=90, b1=95, th=120),
    trace = TRUE)
}
\keyword{datasets}

\eof
