\relax 
\ifx\hyper@anchor\@undefined
\global \let \oldcontentsline\contentsline
\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global \let \oldnewlabel\newlabel
\gdef \newlabel#1#2{\newlabelxx{#1}#2}
\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\let \contentsline\oldcontentsline
\let \newlabel\oldnewlabel}
\else
\global \let \hyper@last\relax 
\fi

\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Illustration of tradeoff between estimation and approximation errors as a function of the size (complexity) of the \ensuremath  {{\@mathcal F}}\xspace  .}}{1}{figure.1}}
\newlabel{fig0}{{1}{1}{\relax }{figure.1}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Illustration of empirical risk and the problem of overfitting to the data.}}{2}{figure.2}}
\newlabel{fig1}{{2}{2}{\relax }{figure.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {1}Strategies To Avoid Overfitting}{2}{section.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Method of Sieves (Grenander, 1981)}{3}{subsection.1.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Complexity Penalization Methods}{3}{subsection.1.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.1}Bayesian Methods (Bayes, 1764)}{3}{subsubsection.1.2.1}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.2}Description Length Methods (Rissanen, 1978)}{3}{subsubsection.1.2.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.3}Vapnik-Cervonenkis Dimension (Vapnik \& Cervonenkis, 1971)}{4}{subsubsection.1.2.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Fitting a linear classifier to 2-dimensional data. There are an infinite number of such classifiers. (a) We can generate a linear classifier by choosing two data points, drawing a line with both points on one side, and declaring all points on or above the line to be ``$+1$'' (or ``$-1$'') and all points below the line to be ``$-1$'' (or ``$+1$''). From this perspective, we see that the two linear classifiers depicted in (b) are equivalent for this set of data points, and hence relative to the set of $n$ training data there are only on the order of $n^2$ unique linear classifiers.}}{4}{figure.3}}
\newlabel{fig:vc}{{3}{4}{Vapnik-Cervonenkis Dimension (Vapnik \& Cervonenkis, 1971)\relax }{figure.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Hold-out Methods}{5}{subsection.1.3}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.1}Leaving-one-out Cross-Validation (Wahba, 1971)}{6}{subsubsection.1.3.1}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Summary}{6}{section.2}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Consistency}{7}{section.3}}