\relax \ifx\hyper@anchor\@undefined \global \let \oldcontentsline\contentsline \gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global \let \oldnewlabel\newlabel \gdef \newlabel#1#2{\newlabelxx{#1}#2} \gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\let \contentsline\oldcontentsline \let \newlabel\oldnewlabel} \else \global \let \hyper@last\relax \fi \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Illustration of tradeoff between estimation and approximation errors as a function of the size (complexity) of the \ensuremath {{\@mathcal F}}\xspace .}}{1}{figure.1}} \newlabel{fig0}{{1}{1}{\relax }{figure.1}{}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Illustration of empirical risk and the problem of overfitting to the data.}}{2}{figure.2}} \newlabel{fig1}{{2}{2}{\relax }{figure.2}{}} \@writefile{toc}{\contentsline {section}{\numberline {1}Strategies To Avoid Overfitting}{2}{section.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Method of Sieves (Grenander, 1981)}{3}{subsection.1.1}} \@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Complexity Penalization Methods}{3}{subsection.1.2}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.1}Bayesian Methods (Bayes, 1764)}{3}{subsubsection.1.2.1}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.2}Description Length Methods (Rissanen, 1978)}{3}{subsubsection.1.2.2}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.3}Vapnik-Cervonenkis Dimension (Vapnik \& Cervonenkis, 1971)}{4}{subsubsection.1.2.3}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Fitting a linear classifier to 2-dimensional data. There are an infinite number of such classifiers. (a) We can generate a linear classifier by choosing two data points, drawing a line with both points on one side, and declaring all points on or above the line to be ``$+1$'' (or ``$-1$'') and all points below the line to be ``$-1$'' (or ``$+1$''). From this perspective, we see that the two linear classifiers depicted in (b) are equivalent for this set of data points, and hence relative to the set of $n$ training data there are only on the order of $n^2$ unique linear classifiers.}}{4}{figure.3}} \newlabel{fig:vc}{{3}{4}{Vapnik-Cervonenkis Dimension (Vapnik \& Cervonenkis, 1971)\relax }{figure.3}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Hold-out Methods}{5}{subsection.1.3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.1}Leaving-one-out Cross-Validation (Wahba, 1971)}{6}{subsubsection.1.3.1}} \@writefile{toc}{\contentsline {section}{\numberline {2}Summary}{6}{section.2}} \@writefile{toc}{\contentsline {section}{\numberline {3}Consistency}{7}{section.3}}