-
Notifications
You must be signed in to change notification settings - Fork 0
/
workshop.tex
132 lines (105 loc) · 4.53 KB
/
workshop.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
\documentclass[a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amssymb}
\usepackage{enumerate}
\usepackage{hyperref}
\usepackage{mythm}
\usepackage{aixi}
\def\X{\mathcal{X}}
\def\C{\mathcal{C}}
%opening
\title{Workshop}
\author{MIRIx people}
\begin{document}
\maketitle
\begin{abstract}
This is a loose compilation of notes from the MIRIx workshop at ANU on
September 6--7, 2014.
\end{abstract}
Things we have shown:\\
We spent a lot of time talking about $S$, the Speed prior~\cite{Schmidhuber:2002}.
$S$ is a computable semi-measure.
AI$S$ is like AI$\xi$ but with $S$ instead of $\xi$.\\
%
\begin{enumerate}
\item $S$ is not universal, because it is computable (and there is no universal computable prior).
\item $S$ is not a measure, for exactly the same reason that the Solomonoff prior is not a measure
(some programs just print a finite string and stop).
\item However, we made up our own algorithm which does the same thing. Basically, it's clear that $S$ is lower semi-computable. So we show that $S(x)- S(x0) - S(x1)$ is also lower semi-computable, which leads to $S(x0)$ being computable.
\item $\varepsilon$-optimal AIS is computable.
\item $S$ effectively dominates (see \autoref{def:effective-domination}) all lower semicomputable semimeasures.
\item $S(x) \leq 1 / |x|$
\item $S(x) \leq c_{S'} S'(x)$~\cite[Eq.\ 7]{Schmidhuber:2002}.
\end{enumerate}
\begin{definition}[Effective Domination]
\label{def:effective-domination}
A semimeasure $\nu$ \emph{effectively dominates} a semimeasure $\rho$ iff
there is a function $f: \mathbb{N} \to \mathbb{R}_+$ such that
\[
\nu(x) \geq 2^{-f(|x|)} \rho(x).
\]
\end{definition}
Effective domination implies absolute continuity on cylinder sets
(is it equivalent?).
We have that $S$ effectively dominates all lower semicomputable semimeasures,
but we are not sure whether this is useful at all.
For example, if we use $S$ for prediction of a lower semicomputable semimeasure $\mu$,
Hutter's loss bounds do not apply.
Instead we get
\begin{equation}
\label{eq:KL}
KL(\mu \dmid S) \leq - \ln w_\mu = f(|x|).
\end{equation}
In our case, $f(|x|) \to \infty$ as $|x| \to \infty$.
The proof for \eqref{eq:KL} is identical to \cite[Thm.\ 3.19]{Hutter:2005}
except for the application of effective domination in the last step.
For deterministic $\mu$,
this gives the error bound~\cite[Thm.\ 3.36]{Hutter:2005}
\[
E_t^{\Theta_S} \leq 2f(t),
\]
where $E_t^{\Theta_S}$ is the number of errors made up to time $t$
when using $S$ for prediction of the sequence generated by $\mu$.
This bound is only useful if $f(t) \in o(t)$.
%\begin{definition}[{Monotone Turing Machine~\cite[Def.\ 4.5.2 \& Def.\ 4.5.3]{LV:2008}\cite[Def.\ 2.6]{Hutter:2005}}]
%\label{def:monotone-TM}
%A \emph{monotone Turing machine}
%is a Turing machine with
%one unidirectional read-only input tape,
%one unidirectional write-only output tape,
%a finite number of work tapes, and no final states.
%A monotone Turing machine implements a function $q$
%that maps $x \in \X^\sharp$ to $y \in \X^\sharp$:
%the input tape is initialized with $x$
%and $y$ is read from the output tape according to the following cases.
%\begin{enumerate}[(i)]
%\item $x \in \X^*$ is finite and
% $y \in \X^*$ is to the left of the output tape's head when
% the head of the input tape reads the next character right of $x$.
%\item The head of the output tape writes $y \in \X^*$ but no more
% as the machine runs forever where
% $x$ is infinite or
% the head on the read-only input tape never reads any characters right of $x$.
%\item The machine writes $y \in \X^\omega$ to the output tape
% as it runs forever where
% $x$ is infinite or
% the head on the read-only input tape never reads any characters right of $x$.
%\end{enumerate}
%\end{definition}
\include{sonmonotone}
\section{Properties of S}
We want to prove that Postulate 1 ~\cite{Schmidhuber:2002} holds for $S$ i.e.\ if $\C_t$ is the set of all strings that are computable in time $t$,
\begin{equation}
\sum_{x\notin \C_t} S(x) \leq \frac{1}{t} \\
\end{equation}
This is straightforward,
\begin{align*}
\sum_{x\notin \C_t} S(x) &= \sum_{x\notin \C_t} \sum_{i=1}^{\infty}\sum_{p\rightarrow_i x} 2^{-|p|}\\
&= \sum_{x\notin \C_t} \sum_{i=\log{t+1}}^{\infty} 2^{-i} \sum_{p\rightarrow_i x} 2^{-|p|}\\
&= 2^{-\log t}\sum_{x\notin \C_t} \sum_{i=1}^{\infty} 2^{-i} \sum_{p\rightarrow_{i+\log t} x} 2^{-|p|}\\
&= \frac{1}{t} \sum_{i=1}^{\infty} 2^{-i} \sum_{x\notin \C_t} \sum_{p\rightarrow_{i+\log t} x} 2^{-|p|}\\
&\leq \frac{1}{t} (\text{Kraft's inequality})
\end{align*}
\bibliographystyle{alpha}
\bibliography{aixi}
\end{document}