%
%        On a transfer theorem for the P<>NP Conjecture
%        == = ======== ======= === === ===== ==========
%
%                    Gregorio Malajovich
%
%           gregorio@labma.ufrj.br, gregorio@msri.org
%
%                         Dec 8, 1998
%
%
%        ABSTRACT:   A model of computation is defined over the 
%        algebraic numbers and over number fields.  This  model 
%        is non-uniform, and the cost of operations depends  on 
%        the height of the operands and on the  degree  of  the 
%        extension of the rational defined by those operands.  
%	
%	 A transfer theorem for the P<>NP Conjecture is  proved, 
%        namely: P<>NP in this model over  the  real  algebraic 
%        numbers if and only if P<>NP in the classical setting.
%
%        This is a LaTeX2e file.
%
\documentclass[final]{amsart}

\usepackage{amsmath, amssymb, amsthm}
\usepackage[all]{xy}
%   \usepackage{colordvi}
\usepackage{blackdvi}
\usepackage{latexsym}

\headheight=8pt     \topmargin=0pt
\textheight=624pt   \textwidth=432pt
\oddsidemargin=18pt \evensidemargin=18pt
\let\cal\mathcal


\newtheorem{theorem}      {Theorem}
\newtheorem{maintheorem}  {Theorem}
\newtheorem{lemma}        {Lemma}
\newtheorem{proposition}  {Proposition}
\newtheorem{corollary}    {Corollary}
\theoremstyle{definition}
\newtheorem{example}      {Example}
\newtheorem{remark}       {Remark}
\newtheorem{definition}   {Definition}
\newtheorem{induction}    {Induction Hypothesis}
\newtheorem{question}     {Question}
\newtheorem{problem}      {Problem}

\renewcommand{\themaintheorem}{\Alph{maintheorem}}


\newcommand{\binomial}[2]  {{\left(\begin{array}{c}#1\\#2\end{array}\right)}}
\newcommand{\pnp}          {$\mathcal P \ne \mathcal {NP}$\ }
\newcommand{\pisnp}        {$\mathcal P = \mathcal {NP}$\ }
\newcommand{\qalg}	   {\mathbb Q^{\mathrm{a}}}
\newcommand{\qralg}	   {\mathbb Q^{\mathrm{ra}}}
\newcommand{\syl}	   {\mathrm{Syl}}
\newcommand{\res}	   {\mathrm{Res}}
\newcommand{\size}	   {\mathrm{Size}}
\newcommand{\absval}       {|\,\cdot\,|}



\newcommand{\MyBlue}[1]  {\Color{ 0.5 0.3  0    0.2}{#1}}
\newcommand{\MyRed}[1]   {\Color{ 0   0.5  0.3  0.2}{#1}}
\newcommand{\MyGreen}[1] {\Color{ 0.5 0    0.8  0.1}{#1}}


\newcommand{\tapepos}[1]{\makebox[2.5em]{ $#1$ } }

\newcommand{\tape}[7]{
  \begin{array}{c|c|c|c|c|c|c|c|c}
  \hline
  \cdots &
  \tapepos{#1} &
  \tapepos{#2} &
  \tapepos{#3} &
  \tapepos{#4} &
  \tapepos{#5} &
  \tapepos{#6} &
  \tapepos{#7} &
  \cdots
  \\
  \hline
  \end{array}}

\newcommand{\halftape}[4]{
  \begin{array}{|c|c|c|c|c}
  \hline
  \tapepos{#1} &
  \tapepos{#2} &
  \tapepos{#3} &
  \tapepos{#4} &
  \cdots
  \\
  \hline
  \end{array}}


\newcommand{\redemph}[1]{\MyRed{\underline{\Black{\em #1}}}}

\newcommand{\compnode}[1]{\MyGreen{
   \begin{picture}(100,50)(0,0)
   \put(0,0){\framebox(100,50){\Black{#1}}}
   \end{picture}
   }}

\newcommand{\decnode}[1]{\MyGreen{
   \begin{picture}(100,50)
   \put(50,0){\line(2,1){50}}
   \put(50,0){\line(-2,1){50}}
   \put(50,50){\line(2,-1){50}}
   \put(50,50){\line(-2,-1){50}}
   \put(50,25){\makebox(0,0){\Black{#1}}}
   \end{picture}
   }}

\newcommand{\ionode}[1]{\MyGreen{
   \begin{picture}(100,50)(0,0)
   \put(50,25){\oval(100,50)}
   \put(50,25){\makebox(0,0){\Black{#1}}}
   \end{picture}
   }}
\newcommand{\connector}[1]{\MyGreen{
   \begin{picture}(100,50)(0,0)
   \put(50,25){\circle{50}}
   \put(50,25){\makebox(0,0){\Black{#1}}}
   \end{picture}
   }}

\newcommand{\downedge}[1]{\MyGreen{
   \begin{picture}(100,25)
   \put(50,25){\vector(0,-1){25}}
   \put(55,12){\makebox(0,0)[l]{\Black{#1}}}
   \end{picture}
   }}

\newcommand{\rightedge}[1]{\MyGreen{
   \begin{picture}(50,50)
   \put(0,25){\vector(1,0){50}}
   \put(25,35){\makebox(0,0){\Black{#1}}}
   \end{picture}
   }}

\newcommand{\newtext}[2]{\marginpar{#2}{\boldmath \bf \large{#1}}}
\newcommand{\oldtext}[2]{[\marginpar{#2}{\tiny {#1}}]}



\title{On a transfer theorem for the \pnp Conjecture}
\author{Gregorio Malajovich}
\address{
Departamento de Matem\'atica Aplicada, Universidade Federal
do Rio de Janeiro. Caixa Postal 68530, CEP 21945, Rio de Janeiro,
RJ, Brasil}
\email{gregorio@labma.ufrj.br}
\curraddr{On leave at MSRI, 1000 Centennial Drive, Berkeley CA
94720-5070, e-mail: \texttt{gregorio@msri.org}}

\date{\today}
\begin{document}

\maketitle

\begin{abstract}
	A model of computation is defined over the algebraic
	numbers and over number fields. 
	This model is non-uniform, and the cost of
	operations depends on the height of the operands and
	on the degree of the extension of the rational defined
	by those operands.
	\par
	A transfer theorem for the \pnp Conjecture is proved,
	namely: \pnp in this model over the real algebraic
	numbers if and only if \pnp in the classical setting.
\end{abstract}

%\newpage
\vspace{\stretch{1}} \ \\
\tableofcontents
\vspace{\stretch{1}}
%\newpage
\section{Introduction}

	A model of computation over a ring was introduced
	by Blum, Shub and Smale in ~\cite{BSS}. This model
	preserves the main features of the classical theory
	of Computation. Turing computability appears as a
	particular case (computation over $\mathbb F_2 =
	\mathbb Z \mod  2 \mathbb Z$). 
\medskip
\par
	One of the main objectives of this theory was to 
	generalize the \pnp conjecture by relating it
        to more traditional mathematical ideas and problems. 
\medskip
\par
	A model of computation in the sense of ~\cite{BSS} is
	given essentially by:
\begin{enumerate}
	\item A ring $R$ with unity and without zero divisors.
	\item A subset $o$ of $R$. Machines will be allowed
	to decide (to branch on) $x \in o$. Natural choices
	are $o=\{0\}$ or, when $R$ is ordered, the positive
	numbers $o=\{x \in R: x > 0 \}$.
	\item A cost function $c: R^{\infty} \rightarrow \mathbb R$.
	Here, $R^{\infty}$ stands for the disjoint union of $R^k$,
	where $k \in \mathbb N$. (i.e., $R^{\infty}$ is the set of
	finite lists of elements in $R$).
\end{enumerate}
\par
	We will write for short: $(R, =, c)$ and $(R, >, c)$ for the
	models of computation over a ring $R$, with cost function $c$
	and branching on $o=\{0\}$ or on  $o=\{x \in R: x > 0 \}$, 
	respectively. 
\par
	In this paper, the `size' of some $x \in R^k \subset R^{\infty}$ 
	is $k c(x)$, and the `cost' of a computation is the size of the
	output, {\bf plus} the number of ring operations times
	$\max c(s)$ where $s$ ranges over the operands of all the ring
	operations performed. 
	Concepts of `polynomial time' and `nondeterministic
	polynomial time' arise naturally.
\par
        The main reference for computations over a ring is~\cite{BCSS98}.
        In the notation above, the main examples described in ~\cite{BCSS98}
        are $(\mathbb F_2,=,1)$,    
        $(\mathbb Z,>,x \mapsto \lceil \log |x| \rceil+1)$,    
        $(\mathbb R,>,1)$, $(\mathbb C,=,1)$.    
\par
	Given a model of computation, there is a \pnp conjecture
	associated to it. A `transfer' theorem is a theorem relating
	the \pnp conjecture in different models.
\par
\begin{sloppypar}
	A few `transfer' theorems are known for unit-cost models. The 
        conjecture \pnp is 
	equivalent over the ring of complex numbers $(\mathbb C,=,1)$, over
	the algebraic closure of the rationals $(\qalg,=,1)$, or over
	any extension in-between~\cite{BCSS96}. See also
	~\cite{BCSS98,CG,CK,KOIRAN94,KOIRAN97,MICHAUX94}. 
	For a few consequences 
	related to the transfer principle see~\cite{MM99}.
\end{sloppypar}
\medskip
\par
	In most literature on Computability over a ring, the cost 
	function (or the
	cost of an arithmetic operation over the ring $R$ (complex, real, 
        ...) is usually fixed as $c \equiv 1$. 
	Uniform cost is related to consequences such as
	the existence of `wisdom numbers' over the reals, encoding
	an infinite sequence of 0's and 1's. Or such as the fact that
	\pnp is true over $(K,=,1)$ for all infinite field $K$
	not algebraically closed~(See \cite{BCSS98} Chapter 7 Theorem 9). 
\medskip
\par
	In this paper, a `weighted' model of computation is introduced.
	The cost function will be defined as the 
	`weight', a measure of the complexity of the operands. The 
	definition of
	weight will encode some natural number-theoretical invariants,
	such as the height of algebraic numbers or the degree of an
	extension.
\medskip
\par
	More precisely, if $x_1, \cdots, x_k$ are algebraic over
	$\mathbb Q$, then the weight of $x$
	is defined as
\[
	w(x) = [\mathbb Q[x_1, \cdots x_k]: \mathbb Q]
		    \left( 1 + \lceil \log_2 H(1:x_1:\cdots :x_k)\rceil \right)
\]
	where $H$ is the projective, multiplicative height function as
	defined in ~\cite{LANG97} and~\cite{SILVERMAN86}.
\par
	In the particular case $x \in \mathbb Z$, the model 
	$(\mathbb Z, >, w)$ is precisely the model of computation
	over the integers `with height' in \cite{BSS} or
	`with respect to logarithmic cost' in \cite{BCSS98}. This model
	is known to be equivalent to the Turing model, up to polynomial
	time. 
\medskip
\par
	Under the weighted model of computation, we will obtain a few
	conditions for the equivalence of the \pnp conjecture over
	two rings; we will use those facts to prove that
\begin{maintheorem} \label{mainA}
	Let $K$ be a real number field, and let $\qralg$
	be the real algebraic closure of rationals. The following are
	equivalent:
\begin{enumerate}
	\item \pnp over the Turing model
	\item \pnp over $(\mathbb Z, >, w)$.
	\item \pnp over $(\mathbb Q, >, w)$.
	\item \pnp over $(K, >, w)$.
	\item \pnp over $(\qralg, >, w)$. 
\end{enumerate}
\end{maintheorem}
\medskip
\par
	In view of the $\tau$-conjecture~\cite{BCSS98, SMALE98} it is
	interesting to ask what happens if one removes `order' from the
	models above. Is deciding $x>0$ hard over $(\qralg,=,w)$ ?
	Actually, $\{ x > 0 \}$ will turn out to be undecidable, and
	the following theorem will follow:
\begin{maintheorem} \label{mainB}
	\pnp over $(\qalg, =, w)$ 
	and over $(\qralg, =, w)$
\end{maintheorem}

\medskip
\par


	The proof of Theorem~\ref{mainA} requires some results on
	the complexity of `effective' diophantine approximation. 
	Algebraic numbers can be approximated by rationals generated 
        by a machine over $(\mathbb Z, >, w)$ (or equivalently, a 
        Turing machine). This machine is `universal' in the sense that
	it can be used to approximate any algebraic number $x$, given 
	a suitable description $y$ of $x$.
\par
	When given $k \in \mathbb N$ and $y \in \mathbb Z^{\infty}$,
	where $y$ is a suitable description
	of $x$, that machine will produce
	$a, b\in \mathbb Z$ and $c \in \mathbb N$ such that
\[
	|x - (a + i b) 2^{-c} | < 2^{-k}
\]
\par
	The `suitable information' on $x$ may be its minimal polynomial
	and an approximation of $x$. 

\begin{maintheorem}\label{mainC}
	There are a machine $M$ over $(\mathbb Z, >, w)$ 
	and a function $y: \qalg \rightarrow \mathbb Z^{\infty}$
	with the following properties:
\par
	If $x$ is an algebraic number, then $M$ with input $k, y(x)$
	will output $a, b\in \mathbb Z$ and $c \in \mathbb N$ such that 
\[
        |x - (a + i b) 2^{-c} | < 2^{-k}
\]
	The running time for $k>5$ is 
	$k \log k \ w(x) \log w(x) \
	\mathrm{Poly} (\deg [\mathbb Q[x]: \mathbb Q])$.
	Also, $w(x) \le \mathrm{Bitsize}(y)$.
	Moreover, if $x$ is real,
	then $b$ is always zero in $M(y(x),k)$.
\end{maintheorem}

	The running time above is better than the $O(k^2)$ 
	in the proof of Theorem~(1.4.7)(a) in ~\cite{LOVASZ}
	or in Algorithm~2 Step~3 in~\cite{LOOS}.
	This theorem gives also an implicit bound for the bit-size
	of $y(x)$. Given $y(x)$, this can be used to decide if an
	algebraic number is $> 0$, for instance.
\medskip
\par
	Theorem~\ref{mainC} makes sense in the Turing model of computation.
	However, it also makes sense in the weighted model over
	$\qralg$, with or without order. In that model, Turing computations 
        can be simulated by using sequences of $0$'s and $1$'s. It is
	possible to compute `mixed' expressions, like $f(x)$ where $f$ is
	an integer polynomial and $x \in \qralg$. 

\begin{maintheorem}\label{mainD}
	The restriction $y:\qralg \rightarrow \mathbb Z^{\infty}$ of
	the function $y$ of Theorem~\ref{mainD} can be computed
	in polynomial time over $(\qralg, >, w)$.
\end{maintheorem}


       Unlike Theorem~(1.4.7)(b) in~\cite{LOVASZ}, the machine in 
       Theorem~\ref{mainD} does not know {\em a priori}
       the weight of the input. 
       Theorem~\ref{mainD} implies that it is possible
       to decide in polynomial time whether $x \in \mathbb Q$.
       The proof relies heavily on the Lenstra-Lenstra-
       Lovasz algorithm~\cite{LLL}. 

\medskip
\par
       A non-uniform model of computation over number fields was 
       introduced by Adler and Beling~\cite{AB} in the realm of
       linear programming. This model was used to bound the running
       time of a simplex algorithm, with input defined over the
       algebraic numbers. The running time depends (among other
       parameters) on the degree of the extension and on a measure
       related to the Height of the input.
\par
       I wish to thank Ilan Adler, Lenore Blum, Felipe Cucker,
       Jim Demmel, Benjamin Diament, Steve Smale, Felipe Voloch, Zhu Hui 
       for their comments and suggestions.
\par
       Part of this work was supported by CNPq (Brasil). Another part
       of this work was done while visiting Mathematical Sciences
       Research Institute, Berkeley, CA. 


\section{Heights and Weights}

	The {\em height function} is an invariant
	of $\qalg$, the algebraic closure of the
	rationals, under automorphisms. It extends naturally
	to a projective invariant of $\mathbb P ^n (\qalg)$.
\par
	The motivation for the introduction of the height function comes
	from two (related) goals. One of them is counting rational points
	(or algebraic points) in algebraic varieties. The other goal is
	bounding above or below algebraic expressions.
\par
	The main standard references are Lang~\cite{LANG86,LANG97} and
	Silverman~\cite{SILVERMAN86}. This section is intended to be
	an introduction for readers not acquainted with the technicalities
	of the construction of the height function; those readers coming
	from a number-theoretical background may skip most of this section
	and proceed to Subsection~\ref{heights:advanced} 
\medskip
\par
\subsection{Heights in $\mathbb Z$ and $\mathbb Q$}

	\label{intro:heights}
	Let $x \in \mathbb Z$. We may define $H(x) = \max ( |x|,1 )$,
	where $\absval$ is the ordinary absolute value. The following
	properties are obvious:
\begin{itemize}
	\item [(H1)] $H(0) = H(1) = H(-1) = 1$
	\item [(H2)] $H(xy) \le H(x) H(y)$
	\item [(H3)] $H(x+y) \le 2 H(x) H(y)$
\end{itemize}
\par
	The reason for writing (H3) instead of $H(x+y) \le H(x)+H(y)$ will
	become clear below.
\par 
	The function $H$ may be extended to $\mathbb Z^n$ by
	putting:
\[
	H(\vec x) = \max \left( H(x_1), \dots , H(x_n) \right)
\]
\par
	It becomes a projective invariant if one sets:
\[
	H(x_0: \cdots : x_n) = \max \left(
           H(\lambda x_0), \cdots , H(\lambda x_n) \right)
\]
	where $\lambda \in \mathbb Q$ is such that all the $\lambda x_i$ are
	relatively prime integers.
\par
	Note that $H(\vec x) = H(1:x_1: \cdots : x_n)$, so there is a
	close relationship between those two formulations. In the sequel,
	we will prefer the projective one.
\medskip
\par
	We can extend the height function to rational numbers by
	stating:
\[
	H\left(\frac{p}{q}\right) = H(p:q)
\]
\par
	If $p$ and $q$ are relatively prime, this last expression is also
	$\max (|p|, |q|)$. In case $q_1, q_2 \in \mathbb Z$ are relatively
	prime, we have also:
\begin{eqnarray*}
	H\left(\frac{1}{q_1} + \frac{1}{q_2}\right) &=& 
	H\left(\frac{q_1+q_2}{q_1 q_2}\right) \\
	&=&
	\max \left( |q_1 + q_2|, |q_1 q_2| \right) \\
	&=& |q_1| |q_2| \\
	&=&
	H\left(\frac{1}{q_1}\right) H\left(\frac{1}{q_2}\right)
\end{eqnarray*}
	This is why the height of the sum needs to be bounded in terms
	of the product of the heights. Property (H3) is sharp,
	in the sense that $H(1+1) = 2 = 2 H(1) H(1)$.
\medskip
\par
	It is natural to further generalize the height function to
	$\mathbb P^n (\mathbb Q)$ by:
\[
	H(x_0: \cdots : x_n)
	=
	\max |\lambda x_0|, \cdots , |\lambda x_n| 
\]
	and $\lambda$ is chosen so that the $\lambda x_i$ are 
	integers and relatively prime.
\medskip
\par
	This is {\bf not} the same as $\max H(x_i)$ and should not be.
	Indeed, the objective of introducing the Height function was to 
	obtain reasonable estimates for expressions.
\par
\begin{example}
	Consider the function: $f(\vec x) = \sum x_i$. For instance,
\par
\[
	f( \frac{1}{2} , -\frac{1}{3} , -\frac{1}{7} , \cdots )
	=
	\frac{1}{2 \times 3 \times 7 \cdots}
\]
	so $H(f(\vec x)) \le \left ( \max H(x_i) \right) ^n$ is
	almost sharp ! However, we have also the more elegant bound: 
\[
	H(f(\vec x)) \le n H(\vec x)
\]
\end{example}
 
\medskip
\par
	The above definition of height in $\mathbb P^n (\mathbb Q)$
        allows us to bound many integer or algebraic expressions:
\begin{example}\label{ex:cauchy}
	\label{ex3}
	Let $a(x) = a_d x^d + a_{d-1} x^{d-1} + \cdots + a_0$ be a 
	polynomial with rational coefficients, $a(x) \not \equiv 0$. 
	The following
	bound is due to Cauchy: if $\alpha \ne 0$ is a root of $a$, then:
\[
	|\alpha| > \frac{1}{1+H(a)}
\]
	Indeed, we can assume without loss of generality (and without
	changing the height) that the $a_i$ are relatively prime integers.
	Also, we can assume that $a_0 \ne 0$.
	If $|\alpha|<1$, $a(\alpha)=0$ implies:
\begin{eqnarray*}
	|a_0| 
        &\le& 
        |a_1| |\alpha| + |a_2| |\alpha|^2 + \cdots + |a_d| |\alpha|^d \\
	&\le& |\alpha| H(a) (1 + |\alpha| + \cdots + |\alpha|^{d-1}) \\
	&<& H(a) \frac{|\alpha|}{1 - |\alpha|}
\end{eqnarray*}
\par
	Hence, $|a_0| < (H(a)+|a_0|) |\alpha|$ and
	hence,
\[
	|\alpha| > \frac{1}{\frac{H(a)}{|a_0|}+1} > \frac{1}{H(a)+1}
\]
\end{example}
\medskip
\par
	The height function $H$ can be extended to the algebraic closure
	$\qalg$ of the rationals. The estimate in
	Example~\ref{ex3} can be generalized to polynomials with algebraic
	coefficients (the actual statement is a little more subtle, see
	Theorem ~\ref{cauchy} below.) 
\par
	Also, the height of a minimal polynomial of $\alpha$ can be
	bounded in terms of $H(\alpha)$ (see Corollary~\ref{minimal} 
        below). In that
	sense, the height function will capture the `complexity' of
	algebraic numbers. 

\subsection{Absolute values}

	In order to generalize the Height into $\qalg$,
	a few definitions are needed. First of all, let $K$ be a
	finite algebraic extension of $\mathbb Q$. An absolute value
	on $K$ is a function
\[
	\absval_{\nu} : K \rightarrow \mathbb R_{+}
\]
	such that:
\begin{itemize}
	\item [(A1)] $|x|_{\nu} = 0 \Leftrightarrow x = 0$.
	\item [(A2)] $|xy|_{\nu} = |x|_{\nu} |y|_{\nu}$
	\item [(A3)] $|x+y|_{\nu} \le |x|_{\nu} + |y|_{\nu}$
\end{itemize} 
\par
	If $\absval_{\nu}$ satisfies the condition 
\begin{itemize}
	\item [(A3')] $|x+y|_{\nu} \le \max( |x|_{\nu} , |y|_{\nu})$
\end{itemize} 
	then $\absval_{\nu}$ is called a {\em non-archimedian} absolute
	value, or {\em valuation}. Otherwise, $\absval_{\nu}$ is said
	to be archimedian.
\medskip
\par
	In the sequel, we will need to consider the set of all possible
	valuations of $K$, modulo the following equivalence: $\absval_{\mu}$
	and $\absval_{\nu}$ are equivalent if and only if there are $a$, $b
	> 0$
	such that
\begin{eqnarray*}
	|x|_{\nu} &\le& a |x|_{\mu}^b \\
	|x|_{\mu} &\le& a |x|_{\nu}^b 
\end{eqnarray*}
	Each of the classes of equivalence above will make $K$ into
	a metric space, with distance function $d(x,y) = |x-y|_{\nu}$.
	Equivalent absolute values give rise to equivalent metric spaces.
	We will denote by $M_K$ a (carefully chosen) system of 
	representatives of each class of absolute values. 
	There is 
	precisely one class of archimedian absolute value
	associated to every automorphism $\sigma$ of $K$ over $\mathbb Q$,
	namely:
\[
	|x|_{\sigma} = |\sigma(x)|
\]
	where $\absval$ is the ordinary absolute value.
	The case of non-archimedian absolute values is more subtle:
\begin{example}
	Let $K = \mathbb Q$. It is a famous theorem by Ostrowski 
	(See~\cite{CASSELS}) that
	the absolute values of $\mathbb Q$ are precisely, up to 
	equivalence:
\begin{itemize}
	\item The usual (archimedian) absolute value $|x|$.
	\item For each prime number $r \in \mathbb N$, a valuation
		$|r^k \frac{p}{q}|_{r} = r^{-k}$, where it is assumed
	        that $r,p,q$ are relatively prime integers.
\end{itemize}	
	Furthermore, we have the {\em product formula}
\[
	\prod _{\nu \in M_{\mathbb Q}} |x|_{\nu} = 1
\]
	where $M_{\mathbb Q}$ is the set of absolute values of $\mathbb Q$
	defined above. A generalization of this product formula will
	hold in the more general setting of number fields.
\end{example}

\begin{example}
	Let $K = \mathbb Q[\sqrt{2}]$. In this case, it is easy to
	show that the ring of integers of $K$ (i.e., the set of $x \in
	K$ s.t. $f(x)=0$ for some monic polynomial $f$ with coefficients
	in $\mathbb Z$) is given by 
\[
	{\mathfrak o}_{K} =
	\{ a + b \sqrt{2} : a, b \in \mathbb Z \}
\]	 	
\medskip
\par
	The {\em norm} $N(a+b \sqrt{2})$ is defined by:
\[
	N(a+b \sqrt{2}) = \prod_{\sigma \in \mathrm{Gal}(K:\mathbb Q)}
	              \sigma(a + b \sqrt{2}) = a^2 - 2b^2
\]
\par
	where $\sigma$ ranges over the automorphisms of $K$.
\medskip
\par
	The units $u$ of the ring of integers of $K$ are characterized
	by the equation: $N(u) = \pm 1$. Those can be proved to be
	precisely the expressions of the form:
\[
	\pm (1 + \sqrt{2})^r \ , \ r \in \mathbb Z
\]		
	(This follows from a theorem by Dirichlet. See 
	~\cite{BOREVICH-SHAFAREVICH} Section II.4 Theorem 5,
	and ibid. Table 1). 
\par
	The ring of integers of $K$ is an Euclidian domain (hint:
	Euclidian Algorithm for the division, with `degree' function
	$|N(.)|$), hence it admits a unique prime factorization
\[
	x = u p_1 ^{a_1} \cdots p_s^{a_s}
\]
\par
	where $p_1, \cdots, p_s$ are prime numbers in ${\mathfrak o}_{K}$
	and $u$ is a unit.
\medskip
\par
	If we are given $x \in {\mathfrak o}_K$, we can compute the prime
	number factorization of $N(x)$ over $\mathbb Z$:
\[
	N(x) = \pm q_1 ^{b_1} q_2^{b_2} \cdots q_s^{b_s}
\]
\par
	Then for each of the $q_i$'s, two cases may arise:
\par
	{\bf Case 1:} $q_i$ is prime over ${\mathfrak o}_K$. This will
	be the case if the equation $N(a+b \sqrt{2}) = q_i$ has no
	solution. For instance, it is easy to check by enumeration
	that $N(a+b\sqrt{2}) \equiv 0 \mod 3$ has no solution, hence
	$3$ is prime in $\mathfrak o_K$. In this case, $3$ divides 
	$x$. Also, $\absval_3$ is a valuation on $K$ and $|x|_3 = 
	\sqrt{|N(x)|_{3}}$.
\par
	{\bf Case 2:} The prime number $q_i$ factors over
	${\mathfrak o}_K$, so that $\pm q_i = p_i p_i'$. Example:
	$-7 = (1+2 \sqrt{2}) (1-2 \sqrt{2})$ so that $p_i$ or
	$p_i'$ divide $x$. There will be valuations associated
	to $p_i$ and $p_i'$, normalized such that:
\[
	|p_i^a \frac{q}{r}|_{p_i} = p_i^{-a}
\]
	for relatively prime $p_i$, $q$, $r$.
\par
	We have also two absolute values at infinity,
\begin{eqnarray*}
	|a + b \sqrt{2}|_{\sigma_1} &=& | a + b \sqrt{2} | \\
	|a + b \sqrt{2}|_{\sigma_2} &=& | a - b \sqrt{2} | 
\end{eqnarray*}
	where $\sigma_1$ is the identity and $\sigma_2: a+b\sqrt{2}
	\mapsto a - b \sqrt{2}$.
\end{example} 

\begin{example}
	Let $K = \mathbb Q [\sqrt{-5}]$. The ring of integers of K is given
	by ${\mathfrak o}_K = \{ a + b \sqrt{-5}, a, b, \in \mathbb Z\}$ 
	and its units are $\pm 1$. 
\par
	This example is interesting because ${\mathfrak o}_K$ is not 
        an Euclidian Domain. The
	closest we can get from prime number factorization is the
	primary decomposition into ideals. 
        For instance, the ideal $(3)$ splits in:
\[
	(3) = {\mathfrak p}_1 {\mathfrak p}_2
\]
	with ${\mathfrak p}_1 = ( 3 \sqrt{-5}, 1 + \sqrt{-5} )$ and
	${\mathfrak p}_2 = ( 3 \sqrt{-5}, 1 - \sqrt{-5} )$. Those are
	prime ideals (Proof: ${\mathfrak o}_K \mod {\mathfrak p}_1$ has
	three equivalence classes ${\mathfrak p}_1$, ${\mathfrak p}_1 + \sqrt{-5}$ 
	and ${\mathfrak p}_1 + 2\sqrt{-5}$. By writing down all the possibilities
	for $x \mod {\mathfrak p}_1$ and $y \mod {\mathfrak p}_1$, we conclude that
	$xy \equiv 0 \mod {\mathfrak p}_1$ implies
	that $x$ or $y \in {\mathfrak p}_1$. Same proof for ${\mathfrak p}_2$).   
\par
	To each prime ideal ${\mathfrak p}$, we associate the absolute
	value
\[
	|x|_{\mathfrak p} = \left(\mathrm N{\mathfrak p}\right) ^{-a}
\]
	where $\mathrm N{\mathfrak p}$ is the number of elements of the field
	$K / {\mathfrak p}$ and 
        $(x) = ({\mathfrak p})^a ({\mathfrak p}_2)^{a_2} \dots$
	is the prime ideal decomposition of $(x)$ in $\mathfrak o_K$.
	This definition can be extended to $K$ by setting
	$|\frac{y}{z}|_{\mathfrak p} =
        |y|_{\mathfrak p} - |z|_{\mathfrak p}$.
\medskip
\par
	There are also two archimedian valuations, $|x|$ and $|\overline x|$. 
\end{example}
\medskip
\par
	{\bf General case:} Let $K$ be a number field (i.e. a finite
	extension of $\mathbb Q$). Then its ring of integers $\mathfrak o_K$
	is a Dedekind domain (See ~\cite{ATIYAH-MACDONALD}, Chapter 9
	for definitions and theory on Dedekind domains. This last fact 
	is stated as Theorem 9.5).  It is a classical result that~:
\begin{theorem}[Corollary 9.4 in~\cite{ATIYAH-MACDONALD}] 
	In a Dedekind domain, every non-zero ideal has a
	unique factorization as a product of prime ideals.
\end{theorem}
\par
	This theorem was proved by Dedekind (c. 1871) in its
	original formulation (number fields), and later generalized to  
	Dedekind domains by Noether (1921, 1927).
\par
	Most modern textbooks provide just an existential proof of that 
        factorization. An effective algorithm to compute the prime ideal 
        decomposition of a principal ideal in a number field appears in
	classical textbooks such as ~\cite{BOREVICH-SHAFAREVICH}. 
	More recent algorithms are discussed in ~\cite{COHEN93}.
\medskip
\par
	A non-archimedian valuation $\absval_{\mathfrak p}$ is associated
	to each prime ideal $\mathfrak p$. 
	Once we know that $(x) = {\mathfrak p}_1 ^{a_1} 
        \cdots {\mathfrak p}_s ^{a_s}$, it is possible to compute the 
	$|x|_{\mathfrak p_i}$.
	 
\subsection{The product formula}

	Given a finite extension $K$ of $\mathbb Q$, let $M_K$ be the
	set of absolute values constructed as in the examples above:
\begin{enumerate}
	\item The archimedian absolute values $\absval_\sigma$ associated
	      to all $\sigma \in \text{Gal}_{\mathbb Q} K$
	\item The valuations $\absval_{\mathfrak p}$ associated to the
	      prime ideals $\mathfrak p$ of ${\mathfrak o}_K$.
\end{enumerate}
\par
	For each of those valuations $\absval_{\nu}$, we can define
	a new metric space $K_{\nu}$ as the completion of $K$ by
	the metric associated to $\absval_{\nu}$. This completion is
	a field, it is indeed a (possibly infinite) field extension of $K$.
\begin{example}
	The archimedian completion of the rationals is
	$\mathbb Q_{\infty} = \mathbb R$. The non-archimedian 
	completion associated to prime number $p$ is given by
	all sequences of the form
\[
	(q_i) = p^{n} (a_0 + p a_1 + p^2 a_2 + \cdots p^i a_i + \cdots)
\]
	for $n \in \mathbb Z$ and $a_i \in \{0 ; \cdots ; p-1 \}$.
\end{example}

\begin{example}
	If $K= \mathbb Q[\sqrt{2}]$ then $K_{\infty} = \mathbb R$.
	If $K = \mathbb Q[i]$, we have two archimedian valuations.
	For any of those, $K_{\infty} = \mathbb C$.
\end{example}

	If $L$ is a finite extension of $K$ and $\nu \in M_K$,
	then $\nu$ extends to at least one absolute value 
        $\nu_0$ of $M_L$. 
\par
	The completion $L_{\nu_0}$ of $L$ is clearly an extension of
	the completion $K_{\nu}$ of $K$. This extension turns to be
        finite. Indeed, let $\alpha$ be a primitive element of $L$
	with respect to $K$. A sequence $(l_i)$ with values in $L$ can
	always be written as $l_i = \sum k_{ij} \alpha^j$, $k_{ij} \in K$.

\begin{definition}
	The {\em local degree} $n_{\nu_0}$ of the extension $L:K$ in $\nu_0$ is
	the degree of the extension $[L_{\nu_0}:K_{\nu}]$.  
\end{definition}		

	The local degrees are the multiplicity values that make the product
	formula true. Indeed,
\begin{theorem}
	Let $K$ be a number field and let $M_K$ be constructed as
	above. Then for any $x \in K$,
\[
	\prod_{\nu \in M_K} |x|_{\nu} ^{n_{\nu}} = 1	
\] 
\end{theorem}

	This fact comes from the formula
\begin{theorem}\label{sumformula}
	Let $L$ and $K$ be number fields, $L$ a finite extension of $K$.
	Let $\nu \in M_K$. Then 
\[
	\sum_{\nu_{0} \mathrm{\ extends\ } \nu}
	n_{\nu_0}
	=
	[L:K]
\]
\end{theorem}
	For proofs, refer to ~\cite{LANG97}.
\medskip
\par
\begin{example}
	Let $K = \mathbb Q[\sqrt{2}]$. Let $\absval_{\sigma_1}$ and 
	$\absval_{\sigma_2}$ be the archimedian absolute values. Then
\[
	n_{\sigma_1} = [K_{\sigma_1} : \mathbb Q_{\sigma_1}] =
	[\mathbb R : \mathbb R] = 1
\]
	and same for $n_{\sigma_2}$. 
\par
	As we saw, $3$ is a prime in $K$. $\mathbb Q_3$ is the set
	of expressions $3^a (p_0 + 3 p_1 + 3^2 p_2 + \cdots)$ with
	$p_i$ in $\{0; 1; 2\}$. On the other hand, $K_3$ is the
	set of expressions $3^a (r_0 + 
        3 r_1 + \cdots)$ with $r_i$ defined modulo $3$. Possible
	values are $0,1,2,\sqrt{2}, 1+\sqrt{2}, 2+\sqrt{2}$. So $n_3 = 2$.
\par
	Now, let us compute $n_{1 + 2 \sqrt{2}}$. As usual,
	$\mathbb Q_7$ is the set
        of expressions $7^a (p_0 + 7 p_1 + 7^2 p_2 + \cdots)$ with
        $p_i$ in $\{0; 1; \cdots ;  7\}$. On the other hand,
	$K_{1 + 2 \sqrt{2}}$ contains the expressions 
        $(1 + 2 \sqrt{2})^a (r_0 + (1 + 2 \sqrt{2}) r_1 + \cdots)$.
	Since the $r_i$ are defined modulo $1 + 2 \sqrt{2}$, we
	have only $7$ possible values (recall that $7 \equiv 0$ and
	that $1 \equiv 2\sqrt{2}$). Therefore $K_{1 + 2 \sqrt{2}}$
        is indeed $\mathbb Q_{7}$  and $n_{1 + 2 \sqrt{2}} = 1$.
\end{example}

\subsection{Heights}

	Let $x = (x_0 : \cdots : x_n) \in \mathbb P^n (K)$, where
	$x$ is a number field. If $x_0, \cdots, x_n \in K$ we 
	define relative height as:
\[
	H_K(x) = \prod_{\nu \in M_K} \max_i |x_i|_{\nu}^{n_{\nu}}
\]
\par
	Because of the product formula, this definition is invariant
	with respect to multiplication of $x$ by any $\lambda \in
	K$:
\begin{eqnarray*}
	H_K( \lambda x) 
        &=& 
        \prod_{\nu \in M_K} \max_i |\lambda x_i|_{\nu}^{n_{\nu}}a\\
        &=&
        \prod_{\nu \in M_K} \max_i |\lambda|_{\nu}^{n_{\nu}} 
                                   |x_i|_{\nu}^{n_{\nu}} \\
        &=&
        \left( \prod_{\nu \in M_K} |\lambda|_{\nu}^{n_{\nu}} \right)
        \left( \prod_{\nu \in M_K} |x_i|_{\nu}^{n_{\nu}} \right)\\
        &=&
        \left( \prod_{\nu \in M_K} |x_i|_{\nu}^{n_{\nu}} \right)\\
        &=&
	H_K(x)
\end{eqnarray*} 
\medskip
\par
	However, this definition is still not invariant under multiplication
	by $\lambda \in \qalg$, $\lambda \not \in K$. In 
	order to obtain an invariant definition in $\qalg$,
	it is customary to set:
\[
	H(x) = H_K(x) ^{\frac{1}{[K:\mathbb Q]}}
\]


	In the particular case $\mathfrak o_K$ is an Euclidian domain,
	we can choose representatives $x_0, \cdots, x_n$ of 
	$(x_0: \cdots : x_n)$ that belong to $\mathfrak o_K$ and are
	relatively prime. Afterwards, only the Archimedian absolute
	values of $x$ can be different of 1.

\subsection{Properties of Heights}

	The basic properties we stated in section ~\ref{intro:heights} can be
	generalized to a convenient bound on $H(f(P))$ in terms of
	$H(f)$ and $H(P)$, where $f$ is a polynomial or a system of 
	polynomials. That bound is stated below as Theorem~\ref{heights}.
	This statement generalizes Theorem~5.6 in ~\cite{SILVERMAN86}.
	Theorem~\ref{heights} seems well-known to people working in number
	theory. However, the precise statement does not seem to be written 
        down in the literature, so it will be stated and proved below.
	A lot of useful properties of heights will then be stated as
	corollaries.
\par
	Before giving the precise statement, it is convenient to clarify
	some notations: we will reserve the notation $H(x)$ for the 
	projective height of $x \in \mathbb P^n (\qalg)$, 
        as we defined  
	in section~\ref{intro:heights}. 
        If we want to write down the affine height
	of some $y \in (\qalg)^n$ we will write instead:
	$H(y,1)$. 
\par
	This convention can be extended to polynomials or systems of
	polynomials. If $f = (f_1, \cdots, f_k)$ is a system of
	multivariate polynomials, then its height $H(f)$ is the height of the
	projective point $( \cdots : f_{iI} : \cdots ) \in
	\mathbb P^{S(f)-1} (\qalg)$. Above, $S(f) =
	S(f_1) + \cdots + S(f_k)$ where $S(f_i)$ is the number of
	non-zero coefficients of $f_i$. Of course, when we need the
	`affine' height of $f$, we will write $H(f,1)$.
\par
	With those conventions, we may state the
\begin{theorem} \label{heights}
	Let
\[
	\begin{array}{rrcl}
	  F=(F_0, \cdots, F_m): & 
          \mathbb C^{n_1} \times \mathbb C^{n_2} \times \cdots 
                  \times \mathbb C^{n_k} & 
	  \rightarrow &
	  \mathbb C^{m+1} \\
          & P^1, \cdots , P^k & \mapsto & F(P^1, \cdots, P^k)
	\end{array}
\]
	be a system of multi-homogeneous polynomials with algebraic
	coefficients, where each $F_i$ has degree $d_j$ in variables
	$P^j$. Let the $P^j$ be algebraic. Then
\[
	H(F(P)) \le (\max S(f_i)) H(F) H(P^1)^{d_1} \cdots H(P^k)^{d_k}
\]
\end{theorem}

	In the particular case $k=1$, $F$ is a morphism from
	$\mathbb P^n (\qalg)$ into 
        $\mathbb P^m (\qalg)$ and Theorem~\ref{heights}
	is Theorem 5.7 in ~\cite{SILVERMAN86}. In the full statement,
	Theorem~\ref{heights} generalizes that fact to morphisms from
	$\mathbb P^{n_1} (\qalg) \times \cdots \times
        \mathbb P^{n_k} (\qalg)$ into
	$\mathbb P^m (\qalg)$.


        The proof below is a modification of the proof of Theorem~5.6 in
        ~\cite{SILVERMAN86}.
\begin{proof}[Proof of Theorem~\ref{heights}]
        Let $K \subset \qalg$ be a finite extension of 
	$\mathbb Q$,
        containing all the coefficients of $F$ and all the coordinates
        of $P$. We have:
\begin{eqnarray*}
        H_K \left(F(P)\right)
        &=&
        \prod_{\nu \in M_K}
        \max_i |F_i(P)|_{\nu}^{n_{\nu}}
        \text {\ by definition}
        \\
        &=&
        \prod_{\nu \in M_K}
        \max_i |\sum_J F_{iJ}(P^1)^{J_1} \cdots (P^k)^{J_k}|_{\nu}^{n_{\nu}}
\end{eqnarray*}
        where the $J_k$ are multi-indices, $|J_j|=d_j$ and
        $J=(J_1; \cdots ; J_k)$.
\medskip
        Let $\epsilon(\nu)$ be equal to 1 when $\nu$ is archimedian,
        and to 0 when $\nu$ is a valuation. The triangular inequality
        implies now: $|\sum_{1 \le i\le n} a_i|_{\nu}
        \le n^{\epsilon(\nu)} \max |a_i|_{\nu}$. Hence.
\begin{eqnarray*}
        H_K \left(F(P)\right)
        &\le&
        \prod_{\nu \in M_K}
        S^{\epsilon(\nu) n_{\nu}}
        \max_{i,J} | F_{iJ}(P^1)^{J_1} \cdots (P^k)^{J_k}|_{\nu}^{n_{\nu}}
        \\
        &\le&
        S^{\sum \epsilon(\nu) n_{\nu}}
        \prod_{\nu \in M_K}
        \max_{i,J} | F_{iJ}(P^1)^{J_1} \cdots (P^k)^{J_k}|_{\nu}^{n_{\nu}}
        \\
        &\le&
        S^{\deg[K:\mathbb Q]}
        \prod_{\nu \in M_K}
        \max_{i,J} | F_{iJ}(P^1)^{J_1} \cdots (P^k)^{J_k}|_{\nu}^{n_{\nu}}
\end{eqnarray*}
        using the fact $\sum_{\nu} \epsilon(\nu) n_{\nu}
        = \sum_{\nu \text{\ archimedian}} n_{\nu} = \deg[K:\mathbb Q]$.
\medskip

        Now, since the degree of $(P^j)^{J_j}$ is precisely $d_j$,
        we can write:
\[
        | (P^j)^{J} |_{\nu}
        \le
        \max_l
        \left( | (P^j_l) |_{\nu} \right)^{|J|}
\]
\medskip


        We can use homogeneity to bound:
\begin{sloppypar}
\begin{eqnarray*}
        H_K \left(F(P)\right)
        &\le&
        S^{\deg[K:\mathbb Q]}
        \prod_{\nu \in M_K}
        \max_{i,J} | F_{iJ} |_{\nu}^{n_{\nu}}
        \max_l | (P^1_l) |_{\nu} ^{d_1 n_{\nu}}
                \cdots
        \max_l | (P^k_l) |_{\nu} ^{d_k n_{\nu}}
        \\
        &\le&
        S^{\deg[K:\mathbb Q]}
        \left(
        \prod_{\nu \in M_K}
        \max_{i,J} | F_{iJ} |_{\nu}^{n_{\nu}}
        \right)
        \left(
        \prod_{\nu \in M_K}
        \max_l \left( | (P^1_l) |_{\nu} \right)^{d_1 n_{\nu}}
        \right)
                \cdots
        \\
        & &
                \cdots
        \left(
        \prod_{\nu \in M_K}
        \max_l \left( | (P^k_l) |_{\nu} \right)^{d_k n_{\nu}}
        \right)
        \\
        &=&
        S^{\deg[K:\mathbb Q]} H_K(F) H_K(P^1)^{d_1} \cdots  H_K(P^k)^{d_k}
\end{eqnarray*}
\end{sloppypar}

        So, after taking the $\deg[K:\mathbb Q]$-th root, we obtain:
\[
        H(F(P)) \le S H(F) H(P^1)^{d_1} \cdots  H(P^k)^{d_k}
\]
        \end{proof}
\medskip



\par
	Also, we may want an affine version of Theorem~\ref{heights}:
\begin{corollary} \label{heights-affine}
	Let
\[
	\begin{array}{rrcl}
	  G=(G_1, \cdots, G_m): & 
          \mathbb C^{n_1} \times \mathbb C^{n_2} \times \cdots 
                  \times \mathbb C^{n_k} & 
	  \rightarrow &
	  \mathbb C^{m} \\
          & Q^1, \cdots , Q^k & \mapsto & G(Q^1, \cdots, Q^k)
	\end{array}
\]
	be a system of polynomials with algebraic
	coefficients, where each $G_i$ has degree at most $d_j$ in variables
	$Q^j$. Let the $Q^j$ be algebraic. Then
\[
	H(G(Q),1) \le (\max S(G_i)) H(G,1) H(Q^1,1)^{d_1} \cdots H(Q^k,1)^{d_k}
\]
\end{corollary}
\begin{proof}[Proof of Corollary~\ref{heights-affine}]
	Introduce homogenizing variables $q_1, \cdots, q_k$ so that for each
	$G_i$, its homogenization $F_i( Q^1, q_1 ; \cdots ; Q^k, q_k)$
	has degree precisely $d_j$ in variables $(Q^j, q_j)$. Also,
	introduce the polynomial $F_0 = q_1^{d_1} \cdots q_k^{d_k}$.
	Then we have $H(F) = H(G,1)$. Moreover, $\max S(F_i) = \max S(G_i)$.
	Specialize $q_1 = \cdots = q_k = 1$, and apply 
	Theorem~\ref{heights} to obtain
\begin{eqnarray*}
	H(G(Q),1) &\le& H(F(P)) \\
		  &\le& (\max S(f_i)) H(F) H(P^1)^{d_1} \cdots H(P^k)^{d_k}\\
		  &\le& 
        (\max S(G_i)) H(G,1) H(Q^1,1)^{d_1} \cdots H(Q^k,1)^{d_k}
\end{eqnarray*}
\end{proof}

\medskip
\par

\begin{corollary}
	Let $x_1$, \dots, $x_k \in \qalg$. Then
\[
H(x_1 x_2 \cdots x_k) \le H(x_1) H(x_2) \cdots H(x_k)
\]
\end{corollary}

\begin{corollary}
	Let $x_1$, \dots, $x_k \in \qalg$. Then
\[
H(x_1 + x_2 +\cdots + x_k) \le n H(x_1) H(x_2) \cdots H(x_k)
\]
\end{corollary}


\begin{corollary}\label{minimal}
	Let $\alpha \in \qalg$ and let $p(x)$ be
	the minimal polynomial of $\alpha$ over $\mathbb Q$.
	Then
\[
	H(p) = H(p,1) \le (2 H(\alpha))^{[\mathbb Q[\alpha]:\mathbb Q]}
\]
\end{corollary}
\begin{proof}
	By definition $p$ is an irreducible polynomial and its coefficients
	are relatively prime integers hence $H(P)=H(P,1)$.
\medskip
\par
	We will use the following fact (Lemma 5.10 in \cite{SILVERMAN86}):
	If $\bar \alpha$ is a conjugate of $\alpha$ by the Galois group
	of $\qalg$ over $\mathbb Q$, then $H(\bar \alpha) =
	H(\alpha)$.
\par
	In our case, the roots of the minimal polynomial $p(x)$ are all
	the conjugates of $\alpha$ by the Galois group of 
	$\mathbb Q[\alpha]$ over $\mathbb Q$. Hence they have the same
	height as $\alpha$. 
\par
	Consider now the polynomial system:
\[
	\sigma(x_1, \cdots, x_n) =
	\left[
	\begin{array}{c}
	\sigma_0(x_1, \cdots, x_n) \\
	\sigma_1(x_1, \cdots, x_n) \\
	\vdots \\
	\sigma_n(x_1, \cdots, x_n) 
	\end{array}
	\right]
	=
        \left[
	\begin{array}{c}
	1 \\
	x_1+ \cdots+ x_n \\
	\vdots \\
	x_1 \cdots x_n 
	\end{array}
	\right]
\]
\par
	Then $\sigma$ can be interpreted as having degree at most 1
	in each variable $x_j$. According to Corollary~\ref{heights-affine},
\[
	H(\sigma(\alpha_1, \cdots , \alpha_n)) \le
	\max S(\sigma_i) H(\sigma,1) H(\alpha)^n
\]
	where $\alpha = \alpha_1, \cdots, \alpha_n$ are all the 
	Galois conjugates of $\alpha$. Furthermore, $H(\sigma,1)=1$
	and $S(\sigma_i) = \binomial{n}{i} < 2^n$.
\end{proof}

	We can generalize the Cauchy bound in example~\ref{ex:cauchy}
into the following statement using Heights:

\begin{theorem}\label{cauchy}
	Let $p(x)$ be a polynomial with algebraic coefficients,
	and let $\alpha$ be a root of $p$. Then
\[
	H(\alpha) < 2 H(p)
\]
\end{theorem}

\begin{proof}[Proof of Theorem~\ref{cauchy}]
	
	Let $d = \deg p$ and 
        assume without loss of generality that $p_d = 1$.
        Let $K = \mathbb Q[p_0, \cdots, p_d]$.
	Let $M_K$ be a system of absolute values of $K$. 
\par
	We claim that if $\nu \in M_K$, 
\[
	\max ( |\alpha|_{\nu} , 1) \le s + \max |p_i|_{\nu}
\]
	where $s=1$ is $\nu$ is Archimedian and $s=0$ otherwise.
	It will follow that $\max ( |\alpha|_{\nu} , 1) $
        $\le (1+s) \max |p_i|_{\nu}$.
\par
	Indeed, we write
\[
	\alpha ^d = - p_{d-1} \alpha^{d-1} - \cdots - p_0
\]
\par
	If $\nu$ is a valuation, this implies that
\[
	|\alpha|_{\nu} ^d \le \max |p_i|_{\nu} |\alpha|_{\nu}^i
\]
\par
	Hence,
\[
        \max(|\alpha|_{\nu}, 1) ^d 
        \le ( \max |p_i|_{\nu}) 
        \max(|\alpha|_{\nu}, 1) ^{d -1}
\]
	and
\[
        \max(|\alpha|_{\nu}, 1) 
        \le ( \max |p_i|_{\nu}) 
\]
\medskip
\par
	On the other hand, if $\nu$ is an Archimedian absolute value,
\[
        \max(|\alpha|_{\nu}, 1) ^d 
        \le 
        ( \max |p_i|_{\nu}) 
	\sum_{j=1}^{d-1}
        \max(|\alpha|_{\nu}, 1) ^{j}
\]
\par
	Therefore, if $|x|_{\nu} > 1$, 
\[
        \max(|\alpha|_{\nu}, 1) ^d 
        < 
        ( \max |p_i|_{\nu}) 
	\frac{ \max(|\alpha|_{\nu}, 1) ^{d}}
             { \max(|\alpha|_{\nu}, 1) -1 }
\]
\par
	It follows at once that
\[
        \max(|\alpha|_{\nu}, 1)  
        < 
        1+ \max |p_i|_{\nu} 
\]	
\par
	If $|x|_{\nu} \le 1$, then inequality $\max ( |\alpha|_{\nu} , 1)$
        $ \le 1 + \max |p_i|_{\nu}$ is trivial.
\medskip
\par
	We can now estimate the height of $x$:
\begin{eqnarray*}
	H_K (x,1) &<&
        \prod_{\nu \in M_K} (1+s(\nu))  \max |p_i|_{\nu}^{n_{\nu}} \\
	&\le& 2^{\sum_{\nu } n_{\nu}} H_K(p)
\end{eqnarray*}
	where the last sum ranges over all archimedian valuations
	in $M_K$.
\par
	We can invoke now Theorem~\ref{sumformula} to replace
	$\sum_{\nu} n_{\nu}$ by $[K:\mathbb Q]$.
	Taking roots, we obtain:
\[
	H(x) < 2 H(p)
\]
\end{proof}

\subsection{Weights}
\label{heights:advanced}

\begin{definition}
	Let $\alpha \in (\qalg)^k$.
	We define the weight function as:
\[
	w(\alpha) = [\mathbb Q[\alpha_1, \cdots, \alpha_k] : \mathbb Q]
		    (1 + \lceil \log_2 H(\alpha,1) \rceil)
\]
\end{definition}

	Weights can be used to bound absolute values:
\begin{lemma}\label{absonweight}
	Let $\alpha \in \qalg$, $\alpha \ne 0$.
	Then $2^{-w(\alpha)} < |\alpha| < 2^{w(\alpha)}$.
\end{lemma}

\begin{proof}
	Let $K = \mathbb Q[\alpha]$. Recall that
\[
	H_K(\alpha) = \prod_{\nu \in M_K} \max (1, |\alpha|_{\nu}^{n_{\nu}})
\]
\par
	There is one archimedian valuation equal to the absolute
	value, so $H_K(\alpha) \ge |\alpha|$. Hence $H(\alpha) \ge
	|\alpha|^{\frac{1}{[K:\mathbb Q]}}$. It follows that
	$|\alpha| < 2^{w(\alpha)}$. 
	Since $H(\alpha)=H(\alpha^{-1})$, the first inequality also
	holds.
\end{proof}




\begin{theorem}\label{minonweight}
	Let $x$ be an algebraic number, and let $p$ be its minimal
	polynomial over $\mathbb Q$. Let $d = \deg p = \deg_{\mathbb Q}(x)$.
	Then:
\[
	w(p)-2 \le w(x) \le d (1+w(p)) \le w(x)^2
\]
\end{theorem}

\begin{proof}[Proof of Theorem ~\ref{minonweight}]
	Theorem~\ref{cauchy} implies that
\[
	w(x) \le d (1+ \lceil\log_2 2H(p) \rceil) \le d(1+w(p))
\]
\par
	On the other hand, Corollary~\ref{minimal} gives:
\[
	H(p) \le (2 H(x))^d
\]
	hence
\[
	\log_2 H(p) \le d \log_2 (1+H(x)) \le w(x)
\]
	and therefore $w(p) \le 2 + w(x)$.
\end{proof}


\par
	The hypothesis of irreducibility of $p$ is essential here.



	An interesting question is how to bound the height of an
	isolated root of a system of polynomial equations.	
	We conclude this section by quoting, for further reference,
	the following result by Krick and Pardo:

\begin{theorem}[Krick and Pardo~\cite{KP}, Corollary 6]
	\label{KP}
	Let $f_1, \cdots , f_r$ $r \le n$ be polynomials
	in $\mathbb Z[x_1, \cdots , x_r]$ of degree and
	height bounded by $d \ge n$ and $\eta$, respectively,
	and let $V$ denote the algebraic affine variety defined
	by: $V = \{ x: f_1(x) = \cdots f_r(x) = 0 \}$.
\par
	Then $V$ has at most $d^n$ isolated points, and their
	height verifies:
\[
	\log_2 H(P) \in d^{O(n)} (\log_2 r + \log_2 \eta)
\]
\end{theorem}






\section{Weighted model of Computation}

\label{machines}

\subsection{Machines over a ring}

	Let $R$ be a commutative ring with unity and without zero
	divisors.
	The following examples will be of interest here:

\begin{itemize}
	\item The ring $\mathbb F_2$ consisting of elements $0$ and $1$.
	\item The ring $\mathbb Z$ of rational integers.
	\item The rings $\qalg$ and 
              $\qralg$ of algebraic and
              real algebraic numbers, resp.
	\item A number field $K$, i.e. a finite algebraic 
              extension of $\mathbb Q$. However, for simplicity,
	      we consider $K$ as a ring.
\end{itemize}
\medskip
\par
	Also, let $o$ be a subset of $R$ and let $c: R^{\infty} \rightarrow
	\mathbb R^{+}$ be a cost function.
\medskip
\par 

	Essentially, a {\em machine} $M$ maps an input space into
	an output space. For input and output space, we want
	to allow all finite, arbitrarily large sequences in
	our base ring $R$. Those sequences are denoted by
	$R^{\infty}$. (In earlier literature, the notation was
	$R^{*}$). Eventually, we use the same notation for the
	machine $M$ and for the function $M: R^{\infty}
	\rightarrow R^{\infty}$, $x \mapsto M(x)$. 
\par
	Intermediate calculations are stored in a {\em State Space}
	{$\mathcal S$}, that can contain arbitrarily large, finite 
	bisequences over $R$. This is denoted  
	$R_{\infty}$.
\par
	A polynomial map $f : R_{\infty} \rightarrow R_{\infty}$ is
	a finite sequence of polynomials in $R_{\infty}$, and 
	therefore it is described by a finite set of coefficients.


\medskip
\par 

	One can think of a machine as a {\em flowchart}, i.e., as a 
	labelled directed graph consisting of {\em nodes} and
	{\em edges}. A first, {\em initial} node is `executed'.
	Then the machine follows and outgoing edge, and finds
	a new node to `execute', and so on. 

\par
	A machine $M$ over $(R,o,c)$ is a defined as a
	labelled
	directed graph $\Gamma$. The nodes of $\Gamma$, indexed
	$1$ to $N$, are of the following five types:

\medskip
\par
\begin{minipage}{1.5in}
\[
\xymatrix{
 \ar[d] \\
 \compnode{$s \leftarrow g_{\gamma} (s)$} \ar[d]\\
 \\
}
\]
\end{minipage}
\hspace{\stretch {1}}
\begin{minipage}{2.5in}
	\redemph{Computation nodes}
                have exactly one outgoing
                edge, and Computation Node $\gamma$ is associated to
		a (finite) polynomial system $g_{\gamma}: \cal S \rightarrow
		\cal S$. The effect of executing $\gamma$ is to `replace'
		$s \in \cal S$ by $g_{\gamma}(s) \in (\cal S)$.



		Replacing means the following. Suppose that $g_{\gamma} = 
		(g_{-1}, g_0, g_1, g_2)$ where each of the $g_{-1}$, \dots,
		$g_2$ is a polynomial in $s$. Then we replace $s_{-1}$,
		\dots , $s_2$ by the values of $g_{-1} (s)$, \dots,
                $g_2 (s)$ and leave all the other coordinates invariant: 

\end{minipage}\\

\[
\xymatrix{
{\tape{s_{-3}}{s_{-2}}{\MyBlue{s_{-1}}}{\MyBlue{s_{0}}}{\MyBlue{s_{1}}} {\MyBlue{s_{2}}}{s_{3}}}
\ar[d]^{f_{\gamma}}\\
{\tape{s_{-3}}{s_{-2}}{\MyBlue{g_{-1}(s)}}{\MyBlue{g_{0}(s)}}{\MyBlue{g_{1}(s)}}{\MyBlue{g_{2}(s)}}{s_{3}}} 
\\}
\]


\medskip
\par

\begin{minipage}{2.0in}
        \redemph{Branching nodes} have exactly two outgoing
                edges labelled TRUE and FALSE, and Branching Node $\gamma$ 
                is associated to
		the expression $s_0 \in o$. Execution branches accordingly.
\end{minipage}
\hspace{\stretch {1}}
\begin{minipage}{2.0in}
\[
\xymatrix{
 \ar[d] \\
 {\decnode{$s_0 \in o$}} \ar[r]^-{\text{FALSE}} 
 \ar[d]^-{\text{TRUE}} & \\ \\
}
\]
\end{minipage}\\

\medskip
\par
\begin{minipage}{1.5in}
\[
\xymatrix{
 \ionode{$s \leftarrow I(\text{Input})$} 
 \ar[d] \\ \\
}
\]
\end{minipage}
\hspace{\stretch {1}}
\begin{minipage}{2.5in}
	The \redemph{Input node} has exactly one outgoing
                edge, and is associated to the input map
		$I: \cal I \rightarrow \cal S$. $I$ just copies 
		the input into non-negative positions in the state space.
		Input node is indexed 1.	
\end{minipage} \\

\[
\xymatrix{
{\halftape {\MyBlue{i_0}}{\MyBlue{i_1}}{\MyBlue{i_2}}{\MyBlue{i_3}}}
 \ar[d]^{I}\\
{\tape {0}{0}{0}{\MyBlue{i_0}}{\MyBlue{i_1}}{\MyBlue{i_2}}{\MyBlue{i_3}}}
}
\]

\medskip
\par

\begin{minipage}{2.5in}
	The \redemph{Output node} has one outgoing
                edge to itself, and is associated to an output map
		$O: \cal S \rightarrow \cal O$. $O$ copies the 
		non-negative state space into the output space. 
		The output node is indexed $N$.
\end{minipage}
\hspace{\stretch {1}}
\begin{minipage}{1.5in}
\[
\xymatrix{
 \ar[d]\\
 {\ionode{Output $O(s)$}} 
}
\]
\end{minipage} \\


\[
\xymatrix{
{\tape{s_{-3}}{s_{-2}}{s_{-1}}{\MyBlue{s_{0}}}{\MyBlue{s_{1}}}
{\MyBlue{s_{2}}}{\MyBlue{s_{3}}} }
 \ar[d]^{O}
\\
{\halftape{\MyBlue{s_{0}}}{\MyBlue{s_{1}}}{\MyBlue{s_{2}}}{\MyBlue{s_3}} }
}
\]

\medskip
\par
\centerline{Fifth node}
\begin{minipage}{1.5in}
\[
\xymatrix{ \ar[d]\\
 {\connector{$\circlearrowright$}} \ar[d]\\
 \\
}
\]
\end{minipage}
\hspace{\stretch {1}}
\begin{minipage}{2.5in}
       	The \redemph{Fifth node} shifts $s \in \cal S$ by 1 to the
                left or to the right.      
\end{minipage}
\\
\[
\xymatrix{
{\tape{\MyBlue{s_{-3}}}{\MyBlue{s_{-2}}}{\MyBlue{s_{-1}}}
{\MyBlue{s_{0}}}{\MyBlue{s_{1}}}
{\MyBlue{s_{2}}}{\MyBlue{s_{3}}}} \ar[d] \\
{\tape{\MyBlue{s_{-2}}}{\MyBlue{s_{-1}}}{\MyBlue{s_{0}}}{\MyBlue{s_{1}}}
{\MyBlue{s_{2}}}{\MyBlue{s_{3}}}{\MyBlue{s_4}}}
}
\]

	The formal definition goes as follows: Let $M$ be a machine with
	input $x$, and let $\gamma_0$ be the input node. Let $\beta(\gamma)$
	denote the node associated to the outgoing edge of $\gamma$ if
	$\gamma$ is not a decision node. In
	case $\gamma$ is a decision node, set $\beta_{\mathrm{true}}$ and
	$\beta_{\mathrm{false}}$ be the corresponding nodes. Let 
	$\sigma_{\mathrm{left}}$
	and $\sigma_{\mathrm{right}}$ be the left shift and right shift,
	respectively. We can define the recurrence:
\begin{equation}
\begin{aligned}
	s^{t+1} &\leftarrow \left\{ 
		\begin{array}{ll}
		g_{\gamma^t} (s^t) & \text{if $\gamma^{t}$ is a computation
						node.}\\
		\sigma_{\mathrm{right}} (s^t) & \text{if $\gamma^{t}$ is a right
						shift node.}\\
		\sigma_{\mathrm{left}} (s^t) & \text{if $\gamma^{t}$ is a left
						shift node.}\\
		s^t &\text{otherwise}	
		\end{array} \right.\\
	\gamma^{t+1} &\leftarrow \left\{
                \begin{array}{ll}
		\beta_{\mathrm{true}}(\gamma^t) & 
			\text{if $\gamma^{t}$ is a decision node
			and $s_0^t \in o$.}\\
		\beta_{\mathrm{false}}(\gamma^t)& 
			\text{if $\gamma^{t}$ is a decision node
			and $s_0^t \not \in o$.}\\
		\beta(\gamma^t) & \text{Otherwise.}
        \end{array} \right. 
\end{aligned}\tag{R$_1$}
\end{equation}
	with initial condition
\begin{equation}
\begin{aligned}
	\gamma^{1} &= \gamma\\
	s^1 &= I(x)
\end{aligned}\tag{R$_2$}
\end{equation}
	
\begin{definition}
	The machine $M$ with input $x$ stops after $T$ steps if $\gamma^T$ is
	an output node. The output, also denoted as $M(x)$, is
	$O(s^T)$. The function $x \mapsto M(x)$ is called the
	input-output mapping, and is defined only for those $x$ such that
	the machine stops at some time $T$. The set $\Omega_M$ 
	$=\{ x : M(x)\  \text{stops}
	\}$ is said to be the set recognized by machine $M$, or
	`halting set' of $M$.
\end{definition}

\begin{definition}
	A set $X \in K^{\infty}$ belongs to class $re$ over
	$(K,o,w)$ if
	and only if it is the halting set of a machine over $(K,o,w)$. A set
	$X$ belongs to class $r$ if both $X$ and its complement
	are in class $re$
\end{definition}

	Note that the definitions of classes $re$ and
	$r$ do not depend on the weighting function. They
	do depend on the branching set $o$.
\par
	Sets in the class $r$ are also said to be `decidable'.
	Indeed, it is easy to construct a machine that `decides' a
	set $X \in r$, i.e., $M(x)=0$ for $x \in X$ and $1$
	otherwise, in finite time.
\medskip
\par
\begin{definition} \label{defsize}
	The {\em size} of some $x \in R^k \subset R^{\infty}$ is defined 
	as:
\[
	\text{Size }( x ) = k c(x)	
\]
\end{definition}

\begin{definition}
	The {\em cost} or {\em running time} of computing $M(x)$ is
\[
\text{Cost } (M,x) = T \max c(s_j^t) + \text{Size }(M(x)) 
\]
	where $T$ is the number of steps 
	(formally, $T = \min t$ s.t. $\gamma^t$ is an output node)
	and $s$ is the vector of all $s_j^t$ for all values of $j \in \mathbb Z$
	and $t \in \mathbb N$. Note that it suffices to take $1 \le t \le T$
	and $-t-m \le j \le t+m$ where $m$ is the maximum of the length of
	input $x$ and a constant depending on the machine $M$.
\end{definition}


\begin{definition}
	A machine $M$ is {\em polynomial time} if and only if there are 
	constants $a, b$ such that for any $x$, if $M(x)$ is defined,
	then
\[
	\text{Cost } (M,x) \le a (\text{Size } x)^b
\]
\end{definition}

	By defining the size of the input as in Definition~\ref{defsize}
	for the cost function $c (x) = w(x)$,
	the sum: 
\[
	k, x_1, \cdots, x_k \mapsto \sum_{i=1}^k x_i
\]
	can be performed in polynomial time. If we had defined the
	size as $k \max c(x_i)$ instead, with $c= w$, the
	weight of the output (and of intermediate results) would
	be exponentially large in the input size.
\par
	The motivation for including the size of the output in the
	definition of the cost is the following property:
	Under the definitions above, if $M$ and $N$ are polynomial
	time machines, their composition
	$M \circ N$ is also polynomial time.
	
\begin{definition}
	Two machines $M$ and $M'$ are {\em polynomially equivalent} 
	if and only if
	they have the same input-output map and the running time of
	computing $M(x)$ and $M'(x)$ are related by
\begin{eqnarray*}
	\text{Cost } (M,x) &\le& a \text{Cost }(M',x)^b\\
	\text{Cost } (M',x) &\le& a \text{Cost }(M,x)^b
\end{eqnarray*}
	where $a$ and $b$ are constants depending on $M$ and $M'$ only.
\end{definition}

	The definition of `machine' given above corresponds
	to a `machine in normal form' in the terminology of~\cite{BCSS98}.
	It is easy to see that for any machine in the sense of ~\cite{BCSS98}
	there is a polynomially equivalent machine in `normal form'.
\par
	Moreover, given $M$, there is a polynomially equivalent machine
	$M'$ with all polynomials $g_\gamma$ of 
        the form $s_0 \leftarrow
	x + y$ or $s_0 \leftarrow x - y$ or $s_0 \leftarrow xy$ where
	$x$ and $y$ can be state variables $s_j$ or constants.
\par
	It is easy to see that for any machine $M$, there is a 
	constant $C_M$ independent of the input 
	such that $\max_{i,t} w(s_i^t) \le C_M \max_{i,t}w((s')_i^t)$ where 
	$s'$ corresponds to an equivalent machine with the
	restrictions above.  

\subsection{The register equations}

	Assume a model $(R,o,c)$ of computation.
	As in~\cite{BSS} or ~\cite{BCSS98}, the recurrence (R$_1$-R$_2$) 
	can be rewritten in a more concise form
\begin{equation}
	\left\{
		\begin{array}{ccl}
		s^{t+1} & = & G_t (\gamma^t, s^t ) \\
		\gamma^{t+1} & = & B (\gamma^t, \chi( s_0^t))
		\end{array}
	\right.
\tag{R'}
\end{equation}

	with initial conditions $\gamma^1 = 1$ and $s^1=I(x)$.
	Above, $G_t$ and $B$ are polynomials and 
	$\chi$ is the characteristic function of the 
	branching-set $o$:
\par
	In the ordered case, $\chi (x) = 1$ if $x > 0$ and $0$ otherwise.
	In the unordered case, $\chi (x) = 1$ if $x=0$ and $0$ otherwise.
	For a p-adic analogue, see~\cite{MW}.
\par
	Polynomials $G_t$ and $B$ are polynomials with coefficients in
	the field of fractions of $R$. They can be constructed 
	explicitly. Assume that the machine M has $N$ nodes.
\par
	Let $p(i,j)$ be the $N$-th Lagrange interpolating polynomial:
\[
	p(i,j) = \frac{ \prod_{1\le j \le N, k \ne j} (i-k) }
	{ \prod_{1\le j \le N, k \ne j} (j-k) }
\]
\par
	This is a degree $N$ polynomial in $i$. If $j \in
	\{ 1 ; \cdots N \}$, its denominator
	divides $N-1\,!$. If $i, j \in \{ 1 ; \cdots N \}$
	then $p(i,j)=1$ if $i=j$ and zero otherwise.
\par
	Let
\[
	g_i^t (s) = 
	\left \{
	\begin{array}{ll}
	g_i(s) & \text{if $i$ is a computation node} \\
	s_{-t+1}, \cdots , s_{t+1} 
	       & \text{if $i$ is a left shift node} \\
	s_{-t-1}, \cdots , s_{t-1} 
	       & \text{if $i$ is a right shift node} \\
	\text{Empty polynomial} & \text{otherwise}
	\end{array}
	\right.
\]

	Then we can set:
\[
	G_t (\gamma, s) =
	\sum_{i=1}^N p(\gamma,i) g_i^t(s)
\]
	Also, if we set
\[
	\beta(i,\chi) =
	\left \{
	\begin{array}{ll}
	  \beta(i) & \text{if $i$ is not a branching node}\\
	  \chi \beta_{\mathrm{true}}(i) + (1-\chi) \beta_{\mathrm{false}} (i) &
		\text{if $i$ is a branching node}
	\end{array}
	\right.
\]
	then we can set
\[
	B(\gamma, \chi)
	=
	\sum_{i=1}^N p(\gamma,i) \beta(i,\chi)
\]

	It is also possible to rewrite (R') as a system
	of algebraic equations with coefficients in $R$, by introducing 
	new variables $u^t$ and $v^t$:
\begin{equation}
	\left\{
	\begin{array}{ccl}
	  N-1\,! s^{t+1} & = & N-1\,!  G_t(\gamma^t, s^t) \\
	  N-1\,! \gamma^{t+1} & = & N-1\,!  B(\gamma^t, u^t) \\
	\end{array}
	\right.
	\tag{R$_1$"}
\end{equation}
\par
	In the ordered case $o = \{x \in R: x>0 \}$, 
	variables $u^t$ should satisfy:
\begin{equation}
	\left\{
	\begin{array}{ccl}
	  s_0^t \, (1-s_0^t (v^t)^2 )\,  (1 + s_0^t (v^t)^2) & = & 0\\
	  u^t - s_0^t (v^t)^2 & = & 0 \\
	\end{array}
	\right.
	\tag{R$_{2a}$"}
\end{equation}
\par
	In the unordered case $o=\{0\}$, one needs instead:
\begin{equation}
	\left\{
	\begin{array}{ccl}
	  s_0^t (1-s_0^t v^t )  & = & 0\\
	  u^t  - s_0^t v^t & = & 0 \\
	\end{array}
	\right.
	\tag{R$_{2b}$"}
\end{equation}

	We may summarize all this as:
\begin{theorem}
	Let $M$ be a machine (in normal form)
        over $(R,=,w)$ for any $R$ or over $(\qralg,>,w)$. Let
	$N$ be the number of nodes of $M$.
	Then there are systems $\varphi_T$ of polynomial
	equations such that $(\gamma^t, s^t)$ satisfy
	equation (R") for $t=1, \cdots, T$ if and
	only if there are $u_j \in \mathbb Z$, $v_j$ such that
	$v_j^2 \in R$, such that 
	$\varphi_T(\gamma, s, u, v) = 0$. Moreover
\begin{enumerate}
	\item The number of equations is $\le 4 T (2T+1+C)$
	where $C$ is a constant depending on $M$. 
	\item The coefficients of $\varphi_T$ are all in $R$.
	\item $\deg \varphi_T \le \max ( \max_{\gamma} ( \deg f_{\gamma} ) +
	      N - 1, 7)$ 
	\item The number of non-zero coefficients in each equation of
		$\varphi_T$ is bounded by $(N-1) \sum_{\gamma}
		S(g_{\gamma})$ where $S(g)$ is the number of non-zero
		coefficients of $g$ and the sum ranges over all the
		computation nodes.
	\item The height of the coefficients of $\varphi_T$ is bounded
	      by $N! 2^N H(M)$, where $H(M)$ is the height of the vector
	      of all the coefficients of each $g_{\gamma}$.
	\item $H(u)=1$ and $H(v^t_j) \le \sqrt{H(s)}$, $H(\gamma) \le N$.
	      Moreover, $u \in \{-1;0;1\}$ and $v \in R$ or $v^2 \in R$,
	      respectively.
\end{enumerate}
\end{theorem}

	This extends Theorem 2 in Chapter 3 of~\cite{BCSS98} to the
	weighted model.


\subsection{Non-deterministic machines}

\begin{definition}
	Let $(R, o, c)$ be a model of computation. Let
	$X \subset R^{\infty}$. The set $X$ belongs to
	the class $\mathcal P$ over $(R, o, c)$ if and only if there are
	a machine $M$ with input $x$ and constants $a$, $b$ such that
\[
	x \in X \Leftrightarrow M(x) = 1
\] 
	and  for any $x \in R^{\infty}$, $M(x)$ is defined and
	the cost of computing $M(x)$ is
	bounded above by $a ( w(x)\text{Length}(x) )^b$ 	
\end{definition}

	The class $\mathcal P$ stands for Polynomial Time. A lot
	of sets are known to belong to the possibly larger class
	$\mathcal {NP}$ (Nondeterministic Polynomial Time). Those
	are the sets that can be recognized by a `nondeterministic'
	machine.
\par
	A non-deterministic machine $M$ to recognize a set $X$ is allowed to 
	input some $x$ and a guess $g$. If $x$ belongs to the set $X$,
	then there is a guess that makes $M(x;g) = 1$. Moreover, this
	computation can be performed in polynomial time with respect
	to the size of $x$ (for at least one guess). On the other hand,
	given any guess $g$, if $M(x;g)=1$ then $x \in X$. 
	More formally:
\begin{definition}
        Let $(R, o, c)$ be a model of computation. Let
        $X \subset R^{\infty}$. The set $X$ belongs to
        the class $\mathcal {NP}$ over $(R, o, c)$ if and only if there are
        a machine $M$ with inputs $x$ and $g$ and constants $a$, $b$ such that:
\begin{enumerate}
\item $\forall x \in R^{\infty}, \ x \in X 
	   \Leftrightarrow \exists \ g \ \text{s.t.} \ M(x;g)=1$
\item $ \forall x \in X, \exists \ g \ \text{s.t.}  \ M(x;g)=1 
	\text{ and } \text{Cost} (M(x;g)) \le  
	        a ( w(x)\text{Length}(x) )^b$
\end{enumerate}
\end{definition}

	A now classical example is Hilbert Nullstellensatz over
	unit cost models (See~\cite{BCSS98} Ch. 5). 
	Let $\mathrm{HN}(K)$ be the set
	of all the $(r,s,f_1, \cdots, f_r)$ where $f_i$'s are polynomials
	in $x_1, \cdots, x_s$ in sparse representation, and where
	there is $x \in K^s$ s.t. $f_1(x) = \cdots = f_r(x) = 0$
\par
	In the unit cost model, the weight of $x$ 
	does not need to be polynomially bounded
	by the length of the input $(r,s,f_1, \cdots, f_r)$.
	(Theorem~\ref{KP} seems quite sharp).
	However, this is necessary in the weighted-cost model,
	so we need to give another example of a $\mathcal {NP}$ set.
	That example will be a subset of the Nullstellensatz, obtained
	by intersecting the Yes-set of the Nullstellensatz with a set
	in~$\mathcal P$.

\subsection{The 3-Nullstellensatz problem}


	
\begin{definition}
	The restricted 3-Nullstellensatz problem over $\qralg$,
        or $\mathrm{HN3}$ for short, is the set of all
	$(m,n,a,f_1, \cdots, f_m)$ where
\begin{enumerate}
	\item $m$, $n$ and $a$ are integers, in unary representation
	(i.e., they are represented by 
        a sequence of $1$'s terminated by a $0$. This representation
	has the following feature: the `bit-size' of $a$ is
	now $a$). 
	\item $f_1, \cdots, f_m$ are polynomials in variables
              $x_1, \cdots , x_n$, of the form:
\[
	f_i(x) = x_{j_i} - \left( x_{k_i} 
{\left\{ \begin{array}{c} +\\-\\\times \end{array} \right\}}
x_{l_i} \right)
\]
	or of the form
\[
	f_i(x) = x_{j_i} - \text{constant}
\]
	\item There is $x$ in $(\qralg)^n$ so that $f(x)=0$ and
	the size of a multiple of the minimal polynomial of 
	each $x_i$ is $< a$. 
\end{enumerate}
\end{definition}

	It is also very natural to rewrite conditions 1 and 3 in
	terms of a few extra polynomial equations, with integer
	(and uniformly bounded) coefficients. This leads to an
	interpretation of HN3 as a subset of HN. There are no
	heights in the new definition.


\begin{lemma} \label{qralginnp}
              HN3 is in $\mathcal{NP}$ over $(\qralg,>,w)$.
\end{lemma}

\begin{proof}[Proof of Lemma ~\ref{qralginnp}] 
\begin{sloppypar}
        We consider the machine $N = N(m,n,a,f_1, \cdots, f_m;$ 
	$ x_1, \cdots$  $ x_n,$
        $p_1, \cdots, p_m)$. The last part of the input (the $x_i$'s and
	the $p_i$'s should be interpreted as the guess in a non-deterministic
	machine). The machine $N$ will check in polynomial time
        the following facts:
\end{sloppypar}
\begin{enumerate}
	\item $f_i(x) = 0$ for all $i$.
	\item $p_i(x_i) = 0$
	\item The bit-size of $p_i$ is smaller than $a$.
\end{enumerate} 

	Clearly, if the answer is yes to all the 3 items, then
	$(m,n,a,f) \in$ HN3. On the other hand, if  $(m,n,a,f) \in$ HN3,
	then the bit-size of the minimal polynomials of each of the $x_i$'s
	is bounded by $a$; furthermore, due to Theorem~\ref{minonweight}, 
	$w(x_i)$ is
	polynomially bounded, hence steps 1 to 3 can be performed in
	polynomial time on  $(m,n,a,f)$.
\end{proof}




\begin{theorem} \label{qralgnpc}
	HN3 is $\mathcal{NP}$-Complete over $(\qralg, >, w)$.
\end{theorem}

	It follows from Lemma~\ref{qralgnpc} that both HN3 and HN
	are $\mathcal{NP}$-hard over $(\qralg, >, w)$. However, HN is not 
	likely to be in $\mathcal{NP}$. Indeed, the usual `guess' for a
	nondeterministic machine in
	the unit cost model is the solution of the equations; that solution
	can have a very large weight. (The bound in Theorem~\ref{KP} seems
	to be quite sharp). Therefore, building a nondeterministic
	polynomial time machine to check HN seems a difficult, maybe
	impossible task.

\begin{proof}[Proof of Theorem \ref{qralgnpc}]
	Let $X \in \mathcal{NP}$ be recognized by the `nondeterministic' 
	machine $M = M(x; g)$ in time $< a \mathrm{\ size}(x)^b$.
\par
	Let $T = a ( \mathrm{length}(x) \max W(x_i)) ^b$ where 
	$W$ is a weight estimator, i.e., a polynomial time
	function satisfying:
\[
	w(x) \le W(x) \le \mathrm{poly}(w(x))
\]
	
	Existence of the weight estimator follows from Theorem~\ref{mainD}).
	Therefore, machine $M$ does terminate in time $\le T$.
\medskip
\par
	Let $\Phi^T (x; g,s,u,v)$ be the register equations ~(R").
	This is a system in $g,s,u,v$, since $x$ is fixed.
\par
	Clearly, there is a polynomial time machine $R_1$ that given
	$T$, $x$, produces the system $R_1(T,x) = \Phi^T (x; g,s,u,v)$	
\par
	Also, there is a polynomial time machine $R_2$ that transforms
	a system $\varphi(z)$ into a HN3 sort of system $(R_2 \varphi)
        (\hat z)$; $\varphi$ has a solution $z$ if and only if 
	$R_2 \varphi$ has a solution $\hat z$. Moreover, because
	of Corollary~\ref{heights-affine}, $\max w(z_j)$ and
	$\max w(\hat z_j)$ are related as follows:
\[
	\max w(z_j) \le \max w(\hat z_j) \le \max w(z_j) ^A
\]
\medskip
\par
	We consider the reduction $x \mapsto R(x) = R_2(R_1(T(x),x))$.
	If $x \in X$, then $R(x)$ has a solution of weight $<T^A$, so
	the bit-size of the minimal polynomials of the solutions is
	bounded by $T^{2A}$.
\par
	Reciprocally, if $R(x)$ has a solution $\hat z$, then 
	$\Phi^T (x; g,s,u,v)=1$ and hence there is $g$ s.t. $M(x,g)=0$.
	This implies $x \in X$.
 	

\end{proof}

	The situation over $(\mathbb Z,=,w)$ is interesting too.
	The set $\mathbb N$ is clearly in $\mathcal {NP}$ over that
	model, since it is possible to guess the `bits' of a number
	$x$ and then decide if $x \ge 0$. (A similar argument works
	with $(\mathbb Z,=,1)$)~\cite{BSS}.
\par	
 	However, assume that the
	$\tau$-conjecture~\cite{SMALE98} is true. Then it is hard 
	to decide
	$x \in \{1 ; \cdots ; n \}$. The input size is defined as
	$\log n$, both for unit cost or for logarithmic cost.
	Hence $\mathbb N \not \in
	\mathcal{P}$ with any of those cost models. Thus,  
	the $\tau$-conjecture implies \pnp over $(\mathbb Z,=,w)$. 


\medskip
\par


\section{A transfer theorem for the \pnp Conjecture}.


\subsection{Morphisms and Transfer Theorem}

	Let $(R_1, o_1, c_1)$ and $(R_2, o_2, c_2)$ be models of
	computation. We do not assume $R_2$ to be an extension of
	$R_1$ or whatsoever. Instead, we will relate those models
        by a partial mapping of the form
\[
	\sigma: R_1 \leftarrow R_2^k
\]
	with $k \in \mathbb N \cup \{ \infty \}$. This mapping will
	be used to `embed' the model of computation $(R_1, o_1, c_1)$ 
	into the model of computation $(R_2, o_2, c_2)$.
\par
	This `covariant' language calls for an explanation: one of
	the main features of this `embedding' should be to allow for
	the simulation of machines in $(R_1, o_1, c_1)$ by machines
	in $(R_2, o_2, c_2)$. In that case, elements of $R_1$ should
	be represented by sequences of elements in $R_2$. 
\par
	For instance, if
	$R_1 = \mathbb Z$ and $R_2 = \mathbb F_2$, then an integer
	in $R_1$ should be represented by a sequence of bits (i.e.
	a finite sequence in $\mathbb F_2$). 
\par
	If $x \in R_1$, the set $\sigma^{-1}(x)$ will contain all the
	valid representations of $x$. Not all the possible lists of
	elements of $R_2$ need to make sense, hence $\sigma$ can be
	a partial map. 
\par
	Moreover, representation of a given $x \in R_1$ does not need to be
	unique. For instance, if $R_1 = \qralg$ is the ring of real
	algebraic numbers, and $R_2 = \mathbb Z$, then
	$x \in R_1$ may be represented by its minimal polynomial, together
	with a good approximation of $x$ (In the sense of Theorem~\ref{mainC}).
\par
	By the use of coupling functions, $\sigma$ can be naturally 
	extended to a map $R_1^{\infty} \leftarrow R_2^{\infty}$. We will
	use the same name for $\sigma$ and for its extension, and specify
	domain and range when necessary.
\par
	An important conceptual point should be stressed: $\sigma$ is a map,
	not a computable map. It makes no sense to `compute' $\sigma$ in
	some model of computation. This map is introduced so we can work
	`in coordinates', much as in linear algebra or differential
	geometry. Of course, since $\sigma$ should transfer a certain
	`computable polynomial time' structure from one model of computation
	into another, a few constraints will be necessary. 
\par
	Let us fix the notation $\size_i(x) = \text{Length}(x) c_i(x)$
	for $x$ a sequence in $(R_i, o_i,$ $c_i)$ and $i=1, 2$.

\begin{definition}
	A map $\varphi: (R_1)^{\infty} \supseteq X \rightarrow Y \subseteq
	(R_2)^{\infty}$ is polynomially bounded
	if and only if there are $a$, $b$ such that for any $x \in X$,
\[
	\size (\varphi (x)) < a \size(x)^b
\]
\end{definition}
\medskip
\par
	A reciprocal of the condition of "polynomially bounded" will be
	often necessary. This motivates the definitions below:
\medskip
\par
\begin{definition}
	A map $\sigma: R_1^{\infty} \leftarrow R_2^{\infty}$ is
	{\em honest} {\bf over} $Y \subset R_1^{\infty}$ if and only
	if there are $a$, $b > 0$ such that 
\[
	\forall y \in Y, 
	\exists x \in \sigma^{-1}(y)
	\text{ such that }
	\size_1(x) < a \, \size_2(y) ^b
\]
	A map will said to be honest if and only if it is honest
	{\bf over} its image.
\end{definition}
\par
	This definition is closely related to the one of a honest map
	{\bf on} a set $X$, in Chapter 5 of~\cite{BCSS98}. Here,
	we do not require honest maps to be computable. It is possible
	also to define
\begin{definition}
	A map $\sigma: R_1^{\infty} \leftarrow R_2^{\infty}$ is
	{\em honest} {\bf on} $X \subset R_2^{\infty}$ if and only
	if there are $a$, $b > 0$ such that 
\[
	\forall x \in X, 
	\text{ such that }
	\size_1(x) < a \, \size_2(\sigma(x)) ^b
\]
\end{definition}

	Clearly, a map $\sigma$ honest {\bf on} $X$ is always honest
	{\bf over} $\sigma(X)$. Also, a map $\sigma$ honest {\bf over} $Y$ 
	is always
	honest {\bf on} some $X$ contained in $\sigma^{-1}(Y)$. 
\par
	An example: the projection $\pi : R^2 \rightarrow R$, 
	$(x,y) \mapsto x$ is honest {\bf over} $R$ but is not
	honest {\bf on} $R^2$. However it is honest on
	$\{(x,y) \text{ s.t. } \size(x) < a i\, \size(y)^b \}$ for
	$a$ and $b$ fixed.
\par
	An alternative definition for the class $\mathcal{NP}$ over
	$(R, o, c)$ can be given in terms of honest maps. The statement
	below is somewhat simpler than Proposition 2 in Chapter 5 of
	~\cite{BCSS98}. However, only one direction seems to be valid in general:
\begin{proposition}
        Let a model of computation $(R,o,c)$ be fixed.
	Let $Y$ be a set of $R^{\infty}$. If
	there exist a set $X \in \mathcal P$ and a honest map $\varphi$
	{\bf over} $Y$, computable in polynomial time, and such that
	$\varphi(X) = Y$, then
	the set $Y$ is in $\mathcal {NP}$ over $(R,o,c)$. 
\end{proposition}

	The proof is immediate, so it is omitted. The reciprocal is
	true (and easy) when $c=1$. A candidate counterexample for the
	reciprocal over $(\qralg, =, w)$ is the following set:
	$\{ (k,x_1, \cdots, x_k): x_i \ge 0\}$. Obvious guesses are the
	square roots of the $x_i$'s, but the weight of the guess vector
	may be exponential in the input size.
\medskip
\par

	In order to transfer `computable polynomial time' 
	structure from the model of computation $(R_1,o_1,c_1)$
	into the model of computation $(R_2,o_2,c_2)$, we can
	still follow the analogy with differential geometry and
	define the `pull-back' by $\sigma$ of computable functions
	and machines over $(R_1,o_1,c_1)$.
\begin{definition}
        Let $\sigma: (R_2)^\infty \rightarrow (R_1)^{\infty}$ and
	let $f$ be a $n$-ary function,
\[
\begin{array}{crcl}
	f: & (R_1)^n &\rightarrow & R_1 \\
	   & y_1, \cdots y_n &\mapsto& f(y_1, \cdots, y_n)
\end{array}
\]
	Then a function $f^{*} : (R_2)^\infty \rightarrow (R_2i)^{\infty}$ is a 
	{\em pull-back} of $f$ if and only if, 
\[
	\forall x_1, \cdots x_n \in R_2 ,
	\sigma( f^{*}( x_1, \cdots, x_n) ) = 
	f( \sigma(x_1), \cdots, \sigma(x_n) )
\]
\end{definition}

	The pull-back of a function does not need to be unique.
	A similar definition, this time unique, can be produced
	for a total function $g$ from $R$ into $\{ 0, 1 \}$:
	(i.e. a set): its pull-back $g^{*}$ is the unique function such that
	$g^*(x) = g(\sigma(x))$. For example, `$y \in o_1$' has 
	pull-back `$\sigma(x) \in o_1'$.
\par
	We can summarize that information in the following commutative
	diagrams:
%\[
%\begin{CD}
%(R_2)^{\infty} @>{(\sigma, \sigma, \cdots \sigma)}>> (R_1)^n \\
%@V{f^*}VV @AAfA \\
%(R_2)^{\infty} @>>\sigma> R_1\\ 
%\end{CD}
%\hspace{1cm}
%\begin{CD}
%(R_2)^{\infty} @>{\sigma}>> R_1 \\
%@V{g^*}VV @AAgA \\
%\{ 0;1 \}  @>>=> \{ 0;1 \} \\
%\end{CD}
%\]

\[
\xymatrix{
(R_2)^{\infty} 
  \ar[rr]^{(\sigma, \sigma, \cdots \sigma)} 
  \ar@{.>}[d]_{f^*}& &
(R_1)^n \ar[d]^{f}\\
(R_2)^{\infty} \ar[rr]_{\sigma}& &
R_1 
} \hspace{1cm}
\xymatrix{
(R_2)^{\infty} 
 \ar[rr]^{\sigma}
 \ar@{.>}[drr]_{g^*}
& & R_1 
 \ar[d]^{g}\\
 & & { \{0;1\} }
}
\]

	It is also reasonable to speak of the pull-back of a machine. 
	We will need the following condition in the sequel:

\begin{definition}
	Let $\sigma: R_1^{\infty} \leftarrow R_2 ^{\infty}$. We say that
	$\sigma$ has the pull-back property if and only if for any 
	machine
	$M$ over $(R_1,o_1,c_1)$ there are $a, b >0$ and a 
	machine
	$M^*$ over $(R_2,o_2,c_2)$ such that
\begin{enumerate}
	\item The function computed by $M^*$ is a pull-back of the function 
	      computed by $M$.
	\item Cost $(M^* , x)$ $<$ $a$ Cost$(M, \sigma(x))^b $
\end{enumerate}
\end{definition}

\par
	It is now clear that
\begin{lemma}\label{trP}
	Let $\sigma: R_1^{\infty} \leftarrow R_2 ^{\infty}$ is 
	polynomially bounded and has the 
	pull-back property. Then 
	$Y \in \mathcal P$ over $(R_1,o_1,c_1)$ implies that 
	its preimage $\sigma^{-1}(Y)$ is in $\mathcal P$ over
	$(R_2,o_2,c_2)$
\end{lemma}
\par
	If $M$ is a nondeterministic polynomial time machine, the
	situation is more subtle: Guesses $g$ taken by $M$ should 
	correspond to guesses $g'$ taken by the pull-back, so that
	$g = \sigma(g')$ and the size of $g'$ is polynomially bounded
	on the size of $g$. Moreover, it is necessary to ensure
	that the guess $g'$ is indeed in the domain of $\sigma$.
	
	

\begin{definition}
	A {\em morphism} from $(R_1,o_1, c_1)$ into $(R_2, o_2,c_2)$
	is a polynomially bounded, partial mapping 
        $\sigma: R_1 \leftarrow R_2^k$,
	with $k \in \mathbb N \cup \{ \infty \}$, so that: 
\begin{itemize}
	\item [(M1)] $\sigma$ is surjective.
	\item [(M2)] $\sigma$ is honest.
	\item [(M3)] The domain of definition of $\sigma$ belongs to 
              $\mathcal{NP}$ over $(R_2, o_2, w_2)$.
	\item [(M4)] $\sigma$ has the pull-back property.
\end{itemize}
\end{definition} 


	The main feature of morphisms is to allow for a `transfer' of
	the class $\mathcal NP$ (and of the class $\mathcal P$, also).
\begin{lemma}\label{trNP}
	Let $\sigma$ be a morphism from $(R_1,o_1, c_1)$ 
	into $(R_2, o_2,c_2)$.
	Let $X$ 
	be in ${\mathcal {NP}}$ over $(R_1,o_1, c_1)$. Then
	$\sigma^{-1}(X)$ is in ${\mathcal {NP}}$ over $(R_2,o_2, c_2)$
\end{lemma}

\begin{proof}[Proof of Lemma~\ref{trNP}]
	Let $M = M(x,g)$ be a nondeterministic polynomial time machine
	to recognize $X$. Let $M^*$ be a pull-back of $M$ by $\sigma$,
	and let $Y = \sigma^{-1}(X)$.
\par
	If $y \in Y$, then $\sigma(y) \in X$ and there is a guess $g$
	such that $M$ accepts $\sigma(y)$ in polynomial time (with
	respect to the size of $\sigma(y)$). Since $\sigma$ is
	polynomially bounded, $\sigma(y)$ is accepted in polynomial
	time with respect to the size of $y$ indeed. 
\par
	Since $\sigma$ is surjective, there is $g'$ in
	$\sigma^{-1}(g)$ such that $M^*(y,g')$
	`accepts' $y$ in polynomial time, again with respect to the
	size of $y$.
\par
	It is necessary to take a closer look at the preimage $g'$ of
	the guess $g$. It
	can be decomposed into sub-guesses $g_1', \cdots, g_s' \in
	(R_2)^{\infty}$, such that for each coordinate $g_i \in R_1$ of $g$,
	we have:
\[
	\sigma(g_i') = g_i
\]
	By Hypothesis (M2) we can assume without loss of generality that
	$\size_2 (g_i') < \size_1 (g_i) = c_1(g_i)$. Moreover, since
	$M$ is polynomial time w.r.t. $\size_2(y)$, $c_1(g_i)$ is 
	polynomially bounded w.r.t. $\size_2(y)$. 
\par
	By Hypothesis (M3), there is a non-deterministic polynomial time
	machine $D=D(z,g'')$ that recognizes the domain of $\sigma$. Setting
	$z=g_i'$, one can use $D$ to recognize valid guesses $g_i'$ is
	nondeterministic polynomial time w.r.t. $\size_2(y)$.
\par
	We define a machine $N$ as follows: $N$ inputs $y$ and guesses
	$g'$, $g_i''$. It first tests $D(g_i', g_i'')$. If all tests
	succeed (i.e. $g'$ is in the domain of $\sigma$), the machine
	tests $M^*(y, g')$ and returns `Yes' on success.
\par
	Clearly, if $y \in Y$, there will be guesses $g'$ and $g''$ such
	that $N(y,g',g'')$ will succeed in polynomial time w.r.t.
	$\size_2(y)$.
\par
	Assume now
	that $y$ gets accepted by $N$, with guess $(g', g'')$.
	Then $g'$ was accepted by $D$, hence $g'$ is in the domain
	of definition of $\sigma$ and $g=\sigma(g')$ is well-defined.
	It follows from the pull-back property that $M$ accepts 
	$\sigma(y)$, with guess $g$. Hence, $y \in Y$.
\end{proof}

	A few remarks:

\begin{remark}
	Identity is a morphism from $(R_1, o_1, c_1)$ into itself.
\end{remark}

\begin{remark}
	The composition of two morphisms is again a morphism
\end{remark}

\begin{remark}
	In order to define a Category, we should specify the objects.
	Those can be chosen to be the class of all $(R,o,c)$ where
	$R$ is a commutative ring with unity and no zero divisors,
	$o$ is a proper subset of $R$ and $c$ is a function from $R$
	into the positive reals. One can also restrict this to $o=\{0\}$
	or to reasonable cost functions in some sense... 
\end{remark}

\begin{remark}
	The usual category theory functors do not seem to make any
	sense in this context. However, the word morphism was selected
	in absence of a better name. $\mathcal P$-morphisms are taken,
	maybe $\mathcal{NP}$-morphisms would be a good name~?
\end{remark}

	Of the properties in the definition of morphisms, the
	pull-back property may be the hardest to check. In some
	conditions, it may be enough to check the pull-back
	condition for a few basic functions:

\begin{definition}
	A map $\sigma: R_1^{\infty} \leftarrow R_2^{\infty}$ is
        {\em very honest} {\bf over} $Y \subset R_1^{\infty}$ if
	and only if there are $a$, $b > 0$ and a polynomial time
	machine $S$ over $(R_2,o_2,c_2)$ such that $\sigma(x) = 
	\sigma(S(x))$ and, 
\[
        \forall x \in \sigma^{-1}(Y), 
        \size_1(S(x)) < a
	\size_2(\sigma(x)) ^b
\]
        A map is `very honest' if and only if it is very honest {\bf over}
	its image.
\end{definition}
	
	Clearly, a very honest map is always honest; the machine $S$ can
	be seen as a simplification procedure for the representation
	$x$ of some $y = \sigma(x)$.  
\par
\begin{lemma}\label{vh}
	Let $\sigma$ be a very honest map, such that the following 
	functions have a polynomial time pull-back: $x, y \mapsto x+y$,
	$x, y \mapsto xy$, $x \mapsto -x$, $x \mapsto (x \in o_1)$.
	Then $\sigma$ has the pull-back property.
\end{lemma}

	We can now define the following relation of equivalence between
	two models:

\begin{definition}
	Computation models $(R_1, o_1, c_1)$ and $(R_2, o_2, c_2)$
	are {\em polynomially equivalent} if and only if there are morphisms:
\begin{eqnarray*}
	\sigma_1&:& (R_1, o_1, c_1) \rightarrow (R_2, o_2, c_2) \\
	\sigma_2&:& (R_2, o_2, c_2) \rightarrow (R_1, o_1, c_1)
\end{eqnarray*} 
	such that:
	\begin{itemize}
	\item [(T1)] 
	There is a polynomial time machine $A_1$ over
	$(R_1, o_1, c_1)$ such that 
	$\sigma_1 \circ \sigma_2 \circ A_1$ is the identity
	in $(R_1)^{\infty}$.
	\item [(T2)] 
	There is a polynomial time machine $A_2$ over
	$(R_2, o_2, c_2)$ such that 
	$\sigma_2 \circ \sigma_1 \circ A_2$ is the identity
	in $(R_2)^{\infty}$.
	\end{itemize}
\end{definition}
%\[
%\begin{array}{ccccccc}
%R_1 & \stackrel{\tiny \sigma_1}{\longleftarrow} & (R_2)^{k}   &\hspace{1cm}&
%(R_1)^k & \stackrel{\tiny \sigma_2}{\longrightarrow} & R_2  \\
%\downarrow {\scriptscriptstyle A_1} & {\scriptscriptstyle \sigma_2} \nearrow&       &&
%& \nwarrow {\scriptscriptstyle \sigma_1} & \downarrow {\scriptscriptstyle A_2}        \\
%R_1^{k'} & & &
%& & & R_2^{k'}  
%\end{array}
%\]

\[
\xymatrix{
R_1
 \ar@(ur,ul)[]_{Id}
 \ar[dd]_{A_1} \\
 & 
(R_2)^\infty 
 \ar[ul]^{\sigma_1}
\\
(R_1)^\infty
 \ar[ur]_{\sigma_2}
}
\hspace{1cm}
\xymatrix{
&
R_2
  \ar@(ul,ur)[]^{Id}
  \ar[dd]^{A_2}
\\
(R_1)^\infty
  \ar[ur]^{\sigma_2} \\
&
(R_2)^\infty
 \ar[ul]_{\sigma_1}
}
\]

	We can now state a transfer theorem:
\begin{theorem} \label{transfer}
	Let computation models $(R_1, o_1, c_1)$ and $(R_2, o_2, c_2)$
	be polynomially equivalent. 
	Then \pnp over $(R_1, o_1, c_1)$ if and only if
	\pnp over $(R_2, o_2, c_2)$.
\end{theorem}

	Note that Theorem~\ref{transfer} does not require the existence
	of a $\mathcal {NP}$-complete problem over any of  $(R_1, o_1, c_1)$ 
	or $(R_2, o_2, c_2)$.
\par
	It is clearly enough to prove only one sense, viz. 
	\pnp over $(R_1, o_1, c_1)$ implies
        \pnp over $(R_2, o_2, c_2)$. 

\begin{proof}[Proof of Theorem ~\ref{transfer}]
	Assume that  \pnp over $(R_1, o_1, c_1)$.
\par
	{\bf Step 1:} By hypothesis, there is a problem $X$
	that is in $\mathcal{NP}$ over $(R_1, o_1, c_1)$,
	but not in $\mathcal P$.
\par
	{\bf Step 2:} Let $Y = \sigma_1^{-1}(X)$. $Y$ is in
	$\mathcal{NP}$ over $(R_2, o_2, c_2)$ (Lemma~\ref{trNP}).
\par
	{\bf Step 3:} Assume by
	absurd that \pisnp over $(R_2, o_2, c_2)$. Then $Y$ is
	in $\mathcal{P}$ over $(R_2, o_2, c_2)$.
\par
	{\bf Step 4:} $Z = \sigma_2^{-1} (Y)$ is in
	$\mathcal{P}$ over $(R_1, o_1, c_1)$ (Lemma~\ref{trP}).
\par
	{\bf Step 5:} $Z$ contains the image of $X$ by 
	the polynomial time machine $A_1$ of property (T1). 
\par
	{\bf Step 6:} On the other hand, if $A_1(x)$ is in
	$Z$, then $x$ is in $X$. 
\par
	{\bf Step 7:} Therefore, machine $A_1$ reduces the
	decision of membership in $X$ to membership in $Z$.
	This later problem can be solved in polynomial time.
	Hence \pisnp over $(R_1, o_1, c_1)$, contradiction.
\end{proof}

	As a matter of fact, the same proof shows a more general, 
	weaker statement:
\begin{theorem} \label{oneway}
	Let $(R_1, o_1, c_1)$ and $(R_2, o_2, c_2)$ be computation
	models, such that there exist mappings 
	\begin{eqnarray*}
        \sigma_1&:& (R_1, o_1, c_1) \rightarrow (R_2, o_2, c_2) \\
        \sigma_2&:& (R_2, o_2, c_2) \rightarrow (R_1, o_1, c_1)
	\end{eqnarray*}
        such that $\sigma_1$ is a morphism, $\sigma_2$ is a polynomially
	bounded partial mapping that
	has the pull-back property (M4). 
	Assume that (T1) holds. Then   
        \pnp over $(R_1, o_1, c_1)$ implies \pnp over $(R_2, o_2, c_2)$.
\end{theorem}
\medskip
\par
	A further remark: the technology of Theorem~\ref{transfer}
	allows to transfer the \pnp Conjecture between models where
	$R_1^\infty$ and $R_2^\infty$ have same cardinality. 
\par
	This is different from the argument in ~\cite{BCSS98} for transferring
	between algebraic and complex numbers, that used Tarski's strong
	transfer principle.

\subsection{Examples}

\begin{proposition}\label{tr1}
	Models 
	$(\mathbb Z, \ge, w)$ 
        and 
	$(\mathbb F_2, =, 1)$ (the Turing model)
	are polynomially equivalent.
\end{proposition}

\begin{proof}[Proof of Proposition~\ref{tr1}]
	Let
\[
	\begin{array}{rccl}
	b:& \mathbb F_2^{\infty} & \rightarrow & \mathbb Z \\
	& (s,0,x_1,0, \cdots, x_k, 1) & \mapsto &
	(-1)^s \sum_{i=0}^k 2^i x_i
	\end{array}
\]
	This is clearly a morphism from  $(\mathbb Z, \ge, w)$
	into $(\mathbb F_2, =, 1)$. We can also define a morphism
	from $(\mathbb F_2, =, 1)$ into $(\mathbb Z, \ge, w)$ by:
\[
	\begin{array}{crcl}
	i:& \mathbb Z \supseteq \{ 0;1 \}& \rightarrow & \mathbb F_2 \\
	& 0 & \mapsto & 0 \\
	& 1 & \mapsto & 1 
	\end{array}
\]
	Machines $A_1$ and $A_2$ are essentially similar. They associate
	to any input $x$ its binary expansion. In the case of $A_2$, the
	input $x$ is supposed to belong to $\mathbb F_2$, hence the only
	possible values are 0 and 1. In the case of $A_1$, $x \in \mathbb Z$.
\par
	Since $i \circ b \circ A_2$ and $b \circ i \circ A_1$ are the
	identity in $\mathbb F_2$ and in $\mathbb Z$ respectively, those
	two models are polynomially equivalent.
\end{proof}


\begin{proposition} \label{tr2}
	Models 
	$(\mathbb Q, \ge, w)$ 
        and 
	$(\mathbb Z, \ge, w)$
	are polynomially equivalent.
\end{proposition}


\begin{proof}[Proof of Proposition~\ref{tr2}]
	In the same spirit, we define:
\[
	\begin{array}{crcl}
	b:& \mathbb Z^2 \supseteq \mathbb Z \times \mathbb Z_{*}
	 & \rightarrow & \mathbb Q \\
	& p,q & \mapsto & \frac{p}{q} 
	\end{array}
\]
\[
	\begin{array}{crcl}
	i:& \mathbb Q \supseteq \mathbb Z& \rightarrow & \mathbb Z \\
	& x & \mapsto & x
	\end{array}
\]
\par
	Then we have to define $A_1: x \mapsto (x,1)$ and $A_2:
	p/q \mapsto (p,q)$ where $p$ and $q$ are relatively prime.
	This last machine will just perform Euclidian algorithm
	(For its running time, see the discussion in~\cite{KNUTH} 
	Section 4.5.3)
\end{proof}


\subsection{Is $x \ge 0 $ hard ?}

	Towards the proof of Theorem~\ref{mainB}, we will
	assume Theorem~\ref{mainC} and show its Corollary:
	
\begin{corollary} \label{risnp}
                The set $\qralg$ is in
		$\mathcal {NP}$ over the algebraic numbers $(\qalg, =, w)$
\end{corollary}

\begin{proof}[Proof of Corollary~\ref{risnp}]
	Given $x \in \qalg$, guess $y(x)$ where $y$ is the function
	defined in Theorem~\ref{mainC}. Then compute a $2^{-k}$-approximation
	of $x$, of the form $a+bi$. By Theorem~\ref{mainC}, $b=0$ if and
	only if $x$ is real.
\end{proof}


\begin{lemma}
	\pnp over $(\qralg, =, w)$ implies
	\pnp over $(\qalg, =, w)$.
\end{lemma}

\begin{proof}
	Define:
\[
	\begin{array}{crcl}
	\sigma_1:& \qralg& \rightarrow & \qalg\\
	& x &\mapsto& \sigma_1(x) = x
	\end{array}
\]
	This is clearly a morphism. (Property (I3) follows from
	Corollary~\ref{risnp}). Define also the morphism: 
\[
	\begin{array}{crcl}
	\sigma_2:& \qalg& \rightarrow & 
                   (\qralg)^2\\
	& x + iy &\mapsto& \sigma_2(x+iy) = (x,y)
	\end{array}
\]
	Then $\sigma_2 \circ \sigma_1$ is the identity map in $R_1$,
	so (T1) holds. Note that $\sigma_1 \circ \sigma_2$ maps $x+iy$ 
        into $x,y$, so it is not clear if (T2) hold.
\par
	Hence, we are under the conditions of Theorem~\ref{oneway} and
        \pnp over $(\qralg, =, w)$ implies
        \pnp over $(\qalg, =, w)$	
\end{proof}

\par
	Now we can show that

\begin{theorem}
	\pnp over $(\qralg, =, w)$.
\end{theorem}

	This implies Theorem~\ref{mainB}



\begin{proof}
	Let $\qralg_{+}$ be the set of 
	$\{ x \in \qralg : x \ge 0 \}$. Clearly,
	$\qralg_{+}$ is in $\mathcal {NP}$, since
	the non-deterministic machine $M(x,y)$ that 
	checks $x - y^2 = 0$ decides $x \in \qralg_{+}$
	in polynomial time. (Recall that $H(\sqrt{x}) = \sqrt{H(x)}$
	and $[\mathbb Q[\sqrt{x}] : \mathbb Q] \le 2 [\mathbb Q[x] : \mathbb Q]$).
\par
	Now, suppose that there is a deterministic machine $M$ that decides
	$x \in \qralg_{+}$ in polynomial time. Let $K$ be the number
	field generated over $\mathbb Q$ by all the coefficients of $M$.
\par
	There is some $\alpha >0$ algebraic over $K$ with the following property:
	there is an automorphism $\sigma$ of $K[\alpha]$ over $K$ such that
	$\alpha^{\sigma} < 0$. Choose $z \in K$ such that $\sqrt{z} \not \in K$,
	and set $\alpha = \sqrt{z}$.
\par
	The machine $M$ cannot distinguish between $\alpha$ and $\alpha^{\sigma}$.
	Indeed, let $\gamma^t$ be the path associated to input $\alpha$ 
	and let $G^t(\alpha) = g_{\gamma^t}(s^t)$ be the polynomial in $\alpha$
	associated to each branching node in $\gamma^t$. The coefficients of
	$G^t$ are in $K$, so $\sigma$ fixes $G^t$ and
\[
	G^t (x) = 0 \Leftrightarrow G^t(x^{\sigma})=0
\]
\par
	This means that replacing $\alpha$ by $\alpha^{\sigma}$ does not
	modify the path $\gamma^t$, hence the output of $M$ is the same. 
\end{proof}

	The same argument is true for the unit cost model
        $(\qralg, =, 1)$. However, in the unit cost case,
	a more general theorem holds, 
	as discussed in ~\cite{BCSS98} Chapter 7. Namely \pnp over all
	$(K, >, 1)$ where $K$ is an
	infinite field not real closed, and over
	all $(K, =, 1)$ where $K$ is an infinite field not algebraically
	closed.



 


\section{Complexity of diophantine approximation}


	The main goal of this section is to prove Theorem~\ref{mainC}.
	A few remarks concerning that Theorem:
\par
	It is possible to represent an algebraic number $x$ by its minimal
	polynomial and an interval containing $x$. This interval should
	be taken smaller than the distance to the nearest root. See
	~\cite{MIGNOTTE} and ~\cite{LOOS} for a bound. See also
	Corollary 1 of Ch. 8 in \cite{BCSS98}, and \cite{MALAJOVICH93} for
	other `gap' bounds. However, in
	order to refine that interval up to $2^{-k}$ 
        by a method such as bisection,
	it takes $k$ steps with $k$-bits arithmetic. This is why
	Theorem~\ref{mainC} was stated.
\par
	In order to obtain this sharper estimate, we use Newton Iteration
	instead. Newton iteration is known to converge quadratically
	once close enough to the root. ~\cite{KANTOROVICH, PE, BEZ1}.
	See also Chapter 8 in ~\cite{BCSS98}.
\par
	If we use some sort of approximate computation, we have
	to allow for some sort of `approximate' Newton iteration.
	Theorems on quadratic convergence of `approximate' Newton 
	iteration appear in ~\cite{MALAJOVICH94}.
\par
	Also, it is necessary to bound the precision necessary to
	specify the `starting point' of the Newton iteration. Those
	are the main ingredients of the proof of Theorem~\ref{mainC}.

\subsection{Approximate Newton Iteration}

	Let $f$ be a univariate polynomial. The following invariants 
	~\cite{PE} are associated to $f$:

\begin{eqnarray*}
	\beta(x) = \beta(f,x) &=& \frac{|f(x)|}{|f'(x)|} \\
	\gamma(x) = \gamma(f,x) &=& \max_{k \ge 2} 
		\left( \frac{|f^{(k)}(x)|}{k ! \ |f'(x)|} 
		\right) ^{\frac{1}{k-1}}\\
	\alpha(x) = \alpha(f,x) &=& \beta(f,x) \ \gamma(f,x)
\end{eqnarray*}

	The following theorem is a particular case of some
	earlier results in~\cite{MALAJOVICH94}. It generalizes the
	convergence theorems in~\cite{PE} to the case of
	{\em approximate} Newton iteration. It is stronger than Theorem~4
	in Chapter~8 of~\cite{BCSS98}, which only provides linear 
	convergence for approximate Newton iteration.

\begin{theorem}\label{apconv}
	Let $f$ be a univariate polynomial, and let $s \in \mathbb N$,
	$s \ge 2$. Let $x_0$ be such that $\alpha(f,x_0) < 2^{-5}$.
	Let $\delta$ be such that $\delta \gamma(f,x_0) < 2^{-4-2^{s+2}}$.
	Let the sequence $x_i$ satisfy
\[
	\left| x_{i+1} - \left( x_i - \frac{f(x_i)}{f'(x_i)}\right)
           \right| < \delta
\]
	Then there is a zero $x^*$ of $f$ such that
\[
	|x_i - x^* | < \frac{ 2^{1-2 ^{\min(i,s)}} }{\gamma(f,x_0)} 
\]
\end{theorem}

	Some of the Lemmas below are standard and may have appeared
	in ~\cite{PE,BEZ1,BCSS98}.
	For later use, we define
	the function $\psi(t) = 1 - 4 t + 2 t^2$. The Lemma below is
	slightly sharper and implies Proposition~3 in Chapter~8 of
	~\cite{BCSS98}.

	
\begin{lemma} \label{beta}
	Let $f$ be a univariate polynomial, and let $x$, $h \in \mathbb C$.
	Let $f'(x) \ne 0$ and assume that $|h| \gamma(f,x) < 1-\sqrt{2}/2$.
	Then:
\[
	\beta (f, x+h)
	\le
	\left| \frac{f(x)}{f'(x)} + h \right|
	\frac{ \left(\, 1 - \gamma(f,x) |h| \,\right) ^2 }
	{\psi(\,\gamma(f,x) |h|\,)}
		+ |h|^2 \gamma(f,x) 
		\frac{ 1 - \gamma(f,x) |h| }{\psi(\,\gamma(f,x) |h|\,)}
\]
	In particular,
\begin{enumerate}
	\item If $h = -\frac{f(x)}{f'(x)} + \delta$, we get:
\[
	\beta (x+h)
	\le
	\left| \delta \right|
	\frac{ \left(\, 1 - \gamma(x) |h| \,\right) ^2 }
	{\psi(\,\gamma(x) |h|\,)}
		+ \left(\,|\beta(x)| + |\delta|\,\right)^2 \gamma(x) 
		\frac{ 1 - \gamma(x) |h| }{\psi(\,\gamma(x) |h|\,)}
\]
	\item If $\beta(f,x)=0$, we get:
\[
	\beta (x+h)
	\le
	\left| h \right|
	\frac{ \left(\, 1 - \gamma(x) |h| \,\right) ^2 }
	{\psi(\,\gamma(x) |h|\,)}
		+ |h|^2 \gamma(x) 
		\frac{ 1 - \gamma(x) |h| }{\psi(\,\gamma(x) |h|\,)}
\]
\end{enumerate}
\end{lemma}

\begin{proof}[Proof of Lemma~\ref{beta}]

We can break
\[
	\beta(x+h) = \frac{|f(x+h)|}{|f'(x+h)|} =
	\frac{|f'(x)|}{|f'(x+h)|}\frac{|f(x+h)|}{|f'(x)|}
\]

	Using Taylor development, it is easy to estimate, since $f'(x)\ne 0$:
\begin{eqnarray*}
\frac{|f(x+h)|}{|f'(x)|}
	&\le&
        \left| \frac{f(x)}{f'(x)} + h \right|	
	+
	\sum_{k \ge 2} \left| \frac{f^{(k)}(x)}{k! f'(x)} h^k \right|
	\\
	&\le&
        \left| \frac{f(x)}{f'(x)} + h \right|	
	+
	|h| \sum_{k \ge 2} (\, \gamma(f,x) |h|\, ) ^{k-1}
\end{eqnarray*}
\par
        and hence
\begin{equation}\label{bound1}
       \frac{|f(x+h)|}{|f'(x)|}
	\le
        \left| \frac{f(x)}{f'(x)} + h \right|	
	+
	\frac {|h|^2 \gamma(x)}{1 - \gamma(x) |h| }
\end{equation}
\medskip
\par
	On the other hand, using again the hypothesis $f'(x) \ne 0$,
\[
	\frac{f'(x+h)}{f'(x)} = 1 + \sum_{k \ge 2} 
           \frac{f^{(k)}(x)}{k-1! f'(x)} h^{k-1}
\]
\par
	hence
\[
	\left| \frac{f'(x+h)}{f'(x)} - 1 \right| 
	\le
	\sum_{k \ge 2} 
          (\,k \gamma(x) |h|\,) ^{k-1}
	\le
	\frac{1}{(\,1 - \gamma(x) |h|\,)^2} - 1
\]
\par
	Thus,
\begin{equation}\label{bound2}
	\frac{|f'(x)|}{|f'(x+h)|}
	\le
	\frac{1}{1 - \left| \frac{f'(x+h)}{f'(x)} -1\right| }
	\le
	\frac{1}{2 - \frac{1}{(\,1 - \gamma(x) |h|\,)^2}}
	\le
	\frac{ 1 - \gamma(x) |h| }{\psi(\,\gamma(x)|h|\,)}
\end{equation}
\par
	Combining bounds ~\ref{bound1} and ~\ref{bound2} together, we obtain
\[
        \beta (x+h)
        \le
        \left| \frac{f(x)}{f'(x)} + |h| \right|
        \frac{ \left(\, 1 - \gamma(x) |h| \, \right) ^2 }
	{\psi(\,\gamma(x) |h|\,)}
                + |h|^2 \gamma(x)
                \frac{ 1 - \gamma(x) |h| }{\psi(\,\gamma(x) |h|\,)}
\]
\end{proof}

\begin{lemma} \label{gamma}
	Let $f$ be a univariate polynomial, and let $x$, $h \in \mathbb C$.
	Let $f'(x) \ne 0$. Assume also that $\gamma(f,x)|h| < 1-\sqrt{2}/2$.
	Then:
\[
	\gamma (f, x+h)
	\le
	\frac{\gamma(f,x)}{(\,1 - \gamma(f,x) |h|\,)\psi(\,\gamma(f,x) |h|\,)}
\]
\end{lemma}

	Since this is Proposition~3 in Chapter ~8 of ~\cite{BCSS98},
	the proof is omitted.



	From Lemma~\ref{gamma} we can deduce that:

\begin{lemma}\label{nbhd}
	Let $f$ be a univariate polynomial. Let $C$ be a constant,
	$C<\frac{1}{10}$. 
	Let $x^* \in \mathbb C$ be a zero of $f$. 
	Let $U$ be a neighborhood of $x^*$, of radius 
         $\rho < \frac{C}{\gamma(f,x^*)}$.
	Then for any $x \in U$, 
\[
	(1-10C) \gamma(f,x^*) < \gamma(f,x) < \frac{1}{1-5C} \gamma(f,x^*)
\]
\end{lemma}

\begin{proof}[Proof of Lemma~\ref{nbhd}]

	From Lemma~\ref{gamma} centered at $x^*$, we have
\begin{eqnarray*}
\gamma(x) &\le&
        \frac{\gamma(x^*)}{(1 - \gamma(x^*)\rho)\psi(\gamma(x^*)\rho)}
	\\
	&\le& 
        \frac{\gamma(x^*)}{(1 - C)\psi(C)}\\
	&\le& 
        \frac{\gamma(x^*)}{1 - 5C}
\end{eqnarray*}

	On the other hand, Lemma~\ref{gamma} also tells us that
\begin{eqnarray*}
\gamma(x^*) &\le&
        \frac{\gamma(x)}{(1 - \gamma(x)\rho)\psi(\gamma(x)\rho)}
	\\
	&\le& 
        \frac{\gamma(x)}{(1 - \frac{C}{1-5C})\psi
	 \left( \frac{C}{1-5C}\right)}\\
	&\le& 
	\frac{1-5C}{1-10C} \gamma(x)
\end{eqnarray*}
\end{proof}

\begin{lemma}\label{dist}
	Let $f$ be a univariate polynomial.
	Let $x$ be such that $\alpha(f,x) < 2^{-4}$. 
	Then there is $x^*$ such that $f(x^*) = 0$ and
	$|x - x^*| <  1.1 \beta(f,x)$
\end{lemma}

\begin{proof}
	First of all, let $x_0 = x$ and $x_{i+1} = x_i - f(x_i)/f'(x_i)$.
	Then $\alpha(x_i) < 2^{-2^{i+1} - 2}$. Indeed, by putting
	Lemma~\ref{beta} and Lemma~\ref{gamma} together, one gets:
\[
	\alpha(x_{i+1}) \le
		\alpha(x_i)^2 
		\frac{1}{\psi(\alpha(x_i))^2}
		< \frac{16}{9} \alpha(x)^2
		< 2 \alpha(x)^2
\]
	so 
\[
	\alpha(x_{i+1}) <  2 \times 2^{-4-2^{i+2}} = 
          \times 2^{-3-2^{i+2}}
\]
	Besides, $\lim \alpha(x_i) = 0$ so $x_i$ converges. Let
	$x^*$ = $\lim x_i$.
\[
	|x - x^*| \le \sum \beta(x_i) 
\]
\par
	From Lemma ~\ref{beta},
\[
	\beta(x_{i+1}) <
		\beta(x_i) \alpha(x_i) 
		\frac{ 1 - \alpha(x_i)}{\psi(\alpha(x_i)}
		< 0.078\cdots \beta(x_i) < 0.08 \beta(x_i)
\]
\par
	Therefore,
\[
        |x - x^*| < \frac{1}{1-0.08} \beta(x_0) < 
	1.087\cdots \beta(x_0) < 1.1 \beta(x_0)
\]
\end{proof}



\begin{lemma}\label{nbhd2}
	Let $f$ be a univariate polynomial.
	Let $x^* \in \mathbb C$ be a zero of $f$. 
	Let $U$ be a neighborhood of $x^*$, of radius 
         $\rho < \frac{1}{100 \gamma(f,x^*)}$.
	Then for any $x_0 \in U$, set $x_1 = x_0 - f(x_0)/f'(x_0)$ 
\[
	|x_1 - x^* | < 0.1 \rho 
\]
\end{lemma}
\begin{proof}
	First of all, we can use Lemma~\ref{nbhd} to bound
	$\gamma$ in $U$. We obtain, using $C=\frac{1}{100}$, that
	for all $x \in U$,
\[
	0.9 \gamma(x^*) < \gamma(x) < \frac{20}{19} \gamma(x^*)
\]

	Also, from Lemma~\ref{beta} item 2, we obtain:
\[
	\beta(x)  
        \le
        \rho 
        \frac{\left( 1 - \gamma(x^*) \rho \right) ^2 }
             {\psi(\gamma(x^*) \rho)}
        + \rho^2 \gamma(x^*)
                \frac{ 1 - \gamma(x^*) \rho }{\psi(\gamma(x^*)\rho)}
	\le 1.031\cdots \rho < 1.05 \rho
\]
\par
	Hence,
\[
	\alpha(x) \le \beta(x) \gamma(x) < (1.05 \rho) \frac{20}{19}
	\gamma(x^*) \le 0.01105 < 0.012
\]
\par
	We can also bound
\[
	\alpha(x_1) < 2 \alpha(x)^2 
\] 
\par
	Thus,
\begin{eqnarray*}
	\beta(x_1) &\le& \frac{2 \alpha(x)^2 }{\gamma(x)} \\
	&\le& 2 \alpha(x) \beta(x) \\
	&\le& 2 \times 0.012 \times 1.05 \rho \\
	&\le& 0.0252 \rho
\end{eqnarray*}
	Hence, using lemma~\ref{dist},
\[
	|x_1 - x^*| < 0.028 \rho
\]
\end{proof}


\begin{proof}[Proof of Theorem~\ref{apconv}]


	Lemma~\ref{dist}, together with the hypothesis $\alpha(x_0)<\frac
	{1}{32}$, implies the existence of a point $x^*$ s.t. $f(x^*)=0$,
	with
\[
	|x_0 - x^*| < 1.1 \beta(x) < \frac{1.1}{32 \gamma(x)}
\]
\par
	We apply Lemma~\ref{gamma} to estimate $\gamma(x^*)$:
\begin{eqnarray*}
	\gamma(x^*)
	&\le&
	\frac{\gamma(x_0)}{
	(\, 1 - \gamma(x_0) |x^*-x_0|\,)
	\psi (\, \gamma(x_0) |x^*-x_0|\,)} \\
	&\le&
        \frac{\gamma(x_0)}{1 - 5 \frac{1.1}{32}} \\
	&\le& 1.21 \gamma(x_0)
\end{eqnarray*}
\par
	Let $U$ be a disc centered in $x^*$ and with radius 
	$\frac{1}{20 \gamma(x^*)}$. Clearly, $x_0 \in U$. 
	(Proof: $\frac{1.1}{32 \gamma(x_0)} < \frac{1.1 \times 1.21}
	{32 \gamma(x^*)} < \frac{1}{20 \gamma(x^*)}$. Lemma~\ref{nbhd}
	provides the following uniform bound for any $x \in U$:
\[
	\frac{1}{2} \gamma(x^*) < \gamma(x) < \frac{4}{3}\gamma(x^*)
	< 2 \gamma(x_0)
\]

\medskip
\par
	Our induction hypothesis is: $\alpha(x_i) < 2^{-3-2^{i+1}}$ for
	all $i \le s$. We also assume by induction that $x_i \in U$ so
	that $\gamma(x_i) \delta < 2^{-4 -2^{s+2}}$. 
        Combining Lemma~\ref{beta} and Lemma~\ref{gamma} we get: 

\[
	\alpha(x_{i+1}) <
	\frac{1}{\psi(\alpha(x_i) + \gamma(x_i) \delta)^2} 
	\left( \gamma(x_i) \delta + (\alpha(x_i) + \gamma(x_i) \delta )^2
	\right)
\]
\par
	Using the induction hypothesis, we can further simplify:
\[
	\alpha(x_{i+1}) <
          2 \alpha(x_i)^2 + 2 \gamma(x_i) \delta
\]

\par
	Replacing $\alpha(x_i)$ by $2^{-3-2^{i+1}}$ and
        $\gamma(x_i) \delta$ by $2^{-4-2^{s+2}}$ one obtains
\[
	\alpha(x_{i+1}) <
        2^{-5-2^{i+2}} + 2^{-4-2^{s+2}}
	< 2^{-3-2^{i+2}}
\]
\par
	since $i \le s$. 
	Then we need to verify that $x_{i+1} \in U$. 
	We use Lemma~\ref{dist} item 1: 
\[
	\beta(x_{i+1}) \le |\delta| 
	\frac{ (\,1 - \gamma(x_i) |h| \,)^2} {\psi(\,\gamma(x_i)|h|\,)}
	+
	|h|^2 \gamma(x_i)
	\frac{ 1 - \gamma(x_i) |h| } {\psi(\,\gamma(x_i)|h|\,)}
\]
\par
	In the right-hand term, we can bound $\gamma(x_i)|h|$ by
	$\alpha(x_i)+\gamma(x_i)\delta$. By the induction hypothesis,
	this is less than $2^{-5}+2^{-8}$. Hence,
\[
	\beta(x_{i+1})
	\le
	\frac{1}{\gamma(x_i)}
	\left(
	2^{-8}
	\frac {\left(1 - (2^{-5}+2^{-8}) \right)^2}{1-4(2^{-5}+2^{-8})}
	+
	(2^{-5}+2^{-8})^2 \frac {1 - (2^{-5}+2^{-8}) }{1-4(2^{-5}+2^{-8})}
	\right)
\]
\par
	Besides, $\gamma(x_i) > \frac{1}{2} \gamma(x^*)$, so
\[
	\beta(x_{i+1}) < \frac{2}{2^7 \gamma(x^*)} < \frac{1}{20 \gamma(x^*)}
\]
\medskip
\par
	This was for $i<s$. When $i=s$, it is easy to bound using
	Lemmas ~\ref{nbhd} and~\ref{dist} that
\[
	|x_i - x^*| < \frac{2^{-2^{s+1}-1}}{\gamma(x^*)}
\]
	and then, since $s\ge 2$, we are in the conditions of
	Lemma~\ref{nbhd2}, for 
\[
	\rho = \frac{2^{-2-2^{s+1}}}{\gamma(x^*)}
\]
	This is mapped into a $\rho/10$ neighborhood of $x^*$. Since
	the error $\delta$ is less than $\rho / 2$, the sequence of
	$x_i$ will never leave the $\rho$-neighborhood.
\end{proof}


	We state the following Lemma, that we will need later on:
\begin{lemma}\label{appalpha}
	Let $f$ be a polynomial and let $x^*$ be a single root of $f$.
	Let $|x-x^*| < 2^{-6} / \gamma(x^*)$.
	Then $\alpha(f,x) < 2^{-5}$.
\end{lemma}

\begin{proof}[Proof of Lemma~\ref{appalpha}]
	By Lemma~\ref{beta} item 2,
\[
	\beta(x) < |x - x^*| \frac{(\,1 - \gamma(x^*) |x-x^*|\,)^2}
	{\psi(\,\gamma(x^*) |x-x^*|\,)}
	+
	\gamma(x) |x - x^*|^2 \frac{1}{\psi(\,\gamma(x^*) |x-x^*|\,)}
\]
\par
	Lemma~\ref{nbhd} implies that $\gamma(x) < \frac{1}{1-4\times 2^{-6}}
	\gamma(x^*)$. Putting all together,
\[
	\alpha(x_i) < \frac{2^{-6}+2^{-12}}
	{\left(1-4 \times 2^{-6}\right)^2}
	< 2^{-5}
\]
\end{proof}


\subsection{Approximation of Algebraic Numbers}

\begin{theorem}\label{dioph3}
	Let $K$ be a number field. Let $f$ be a degree $d$ polynomial
	with coefficients in $K$. Let $x$ be a single root of $f$.
	Let $x_0 \in \mathbb C$ be such that $f'(x)\ne 0$ 
	Then the following bounds are true:
\begin{enumerate}
	\item $ w(\gamma(f,x_0)) \le [K[x_0]:\mathbb Q] \left(
		d^2 \lceil \log_2 d \rceil +
		d^3 \lceil \log_2 H(x_0) \rceil +
		d^2 \lceil \log_2 H(f) \rceil \right)$
	\item $ w(\gamma(f,x)) \le 3 d^4 w(f)$
	\item $ \gamma(f,x) \le 2^{3 d^4 w(f)}$
	\item If $f$ is the minimal polynomial of $x$ over $\mathbb Q$, then
              $\gamma(f,x) \le 2^{3 d^4 w(f)}
			   \le 2^{3 w(x)^5}$
\end{enumerate}
\end{theorem}

\begin{proof}[Proof of Theorem~\ref{dioph3}]

	In order to bound the weight of 
\[
\gamma(x_0) = \max_{k \ge 2} 
		\left( \frac{|f^{(k)}(x_0)|}{k ! |f'(x_0)|} 
		\right) ^{\frac{1}{k-1}}
\]
	we use Theorem
	~\ref{heights} with $F_0 (f,y) = f^{(k)}(y)$ and
	$F_1(y) = k! f'(y)$, $y=(x_0:1)$. We get 
\[
	H( F_0(y): F_1(y) )
	\le
	(d-1) \frac{d!}{d-k\,!} H(f) H(y)^{d-1}
\]
	A gross estimate of $k!$ and of $d-k\,!$ is $d!$. So we deduce that
\begin{eqnarray*}
H(\gamma(x_0)) &\le& \max_{k \ge 2} 
       \left( 
	d! (d-1) H(f) H(x_0:1)^{d-1} 
	\right) ^{\frac{1}{k-1}}\\
	&\le&
	d! (d-1) H(f) H(x_0:1)^{d-1}
\end{eqnarray*}

	We can also bound
\[
	[\mathbb Q[ \gamma(x_0) ] : \mathbb Q] 
        \le (d-1) [K[x_0]:K][K:\mathbb Q]
\]
\par
	Hence,
\[
	w(\gamma(x_0)) \le (d-1) [K[x_0]:K] [K:\mathbb Q] 
	\left( 1 + \lceil \log_2 d! (d-1) H(f) H(x)^{d-1} \rceil
	\right)
\]
\par
	and the first part of the Theorem follows. The second part is
	a consequence of Theorem~\ref{cauchy}. Then we use 
	Lemma~\ref{absonweight} to deduce part 3, and 
	Lemma~\ref{minonweight}
	for part 4. 
\end{proof}

\subsection{Proof of Theorem~\ref{mainC}}


\begin{proof}[Proof of Theorem~\ref{mainC}]

	Let $x$ be an algebraic number, and let $d = [\mathbb Q[x]:
	\mathbb Q]$. Let $f$ be the minimal polynomial of $x$ over
	$\mathbb Q$. By Corollary~\ref{minimal}, 
\[
	H(f) \le 2^{w(x)}
\]
	and therefore $f$ can be represented by $d+1$ numbers of
	$1+w(x)$ bits.
\medskip
\par
	Theorem~\ref{dioph3} item 4 implies:
\[
	\gamma(x) \le 2^{3 d^4 w(f)}
\]
\par
	Let $x_0$ be an approximation of $f$, such that $|x-x_0|
	< \frac{2^{-6}}{\gamma(x)}$. It is enough to choose $x_0$
	within a radius of
\[
	|x - x_0| <  2^{-6- 3 d^4 w(f)}
\]
	from $x$. One possibility is to truncate the real and imaginary
	part into fixed points of $8 + 3 d^4 w(f) + w(x)$ bits of precision
	(using fixed point representation).
\par
	We are under the conditions of Lemma~\ref{appalpha}, so
	$\alpha(x_0) < 2^{-5}$. 
\medskip
\par
	We can now produce the function $y(x)$. It is a list $(d,w,f,x_0)$
	where $w = \lceil w(x) \rceil$, and $d$, $f$ and $x_0$ are as
	above. Theorem~\ref{minonweight} yields 
	$w(x) < \mathrm{Bitsize}(y(x))$.
\par
	It is time to construct the machine $M$ that will provide the
	$2^{-k}$- approximations of $x$. First of all, we can bound
	$\gamma(f,x_0)$ by applying Lemma~\ref{gamma}. The following
	conclusion is not sharp, but is convenient:
\[
	\frac{1}{2} \gamma(x) \le \gamma(x_0) \le 2 \gamma(x)
\]
\par
	The next step is to choose $s$ such that
\[
\frac{2^{1-2^s}}{\gamma(x_0)} < 2^{-k}
\]
\par
	Using $w(\gamma(x_0)) \le Cd^7 w$, we restrict our choice
	to:
\[
	2^{1-2^s} < 2^{-k-C d^7 w}
\]
\par
	A good choice is $s = \lceil \log_2 \left( k+1+C d^7 w \right)
	\rceil$. In that case, $s \le \log_2 k + 7 \log_2 d + \log_2 w
	+ C'$, for some constant $C'$. The machine $M$ will perform
	$s$ approximate Newton iterations, obtaining at each step
	$x_i$ with accuracy
\[
	\delta = 2^{-4 -C d^7 w - 2^{s+2}} <
	\frac{2^{-4-2^{s+2}}}{\gamma(x_0)}
\]

	This corresponds to $4+C d^7 w + 2^{s+2}$ bits of precision
	in each $x_i$. This can be achieved by using precision of
	$k w \mathrm{Poly}(d)$ bits of fixed point, and truncating.
\par
\begin{sloppypar}
	Therefore, the total running time can be bounded in terms
	of $k \log_2 k \ w \log_2 w$ $\mathrm{Poly}(d)$ bit operations,
	or more precisely:
\end{sloppypar}
\[
	O( k \log_2 k \ w \log_2 w \ d^8 \log_2 d )
\]
	where the exponent in $d$ is not intended to be sharp.
\end{proof}



\section{Bit representation of Algebraic Numbers}


	Algebraic numbers are essentially discrete objects.
	They can be represented by a minimal polynomial. We
	have seen in Theorem~\ref{minonweight} and ~\ref{mainC} that
	the bit-length of such a representation is related to
	the weight of the algebraic number.
\par
	We will start this section by proving Theorem~\ref{mainD}.
	The main tool in the proof is the
	basis reduction algorithm by Lenstra, Lenstra and Lovasz 
	~\cite{LLL} (LLL for short). This algorithm provides an effective
	procedure to check for algebraic dependence.
\par
	The main results about that algorithm are only quoted below.
	Since this algorithm is defined over the integers (with the
	bit model), a few extra estimates will be necessary.
\medskip
\par
	Next, we would like to extend the ideas of Theorem~\ref{mainC} 
	to the case
        where we have several algebraic numbers $\alpha_1$,
	\dots, $\alpha_n$. It makes sense to represent them
	as polynomials $q_i(\alpha)$, where $\alpha$ is a 
	primitive element of $\mathbb Q[\alpha_1, \cdots, \alpha_n]$.
\par
\begin{definition} 
	An integer representation of $\alpha_1$, \dots,
	$\alpha_n \in (\qalg)^n$ is a list
	$(y(\alpha), q_1, \cdots, q_n) \in \mathbb Z^{\infty}$ where
	\begin{enumerate}
	\item $\alpha$ is a primitive element of 
              $\mathbb Q[\alpha_1, \cdots, \alpha_n]$
	\item $y : \qalg \rightarrow \mathbb Z^{\infty}$ is the function
	      defined in Theorem ~\ref{mainC}, where $\qalg$ is the ring
	      of algebraic numbers.
	\item $q_1, \cdots, q_n$ are densely represented polynomials 
	      with rational coefficients such that $\alpha_i = q_i(\alpha)$
	\end{enumerate}
\end{definition}
\par
	We will then prove a theorem on the complexity of basic
	arithmetic operations over integer representations of algebraic
	numbers. Namely, 
\begin{theorem}\label{emulation}
	The following operations can be executed in 
	polynomial time over $(\mathbb Z,>,w)$:
\begin{enumerate}
	\item Merging: given an integer representation of $a_1, 
	\cdots, a_n$ and an integer representation of $b_1, 
	\cdots, b_m$, compute one integer representation of
	$a_1, \cdots,$ $a_n,$ $b_1, \cdots, b_m$.
	\item Arithmetic: given integer representation for 
	$x, y$, compute integer representation for $x, y, x+y, xy, x-y$.
	\item Decision: given an integer representation for
	$x$, decide $x=0$, or $x \in \mathbb R$, or if this is the
	case, whether $x>0$.	
	\item Restriction: Given an integer representation for
	$a_1, \cdots, a_n$, produce an integer representation of
	$a_n$, of size polynomially bounded in $w(a_n)$.
\end{enumerate}
\end{theorem}

	Theorem~\ref{emulation} will be proved using
	a modification of well-known symbolic algebra algorithms 
	(namely Collins' SIMPLE algorithm for primitive elements).
\par
	One consequence of Theorem~\ref{emulation} and 
        Theorem~\ref{mainD} together will be that
	computation models over the integers $(\mathbb Z, >, w)$ and
	over the real algebraic numbers $(\qralg, >, w)$ are 
	polynomially equivalent, 
	finishing the proof of
	Theorem~\ref{mainA}.

\subsection{Lattice basis reduction}
\medskip
\par
\begin{definition}
	A Lattice is a subset $\Lambda$ of $\mathbb R^n$ of the
	form:
\[
	\Lambda = \{ A z : z \in \mathbb Z^n \}
\]
	where $A$ is a (real or) integer $n \times n$ matrix.
\end{definition}
	We can restrict ourselves to the case where $A$ is invertible.
	In that case, the columns of $A$ are said to be a basis of
	the Lattice $\Lambda$. Of course a lattice admits an 
	infinity of bases, and it is possible to pass from a basis
	to another basis through elementary column operations (adding
	a multiple of another column, or column permutation).
\par
	Given $A$, the LLL algorithm will perform those operations and output 
	a `reduced' basis. Reduced bases keep some of the good properties
	of orthonormal bases, while maintaining integer arithmetic.
	The price to pay is to replace orthogonality and norm-1 with
	certain bounds. 
\par
	Following the notation of ~\cite{LLL}, let $(b) = (b_1, \cdots, b_n)$
	be a basis for a lattice $\Lambda$. The following vectors and
	numbers are associated to $(b)$:
\begin{eqnarray*}
	b_i ^* &=& b_i - \sum_{j=1}^{i-1} \mu_{ij} b_j \\
	\mu_{ij} &=& \frac{ <b_i, b_j^*> }{\| b_j^* \|^2}
\end{eqnarray*}
\medskip
\par
	Vectors $b_i^*$ correspond to the Gram-Schmidt orthogonalization
	of $(b)$ in $\mathbb R ^n$.
\begin{definition}[LLL]
	A basis $(b)$ for a lattice $\Lambda$ is said to be reduced
	if and only if
\[
\begin{array}{rcll}
	|\mu_{ij}| &\le& \frac{1}{2} &,\ 1 \le j < i \le n \\
	\\
	| b_i^* + \mu_{i,i-1} b_{i-1}^* | &\ge& \frac{3}{4} |b_{i-1}^*|^2
	& , \ 1 < i \le n
\end{array}
\]
\end{definition}
\medskip
\par
	The following result is Proposition 1.11 in~\cite{LLL}:
\begin{proposition}\label{LLLx}
	Let $L \subset \mathbb R^n$ be a lattice with reduced
	basis $b_1, b_2, \cdots, b_n$. Then $|b_1|^2 \le 2^{n-1} |x|^2$
	for any $x \in L$, $x \ne 0$. 
\end{proposition}

	Also, Proposition 1.9 ibid says that:
\begin{proposition}\label{LLLdet}
	Let $L \subset \mathbb R^n$ be a lattice with reduced
	basis $b_1, b_2, \cdots, b_n$. Then $\|b_1\| \le 2^{\frac{n-1}{4}}
	(\det L)^{\frac{1}{n}}$
\end{proposition}
	where the determinant of a lattice $L$ is the determinant of
	any positively oriented basis of $L$.
\medskip
\par
	Complexity of the basis-reduction algorithm goes as follows
	(Proposition 1.26 in ~\cite{LLL}):
\begin{proposition}\label{lll}
	Let $L \in \mathbb Z^n$ be a lattice with basis $b_1, \cdots,
	b_n$ and let $B \in \mathbb R, B \ge 2$ be such that $|b_i|^2
	\le B$ for $1 \le i \le n$. Then the number of arithmetic
	operations needed by the basis reduction algorithm described
	in ~\cite{LLL} equation (1.15) is $O(n^4 \log B)$, and the
	integers on which those operations are performed each have 
	binary length $O(n \log B)$.
\end{proposition}


This was used in~\cite{LLL} to deduce that 
\begin{theorem}\label{LLL}
	The cost of factorizing $f \in \mathbb Z[x]$ into irreducible
	factors, in the bit model, is 
\[
O((\deg f)^{12} + (\deg f)^9 (\log \|f\|_2)^3)
\]
\end{theorem}



	Before giving the proof of Theorem~\ref{mainD}, we will
	proceed like it is suggested in ~\cite{LLL} 1.39 to obtain
	the following, preliminary result:

\begin{lemma}\label{prlemma}
	There is a polynomial time machine in $(\qralg,
	\ge, w)$ accepting input $w_0, n \in \mathbb N$ and
        $\alpha_0=1, \alpha_1, \cdots \alpha_n \in \qralg$
	with output $p \in \mathbb Z^{\infty}$ with the
	following properties:
	\begin{enumerate}
	\item If $w(\alpha_1, \cdots, \alpha_n) \le w_0$, and there
		is a relation $\sum \hat p_i \alpha_i = 0$, not all
		the $\hat p_i$ equal to zero,
		then the machine returns some $p \ne 0$ such that
		$\sum p_i \alpha_i = 0$.
	\item If the machine returns $p \ne 0$, then $\sum p_i \alpha_i = 0$.
	\item The machine always terminates.
	\end{enumerate}
\end{lemma}




\begin{proof}[Proof of Lemma~\ref{prlemma}] 
	{\ }
	\par
	{\bf Step 1}. Let $b$ be a constant in $\mathbb N^*$, to be
	precised later. Each of the $\alpha_i$ can be approximated
	by $\frac{a_i}{b}$, $a_i \in \mathbb Z$, with error
	$| \alpha_i - \frac{a_i}{b} | \le \frac{1}{2b}$. The $a_i$'s
	can be found by computing $b \alpha_i$ and rounding to the
	nearest integer.
	\par
	{\bf Step 2}. let $m \in \mathbb Z^{n+1}$, $m_i < B$ where $B$
	is another constant to be computed later. We assume here that
	$w(\alpha) < w_0$. Then we have two possibilities, namely
\[
	\sum_{i=0}^n m_i \alpha_i = 0
\]
	or
\[
	| \sum_{i=0}^n m_i \alpha_i | >\delta = ( B(n+1) )^{-w_0}
\]
\par
	Indeed, 
\[
	H(\sum m_i \alpha_i) \le (n+1) \prod H(m_i \alpha_i)
	= n \prod m_i \prod H(\alpha_i)
\]
\par
	Let $d = \deg [ \mathbb Q[\alpha_1, \cdots, \alpha_n] ]$ and 
	assume that $\sum m_i \alpha_i \ne 0$. Then
\begin{eqnarray*}
	|\sum m_i \alpha_i| &>& H(\sum m_i \alpha_i)^{-d} \\
	&>& (n+1)^{-d} B^{-nd} \left( \prod H(\alpha_i) \right)^{-d} \\
	&>& (n+1)^{-d} B^{-nd} 2^{-n w_0}
\end{eqnarray*}


	Also, setting $b > \frac{(n+1)B}{\delta}$, so that
	$\frac{(n+1) B}{2b} < \frac{\delta}{2}$, one
	obtains:
\[
	\left| 
        \sum_{i=0}^n m_i \alpha_i - 
	\sum_{i=0}^n m_i \frac{a_i}{b} 
	\right| < \frac{\delta}{2}
\]

	Hence, under the assumptions $w(\alpha) < w_0$, $|m_i| < B$
	one obtains:
\begin{eqnarray*}
	| \sum m_i \frac{a_i}{b} | < \frac{\delta}{2}
	&\Rightarrow&
	\sum m_i \alpha_i = 0 \\ 
	| \sum m_i \frac{a_i}{b} | \ge \frac{\delta}{2}
	&\Rightarrow&
	\sum m_i \alpha_i \ne 0 
\end{eqnarray*}
\par
	{\bf Step 3} We consider now the lattice $\Lambda$ given by
	the basis:
\[
	U = 
	\left[
	\begin{array}{cccc}
	1 \\
	& 1 \\
	& & \ddots \\
	\frac{a_0 C}{b} &
	\frac{a_1 C}{b} &
	\cdots &
	\frac{a_n C}{b} 
	\end{array}
	\right]
\]
	where $C$ is a multiple of $b$ to be given later.
	Suppose that $u_1$ = $U \left[ \begin{array}{c}
	m_0 \\ m_1 \\ \vdots \\ m_n \end{array} \right]$ is
	the first vector of a reduced basis of $\Lambda$.
	According to Proposition~\ref{LLLdet},
\begin{eqnarray*}
	\| u_1 \| &\le& 2^{\frac{n}{4}} (\det U)^{\frac{1}{n+1}} \\
	          & = & 2^{\frac{n}{4}} (\frac{C a_n}{b})^{\frac{1}{n+1}}
\end{eqnarray*}
\par
	{\bf Step 4}. Now, since $u_1 = \left[ \begin{array}{c}
        m_0 \\ \vdots \\ m_{n-1} \\ \sum_{i=0}^n \frac{C a_i}{b} m_i 
	\end{array} \right]$ we can estimate:
\[
	\left|
	\sum_{i=0}^{n} \frac{C a_i}{b} m_i 
	\right|
	<
	2^{ \frac{n}{4} }
	\left(
	\frac{C a_n}{b}
	\right)^{\frac{1}{n+1}}
\]
	hence
\[
	\left|
	\sum_{i=0}^{n} \frac{a_i}{b} m_i 
	\right|
	<
	2^{ \frac{n}{4} }
	C^{-\frac{n}{n+1}}
	\left(
	\frac{a_n}{b}
	\right)^{\frac{1}{n+1}}
\]
	so if we fix $C > \left( \frac{2}{\delta} 2^{-\frac{n}{4}}
	|\alpha_n + \frac{1}{2b}|^{-\frac{1}{n+1}} \right)
	^{1+\frac{1}{n}}$ we obtain
\[
	\left|
	\sum_{i=0}^n \frac{a_i}{b} m_i 
	\right| < \frac{\delta}{2}
\]
\par
	{\bf Step 5}. In order to guarantee the condition in Step 2, we
	need to give $B$ where $|m_i| < B$ for the reduced basis we
	will obtain. Let us assume that $w(\alpha) \le w_0$. Also,
	we assume that $\sum \hat p_i \alpha_i = 0$, with $\hat p_i$
	minimal, $\hat p_i \ne 0$. Then according to Theorem
	~\ref{minonweight}, $|\hat p_i| < 2^{w_0}$. Proposition ~\ref{LLLx}
	lets us choose 
\[
B=2^{n+w_0}
\]
	Then we choose $\delta$ and $b$ as
	above and $C$ as in step 4. All the operations done so far 
	involve integer values with polynomially bounded size. The
	$a_i$'s can be obtained by binary search (also in polynomial
	time). The LLL algorithm is also polynomial time.
	At the end of the program, we check if $\sum p_i \alpha_i = 0$.
\end{proof} 

\begin{remark}
	Felipe Cucker pointed out that in the argument above,
	only the binary search and the 
	final test $\sum p_i \alpha_i=0$
	involve computations over $\qralg$. All other computations can
	be performed in a Turing machine.
\end{remark}

\begin{proof}[Proof of Theorem~\ref{mainD}]

	Let $x \in \qralg$. If we knew $d=[\mathbb Q[x]:\mathbb Q]$
	and a reasonable bound for $w_0$, we could just apply
	Lemma~\ref{prlemma} to input $1,x, x^2, \cdots, x^d$ to
	obtain the minimal polynomial of $x$. Since we don't, we
	proceed as follows:
\par
	Start with bounds $d=1, w_0=2$, and apply Lemma~\ref{prlemma}.
	In case of failure ($p=0$), multiply $d$ and $w_0$ by $2$ and
	start again.
\par
	The machine in Lemma~\ref{prlemma} will eventually succeed, 
	with some $d < 2 [\mathbb Q[x]:\mathbb Q]$ and some 
	$w_0 < 2 d w(x)$. Those bounds are polynomial in $w(x)$.
\par
	Next, we use Theorem ~\ref{LLL} to factorize $p$ over
	$\mathbb Q$, so we obtain the minimal polynomial of $x$. 
\par
	At that time, we also have an upper bound $w_0$ for $w(x)$.
	Therefore, Theorem ~\ref{dioph3} implies that:
\[
	\gamma(x) < 2^{3 w_0 ^5}
\]
	Recall from Lemma~\ref{appalpha} that if $|x-x'| <
	2^{-6} / \gamma(x)$, then  $\alpha(x') < 2^{-5}$ and
	$x'$ is a suitable approximation of $x$. So all that we
	need is to compute an approximation of $x$ with precision
	$2^{-6-3 w_0 ^5}$. This can be done by bisection, in
	polynomial time, in such a way that the denominator is
	indeed $2^{-6-3 w_0 ^5}$.
\end{proof}


\subsection{Symbolic algebra}

\begin{proof}[Proof of Theorem~\ref{emulation}]
	The most difficult part of the proof of Theorem~\ref{emulation}
	is the first. 
\par
	We will follow Collins'  SIMPLE algorithm, as 
	explained in~\cite{LOOS}. However, instead of using isolating
	intervals, we will use approximate roots. Also, due to the
	estimates on weights, some of
	the steps will have to be modified.
\par
	{\bf Item 1} reduces to the following problem.
\begin{problem}[Extension]
	Let $A$ and $B$ be minimal polynomials of $a$ and
	$b$, respectively. Let $a'$ and $b'$ be approximate
	roots associated to $a$ and $b$. Produce 
	a minimal polynomial $C$
	for a primitive element
        $c$ of $\mathbb Q[a,b]$, together with an approximate root
	$c'$ of $C$ associated to $c$. Also, produce rational
	polynomials $q_1$ 
        and $q_2$ such that
	$a = q_1(c)$ and $b = q_2(c)$. 
\end{problem}

	We need a polynomial time algorithm for solving the Extension
	Problem. Let us fix some notation. If $f$ and $g$ are polynomials
	in $y$, $f=\sum_{i=1}^m f_i y^i$ and $g=\sum_{i=1}^n g_i y^i$,
	$f_m \ne 0$, $g_n \ne 0$, the Sylvester matrix $\syl_y(f,g)$ is
	the matrix associated to the operator:
\[
	a, b \mapsto af + bg
\]
	where $a$ and $b$ are polynomials of degree $n-1$ and $m-1$,
	respectively:
\[
	\syl_y (f,g) =
	\left[
	\begin{array}{ccccccccc}
f_0&   &   &   &g_0 \\
f_1&f_0&   &   &g_1&g_0\\
f_2&f_1&\ddots  &   &\vdots  &g_1 & \ddots\\ 
\vdots  &f_2&\ddots  &f_0& &\vdots  & \ddots &g_0\\
f_m&   & \ddots &f_1& g_n  & &   & g_1 \\
   &f_m&   &f_2&   &g_n   & & \vdots \\
   &   & \ddots & \vdots &   &   & \ddots  \\
   &   &   &f_m&   &   &   & g_n 
	\end{array}
\right]
\]
\par
	The resultant $\res_y (f,g)$ is just 
\[
	\res_y (f,g) = \det \ \syl_y(f,g)
\]
\par
	It is obvious that if $f$ and $g$ have a common root then
	$\res_y(f,g)$ vanishes. The converse is easy (assume that
	$af + bg \equiv 0$ and suppose that $f$ and $g$ have no common
	root. Then $a$ has to vanish at the roots of $g$, with multiplicity,
        etc...)
\par
	This is still valid if the coefficients of $f$ and $g$ belong
	to the transcendental extension $\mathbb Q[x]$.
\medskip
\par
	We can now produce a primitive element $c$ for $\mathbb Q[a,b]$
	as follows. Let
\[
	r(x,t) = \mathrm{Res}_y ( A(x-ty), B(y) )
\]
	where $x$ is a formal variable.
	We need to compute $r(x,t)$ as a polynomial in $x$ 
	for a fixed value of $t$. We will choose $t$ such that                  
        $r(x,t)$ has no multiple root (as a polynomial in $x$).
\par
	Note that the roots of $r(x,t)$ are the expressions
        $c_{ij} = a_i + t b_j$ 
	where $a_i$ and $b_j$ are the roots of $A$ and $B$,
	respectively.
\par
	Therefore, if we set $t$ large enough (namely
	$t > 2^{2+ w(a)+w(b)}$) we get:
\[
	|a_i - a_j| < 2^{w(a_i - a_j)} \le 2^{1+w(a)}
\]
	because of Lemma~\ref{absonweight}.
\[
	|b_k - b_l| > 2^{-1-w(b)}
\]
	by the same reason.
	Thus
\[
	|a_i - a_j| < t |b_k - b_l|
\]
	and hence
\[
	a_i - t b_k \ne a_j - t b_l 
\]
	It follows that $r(x,t)$ has no multiple root.
\par
	We can use Theorem~\ref{LLL} to find out an irreducible
	factor of $r(x,t)$ vanishing in $a_i+tb_k$. Let $C(x)$ be that factor.
\par
	Our primitive element is $c = a_i + t b_k$. The weight of $c$ is
	polynomially bounded on the input size. An approximation $c'$
	can be computed by using estimate on $w(c)$ and Lemma~\ref{nbhd2}.
\par
	The polynomial $r(x,t)$ is a polynomial of degree no more
	than $(\deg A)
	(\deg B)$ in $x$. i
\par
	For any $x$ in $\{ 0; 1; (\deg A) (\deg B) \}$, it is
	possible to compute, in polynomial time, $\res_y(A(x-ty),B(y))$.
	Moreover, 
\[
	\res_y (A(x-ty),B(y))_{|x = i}
	=
	\res_y( A(i - ty), By)
\]
	so that the coefficients of $r(x,t)$ may be obtained by
	interpolation.
\par
	We can use Theorem~\ref{LLL} to deduce that an irreducible
	factor of $r(x,t)$ vanishing in $c$ can be computed in
	polynomial time. Let $C(x)$ be that factor.
\par
	Let $D(x) = \gcd( A(c - tx), B(x) )$. The polynomial
	$D(x)$ can be computed symbolically with arithmetic in
	$\mathbb Q[c]$. Its only root is $b_k$, so it has degree 1,
	and is of the form: $D(x) = x - q_2(c)$.  
\par
	We can also set $q_1(x) = x -t q_2(x)$ so that $q_1(c)=a$.  
	
\medskip
\par
	{\bf Item 2}: Let $x = q_1(a)$ and $y=q_2(a)$. Since $A$
	is the minimal polynomial of $a$,
\begin{eqnarray*}
	x+y &=& (q_1 + q_2) (a) \\
	x-y &=& (q_1 - q_2) (a) \\
	xy  &=& (q_1 q_2 \mod A) (a)
\end{eqnarray*}
\medskip
\par
	{\bf Item 3}: If $x = q(a)$ we have, by Corollary~\ref{heights-affine}
	that:
\[
	H(x) \le S H(q) H(a)^{\deg q}
\]
\par
	Also,
\begin{eqnarray*}
	w(x) &=&
	\left[ \mathbb Q[x] : \mathbb Q \right] 
	\lceil 1 + \log_2 H(x) \rceil \\
	&\le&
        \left[ \mathbb Q[a] : \mathbb Q \right] 
	\lceil 1 + (\deg q) \log_2 H(a) + \log_2 H(q) \rceil \\
	&\le& (\deg q) w(a) + w(q)
\end{eqnarray*}

	If $x \ne 0$ then $|x|>2^{-w(x)}$. Thus it is enough to
	compute $x$ up to accuracy $2^{-w(x)-1}$. If we evaluate
	$a$ with accuracy $2^{-b}$, then $q(a)$ may be evaluated
	with accuracy
	$2^{-b} |a|^{\deg q - 1} \max |q_i|$. All this can be
	done in polynomial time (Theorem~\ref{mainC}).
\medskip
\par
	{\bf Item 4}. Let $q$ be such that $a_n = q(a)$. Using
	arithmetic in $\mathbb Z[x] \mod A$, we can compute
	$1$, $q(x)$, $q^2(x) \mod A$, $q^3(x) \mod A$, etc..., 
	and check for linear independence. Once we get
	linear dependency, we solve the system:
\[
	Q p = 0
\]
	where $Q$ is the matrix with columns $1$, $q(x)$, 
	\dots, $q(x)^i \mod A(x)$, and where each polynomial
	is represented by its coefficients.
\end{proof} 

\subsection{\pnp over $(\mathbb Z, >, w)$ $\Leftrightarrow$
            \pnp over $(\qralg, >, w)$}



	We complete here the proof of Theorem~\ref{mainA}. Define:
\[
\begin{array}{crcl}
b:& \mathbb Z^{\infty} & \rightarrow & \qralg \\
& y & \mapsto & \lim_{k \rightarrow \infty} M(k,y)
\end{array}
\]
	where $M$ is the machine defined in Theorem~\ref{mainC}.
\par
	Let us recall the construction of $M$. The list $y$ is 
	interpreted as the coefficients of an irreducible
	polynomial $p$ and an initial approximate root. $M(k,y)$ computes
	then a certain number of Newton iterations. If the limit is 
	defined, this means that $b(y)$ is a root of $p$. 
\par
	I claim that $b$ is a morphism from $(\qralg, >, w)$ into
	$(\mathbb Z,>,w)$. Indeed, surjectivity (M1) and honesty
	(M2) follow from Theorem~\ref{mainC}. 
\par
	The domain of definition of $b$ is the class of $y$ such that
	Newton iteration will converge to a root of $p$. Combining
	Theorem~\ref{mainC}, Lemma~\ref{appalpha} and Theorem~\ref{dioph3},
	we can check the domain of $b$ in polynomial time. This implies
	property (M3).
\par
	Theorem~\ref{emulation} implies that $b$ is `very honest' and
	satisfies the hypotheses of Lemma~\ref{vh}. Therefore,
	$b$ has the pull-back property (M4).
\medskip
\par
	We define also the inclusion morphism from $(\mathbb Z,>,w)$
	into $(\qralg,>,w)$~:
\[
\begin{array}{rrcl}
i: & \qralg \supseteq \mathbb Z & \rightarrow & \mathbb Z\\
& x & \mapsto & x
\end{array}
\]
\medskip
\par
	It remains to produce machines $A_1$ and $A_2$. The machine
	$A_1$ is fairly trivial: it associates, to every $x \in \mathbb Z$, 
	its corresponding $y(x)$. 
\par
	The machine $A_2$ is the polynomial time machine
	from Theorem~\ref{mainD}. It associates to every $x \in \qralg$,
	its $y(x)$ 
\par
	Thus, models  $(\mathbb Z,>,w)$ and $(\qralg,>,w)$ are 
	polynomially equivalent and we can apply Theorem~\ref{transfer}
	to deduce that
\[
	\text{\pnp over } (\mathbb Z,>,w) \ \Leftrightarrow \
	\text{\pnp over } (\qralg,>,w)
\]
\par
	Almost the same argument shows that \pnp over $(\mathbb Z,>,w)$
	if and only if  \pnp over $(K,>,w)$ where $K$ is a number field,
	considered as a ring. Together with Propositions~\ref{tr1} and
	~\ref{tr2}, this concludes the proof of Theorem~\ref{mainA}









	Define 
\[
	\begin{array}{crcl}
	\sigma_1:& \mathbb Z &\rightarrow &\qralg \\
		 & n &\mapsto &n
	\end{array}
\]
	and
\[
	\begin{array}{crcl}
	\sigma_2:& \qralg &\rightarrow &\mathbb Z \\
		 & x &\mapsto &y(x)
	\end{array}
\]
\par
	where $y(x)$ is the function in Theorem~\ref{mainC}.
\par
	It follows from Theorem ~\ref{emulation} that $\sigma_2$
	is a morphism. Moreover, from Theorem~\ref{mainD} we can
	deduce that $\sigma_1 \circ \sigma_2$ is in polynomial
	time over $(\mathbb Z, >, w)$. Hence, we can apply
	Theorem~\ref{transfer}.


\begin{thebibliography}{BCSS98}

\bibitem[AB94]{AB}
Ilan Adler and Peter~A. Beling.
\newblock Polynomial algorithms for linear programming over the algebraic
  numbers.
\newblock {\em Algorithmica}, 12:436--457, 1994.

\bibitem[AM69]{ATIYAH-MACDONALD}
M.~F. Atiyah and I.~G. Macdonald.
\newblock {\em Introduction to Commutative Algebra}.
\newblock Addison Wesley, Reading MA, 1969.

\bibitem[BCSS96]{BCSS96}
Lenore Blum, Felipe Cucker, Mike Shub, and Steve Smale.
\newblock Algebraic settings for the problem {$\mathcal P \ne \mathcal{NP}$}.
\newblock In Steve~Smale James~Renegar, Michael~Shub, editor, {\em The
  Mathematics of Numerical Analysis}, volume~32 of {\em Lectures in Applied
  Mathematics}, pages 125--144. American Mathematical Society, 1996.

\bibitem[BCSS98]{BCSS98}
Lenore Blum, Felipe Cucker, Mike Shub, and Steve Smale.
\newblock {\em Complexity and Real Computation}.
\newblock Springer, 1998.

\bibitem[BS67]{BOREVICH-SHAFAREVICH}
Z.~I. Borevich and I.~R. Shafarevich.
\newblock {\em Th{\'e}orie des Nombres}.
\newblock Gauthier-Villars, Paris, 1967.
\newblock Reprinted by Editions Jacques Gabay, 1993.

\bibitem[BSS89]{BSS}
Lenore Blum, Mike Shub, and Steve Smale.
\newblock On a theory of computation and complexity over the real numbers:
  {$\mathcal {NP}$}-completeness, recursive functions and universal machines.
\newblock {\em Bulletin of the AMS}, 21(1), 1989.

\bibitem[Cas67]{CASSELS}
J.~W.~S. Cassels.
\newblock Global fields.
\newblock In {\em Algebraic Number Theory (Proc. Instructional Conf., Brighton,
  1965)}, pages 42--84. Thompson, Washington, D.C., 1967.

\bibitem[CG97]{CG}
Felipe Cucker and Dima Grigoriev.
\newblock On the power of real {T}uring machines over binary inputs.
\newblock {\em SIAM Journal on Computing}, 26(1):243--254, 1997.

\bibitem[CK95]{CK}
Felipe Cucker and Pascal Koiran.
\newblock Computing over the reals with addition and order: higher complexity
  classes.
\newblock {\em Journal of Complexity}, 11(3):358--376, 1995.

\bibitem[Coh93]{COHEN93}
Henri Cohen.
\newblock {\em A course in computational algebraic number theory}, volume 138
  of {\em Graduate Texts in Mathematics}.
\newblock Springer-Verlag, Berlin, 1993.

\bibitem[KA82]{KANTOROVICH}
L.~V. Kantorovich and G.~P. Akilov.
\newblock {\em Functional analysis}.
\newblock Pergamon Press, Oxford, second edition, 1982.
\newblock Translated from the Russian by Howard L. Silcock.

\bibitem[Knu98]{KNUTH}
Donald~E. Knuth.
\newblock {\em The Art of Computer Programming}, volume~2.
\newblock Addison-Wesley, Reading MA, third edition edition, 1998.

\bibitem[Koi94]{KOIRAN94}
Pascal Koiran.
\newblock Computing over the reals with addition and order.
\newblock {\em Journal of Complexity}, 133:486--495, 1994.

\bibitem[Koi97]{KOIRAN97}
Pascal Koiran.
\newblock A weak version of the {B}lum, {S}hub, and {S}male model.
\newblock {\em Journal of Computer and System Sciences}, 54(1, part
  2):177--189, 1997.
\newblock 1st Annual Dagstuhl Seminar on Neural Computing (1994).

\bibitem[KP95]{KP}
Teresa Krick and Luis~M Pardo.
\newblock A computational method for diophantine approximation.
\newblock In {\em Proceedings of {MEGA'94}}, Progress in Mathematics.
  Birkhauser, 1995.

\bibitem[Lan86]{LANG86}
Serge Lang.
\newblock {\em Algebraic Number Theory}.
\newblock Springer-Verlag, New York, 1986.

\bibitem[Lan97]{LANG97}
Serge Lang.
\newblock {\em Survey of Diophantine Geometry}.
\newblock Springer, Berlin, 1997.
\newblock Corrected Second Printing.

\bibitem[LLL82]{LLL}
Arjen~K. Lenstra, Hendrik~W. Lenstra, and Laszlo Lovasz.
\newblock Factoring polynomials with rational coefficients.
\newblock {\em Mathematische Annalen}, 261:515--534, 1982.

\bibitem[Loo83]{LOOS}
R.~Loos.
\newblock Computing in algebraic extensions.
\newblock In {\em Computer algebra}, pages 173--187. Springer, Vienna, 1983.

\bibitem[Lov86]{LOVASZ}
L{\'a}szl{\'o} Lov{\'a}sz.
\newblock {\em An Algorithmic Theory of Numbers, Graphs and Convexity}.
\newblock CBMS-NSF Reg. Conference Series in Appl. Math. 50. SIAM,
  Philadelphia, 1986.

\bibitem[Mal93]{MALAJOVICH93}
Gregorio Malajovich.
\newblock {\em On the complexity of path-following Newton algorithms for
  solving systems of polynomial equations with integer coefficients.}
\newblock PhD thesis, Berkeley, 1993.

\bibitem[Mal94]{MALAJOVICH94}
Gregorio Malajovich.
\newblock On generalized {N}ewton algorithms: quadratic convergence,
  path-following and error analysis.
\newblock {\em Theoret. Comput. Sci.}, 133(1):65--84, 1994.
\newblock Selected papers of the Workshop on Continuous Algorithms and
  Complexity (Barcelona, 1993).

\bibitem[Mic94]{MICHAUX94}
Christian Michaux.
\newblock \pnp over the nonstandard reals implies \pnp over {$\mathbb R$}.
\newblock {\em Journal of Complexity}, 133:95--104, 1994.

\bibitem[Mig83]{MIGNOTTE}
M.~Mignotte.
\newblock Some useful bounds.
\newblock In {\em Computer algebra}, pages 259--263. Springer, Vienna, 1983.

\bibitem[MM99]{MM99}
Gregorio Malajovich and Klaus Meer.
\newblock On the structure of {$\mathcal{NP}_{\mathbb C}$}.
\newblock {\em SIAM Journal on Computing}, 28(1), 1999.

\bibitem[MW97]{MW}
Michael Maller and Jennifer Whitehead.
\newblock Computational complexity over the $p$-adic numbers.
\newblock {\em Journal of Complexity}, 13(2):195--207, 1997.

\bibitem[Sil86]{SILVERMAN86}
Joseph~A. Silverman.
\newblock {\em The Arithmetic of Elliptic Curves}.
\newblock Graduate Texts in Mathematics 106. Springer, New York, 1986.

\bibitem[Sma86]{PE}
Steve Smale.
\newblock Newton's method estimates from data at one point.
\newblock In {\em The merging of disciplines: new directions in pure, applied,
  and computational mathematics (Laramie, Wyo., 1985)}, pages 185--196.
  Springer, New York, 1986.

\bibitem[Sma98]{SMALE98}
Steve Smale.
\newblock Mathematical problems for the next century.
\newblock {\em Math. Intelligencer}, 20(2):7--15, 1998.

\bibitem[SS93]{BEZ1}
Michael Shub and Steve Smale.
\newblock Complexity of {B}\'ezout's theorem. {I}. {G}eometric aspects.
\newblock {\em J. Amer. Math. Soc.}, 6(2):459--501, 1993.

\end{thebibliography}


\end{document}
