Library Flocq.Appli.Fappli_IEEE_bits

This file is part of the Flocq formalization of floating-point arithmetic in Coq: http://flocq.gforge.inria.fr/

Copyright (C) 2011 Sylvie Boldo
Copyright (C) 2011 Guillaume Melquiond

This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

IEEE-754 encoding of binary floating-point data

Require Import Fcore.
Require Import Fcore_digits.
Require Import Fcalc_digits.
Require Import Fappli_IEEE.

Section Binary_Bits.

Number of bits for the fraction and exponent
Variable mw ew : Z.
Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.

Let emax := Zpower 2 (ew - 1).
Let prec := (mw + 1)%Z.
Let emin := (3 - emax - prec)%Z.
Let binary_float := binary_float prec emax.

Let Hprec : (0 < prec)%Z.
Qed.

Let Hm_gt_0 : (0 < 2^mw)%Z.
Qed.

Let He_gt_0 : (0 < 2^ew)%Z.
Qed.

Hypothesis Hmax : (prec < emax)%Z.

Definition join_bits (s : bool) m e :=
  (((if s then Zpower 2 ew else 0) + e) * Zpower 2 mw + m)%Z.

Definition split_bits x :=
  let mm := Zpower 2 mw in
  let em := Zpower 2 ew in
  (Zle_bool (mm * em) x, Zmod x mm, Zmod (Zdiv x mm) em)%Z.

Theorem split_join_bits :
  forall s m e,
  (0 <= m < Zpower 2 mw)%Z ->
  (0 <= e < Zpower 2 ew)%Z ->
  split_bits (join_bits s m e) = (s, m, e).

Theorem join_split_bits :
  forall x,
  (0 <= x < Zpower 2 (mw + ew + 1))%Z ->
  let '(s, m, e) := split_bits x in
  join_bits s m e = x.

Theorem split_bits_inj :
  forall x y,
  (0 <= x < Zpower 2 (mw + ew + 1))%Z ->
  (0 <= y < Zpower 2 (mw + ew + 1))%Z ->
  split_bits x = split_bits y ->
  x = y.

Definition bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx => join_bits sx 0 0
  | B754_infinity sx => join_bits sx 0 (Zpower 2 ew - 1)
  | B754_nan => join_bits false (Zpower 2 mw - 1) (Zpower 2 ew - 1)
  | B754_finite sx mx ex _ =>
    if Zle_bool (Zpower 2 mw) (Zpos mx) then
      join_bits sx (Zpos mx - Zpower 2 mw) (ex - emin + 1)
    else
      join_bits sx (Zpos mx) 0
  end.

Definition split_bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx => (sx, 0, 0)%Z
  | B754_infinity sx => (sx, 0, Zpower 2 ew - 1)%Z
  | B754_nan => (false, Zpower 2 mw - 1, Zpower 2 ew - 1)%Z
  | B754_finite sx mx ex _ =>
    if Zle_bool (Zpower 2 mw) (Zpos mx) then
      (sx, Zpos mx - Zpower 2 mw, ex - emin + 1)%Z
    else
      (sx, Zpos mx, 0)%Z
  end.

Theorem split_bits_of_binary_float_correct :
  forall x,
  split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.

Definition binary_float_of_bits_aux x :=
  let '(sx, mx, ex) := split_bits x in
  if Zeq_bool ex 0 then
    match mx with
    | Z0 => F754_zero sx
    | Zpos px => F754_finite sx px emin
    | Zneg _ => F754_nan
    end
  else if Zeq_bool ex (Zpower 2 ew - 1) then
    if Zeq_bool mx 0 then F754_infinity sx else F754_nan
  else
    match (mx + Zpower 2 mw)%Z with
    | Zpos px => F754_finite sx px (ex + emin - 1)
    | _ => F754_nan
    end.

Lemma binary_float_of_bits_aux_correct :
  forall x,
  valid_binary prec emax (binary_float_of_bits_aux x) = true.

Definition binary_float_of_bits x :=
  FF2B prec emax _ (binary_float_of_bits_aux_correct x).

Theorem binary_float_of_bits_of_binary_float :
  forall x,
  binary_float_of_bits (bits_of_binary_float x) = x.

Theorem bits_of_binary_float_of_bits :
  forall x,
  (0 <= x < 2^(mw+ew+1))%Z ->
  binary_float_of_bits x <> B754_nan prec emax ->
  bits_of_binary_float (binary_float_of_bits x) = x.

End Binary_Bits.

Specialization for IEEE single precision operations
Section B32_Bits.

Definition binary32 := binary_float 24 128.

Let Hprec : (0 < 24)%Z.
Qed.

Let Hprec_emax : (24 < 128)%Z.
Qed.

Definition b32_opp := Bopp 24 128.
Definition b32_plus := Bplus _ _ Hprec Hprec_emax.
Definition b32_minus := Bminus _ _ Hprec Hprec_emax.
Definition b32_mult := Bmult _ _ Hprec Hprec_emax.
Definition b32_div := Bdiv _ _ Hprec Hprec_emax.
Definition b32_sqrt := Bsqrt _ _ Hprec Hprec_emax.

Definition b32_of_bits : Z -> binary32 := binary_float_of_bits 23 8 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b32 : binary32 -> Z := bits_of_binary_float 23 8.

End B32_Bits.

Specialization for IEEE double precision operations
Section B64_Bits.

Definition binary64 := binary_float 53 1024.

Let Hprec : (0 < 53)%Z.
Qed.

Let Hprec_emax : (53 < 1024)%Z.
Qed.

Definition b64_opp := Bopp 53 1024.
Definition b64_plus := Bplus _ _ Hprec Hprec_emax.
Definition b64_minus := Bminus _ _ Hprec Hprec_emax.
Definition b64_mult := Bmult _ _ Hprec Hprec_emax.
Definition b64_div := Bdiv _ _ Hprec Hprec_emax.
Definition b64_sqrt := Bsqrt _ _ Hprec Hprec_emax.

Definition b64_of_bits : Z -> binary64 := binary_float_of_bits 52 11 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b64 : binary64 -> Z := bits_of_binary_float 52 11.

End B64_Bits.