#******************************************************************************
#*
#* Copyright (c) 2004 Freescale Semiconductor, Inc
#* All rights reserved.
#*
#* Redistribution and use in source and binary forms, with or without
#* modification, are permitted provided that the following conditions are met:
#*     * Redistributions of source code must retain the above copyright
#*       notice, this list of conditions and the following disclaimer.
#*     * Redistributions in binary form must reproduce the above copyright
#*       notice, this list of conditions and the following disclaimer in the
#*       documentation and/or other materials provided with the distribution.
#*     * Neither the name of Freescale Semiconductor nor the
#*       names of its contributors may be used to endorse or promote products
#*       derived from this software without specific prior written permission.
#*
#* THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
#* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
#* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#*
#*  Functions:    ceil, floor, 
#*                __trunc_ceil_floor, __round_ceil_floor, 
#*                __common_ceil_floor    
#*
#*  Description:  implements CEIL() and FLOOR() functions
#*                from MATH library (libmath)
#*
#*  Notes:        none
#*
#******************************************************************************

#include <sysdep.h>

        .file    "s_ceil.S"

#define r0 0
#define r1 1
#define r2 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31

#define cr0_lt 0
#define cr0_gt 1
#define cr0_eq 2
#define cr0_so 3
#define cr1_lt 4
#define cr1_gt 5
#define cr1_eq 6
#define cr1_so 7
#define cr5_lt 20
#define cr5_gt 21
#define cr5_eq 22
#define cr5_so 23
#define cr6_lt 24
#define cr6_gt 25
#define cr6_eq 26
#define cr6_so 27
#define cr7_lt 28
#define cr7_gt 29
#define cr7_eq 30
#define cr7_so 31

        /*ARGUMENTS AND RESULT COMPONENTS */
#define a_hi r3
#define a_lo r4
                
#define res_hi a_hi
#define res_lo a_lo
                
#define frac_a_hi r6
#define frac_a_lo a_lo
#define exp_a r7
#define sign_a r8
                
#define frac_res_hi res_hi
#define frac_res_lo res_lo
#define exp_res exp_a
#define sign_res sign_a

#define round_mode r5
#define rsh r12

#define exp_shift 20
        
#define temp r0
#define temp1 r9
#define temp2 r10
#define temp3 r11
#define temp4 r12
                
        .section    ".text"

/****************************************
 *  fast implementation of CEIL         * 
 ****************************************/
 /*rounds positive arguments (to+INF), */
 /*truncates negative arguments (to ZERO) */
        .align  2
ENTRY(__ceil)
        li      round_mode, -1
        b       L(__common_ceil_floor)
END(__ceil)

weak_alias (__ceil, ceil)
#ifdef NO_LONG_DOUBLE
strong_alias (__ceil, __ceill)
weak_alias (__ceil, ceill)
#endif

/****************************************
 *  fast implementation of FLOOR        * 
 ****************************************/
 /*truncates positive arguments (to ZERO), */
 /*rounds negative arguments (to -INF) */
        .align  2
ENTRY(__floor)
        li      round_mode, 0
        b       L(__common_ceil_floor)
END(__floor)

weak_alias (__floor, floor)
#ifdef NO_LONG_DOUBLE
strong_alias (__floor, __floorl)
weak_alias (__floor, floorl)
#endif

/****************************************
 *  fast implementation of ROUND        * 
 ****************************************/
 /*rounds all arguments (to INF) */
    .align  2
L(__round_ceil_floor):
        andis.  sign_a, a_hi, 0x8000
        li      round_mode, -1
        b       L(round_entry_to_common_ceil_floor)

/****************************************
 *  fast implementation of TRUNC        * 
 ****************************************/
 /*truncates all arguments (to ZERO) */
    .align  2
L(__trunc_ceil_floor):
        andis.  sign_a, a_hi, 0x8000
        li      round_mode, 0
        b       L(trunc_entry_to_common_ceil_floor)

/****************************************
 *  fast implementation of COMMON       * 
 ****************************************/
        .align  2
L(__common_ceil_floor):
        andis.  sign_a, a_hi, 0x8000
        not     temp, round_mode
        isel    round_mode, round_mode, temp, cr0_eq
L(trunc_entry_to_common_ceil_floor):
L(round_entry_to_common_ceil_floor):
        rlwinm. exp_a, a_hi, (32-exp_shift), 21, 31
        rlwinm  frac_a_hi, a_hi, 0, 12, 31
        cmpi    cr6, 0, exp_a, (1023+52)        #covers both big numbers and NANs/INFs
        cmpi    cr1, 0, exp_a, (1023+0)
        cmpi    cr5, 0, exp_a, (1023+20)
        
        beq-    cr0, L(zero_denorm_ceil_floor)
        bgelr-  cr6
        blt-    cr1, L(tiny_ceil_floor)
        ble+    cr5, L(small_round_ceil_floor)

L(big_round_ceil_floor):
        li      temp1, -1
        addi    rsh, exp_a, -1023-20
        cmpi    cr1, 0, round_mode, 0
        srw     temp2, temp1, rsh               #mask_lo=(-1)>>(exp-1023-20)
        oris    frac_a_hi, frac_a_hi, 0x0010    #add leading 1.0
        and.    temp, frac_a_lo, temp2          #check if remainder to trunc/round is not zero
        
        beq-    cr1, L(big_trunc_ceil_floor)
        beq-    cr0, L(big_trunc_ceil_floor)	

        addc    frac_res_lo, frac_a_lo, temp2
        addze   frac_res_hi, frac_a_hi
        addi    temp1, exp_a, +1						#(exp+1)=INF is handled here correctly (because frac == ZERO)!!!
        andis.  temp, frac_res_hi, 0x0020
        andc    frac_res_lo, frac_res_lo, temp2
        /*if rounding occurred then frac=0x20_0000, i.e. it is ZERO -> no need to shift it back !!! */
        isel    exp_res, exp_a, temp1, cr0_eq
        /*pack & return*/        
        or      res_hi, frac_res_hi, sign_res
        rlwimi  res_hi, exp_res, exp_shift, 1, 11
        blr

L(big_trunc_ceil_floor):        
        rlwimi  frac_a_hi, exp_res, exp_shift, 1, 11
        andc    frac_res_lo, frac_a_lo, temp2
        or      res_hi, frac_a_hi, sign_res
        blr

L(small_round_ceil_floor):
        li      temp1, -1
        addi    rsh, exp_a, -1023+12
        cmpi    cr1, 0, round_mode, 0
        srw     temp2, temp1, rsh               #mask_hi=0x000f_ffff>>(exp-1023)=(-1)>>(exp-1023+12)
        oris    frac_a_hi, frac_a_hi, 0x0010    #add leading 1.0
        and     temp, frac_a_hi, temp2          
        or.     temp, frac_a_lo, temp           #check if remainder to trunc/round is not zero
        
        beq-    cr1, L(small_trunc_ceil_floor)
        beq-    cr0, L(small_trunc_ceil_floor)
        
        addic   frac_res_lo, frac_a_lo, -1      #add 0xffff_ffff i.e. lower part of round mask
        adde    frac_res_hi, frac_a_hi, temp2   #add higher part of round mask
        addi    temp1, exp_a, +1
        andc    frac_res_hi, frac_res_hi, temp2
        li      frac_res_lo, 0
        andis.  temp, frac_res_hi, 0x0020
        /*if rounding occurred then frac=0x20_0000, i.e. it is ZERO -> no need to shift it back !!! */
        isel    exp_res, exp_a, temp1, cr0_eq			#(exp+1)=INF is handled here correctly (because frac == ZERO)!!!
        /*pack & return*/        
        or      res_hi, frac_res_hi, sign_res
        rlwimi  res_hi, exp_res, exp_shift, 1, 11
        blr

L(small_trunc_ceil_floor):        
        andc    frac_res_hi, frac_a_hi, temp2
        li      frac_res_lo, 0
        /*pack & return*/        
        rlwimi  frac_res_hi, exp_res, exp_shift, 1, 11
        or      res_hi, frac_res_hi, sign_res
        blr

L(tiny_ceil_floor):
        oris    frac_a_hi, frac_a_hi, 0x0010    #add leading 1.0
L(zero_denorm_ceil_floor): # round to ZERO or ONE (with correct sign) depending on round mode
        or      temp2, frac_a_hi, frac_a_lo
        lis     temp1, 1023 << 4                # shifted exp for 1.0 = 1.0*2^0 = 1.0*2^(1023-bias)
        and.    temp, temp2, round_mode
        li      res_lo, 0
        isel    exp_res, 0, temp1, cr0_eq
        or      res_hi, sign_res, exp_res
        blr

         /*
         c=fmod(a,b);
         c - remainder a/b, 
         with:
         sign(c) = sign(a)
         a = b * integer + c
         
         simple solution: c = a - b*trunc(a/b) -> it needs stack
         comlex solution:
         case(exp_a-exp_b+1023)
         >=1023+52 -> return +-ZERO (i.e. a/b==integer && c==0)
         <1023 -> return A (i.e. integer=0 && c==a)
         other -> try simple solution
                  or
                  integer = shorter divide cycle for integer part bits only, c = a-b*integer    
         
         c=modf(a,*b)
         c - fractional part of a,
         *b - integer part of a,
         with:
         sign(a)=sign(*b)=sign(c)
         a = (*b) + c
         
         simple solution: *b = trunc(a), c = a-(*b) -> it needs stack
         comlex solution:
         case(exp_a)
         >=1023+52 -> c=+-ZERO(depends on sign(a)), *b=a 
         <1023 -> c=a, *b=+ZERO
         other -> try simple solution
                  or
                  rework round to extract both integer & truncated part
                  (current round simply masks truncated part)
         */
         
