#******************************************************************************
#* ====================================================
#* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
#*
#* Developed at SunPro, a Sun Microsystems, Inc. business.
#* Permission to use, copy, modify, and distribute this
#* software is freely granted, provided that this notice 
#* is preserved.
#* ====================================================
#*
#* Copyright (c) 2004 Freescale Semiconductor, Inc
#* All rights reserved.
#*
#* Redistribution and use in source and binary forms, with or without
#* modification, are permitted provided that the following conditions are met:
#*     * Redistributions of source code must retain the above copyright
#*       notice, this list of conditions and the following disclaimer.
#*     * Redistributions in binary form must reproduce the above copyright
#*       notice, this list of conditions and the following disclaimer in the
#*       documentation and/or other materials provided with the distribution.
#*     * Neither the name of Freescale Semiconductor nor the
#*       names of its contributors may be used to endorse or promote products
#*       derived from this software without specific prior written permission.
#*
#* THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
#* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
#* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#*
#*  Functions:    atan2
#*
#*  Description:  implements floating point double precision
#*                software emulation for atan2() functions
#*                from MATH library (libmath)
#*
#*  Notes:        none
#*
#******************************************************************************

#include <sysdep.h>

        .file	"e_atan2.S"

#define r0 0
#define r1 1
#define r2 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31

#define lt0 0
#define gt0 1
#define eq0 2
#define so0 3
#define lt1 4
#define gt1 5
#define eq1 6
#define so1 7
#define lt2 8
#define gt2 9
#define eq2 10
#define so2 11
#define lt3 12
#define gt3 13
#define eq3 14
#define so3 15
#define lt4 16
#define gt4 17
#define eq4 18
#define so4 19
#define lt5 20
#define gt5 21
#define eq5 22
#define so5 23
#define lt6 24
#define gt6 25
#define eq6 26
#define so6 27
#define lt7 28
#define gt7 29
#define eq7 30
#define so7 31

	.globl	__muldf3
	.globl	__divdf3
	.globl	__adddf3
	.globl	__subdf3

/*******************************************
*	fast implementation of atan2       *
*******************************************/
	.align  2

ENTRY(__ieee754_atan2)
#ifdef _SOFT_DOUBLE
#define k r0
#define hx r3
#define lx r4
#define hy r5
#define ly r6
#define m r10
#define ix r11
#define iy r12

	clrlwi	ix,	hx,	1		# |x|
	clrlwi	iy,	hy,	1		# |y|

	lis	r7,	0x7ff0

	or.	r8,	ix,	lx		# compare x to zero
	or	r9,	iy,	ly

	cmpw	cr5,	ix,	r7		# compare x to NaN and Inf
	cmpw	cr6,	iy,	r7		# compare y to NaN and Inf
	cmpwi	cr1,	lx,	0
	cmpwi	cr7,	ly,	0

	bgt-	cr5,	L(atan2_return_nan)
	bgt-	cr6,	L(atan2_return_nan)
	blt+	cr5,	L(atan2_check_y_nan)
	bgt-	cr1,	L(atan2_return_nan)
L(atan2_check_y_nan):
	blt+	cr6,	L(atan2_check_x_zero)
	bgt-	cr7,	L(atan2_return_nan)

L(atan2_check_x_zero):
	srwi	m,	hx,	31
	rlwimi	m,	hy,	2,	30,30	

	cmpwi	cr1,	r9,	0		# compare y to zero

	beq-	cr0,	L(atan2_x_zero)
	beq-	cr1,	L(atan2_y_zero_or_x_inf)

	xoris	r7,	hy,	0x3ff0		# compare y to one
	or	r7,	r7,	ly
	cmpwi	cr7,	r7,	0

	beq-	cr6,	L(atan2_y_inf)
	beq-	cr5,	L(atan2_y_zero_or_x_inf)

	bne+	cr7,	1f
	b	atan@plt

1:
	stwu	r1,	-16(r1)			# make stack frame
	mflr	r0
	stw	r0,	20(r1)

	stw	m,	8(r1)

	subf	k,	iy,	ix
	srawi	k,	k,	20
	cmpwi	cr1,	hy,	0
	cmpwi	cr5,	k,	60
	cmpwi	cr6,	k,	-60
	crand	eq7,	lt1,	lt6

	bgt-	cr5,	L(atan2_big_differ)
	beq-	cr7,	L(atan2_small_differ)

	bl	__divdf3@plt			# x/y
	clrlwi	r3,	r3,	1		# |x/y|
	bl	atan@plt				# atan(|x/y|)

L(atan2_check_m):
	lwz	m,	8(r1)

	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr5,	m,	2

	beq-	cr0,	L(atan2_exit)
	beq-	cr1,	L(atan2_invert_result)
	beq-	cr5,	L(atan2_pos_neg)
	b	L(atan2_neg_neg)

L(atan2_exit):
	lwz	r0,	20(r1)
	mtlr	r0
	addi	r1,	r1,	+16

	blr

L(atan2_big_differ):
	lis	r3,	0x3ff9
	ori	r3, r3,	0x21fb
	lis	r4,	0x5444
	ori	r4, r4,	0x2d18
	b	L(atan2_check_m)

L(atan2_small_differ):
	lis	r3,	0
	li	r4,	0
	b	L(atan2_check_m)

L(atan2_invert_result):
	xoris	r3,	r3,	0x8000
	b	L(atan2_exit)

L(atan2_pos_neg):
	lis	r5,	0x3ca1
	ori	r5, r5,	0xa626
	lis	r6,	0x3314
	ori	r6, r6,	0x5c07
	bl	__subdf3@plt

	mr	r5,	r3
	mr	r6,	r4
	lis	r3,	0x4009
	ori	r3, r3,	0x21fb
	lis	r4,	0x5444
	ori	r4, r4,	0x2d18
	bl	__subdf3@plt

	b	L(atan2_exit)

L(atan2_neg_neg):
	lis	r5,	0x3ca1
	ori	r5, r5,	0xa626
	lis	r6,	0x3314
	ori	r6, r6,	0x5c07
	bl	__subdf3@plt

	lis	r5,	0x4009
	ori	r5, r5,	0x21fb
	lis	r6,	0x5444
	ori	r6, r6,	0x2d18
	bl	__subdf3@plt

	b	L(atan2_exit)

L(atan2_x_zero):
	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr5,	m,	2
	cmpwi	cr6,	m,	3

	beq-	cr0,	L(atan2_return_pzero)	# atan2(+-0, +anything) = +-0
	beq-	cr1,	L(atan2_return_nzero)
	beq-	cr5,	L(atan2_return_ppi)	# atan2(+-0, -anything) = +-Pi
	beq-	cr6,	L(atan2_return_npi)

L(atan2_y_zero_or_x_inf):
	cmpwi	cr0,	hx,	0
	blt-	cr0,	L(atan2_return_npio2)	# atan2(+-anything, 0) = +-Pi/2
	b	L(atan2_return_ppio2)

L(atan2_y_inf):
	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr6,	m,	2
	cmpwi	cr7,	m,	3

	beq-	cr5,	L(atan2_y_inf_and_x_inf)

	beq-	cr0,	L(atan2_return_pzero)	# atan2(+-0, +anything) = +-0
	beq-	cr1,	L(atan2_return_nzero)
	beq-	cr6,	L(atan2_return_ppi)	# atan2(+-0, -anything) = +-Pi
	beq-	cr7,	L(atan2_return_npi)

L(atan2_y_inf_and_x_inf):
	beq-	cr0,	L(atan2_return_ppio4)
	beq-	cr1,	L(atan2_return_npio4)
	beq-	cr6,	L(atan2_return_p3pio4)
	beq-	cr7,	L(atan2_return_n3pio4)

L(atan2_return_ppio4):
	lis	r3,	0x3fe9
	b	L(atan2_return_pi)

L(atan2_return_npio4):
	lis	r3,	0xbfe9
	b	L(atan2_return_pi)

L(atan2_return_ppio2):
	lis	r3,	0x3ff9
	b	L(atan2_return_pi)

L(atan2_return_npio2):
	lis	r3,	0xbff9
	b	L(atan2_return_pi)

L(atan2_return_p3pio4):
	lis	r3,	0x4002
	ori	r3, r3,	0xd97c
	lis	r4,	0x7f33
	ori	r4, r4,	0x21d2
	blr

L(atan2_return_n3pio4):
	lis	r3,	0xc002
	ori	r3, r3,	0xd97c
	lis	r4,	0x7f33
	ori	r4, r4,	0x21d2
	blr

L(atan2_return_ppi):
	lis	r3,	0x4009
	b	L(atan2_return_pi)

L(atan2_return_npi):
	lis	r3,	0xc009
	b	L(atan2_return_pi)

L(atan2_return_pi):
	ori	r3, r3,	0x21fb
	lis	r4,	0x5444
	ori	r4, r4,	0x2d18
	blr

L(atan2_return_pzero):
	lis	r3,	0
	li	r4,	0
	blr

L(atan2_return_nzero):
	lis	r3,	0x8000
	li	r4,	0
	blr

L(atan2_return_nan):
	lis	r3,	0x7fff
	li	r4,	0
	blr
#else
#define hx r3
#define lx r4
#define hy r5
#define ly r6
#define k r9
#define m r10
#define ix r11
#define iy r12

	clrlwi	ix,	hx,	1		# |x|
	clrlwi	iy,	hy,	1		# |y|

	lis	r7,	0x7ff0

	or.	r8,	ix,	lx		# compare x to zero
	or	r9,	iy,	ly

	cmpw	cr5,	ix,	r7		# compare x to NaN and Inf
	cmpw	cr6,	iy,	r7		# compare y to NaN and Inf
	cmpwi	cr1,	lx,	0
	cmpwi	cr7,	ly,	0

	bgt-	cr5,	L(atan2_return_nan)
	bgt-	cr6,	L(atan2_return_nan)
	blt+	cr5,	L(atan2_check_y_nan)
	bgt-	cr1,	L(atan2_return_nan)
L(atan2_check_y_nan):
	blt+	cr6,	L(atan2_check_x_zero)
	bgt-	cr7,	L(atan2_return_nan)

L(atan2_check_x_zero):
	srwi	m,	hx,	31
	rlwimi	m,	hy,	2,	30,30	

	cmpwi	cr1,	r9,	0		# compare y to zero

	beq-	cr0,	L(atan2_x_zero)
	beq-	cr1,	L(atan2_y_zero_or_x_inf)

	xoris	r7,	hy,	0x3ff0		# compare y to one
	or	r7,	r7,	ly
	cmpwi	cr7,	r7,	0

	beq-	cr6,	L(atan2_y_inf)
	beq-	cr5,	L(atan2_y_zero_or_x_inf)

	bne+	cr7,	1f
	b	atan@plt

1:
	lis	r7,	0x0010
	li	k,	0
	cmpw	cr5,	ix,	r7
	cmpw	cr6,	iy,	r7

L(atan2_check_x):
	bge+	cr5,	L(atan2_check_y)

	cmpwi	cr0,	ix,	0
	bne+	cr0,	L(atan2_denorm_x_small_shift)

	cntlzw	r7,	lx			# r7 = N = number of leading 0s
	cmpwi	cr5,	r7,	11
	blt+	cr5,	L(atan2_denorm_x_middle_shift)

L(atan2_denorm_x_big_shift):
	addi	r8,	r7,	-11
	slw	ix,	lx,	r8
	clrlwi	ix,	ix,	12		# remove leading one
	li	lx,	0
	addi	r7,	r7,	+21
	b	L(atan2_denorm_x_calc_exp)

L(atan2_denorm_x_middle_shift):
	li	r8,	11
	sub	r8,	r8,	r7		# r8 = 11 - N
	srw	ix,	lx,	r8
	clrlwi	ix,	ix,	12		# remove leading one
	addi	r7,	r7,	+21
	slw	lx,	lx,	r7
	b	L(atan2_denorm_x_calc_exp)

L(atan2_denorm_x_small_shift):
	cntlzw	r7,	ix			# r7 = N = number of leading 0s
	addi	r7,	r7,	-11		# discard zeroes in exp
	slw	ix,	ix,	r7
	li	r8,	32
	sub	r8,	r8,	r7		# r8 = 32 - N
	srw	r8,	lx,	r8
	or	ix,	ix,	r8
	clrlwi	ix,	ix,	12		# remove leading one
	slw	lx,	lx,	r7
L(atan2_denorm_x_calc_exp):
	li	r8,	+55			# 54!!!
	sub	r8,	r8,	r7		# calculate exp
	slwi	r8,	r8,	20
	or	ix,	ix,	r8

	addi	k,	k,	-54

L(atan2_check_y):
	bge+	cr6,	L(atan2_perform_division)

	cmpwi	cr0,	iy,	0
	bne+	cr0,	L(atan2_denorm_y_small_shift)

	cntlzw	r7,	ly			# r7 = N = number of leading 0s
	cmpwi	cr5,	r7,	11
	blt+	cr5,	L(atan2_denorm_y_middle_shift)

L(atan2_denorm_y_big_shift):
	addi	r8,	r7,	-11
	slw	iy,	ly,	r8
	clrlwi	iy,	iy,	12		# remove leading one
	li	ly,	0
	addi	r7,	r7,	+21
	b	L(atan2_denorm_y_calc_exp)

L(atan2_denorm_y_middle_shift):
	li	r8,	11
	sub	r8,	r8,	r7		# r8 = 11 - N
	srw	iy,	ly,	r8
	clrlwi	iy,	iy,	12		# remove leading one
	addi	r7,	r7,	+21
	slw	ly,	ly,	r7
	b	L(atan2_denorm_y_calc_exp)

L(atan2_denorm_y_small_shift):
	cntlzw	r7,	iy			# r7 = N = number of leading 0s
	addi	r7,	r7,	-11		# discard zeroes in exp
	slw	iy,	iy,	r7
	li	r8,	32
	sub	r8,	r8,	r7		# r8 = 32 - N
	srw	r8,	ly,	r8
	or	iy,	iy,	r8
	clrlwi	iy,	iy,	12		# remove leading one
	slw	ly,	ly,	r7
L(atan2_denorm_y_calc_exp):
	li	r8,	+55			# 54!!!
	sub	r8,	r8,	r7		# calculate exp
	slwi	r8,	r8,	20
	or	iy,	iy,	r8

	addi	k,	k,	+54

L(atan2_perform_division):
	rlwinm	r7,	ix,	12,20,31	# exp of x in r7
	rlwinm	r8,	iy,	12,20,31	# exp of y in r8
	sub	r7,	r7,	r8		# r7 = ix - iy
	add	k,	k,	r7		# k = k + (ix-iy)

	cmpwi	cr1,	hy,	0
	cmpwi	cr5,	k,	54
	cmpwi	cr6,	k,	-54

	crand	eq7,	lt1,	lt6

	bgt-	cr5,	L(atan2_big_differ)
	beq-	cr7,	L(atan2_small_differ)

	clrlwi	hx,	ix,	12
	clrlwi	hy,	iy,	12

	oris	hx,	hx,	0x3ff0
	oris	hy,	hy,	0x3ff0

	evmergelo	r4,	r3,	r4	# merge x to 64 bit
	evmergelo	r6,	r5,	r6	# merge y to 64 bit

	efddiv		r4,	r4,	r6	# x/y
	evmergelohi	r3,	r4,	r4	# split result

	rlwinm	r8,	hx,	12,20,31	# exp of result in r8
	clrlwi	hx,	hx,	12
	add.	r8,	r8,	k
	ble-	cr0,	L(atan2_scale_result)

	slwi	r8,	r8,	20
	or	hx,	hx,	r8

	b	L(atan2_calc_atan)

L(atan2_scale_result):
	neg	r8,	r8
	oris	hx,	hx,	0x0010

	cmpwi	cr5,	r8,	52
	cmpwi	cr6,	r8,	32
	cmpwi	cr7,	r8,	20

	bgt-	cr5,	L(atan2_small_differ)
	bge-	cr6,	L(atan2_scale_result_big_shift)
	bge-	cr7,	L(atan2_scale_result_middle_shift)

L(atan2_scale_result_small_shift):
	li	r6,	1
	slw	r6,	r6,	r8
	addi	r8,	r8,	+1
	add	lx,	lx,	r6		# rounding
	srw	lx,	lx,	r8

	li	r7,	32
	sub	r7,	r7,	r8		# r7 = 32 - r8
	slw	r6,	hx,	r7
	or	lx,	lx,	r6
	srw	hx,	hx,	r8

	b	L(atan2_calc_atan)

L(atan2_scale_result_middle_shift):
	li	r7,	31
	sub	r7,	r7,	r8		# r7 = 31 - r8
	srw	r6,	lx,	r8
	slw	lx,	hx,	r7
	addi	r6,	r6,	1		# rounding
	srwi	r6,	r6,	1
	li	hx,	0
	or	lx,	lx,	r6

	b	L(atan2_calc_atan)

L(atan2_scale_result_big_shift):
	addi	r8,	r8,	-32
	srw	lx,	hx,	r8
	addi	lx,	lx,	1		# rounding
	srwi	lx,	lx,	1
	li	hx,	0

L(atan2_calc_atan):

	stwu	r1,	-16(r1)			# make stack frame
	mflr	r0
	stw	r0,	20(r1)

	stw	m,	8(r1)

	bl	atan@plt			# atan(|x/y|)

	lwz	m,	8(r1)			# restore m

	lwz	r0,	20(r1)
	mtlr	r0
	addi	r1,	r1,	+16

L(atan2_check_m):
	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr5,	m,	2

	beqlr-	cr0
	beq-	cr1,	L(atan2_invert_result)
	beq-	cr5,	L(atan2_pos_neg)
	b	L(atan2_neg_neg)

L(atan2_big_differ):
	lis	r3,	0x3ff9
	ori	r3, r3,	0x21fb
	lis	r4,	0x5444
	ori	r4, r4,	0x2d18
	b	L(atan2_check_m)

L(atan2_small_differ):
	lis	r3,	0
	li	r4,	0
	b	L(atan2_check_m)

L(atan2_invert_result):
	xoris	r3,	r3,	0x8000
	blr					# exit

L(atan2_pos_neg):
	evmergelo	r4,	r3,	r4	# merge x to 64 bit

	lis	r6,	0x3ca1
	ori	r6, r6,	0xa626
	lis	r7,	0x3314
	ori	r7, r7,	0x5c07
	evmergelo	r5,	r6,r7
	efdsub	r4,	r4,	r5

	lis	r5,	0x4009
	ori	r5, r5,	0x21fb
	lis	r6,	0x5444
	ori	r6, r6,	0x2d18
	evmergelo	r7,	r5,r6
	efdsub	r4,	r7,	r4

	evmergelohi	r3,	r4,	r4	# split result
	blr					# exit

L(atan2_neg_neg):
	evmergelo	r4,	r3,	r4	# merge x to 64 bit

	lis	r6,	0x3ca1
	ori	r6, r6,	0xa626
	lis	r7,	0x3314
	ori	r7, r7,	0x5c07
	evmergelo	r5,	r6,r7
	efdsub	r4,	r4,	r5

	lis	r5,	0x4009
	ori	r5, r5,	0x21fb
	lis	r6,	0x5444
	ori	r6, r6,	0x2d18
	evmergelo	r7,	r5,r6
	efdsub	r4,	r4,	r7

	evmergelohi	r3,	r4,	r4	# split result
	blr					# exit

L(atan2_x_zero):
	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr5,	m,	2
	cmpwi	cr6,	m,	3

	beq-	cr0,	L(atan2_return_pzero)	# atan2(+-0, +anything) = +-0
	beq-	cr1,	L(atan2_return_nzero)
	beq-	cr5,	L(atan2_return_ppi)	# atan2(+-0, -anything) = +-Pi
	beq-	cr6,	L(atan2_return_npi)

L(atan2_y_zero_or_x_inf):
	cmpwi	cr0,	hx,	0
	blt-	cr0,	L(atan2_return_npio2)	# atan2(+-anything, 0) = +-Pi/2
	b	L(atan2_return_ppio2)

L(atan2_y_inf):
	cmpwi	cr0,	m,	0
	cmpwi	cr1,	m,	1
	cmpwi	cr6,	m,	2
	cmpwi	cr7,	m,	3

	beq-	cr5,	L(atan2_y_inf_and_x_inf)

	beq-	cr0,	L(atan2_return_pzero)	# atan2(+-0, +anything) = +-0
	beq-	cr1,	L(atan2_return_nzero)
	beq-	cr6,	L(atan2_return_ppi)	# atan2(+-0, -anything) = +-Pi
	beq-	cr7,	L(atan2_return_npi)

L(atan2_y_inf_and_x_inf):
	beq-	cr0,	L(atan2_return_ppio4)
	beq-	cr1,	L(atan2_return_npio4)
	beq-	cr6,	L(atan2_return_p3pio4)
	beq-	cr7,	L(atan2_return_n3pio4)

L(atan2_return_ppio4):
	lis	r3,	0x3fe9
	b	L(atan2_return_pi)

L(atan2_return_npio4):
	lis	r3,	0xbfe9
	b	L(atan2_return_pi)

L(atan2_return_ppio2):
	lis	r3,	0x3ff9
	b	L(atan2_return_pi)

L(atan2_return_npio2):
	lis	r3,	0xbff9
	b	L(atan2_return_pi)

L(atan2_return_p3pio4):
	lis	r3,	0x4002
	ori	r3, r3,	0xd97c
	lis	r4,	0x7f33
	ori	r4, r4,	0x21d2
	blr

L(atan2_return_n3pio4):
	lis	r3,	0xc002
	ori	r3, r3,	0xd97c
	lis	r4,	0x7f33
	ori	r4, r4,	0x21d2
	blr

L(atan2_return_ppi):
	lis	r3,	0x4009
	b	L(atan2_return_pi)

L(atan2_return_npi):
	lis	r3,	0xc009
	b	L(atan2_return_pi)

L(atan2_return_pi):
	ori	r3, r3,	0x21fb
	lis	r4,	0x5444
	ori	r4, r4,	0x2d18
	blr

L(atan2_return_pzero):
	lis	r3,	0
	li	r4,	0
	blr

L(atan2_return_nzero):
	lis	r3,	0x8000
	li	r4,	0
	blr

L(atan2_return_nan):
	lis	r3,	0x7fff
	li	r4,	0
	blr
#endif

END(__ieee754_atan2)
