
#include "macros.inc"

#define dest_hi r25
#define dest_lo r24
#define src_hi r23
#define src_lo r22
#define len_hi r21
#define len_lo r20

; void *memcpy(void *dest, const void *src, size_t len)

	.text
	.global	_U(memcpy)
	.type	_U(memcpy), @function
_U(memcpy):
	LOAD_Z(src_lo, src_hi)
	LOAD_X(dest_lo, dest_hi)
#if OPTIMIZE_SPEED
; 15 words, (14 + len * 6 - (len & 1)) cycles
	sbrs	len_lo, 0
	rjmp	.memcpy_start
	rjmp	.memcpy_odd
.memcpy_loop:
	ld	__tmp_reg__, Z+
	st	X+, __tmp_reg__
.memcpy_odd:
	ld	__tmp_reg__, Z+
	st	X+, __tmp_reg__
.memcpy_start:
	subi	len_lo, lo8(2)
	sbci	len_hi, hi8(2)
#else
; 11 words, (13 + len * 8) cycles
	rjmp	.memcpy_start
.memcpy_loop:
	ld	__tmp_reg__, Z+
	st	X+, __tmp_reg__
.memcpy_start:
	subi	len_lo, lo8(1)
	sbci	len_hi, hi8(1)
#endif
	brcc	.memcpy_loop
; return dest (unchanged)
	ret
.memcpy_end:
	.size	_U(memcpy), .memcpy_end - _U(memcpy)

