[meego-commits] 8989: Changes to Trunk:Testing/pixman
Liu Xinyun
no_reply at build.meego.com
Thu Oct 28 00:59:15 UTC 2010
Hi,
I have made the following changes to pixman in project Trunk:Testing. Please review and accept ASAP.
Thank You,
Liu Xinyun
[This message was auto-generated]
---
Request #8989:
submit: home:xyl:branches:Trunk:Testing/pixman(r3)(cleanup) -> Trunk:Testing/pixman
Message:
Update to 0.20.0. The latest stable version
State: new 2010-10-27T17:59:14 xyl
Comment: None
changes files:
--------------
--- pixman.changes
+++ pixman.changes
@@ -0,0 +1,3 @@
+* Thu Oct 28 2010 Liu Xinyun <xinyun.liu at intel.com> - 0.20.0
+- Update to 0.20.0
+
old:
----
pixman-0.19.4.tar.bz2
new:
----
pixman-0.20.0.tar.bz2
spec files:
-----------
--- pixman.spec
+++ pixman.spec
@@ -1,13 +1,13 @@
#
# Do NOT Edit the Auto-generated Part!
-# Generated by: spectacle version 0.19
+# Generated by: spectacle version 0.20
#
# >> macros
# << macros
Name: pixman
Summary: Pixel manipulation library
-Version: 0.19.4
+Version: 0.20.0
Release: 1
Group: System/Libraries
License: MIT
other changes:
--------------
++++++ pixman-0.19.4.tar.bz2 -> pixman-0.20.0.tar.bz2
--- configure
+++ configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for pixman 0.19.4.
+# Generated by GNU Autoconf 2.65 for pixman 0.20.0.
#
# Report bugs to <"pixman at lists.freedesktop.org">.
#
@@ -701,8 +701,8 @@
# Identity of this package.
PACKAGE_NAME='pixman'
PACKAGE_TARNAME='pixman'
-PACKAGE_VERSION='0.19.4'
-PACKAGE_STRING='pixman 0.19.4'
+PACKAGE_VERSION='0.20.0'
+PACKAGE_STRING='pixman 0.20.0'
PACKAGE_BUGREPORT='"pixman at lists.freedesktop.org"'
PACKAGE_URL=''
@@ -1473,7 +1473,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures pixman 0.19.4 to adapt to many kinds of systems.
+\`configure' configures pixman 0.20.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1543,7 +1543,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of pixman 0.19.4:";;
+ short | recursive ) echo "Configuration of pixman 0.20.0:";;
esac
cat <<\_ACEOF
@@ -1663,7 +1663,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-pixman configure 0.19.4
+pixman configure 0.20.0
generated by GNU Autoconf 2.65
Copyright (C) 2009 Free Software Foundation, Inc.
@@ -2251,7 +2251,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by pixman $as_me 0.19.4, which was
+It was created by pixman $as_me 0.20.0, which was
generated by GNU Autoconf 2.65. Invocation command line was
$ $0 $@
@@ -3059,7 +3059,7 @@
# Define the identity of the package.
PACKAGE='pixman'
- VERSION='0.19.4'
+ VERSION='0.20.0'
cat >>confdefs.h <<_ACEOF
@@ -11239,13 +11239,13 @@
-LT_VERSION_INFO="19:4:19"
+LT_VERSION_INFO="20:0:20"
PIXMAN_VERSION_MAJOR=0
-PIXMAN_VERSION_MINOR=19
+PIXMAN_VERSION_MINOR=20
-PIXMAN_VERSION_MICRO=4
+PIXMAN_VERSION_MICRO=0
@@ -12757,6 +12757,78 @@
fi
+ if test "z$support_for_pthread_setspecific" != "zyes"; then
+ save_CFLAGS="$CFLAGS"
+ save_LDFLAGS="$LDFLAGS"
+ save_LIBS="$LIBS"
+ CFLAGS=""
+ LDFLAGS=""
+ LIBS=""
+ CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
+ pixman_cc_flag=yes
+else
+ pixman_cc_stderr=`test -f conftest.err && cat conftest.err`
+ pixman_cc_flag=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+
+ if test "x$pixman_cc_stderr" != "x"; then
+ pixman_cc_flag=no
+ fi
+
+ if test "x$pixman_cc_flag" = "xyes"; then
+ PTHREAD_CFLAGS="$CFLAGS"
+ PTHREAD_LIBS="$LIBS"
+ PTHREAD_LDFLAGS="$LDFLAGS"
+ support_for_pthread_setspecific=yes
+ else
+ :
+ fi
+ CFLAGS="$save_CFLAGS"
+ LDFLAGS="$save_LDFLAGS"
+ LIBS="$save_LIBS"
+
+ fi
+
if test $support_for_pthread_setspecific = yes; then
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
@@ -13330,7 +13402,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by pixman $as_me 0.19.4, which was
+This file was extended by pixman $as_me 0.20.0, which was
generated by GNU Autoconf 2.65. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -13396,7 +13468,7 @@
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-pixman config.status 0.19.4
+pixman config.status 0.20.0
configured by $0, generated by GNU Autoconf 2.65,
with options \\"\$ac_cs_config\\"
--- configure.ac
+++ configure.ac
@@ -53,8 +53,8 @@
#
m4_define([pixman_major], 0)
-m4_define([pixman_minor], 19)
-m4_define([pixman_micro], 4)
+m4_define([pixman_minor], 20)
+m4_define([pixman_micro], 0)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
@@ -726,7 +726,8 @@
PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
-
+ PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
+
if test $support_for_pthread_setspecific = yes; then
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
--- pixman/pixman-arm-neon-asm.S
+++ pixman/pixman-arm-neon-asm.S
@@ -388,6 +388,7 @@
vld1.16 {d4, d5}, [DST_R, :128]!
vst1.16 {d28, d29}, [DST_W, :128]!
pixman_composite_over_n_0565_process_pixblock_head
+ cache_preload 8, 8
.endm
.macro pixman_composite_over_n_0565_init
@@ -495,15 +496,15 @@
/******************************************************************************/
-.macro pixman_composite_add_8000_8000_process_pixblock_head
+.macro pixman_composite_add_8_8_process_pixblock_head
vqadd.u8 q14, q0, q2
vqadd.u8 q15, q1, q3
.endm
-.macro pixman_composite_add_8000_8000_process_pixblock_tail
+.macro pixman_composite_add_8_8_process_pixblock_tail
.endm
-.macro pixman_composite_add_8000_8000_process_pixblock_tail_head
+.macro pixman_composite_add_8_8_process_pixblock_tail_head
vld1.8 {d0, d1, d2, d3}, [SRC]!
PF add PF_X, PF_X, #32
PF tst PF_CTL, #0xF
@@ -523,15 +524,15 @@
.endm
generate_composite_function \
- pixman_composite_add_8000_8000_asm_neon, 8, 0, 8, \
+ pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
FLAG_DST_READWRITE, \
32, /* number of pixels, processed in a single block */ \
10, /* prefetch distance */ \
default_init, \
default_cleanup, \
- pixman_composite_add_8000_8000_process_pixblock_head, \
- pixman_composite_add_8000_8000_process_pixblock_tail, \
- pixman_composite_add_8000_8000_process_pixblock_tail_head
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
+ pixman_composite_add_8_8_process_pixblock_tail_head
/******************************************************************************/
@@ -561,8 +562,8 @@
10, /* prefetch distance */ \
default_init, \
default_cleanup, \
- pixman_composite_add_8000_8000_process_pixblock_head, \
- pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
pixman_composite_add_8888_8888_process_pixblock_tail_head
generate_composite_function_single_scanline \
@@ -571,8 +572,8 @@
8, /* number of pixels, processed in a single block */ \
default_init, \
default_cleanup, \
- pixman_composite_add_8000_8000_process_pixblock_head, \
- pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
pixman_composite_add_8888_8888_process_pixblock_tail_head
/******************************************************************************/
@@ -710,6 +711,7 @@
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
pixman_composite_over_8888_8888_process_pixblock_head
+ cache_preload 8, 8
.endm
.macro pixman_composite_over_n_8888_init
@@ -1870,3 +1872,104 @@
10, /* dst_r_basereg */ \
8, /* src_basereg */ \
15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_0565_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q4, d2, d1, d0
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* source pixel data is in {d0, d1, d2, XX} */
+ /* destination pixel data is in {d4, d5, d6, XX} */
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+.endm
+
+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
+ vqadd.u8 q0, q0, q2
+ vqadd.u8 q1, q1, q3
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
+ vld1.8 {d15}, [MASK]!
+ pixman_composite_add_0565_8_0565_process_pixblock_tail
+ vld1.16 {d8, d9}, [SRC]!
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_add_0565_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_add_0565_8_0565_process_pixblock_head, \
+ pixman_composite_add_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* destination pixel data is in {d4, d5, d6, xx} */
+ vmvn.8 d24, d15 /* get inverted alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+.endm
+
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vraddhn.u16 d0, q14, q8
+ vraddhn.u16 d1, q15, q9
+ vraddhn.u16 d2, q12, q10
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
+ vld1.8 {d15}, [SRC]!
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_out_reverse_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_head, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 15, /* src_basereg */ \
+ 0 /* mask_basereg */
--- pixman/pixman-arm-neon.c
+++ pixman/pixman-arm-neon.c
@@ -52,7 +52,7 @@
uint8_t, 3, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
uint32_t, 1, uint32_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
uint32_t, 1, uint32_t, 1)
@@ -60,6 +60,8 @@
uint32_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
+ uint8_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
uint16_t, 1)
@@ -82,6 +84,8 @@
PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
+ uint16_t, 1, uint8_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
uint32_t, 1, uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
@@ -256,12 +260,16 @@
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
{ PIXMAN_OP_NONE },
};
@@ -347,9 +355,13 @@
pixman_implementation_t *
_pixman_implementation_create_arm_neon (void)
{
- pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
+#ifdef USE_ARM_SIMD
+ pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
+#else
+ pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
+#endif
pixman_implementation_t *imp =
- _pixman_implementation_create (general, arm_neon_fast_paths);
+ _pixman_implementation_create (fallback, arm_neon_fast_paths);
imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
--- pixman/pixman-arm-simd-asm.S
+++ pixman/pixman-arm-simd-asm.S
@@ -56,7 +56,7 @@
* aliases for better readability and maintainability.
*/
-pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+pixman_asm_function pixman_composite_add_8_8_asm_armv6
push {r4, r5, r6, r7, r8, r9, r10, r11}
mov r10, r1
sub sp, sp, #4
--- pixman/pixman-arm-simd.c
+++ pixman/pixman-arm-simd.c
@@ -33,12 +33,12 @@
#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
void
-pixman_composite_add_8000_8000_asm_armv6 (int32_t width,
- int32_t height,
- uint8_t *dst_line,
- int32_t dst_stride,
- uint8_t *src_line,
- int32_t src_stride)
+pixman_composite_add_8_8_asm_armv6 (int32_t width,
+ int32_t height,
+ uint8_t *dst_line,
+ int32_t dst_stride,
+ uint8_t *src_line,
+ int32_t src_stride)
{
uint8_t *dst, *src;
int32_t w;
@@ -375,7 +375,7 @@
#endif
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
@@ -397,7 +397,7 @@
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
--- pixman/pixman-bits-image.c
+++ pixman/pixman-bits-image.c
@@ -944,7 +944,8 @@
convert_ ## format, \
PIXMAN_ ## format, \
repeat_mode); \
- }
+ } \
+ extern int no_such_variable
MAKE_BILINEAR_FETCHER (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD);
MAKE_BILINEAR_FETCHER (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE);
--- pixman/pixman-combine.c.template
+++ pixman/pixman-combine.c.template
@@ -133,6 +133,17 @@
}
static void
+combine_dst (pixman_implementation_t *imp,
+ pixman_op_t op,
+ comp4_t * dest,
+ const comp4_t * src,
+ const comp4_t * mask,
+ int width)
+{
+ return;
+}
+
+static void
combine_src_u (pixman_implementation_t *imp,
pixman_op_t op,
comp4_t * dest,
@@ -1296,17 +1307,13 @@
comp4_t s = combine_mask (src, mask, i);
comp2_t a = s >> A_SHIFT;
- if (a != 0x00)
+ if (s != 0x00)
{
- if (a != MASK)
- {
- comp4_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
- s = d;
- }
+ comp4_t d = *(dest + i);
+ a = combine_disjoint_out_part (d >> A_SHIFT, a);
+ UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
- *(dest + i) = s;
+ *(dest + i) = d;
}
}
}
@@ -2314,7 +2321,7 @@
/* Unified alpha */
imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear;
imp->combine_width[PIXMAN_OP_SRC] = combine_src_u;
- /* dest */
+ imp->combine_width[PIXMAN_OP_DST] = combine_dst;
imp->combine_width[PIXMAN_OP_OVER] = combine_over_u;
imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
imp->combine_width[PIXMAN_OP_IN] = combine_in_u;
@@ -2330,7 +2337,7 @@
/* Disjoint, unified */
imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_width[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
@@ -2344,7 +2351,7 @@
/* Conjoint, unified */
imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_width[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
@@ -2390,7 +2397,7 @@
/* Disjoint CA */
imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_width_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
@@ -2404,7 +2411,7 @@
/* Conjoint CA */
imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_width_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
@@ -2427,10 +2434,10 @@
imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
- /* It is not clear that these make sense, so leave them out for now */
- imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = NULL;
- imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
- imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = NULL;
- imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
+ /* It is not clear that these make sense, so make them noops for now */
+ imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+ imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
+ imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
+ imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
}
--- pixman/pixman-combine32.c
+++ pixman/pixman-combine32.c
@@ -137,6 +137,17 @@
}
static void
+combine_dst (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ return;
+}
+
+static void
combine_src_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
@@ -1300,17 +1311,13 @@
uint32_t s = combine_mask (src, mask, i);
uint16_t a = s >> A_SHIFT;
- if (a != 0x00)
+ if (s != 0x00)
{
- if (a != MASK)
- {
- uint32_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
- s = d;
- }
+ uint32_t d = *(dest + i);
+ a = combine_disjoint_out_part (d >> A_SHIFT, a);
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
- *(dest + i) = s;
+ *(dest + i) = d;
}
}
}
@@ -2318,7 +2325,7 @@
/* Unified alpha */
imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
- /* dest */
+ imp->combine_32[PIXMAN_OP_DST] = combine_dst;
imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
@@ -2334,7 +2341,7 @@
/* Disjoint, unified */
imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
@@ -2348,7 +2355,7 @@
/* Conjoint, unified */
imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
@@ -2394,7 +2401,7 @@
/* Disjoint CA */
imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
@@ -2408,7 +2415,7 @@
/* Conjoint CA */
imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
@@ -2431,10 +2438,10 @@
imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
- /* It is not clear that these make sense, so leave them out for now */
- imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = NULL;
- imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
- imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = NULL;
- imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
+ /* It is not clear that these make sense, so make them noops for now */
+ imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+ imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
+ imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
+ imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
}
--- pixman/pixman-combine64.c
+++ pixman/pixman-combine64.c
@@ -137,6 +137,17 @@
}
static void
+combine_dst (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint64_t * dest,
+ const uint64_t * src,
+ const uint64_t * mask,
+ int width)
+{
+ return;
+}
+
+static void
combine_src_u (pixman_implementation_t *imp,
pixman_op_t op,
uint64_t * dest,
@@ -1300,17 +1311,13 @@
uint64_t s = combine_mask (src, mask, i);
uint32_t a = s >> A_SHIFT;
- if (a != 0x00)
+ if (s != 0x00)
{
- if (a != MASK)
- {
- uint64_t d = *(dest + i);
- a = combine_disjoint_out_part (d >> A_SHIFT, a);
- UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s);
- s = d;
- }
+ uint64_t d = *(dest + i);
+ a = combine_disjoint_out_part (d >> A_SHIFT, a);
+ UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s);
- *(dest + i) = s;
+ *(dest + i) = d;
}
}
}
@@ -2318,7 +2325,7 @@
/* Unified alpha */
imp->combine_64[PIXMAN_OP_CLEAR] = combine_clear;
imp->combine_64[PIXMAN_OP_SRC] = combine_src_u;
- /* dest */
+ imp->combine_64[PIXMAN_OP_DST] = combine_dst;
imp->combine_64[PIXMAN_OP_OVER] = combine_over_u;
imp->combine_64[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
imp->combine_64[PIXMAN_OP_IN] = combine_in_u;
@@ -2334,7 +2341,7 @@
/* Disjoint, unified */
imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_64[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
imp->combine_64[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
@@ -2348,7 +2355,7 @@
/* Conjoint, unified */
imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
- /* dest */
+ imp->combine_64[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
imp->combine_64[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
@@ -2394,7 +2401,7 @@
/* Disjoint CA */
imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_64_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
@@ -2408,7 +2415,7 @@
/* Conjoint CA */
imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
- /* dest */
+ imp->combine_64_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
@@ -2431,10 +2438,10 @@
imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
- /* It is not clear that these make sense, so leave them out for now */
- imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = NULL;
- imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
- imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = NULL;
- imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
+ /* It is not clear that these make sense, so make them noops for now */
+ imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+ imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
+ imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
+ imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
}
--- pixman/pixman-compiler.h
+++ pixman/pixman-compiler.h
@@ -191,7 +191,8 @@
value = tls_ ## name ## _alloc (); \
} \
return value; \
- }
+ } \
+ extern int no_such_variable
# define PIXMAN_GET_THREAD_LOCAL(name) \
tls_ ## name ## _get ()
--- pixman/pixman-fast-path.c
+++ pixman/pixman-fast-path.c
@@ -910,19 +910,19 @@
}
static void
-fast_composite_add_8000_8000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+fast_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
@@ -1399,15 +1399,60 @@
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
-FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
-FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
-FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
+{
+ uint16_t tmp1, tmp2, tmp3, tmp4;
+ while ((w -= 4) >= 0)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ *dst++ = tmp3;
+ *dst++ = tmp4;
+ }
+ if (w & 2)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ }
+ if (w & 1)
+ *dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, COVER);
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, NONE);
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, PAD);
+
static force_inline uint32_t
fetch_nearest (pixman_repeat_t src_repeat,
pixman_format_code_t format,
@@ -1602,7 +1647,7 @@
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
--- pixman/pixman-fast-path.h
+++ pixman/pixman-fast-path.h
@@ -381,7 +381,9 @@
OP, repeat_mode) \
FAST_NEAREST_MAINLOOP(scale_func_name##_##OP, \
scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- src_type_t, dst_type_t, repeat_mode)
+ src_type_t, dst_type_t, repeat_mode) \
+ \
+ extern int no_such_variable
#define SCALED_NEAREST_FLAGS \
--- pixman/pixman-image.c
+++ pixman/pixman-image.c
@@ -48,7 +48,6 @@
gradient->n_stops = n_stops;
gradient->stop_range = 0xffff;
- gradient->common.class = SOURCE_IMAGE_CLASS_UNKNOWN;
return TRUE;
}
@@ -363,24 +362,21 @@
flags |=
FAST_PATH_NO_PAD_REPEAT |
FAST_PATH_NO_NONE_REPEAT |
- FAST_PATH_NO_NORMAL_REPEAT |
- FAST_PATH_COVERS_CLIP;
+ FAST_PATH_NO_NORMAL_REPEAT;
break;
case PIXMAN_REPEAT_PAD:
flags |=
FAST_PATH_NO_REFLECT_REPEAT |
FAST_PATH_NO_NONE_REPEAT |
- FAST_PATH_NO_NORMAL_REPEAT |
- FAST_PATH_COVERS_CLIP;
+ FAST_PATH_NO_NORMAL_REPEAT;
break;
default:
flags |=
FAST_PATH_NO_REFLECT_REPEAT |
FAST_PATH_NO_PAD_REPEAT |
- FAST_PATH_NO_NONE_REPEAT |
- FAST_PATH_COVERS_CLIP;
+ FAST_PATH_NO_NONE_REPEAT;
break;
}
@@ -400,8 +396,6 @@
if (image->solid.color.alpha == 0xffff)
flags |= FAST_PATH_IS_OPAQUE;
-
- flags |= FAST_PATH_COVERS_CLIP;
break;
case BITS:
@@ -414,12 +408,6 @@
else
{
code = image->bits.format;
-
- if (!image->common.transform &&
- image->common.repeat == PIXMAN_REPEAT_NORMAL)
- {
- flags |= FAST_PATH_SIMPLE_REPEAT;
- }
}
if (!PIXMAN_FORMAT_A (image->bits.format) &&
--- pixman/pixman-linear-gradient.c
+++ pixman/pixman-linear-gradient.c
@@ -1,3 +1,4 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007 Red Hat, Inc.
@@ -37,58 +38,58 @@
int width,
int height)
{
+ source_image_t *source = (source_image_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
pixman_vector_t v;
pixman_fixed_32_32_t l;
- pixman_fixed_48_16_t dx, dy, a, b, off;
- pixman_fixed_48_16_t factors[4];
- int i;
-
- image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
+ pixman_fixed_48_16_t dx, dy;
+ double inc;
+ source_image_class_t class;
- dx = linear->p2.x - linear->p1.x;
- dy = linear->p2.y - linear->p1.y;
+ class = SOURCE_IMAGE_CLASS_UNKNOWN;
- l = dx * dx + dy * dy;
-
- if (l)
+ if (source->common.transform)
{
- a = (dx << 32) / l;
- b = (dy << 32) / l;
+ /* projective transformation */
+ if (source->common.transform->matrix[2][0] != 0 ||
+ source->common.transform->matrix[2][1] != 0 ||
+ source->common.transform->matrix[2][2] == 0)
+ {
+ return class;
+ }
+
+ v.vector[0] = source->common.transform->matrix[0][1];
+ v.vector[1] = source->common.transform->matrix[1][1];
+ v.vector[2] = source->common.transform->matrix[2][2];
}
else
{
- a = b = 0;
- }
-
- off = (-a * linear->p1.x
- -b * linear->p1.y) >> 16;
-
- for (i = 0; i < 3; i++)
- {
- v.vector[0] = pixman_int_to_fixed ((i % 2) * (width - 1) + x);
- v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
+ v.vector[0] = 0;
+ v.vector[1] = pixman_fixed_1;
v.vector[2] = pixman_fixed_1;
+ }
- if (image->common.transform)
- {
- if (!pixman_transform_point_3d (image->common.transform, &v))
- {
- image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
+ dx = linear->p2.x - linear->p1.x;
+ dy = linear->p2.y - linear->p1.y;
- return image->source.class;
- }
- }
+ l = dx * dx + dy * dy;
- factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
- }
+ if (l == 0)
+ return class;
- if (factors[2] == factors[0])
- image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL;
- else if (factors[1] == factors[0])
- image->source.class = SOURCE_IMAGE_CLASS_VERTICAL;
+ /*
+ * compute how much the input of the gradient walked changes
+ * when moving vertically through the whole image
+ */
+ inc = height * (double) pixman_fixed_1 * pixman_fixed_1 *
+ (dx * v.vector[0] + dy * v.vector[1]) /
+ (v.vector[2] * (double) l);
+
+ /* check that casting to integer would result in 0 */
+ if (-1 < inc && inc < 1)
+ class = SOURCE_IMAGE_CLASS_HORIZONTAL;
- return image->source.class;
+ return class;
}
static void
@@ -101,7 +102,7 @@
{
pixman_vector_t v, unit;
pixman_fixed_32_32_t l;
- pixman_fixed_48_16_t dx, dy, a, b, off;
+ pixman_fixed_48_16_t dx, dy;
gradient_t *gradient = (gradient_t *)image;
source_image_t *source = (source_image_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
@@ -136,31 +137,31 @@
l = dx * dx + dy * dy;
- if (l != 0)
+ if (l == 0 || unit.vector[2] == 0)
{
- a = (dx << 32) / l;
- b = (dy << 32) / l;
- off = (-a * linear->p1.x
- -b * linear->p1.y) >> 16;
- }
-
- if (l == 0 || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1))
- {
- pixman_fixed_48_16_t inc, t;
-
/* affine transformation only */
- if (l == 0)
+ pixman_fixed_32_32_t t, next_inc;
+ double inc;
+
+ if (l == 0 || v.vector[2] == 0)
{
t = 0;
inc = 0;
}
else
{
- t = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
- inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
+ double invden, v2;
+
+ invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+ (l * (double) v.vector[2]);
+ v2 = v.vector[2] * (1. / pixman_fixed_1);
+ t = ((dx * v.vector[0] + dy * v.vector[1]) -
+ (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
+ inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
}
+ next_inc = 0;
- if (source->class == SOURCE_IMAGE_CLASS_VERTICAL)
+ if (((pixman_fixed_32_32_t )(inc * width)) == 0)
{
register uint32_t color;
@@ -170,81 +171,52 @@
}
else
{
- if (!mask)
- {
- while (buffer < end)
- {
- *buffer++ = _pixman_gradient_walker_pixel (&walker, t);
-
- t += inc;
- }
- }
- else
+ int i;
+
+ i = 0;
+ while (buffer < end)
{
- while (buffer < end)
+ if (!mask || *mask++)
{
- if (*mask++)
- *buffer = _pixman_gradient_walker_pixel (&walker, t);
-
- buffer++;
- t += inc;
+ *buffer = _pixman_gradient_walker_pixel (&walker,
+ t + next_inc);
}
+ i++;
+ next_inc = inc * i;
+ buffer++;
}
}
}
else
{
/* projective transformation */
- pixman_fixed_48_16_t t;
-
- if (source->class == SOURCE_IMAGE_CLASS_VERTICAL)
- {
- register uint32_t color;
-
- if (v.vector[2] == 0)
- {
- t = 0;
- }
- else
- {
- pixman_fixed_48_16_t x, y;
+ double t;
- x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
- y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
- t = ((a * x + b * y) >> 16) + off;
- }
+ t = 0;
- color = _pixman_gradient_walker_pixel (&walker, t);
- while (buffer < end)
- *buffer++ = color;
- }
- else
+ while (buffer < end)
{
- while (buffer < end)
+ if (!mask || *mask++)
{
- if (!mask || *mask++)
+ if (v.vector[2] != 0)
{
- if (v.vector[2] == 0)
- {
- t = 0;
- }
- else
- {
- pixman_fixed_48_16_t x, y;
- x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
- y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
- t = ((a * x + b * y) >> 16) + off;
- }
+ double invden, v2;
- *buffer = _pixman_gradient_walker_pixel (&walker, t);
+ invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+ (l * (double) v.vector[2]);
+ v2 = v.vector[2] * (1. / pixman_fixed_1);
+ t = ((dx * v.vector[0] + dy * v.vector[1]) -
+ (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
}
- ++buffer;
-
- v.vector[0] += unit.vector[0];
- v.vector[1] += unit.vector[1];
- v.vector[2] += unit.vector[2];
+ *buffer = _pixman_gradient_walker_pixel (&walker, t);
}
+
+ ++buffer;
+
+ v.vector[0] += unit.vector[0];
+ v.vector[1] += unit.vector[1];
+ v.vector[2] += unit.vector[2];
}
}
}
@@ -282,7 +254,6 @@
linear->p2 = *p2;
image->type = LINEAR;
- image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
image->common.classify = linear_gradient_classify;
image->common.property_changed = linear_gradient_property_changed;
--- pixman/pixman-mmx.c
+++ pixman/pixman-mmx.c
@@ -2845,19 +2845,19 @@
}
static void
-mmx_composite_add_8000_8000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+mmx_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
@@ -3268,7 +3268,7 @@
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8000_8000 ),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ),
--- pixman/pixman-private.h
+++ pixman/pixman-private.h
@@ -65,7 +65,6 @@
{
SOURCE_IMAGE_CLASS_UNKNOWN,
SOURCE_IMAGE_CLASS_HORIZONTAL,
- SOURCE_IMAGE_CLASS_VERTICAL,
} source_image_class_t;
typedef source_image_class_t (*classify_func_t) (pixman_image_t *image,
@@ -112,7 +111,6 @@
struct source_image
{
image_common_t common;
- source_image_class_t class;
};
struct solid_fill
@@ -152,10 +150,11 @@
circle_t c1;
circle_t c2;
- double cdx;
- double cdy;
- double dr;
- double A;
+
+ circle_t delta;
+ double a;
+ double inva;
+ double mindr;
};
struct conical_gradient
@@ -554,13 +553,13 @@
#define FAST_PATH_NO_PAD_REPEAT (1 << 3)
#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4)
#define FAST_PATH_NO_ACCESSORS (1 << 5)
-#define FAST_PATH_NARROW_FORMAT (1 << 6)
-#define FAST_PATH_COVERS_CLIP (1 << 7)
+#define FAST_PATH_NARROW_FORMAT (1 << 6)
#define FAST_PATH_COMPONENT_ALPHA (1 << 8)
+#define FAST_PATH_SAMPLES_OPAQUE (1 << 7)
#define FAST_PATH_UNIFIED_ALPHA (1 << 9)
#define FAST_PATH_SCALE_TRANSFORM (1 << 10)
#define FAST_PATH_NEAREST_FILTER (1 << 11)
-#define FAST_PATH_SIMPLE_REPEAT (1 << 12)
+#define FAST_PATH_HAS_TRANSFORM (1 << 12)
#define FAST_PATH_IS_OPAQUE (1 << 13)
#define FAST_PATH_NEEDS_WORKAROUND (1 << 14)
#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
@@ -570,8 +569,6 @@
#define FAST_PATH_Y_UNIT_ZERO (1 << 19)
#define FAST_PATH_BILINEAR_FILTER (1 << 20)
#define FAST_PATH_NO_NORMAL_REPEAT (1 << 21)
-#define FAST_PATH_HAS_TRANSFORM (1 << 22)
-#define FAST_PATH_SAMPLES_OPAQUE (1 << 23)
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
@@ -593,29 +590,25 @@
FAST_PATH_NO_NORMAL_REPEAT | \
FAST_PATH_NO_PAD_REPEAT)
-#define _FAST_PATH_STANDARD_FLAGS \
- (FAST_PATH_ID_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NO_CONVOLUTION_FILTER | \
- FAST_PATH_NO_PAD_REPEAT | \
- FAST_PATH_NO_REFLECT_REPEAT | \
+#define FAST_PATH_STANDARD_FLAGS \
+ (FAST_PATH_NO_CONVOLUTION_FILTER | \
FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT | \
- FAST_PATH_COVERS_CLIP)
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NARROW_FORMAT)
-#define FAST_PATH_STD_SRC_FLAGS \
- _FAST_PATH_STANDARD_FLAGS
-#define FAST_PATH_STD_MASK_U_FLAGS \
- (_FAST_PATH_STANDARD_FLAGS | \
- FAST_PATH_UNIFIED_ALPHA)
-#define FAST_PATH_STD_MASK_CA_FLAGS \
- (_FAST_PATH_STANDARD_FLAGS | \
- FAST_PATH_COMPONENT_ALPHA)
#define FAST_PATH_STD_DEST_FLAGS \
(FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NO_ALPHA_MAP | \
FAST_PATH_NARROW_FORMAT)
+#define SOURCE_FLAGS(format) \
+ (FAST_PATH_STANDARD_FLAGS | \
+ ((PIXMAN_ ## format == PIXMAN_solid) ? \
+ 0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
+
+#define MASK_FLAGS(format, extra) \
+ ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
+
#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
PIXMAN_OP_ ## op, \
PIXMAN_ ## src, \
@@ -628,19 +621,19 @@
#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func) \
{ FAST_PATH ( \
- op, \
- src, FAST_PATH_STD_SRC_FLAGS, \
- mask, (PIXMAN_ ## mask) ? FAST_PATH_STD_MASK_U_FLAGS : 0, \
- dest, FAST_PATH_STD_DEST_FLAGS, \
- func) }
+ op, \
+ src, SOURCE_FLAGS (src), \
+ mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA), \
+ dest, FAST_PATH_STD_DEST_FLAGS, \
+ func) }
#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func) \
{ FAST_PATH ( \
- op, \
- src, FAST_PATH_STD_SRC_FLAGS, \
- mask, FAST_PATH_STD_MASK_CA_FLAGS, \
- dest, FAST_PATH_STD_DEST_FLAGS, \
- func) }
+ op, \
+ src, SOURCE_FLAGS (src), \
+ mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA), \
+ dest, FAST_PATH_STD_DEST_FLAGS, \
+ func) }
/* Memory allocation helpers */
void *
--- pixman/pixman-radial-gradient.c
+++ pixman/pixman-radial-gradient.c
@@ -1,3 +1,4 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
*
* Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
@@ -33,6 +34,100 @@
#include <math.h>
#include "pixman-private.h"
+static inline pixman_fixed_32_32_t
+dot (pixman_fixed_48_16_t x1,
+ pixman_fixed_48_16_t y1,
+ pixman_fixed_48_16_t z1,
+ pixman_fixed_48_16_t x2,
+ pixman_fixed_48_16_t y2,
+ pixman_fixed_48_16_t z2)
+{
+ /*
+ * Exact computation, assuming that the input values can
+ * be represented as pixman_fixed_16_16_t
+ */
+ return x1 * x2 + y1 * y2 + z1 * z2;
+}
+
+static inline double
+fdot (double x1,
+ double y1,
+ double z1,
+ double x2,
+ double y2,
+ double z2)
+{
+ /*
+ * Error can be unbound in some special cases.
+ * Using clever dot product algorithms (for example compensated
+ * dot product) would improve this but make the code much less
+ * obvious
+ */
+ return x1 * x2 + y1 * y2 + z1 * z2;
+}
+
+static uint32_t
+radial_compute_color (double a,
+ double b,
+ double c,
+ double inva,
+ double dr,
+ double mindr,
+ pixman_gradient_walker_t *walker,
+ pixman_repeat_t repeat)
+{
+ /*
+ * In this function error propagation can lead to bad results:
+ * - det can have an unbound error (if b*b-a*c is very small),
+ * potentially making it the opposite sign of what it should have been
+ * (thus clearing a pixel that would have been colored or vice-versa)
+ * or propagating the error to sqrtdet;
+ * if det has the wrong sign or b is very small, this can lead to bad
+ * results
+ *
+ * - the algorithm used to compute the solutions of the quadratic
+ * equation is not numerically stable (but saves one division compared
+ * to the numerically stable one);
+ * this can be a problem if a*c is much smaller than b*b
+ *
+ * - the above problems are worse if a is small (as inva becomes bigger)
+ */
+ double det;
+
+ if (a == 0)
+ {
+ return _pixman_gradient_walker_pixel (walker,
+ pixman_fixed_1 / 2 * c / b);
+ }
+
+ det = fdot (b, a, 0, b, -c, 0);
+ if (det >= 0)
+ {
+ double sqrtdet, t0, t1;
+
+ sqrtdet = sqrt (det);
+ t0 = (b + sqrtdet) * inva;
+ t1 = (b - sqrtdet) * inva;
+
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (0 <= t0 && t0 <= pixman_fixed_1)
+ return _pixman_gradient_walker_pixel (walker, t0);
+ else if (0 <= t1 && t1 <= pixman_fixed_1)
+ return _pixman_gradient_walker_pixel (walker, t1);
+ }
+ else
+ {
+ if (t0 * dr > mindr)
+ return _pixman_gradient_walker_pixel (walker, t0);
+ else if (t1 * dr > mindr)
+ return _pixman_gradient_walker_pixel (walker, t1);
+ }
+ }
+
+ return 0;
+}
+
static void
radial_gradient_get_scanline_32 (pixman_image_t *image,
int x,
@@ -42,118 +137,85 @@
const uint32_t *mask)
{
/*
+ * Implementation of radial gradients following the PDF specification.
+ * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
+ * Manual (PDF 32000-1:2008 at the time of this writing).
+ *
* In the radial gradient problem we are given two circles (c₁,r₁) and
- * (c₂,r₂) that define the gradient itself. Then, for any point p, we
- * must compute the value(s) of t within [0.0, 1.0] representing the
- * circle(s) that would color the point.
- *
- * There are potentially two values of t since the point p can be
- * colored by both sides of the circle, (which happens whenever one
- * circle is not entirely contained within the other).
- *
- * If we solve for a value of t that is outside of [0.0, 1.0] then we
- * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
- * value within [0.0, 1.0].
+ * (c₂,r₂) that define the gradient itself.
*
- * Here is an illustration of the problem:
+ * Mathematically the gradient can be defined as the family of circles
*
- * p₂
- * p •
- * • ╲
- * · ╲r₂
- * p₁ · ╲
- * • θ╲
- * ╲ ╌╌•
- * ╲r₁ · c₂
- * θ╲ ·
- * ╌╌•
- * c₁
+ * ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
*
- * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
- * points p₁ and p₂ on the two circles are collinear with p. Then, the
- * desired value of t is the ratio of the length of p₁p to the length
- * of p₁p₂.
+ * excluding those circles whose radius would be < 0. When a point
+ * belongs to more than one circle, the one with a bigger t is the only
+ * one that contributes to its color. When a point does not belong
+ * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
+ * Further limitations on the range of values for t are imposed when
+ * the gradient is not repeated, namely t must belong to [0,1].
*
- * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
- * We can also write six equations that constrain the problem:
+ * The graphical result is the same as drawing the valid (radius > 0)
+ * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
+ * is not repeated) using SOURCE operatior composition.
*
- * Point p₁ is a distance r₁ from c₁ at an angle of θ:
+ * It looks like a cone pointing towards the viewer if the ending circle
+ * is smaller than the starting one, a cone pointing inside the page if
+ * the starting circle is the smaller one and like a cylinder if they
+ * have the same radius.
*
- * 1. p₁x = c₁x + r₁·cos θ
- * 2. p₁y = c₁y + r₁·sin θ
+ * What we actually do is, given the point whose color we are interested
+ * in, compute the t values for that point, solving for t in:
*
- * Point p₂ is a distance r₂ from c₂ at an angle of θ:
+ * length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
+ *
+ * Let's rewrite it in a simpler way, by defining some auxiliary
+ * variables:
*
- * 3. p₂x = c₂x + r2·cos θ
- * 4. p₂y = c₂y + r2·sin θ
+ * cd = c₂ - c₁
+ * pd = p - c₁
+ * dr = r₂ - r₁
+ * lenght(t·cd - pd) = r₁ + t·dr
*
- * Point p lies at a fraction t along the line segment p₁p₂:
+ * which actually means
*
- * 5. px = t·p₂x + (1-t)·p₁x
- * 6. py = t·p₂y + (1-t)·p₁y
+ * hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
*
- * To solve, first subtitute 1-4 into 5 and 6:
+ * or
*
- * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
- * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
+ * ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
*
- * Then solve each for cos θ and sin θ expressed as a function of t:
+ * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
*
- * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
- * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
+ * (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
*
- * To simplify this a bit, we define new variables for several of the
- * common terms as shown below:
+ * where we can actually expand the squares and solve for t:
*
- * p₂
- * p •
- * • ╲
- * · ┆ ╲r₂
- * p₁ · ┆ ╲
- * • pdy┆ ╲
- * ╲ ┆ •c₂
- * ╲r₁ ┆ · ┆
- * ╲ ·┆ ┆cdy
- * •╌╌╌╌┴╌╌╌╌╌╌╌┘
- * c₁ pdx cdx
+ * t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
+ * = r₁² + 2·r₁·t·dr + t²·dr²
*
- * cdx = (c₂x - c₁x)
- * cdy = (c₂y - c₁y)
- * dr = r₂-r₁
- * pdx = px - c₁x
- * pdy = py - c₁y
+ * (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
+ * (pdx² + pdy² - r₁²) = 0
*
- * Note that cdx, cdy, and dr do not depend on point p at all, so can
- * be pre-computed for the entire gradient. The simplifed equations
- * are now:
+ * A = cdx² + cdy² - dr²
+ * B = pdx·cdx + pdy·cdy + r₁·dr
+ * C = pdx² + pdy² - r₁²
+ * At² - 2Bt + C = 0
+ *
+ * The solutions (unless the equation degenerates because of A = 0) are:
*
- * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
- * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
+ * t = (B ± ⎷(B² - A·C)) / A
*
- * Finally, to get a single function of t and eliminate the last
- * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
- * each equation, (we knew a quadratic was coming since it must be
- * possible to obtain two solutions in some cases):
+ * The solution we are going to prefer is the bigger one, unless the
+ * radius associated to it is negative (or it falls outside the valid t
+ * range).
*
- * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
- * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
+ * Additional observations (useful for optimizations):
+ * A does not depend on p
*
- * Then add both together, set the result equal to 1, and express as a
- * standard quadratic equation in t of the form At² + Bt + C = 0
- *
- * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
- *
- * In other words:
- *
- * A = cdx² + cdy² - dr²
- * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
- * C = pdx² + pdy² - r₁²
- *
- * And again, notice that A does not depend on p, so can be
- * precomputed. From here we just use the quadratic formula to solve
- * for t:
- *
- * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
+ * A < 0 <=> one of the two circles completely contains the other one
+ * <=> for every p, the radiuses associated with the two t solutions
+ * have opposite sign
*/
gradient_t *gradient = (gradient_t *)image;
@@ -161,153 +223,150 @@
radial_gradient_t *radial = (radial_gradient_t *)image;
uint32_t *end = buffer + width;
pixman_gradient_walker_t walker;
- pixman_bool_t affine = TRUE;
- double cx = 1.;
- double cy = 0.;
- double cz = 0.;
- double rx = x + 0.5;
- double ry = y + 0.5;
- double rz = 1.;
+ pixman_vector_t v, unit;
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
_pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
if (source->common.transform)
{
- pixman_vector_t v;
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
if (!pixman_transform_point_3d (source->common.transform, &v))
return;
-
- cx = source->common.transform->matrix[0][0] / 65536.;
- cy = source->common.transform->matrix[1][0] / 65536.;
- cz = source->common.transform->matrix[2][0] / 65536.;
- rx = v.vector[0] / 65536.;
- ry = v.vector[1] / 65536.;
- rz = v.vector[2] / 65536.;
-
- affine =
- source->common.transform->matrix[2][0] == 0 &&
- v.vector[2] == pixman_fixed_1;
+ unit.vector[0] = source->common.transform->matrix[0][0];
+ unit.vector[1] = source->common.transform->matrix[1][0];
+ unit.vector[2] = source->common.transform->matrix[2][0];
+ }
+ else
+ {
+ unit.vector[0] = pixman_fixed_1;
+ unit.vector[1] = 0;
+ unit.vector[2] = 0;
}
- if (affine)
+ if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
{
- /* When computing t over a scanline, we notice that some expressions
- * are constant so we can compute them just once. Given:
+ /*
+ * Given:
*
- * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
+ * t = (B ± ⎷(B² - A·C)) / A
*
* where
*
- * A = cdx² + cdy² - dr² [precomputed as radial->A]
- * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
+ * A = cdx² + cdy² - dr²
+ * B = pdx·cdx + pdy·cdy + r₁·dr
* C = pdx² + pdy² - r₁²
+ * det = B² - A·C
*
* Since we have an affine transformation, we know that (pdx, pdy)
* increase linearly with each pixel,
*
- * pdx = pdx₀ + n·cx,
- * pdy = pdy₀ + n·cy,
- *
- * we can then express B in terms of an linear increment along
- * the scanline:
+ * pdx = pdx₀ + n·ux,
+ * pdy = pdy₀ + n·uy,
*
- * B = B₀ + n·cB, with
- * B₀ = -2·(pdx₀·cdx + pdy₀·cdy + r₁·dr) and
- * cB = -2·(cx·cdx + cy·cdy)
- *
- * Thus we can replace the full evaluation of B per-pixel (4 multiplies,
- * 2 additions) with a single addition.
+ * we can then express B, C and det through multiple differentiation.
+ */
+ pixman_fixed_32_32_t b, db, c, dc, ddc;
+
+ /* warning: this computation may overflow */
+ v.vector[0] -= radial->c1.x;
+ v.vector[1] -= radial->c1.y;
+
+ /*
+ * B and C are computed and updated exactly.
+ * If fdot was used instead of dot, in the worst case it would
+ * lose 11 bits of precision in each of the multiplication and
+ * summing up would zero out all the bit that were preserved,
+ * thus making the result 0 instead of the correct one.
+ * This would mean a worst case of unbound relative error or
+ * about 2^10 absolute error
*/
- double r1 = radial->c1.radius / 65536.;
- double r1sq = r1 * r1;
- double pdx = rx - radial->c1.x / 65536.;
- double pdy = ry - radial->c1.y / 65536.;
- double A = radial->A;
- double invA = -65536. / (2. * A);
- double A4 = -4. * A;
- double B = -2. * (pdx*radial->cdx + pdy*radial->cdy + r1*radial->dr);
- double cB = -2. * (cx*radial->cdx + cy*radial->cdy);
- pixman_bool_t invert = A * radial->dr < 0;
+ b = dot (v.vector[0], v.vector[1], radial->c1.radius,
+ radial->delta.x, radial->delta.y, radial->delta.radius);
+ db = dot (unit.vector[0], unit.vector[1], 0,
+ radial->delta.x, radial->delta.y, 0);
+
+ c = dot (v.vector[0], v.vector[1],
+ -((pixman_fixed_48_16_t) radial->c1.radius),
+ v.vector[0], v.vector[1], radial->c1.radius);
+ dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
+ 2 * (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
+ 0,
+ unit.vector[0], unit.vector[1], 0);
+ ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
+ unit.vector[0], unit.vector[1], 0);
while (buffer < end)
{
if (!mask || *mask++)
{
- pixman_fixed_48_16_t t;
- double det = B * B + A4 * (pdx * pdx + pdy * pdy - r1sq);
- if (det <= 0.)
- t = (pixman_fixed_48_16_t) (B * invA);
- else if (invert)
- t = (pixman_fixed_48_16_t) ((B + sqrt (det)) * invA);
- else
- t = (pixman_fixed_48_16_t) ((B - sqrt (det)) * invA);
-
- *buffer = _pixman_gradient_walker_pixel (&walker, t);
+ *buffer = radial_compute_color (radial->a, b, c,
+ radial->inva,
+ radial->delta.radius,
+ radial->mindr,
+ &walker,
+ source->common.repeat);
}
- ++buffer;
- pdx += cx;
- pdy += cy;
- B += cB;
+ b += db;
+ c += dc;
+ dc += ddc;
+ ++buffer;
}
}
else
{
/* projective */
+ /* Warning:
+ * error propagation guarantees are much looser than in the affine case
+ */
while (buffer < end)
{
if (!mask || *mask++)
{
- double pdx, pdy;
- double B, C;
- double det;
- double c1x = radial->c1.x / 65536.0;
- double c1y = radial->c1.y / 65536.0;
- double r1 = radial->c1.radius / 65536.0;
- pixman_fixed_48_16_t t;
- double x, y;
-
- if (rz != 0)
- {
- x = rx / rz;
- y = ry / rz;
- }
- else
+ if (v.vector[2] != 0)
{
- x = y = 0.;
- }
+ double pdx, pdy, invv2, b, c;
- pdx = x - c1x;
- pdy = y - c1y;
+ invv2 = 1. * pixman_fixed_1 / v.vector[2];
- B = -2 * (pdx * radial->cdx +
- pdy * radial->cdy +
- r1 * radial->dr);
- C = (pdx * pdx + pdy * pdy - r1 * r1);
-
- det = (B * B) - (4 * radial->A * C);
- if (det < 0.0)
- det = 0.0;
+ pdx = v.vector[0] * invv2 - radial->c1.x;
+ /* / pixman_fixed_1 */
- if (radial->A * radial->dr < 0)
- t = (pixman_fixed_48_16_t) ((-B - sqrt (det)) / (2.0 * radial->A) * 65536);
+ pdy = v.vector[1] * invv2 - radial->c1.y;
+ /* / pixman_fixed_1 */
+
+ b = fdot (pdx, pdy, radial->c1.radius,
+ radial->delta.x, radial->delta.y,
+ radial->delta.radius);
+ /* / pixman_fixed_1 / pixman_fixed_1 */
+
+ c = fdot (pdx, pdy, -radial->c1.radius,
+ pdx, pdy, radial->c1.radius);
+ /* / pixman_fixed_1 / pixman_fixed_1 */
+
+ *buffer = radial_compute_color (radial->a, b, c,
+ radial->inva,
+ radial->delta.radius,
+ radial->mindr,
+ &walker,
+ source->common.repeat);
+ }
else
- t = (pixman_fixed_48_16_t) ((-B + sqrt (det)) / (2.0 * radial->A) * 65536);
-
- *buffer = _pixman_gradient_walker_pixel (&walker, t);
+ {
+ *buffer = 0;
+ }
}
++buffer;
- rx += cx;
- ry += cy;
- rz += cz;
+ v.vector[0] += unit.vector[0];
+ v.vector[1] += unit.vector[1];
+ v.vector[2] += unit.vector[2];
}
}
}
@@ -351,12 +410,20 @@
radial->c2.x = outer->x;
radial->c2.y = outer->y;
radial->c2.radius = outer_radius;
- radial->cdx = pixman_fixed_to_double (radial->c2.x - radial->c1.x);
- radial->cdy = pixman_fixed_to_double (radial->c2.y - radial->c1.y);
- radial->dr = pixman_fixed_to_double (radial->c2.radius - radial->c1.radius);
- radial->A = (radial->cdx * radial->cdx +
- radial->cdy * radial->cdy -
- radial->dr * radial->dr);
+
+ /* warning: this computations may overflow */
+ radial->delta.x = radial->c2.x - radial->c1.x;
+ radial->delta.y = radial->c2.y - radial->c1.y;
+ radial->delta.radius = radial->c2.radius - radial->c1.radius;
+
+ /* computed exactly, then cast to double -> every bit of the double
+ representation is correct (53 bits) */
+ radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius,
+ radial->delta.x, radial->delta.y, radial->delta.radius);
+ if (radial->a != 0)
+ radial->inva = 1. * pixman_fixed_1 / radial->a;
+
+ radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
image->common.property_changed = radial_gradient_property_changed;
--- pixman/pixman-solid-fill.c
+++ pixman/pixman-solid-fill.c
@@ -66,7 +66,7 @@
int width,
int height)
{
- return (image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL);
+ return SOURCE_IMAGE_CLASS_HORIZONTAL;
}
static void
@@ -109,7 +109,6 @@
img->solid.color_32 = color_to_uint32 (color);
img->solid.color_64 = color_to_uint64 (color);
- img->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
img->common.classify = solid_fill_classify;
img->common.property_changed = solid_fill_property_changed;
--- pixman/pixman-sse2.c
+++ pixman/pixman-sse2.c
@@ -357,34 +357,6 @@
over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
}
-static force_inline void
-cache_prefetch (__m128i* addr)
-{
- _mm_prefetch ((void const*)addr, _MM_HINT_T0);
-}
-
-static force_inline void
-cache_prefetch_next (__m128i* addr)
-{
- _mm_prefetch ((void const *)(addr + 4), _MM_HINT_T0); /* 64 bytes ahead */
-}
-
-/* prefetching NULL is very slow on some systems. don't do that. */
-
-static force_inline void
-maybe_prefetch (__m128i* addr)
-{
- if (addr)
- cache_prefetch (addr);
-}
-
-static force_inline void
-maybe_prefetch_next (__m128i* addr)
-{
- if (addr)
- cache_prefetch_next (addr);
-}
-
/* load 4 pixels from a 16-byte boundary aligned address */
static force_inline __m128i
load_128_aligned (__m128i* src)
@@ -649,11 +621,6 @@
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
/* Align dst on a 16-byte boundary */
while (w && ((unsigned long)pd & 15))
{
@@ -667,18 +634,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
/* I'm loading unaligned because I'm not sure about
* the address alignment.
*/
@@ -740,11 +697,6 @@
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
/* Align dst on a 16-byte boundary */
while (w &&
((unsigned long)pd & 15))
@@ -759,18 +711,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
/* I'm loading unaligned because I'm not sure
* about the address alignment.
*/
@@ -842,11 +784,6 @@
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
s = combine1 (ps, pm);
@@ -859,18 +796,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
@@ -916,11 +843,6 @@
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
s = combine1 (ps, pm);
@@ -933,18 +855,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
@@ -985,11 +897,6 @@
const uint32_t* pm,
int w)
{
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
uint32_t s = combine1 (ps, pm);
@@ -1006,21 +913,11 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1067,11 +964,6 @@
const uint32_t* pm,
int w)
{
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
uint32_t s = combine1 (ps, pm);
@@ -1087,21 +979,11 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1167,11 +1049,6 @@
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
s = combine1 (ps, pm);
@@ -1184,18 +1061,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1264,11 +1131,6 @@
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
s = combine1 (ps, pm);
@@ -1281,18 +1143,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1365,11 +1217,6 @@
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && ((unsigned long) pd & 15))
{
s = combine1 (ps, pm);
@@ -1382,18 +1229,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
xmm_dst = load_128_aligned ((__m128i*) pd);
@@ -1450,11 +1287,6 @@
const uint32_t* ps = src;
const uint32_t* pm = mask;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = combine1 (ps, pm);
@@ -1468,20 +1300,10 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
__m128i s;
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
s = combine4 ((__m128i*)ps, (__m128i*)pm);
save_128_aligned (
@@ -1536,11 +1358,6 @@
uint32_t pack_cmp;
__m128i xmm_src, xmm_dst;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = combine1 (ps, pm);
@@ -1553,18 +1370,8 @@
pm++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- maybe_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- maybe_prefetch_next ((__m128i*)pm);
-
xmm_dst = load_128_aligned ((__m128i*)pd);
xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
@@ -1637,11 +1444,6 @@
__m128i xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -1651,18 +1453,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -1718,11 +1510,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -1733,18 +1520,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -1807,11 +1584,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -1822,18 +1594,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -1885,11 +1647,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -1904,18 +1661,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -1973,11 +1720,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -1992,18 +1734,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2059,11 +1791,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2078,18 +1805,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2148,11 +1865,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2168,18 +1880,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2258,11 +1960,6 @@
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2273,18 +1970,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2364,11 +2051,6 @@
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2379,18 +2061,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2473,11 +2145,6 @@
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2488,18 +2155,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
@@ -2562,11 +2219,6 @@
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
s = *ps++;
@@ -2580,18 +2232,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)ps);
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)ps);
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
@@ -2971,9 +2613,6 @@
{
dst = dst_line;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
dst_line += dst_stride;
w = width;
@@ -2986,13 +2625,8 @@
w--;
}
- cache_prefetch ((__m128i*)dst);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
-
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
@@ -3062,9 +2696,6 @@
{
dst = dst_line;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
dst_line += dst_stride;
w = width;
@@ -3079,14 +2710,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
while (w >= 8)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
-
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_565_128_4x128 (xmm_dst,
@@ -3177,10 +2802,6 @@
dst_line += dst_stride;
mask_line += mask_stride;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
m = *pm++;
@@ -3200,16 +2821,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_mask = load_128_unaligned ((__m128i*)pm);
pack_cmp =
@@ -3316,10 +2929,6 @@
dst_line += dst_stride;
mask_line += mask_stride;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w && (unsigned long)pd & 15)
{
m = *pm++;
@@ -3340,16 +2949,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
-
xmm_mask = load_128_unaligned ((__m128i*)pm);
pack_cmp =
@@ -3447,10 +3048,6 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)src);
-
while (w && (unsigned long)dst & 15)
{
uint32_t s = *src++;
@@ -3467,16 +3064,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)src);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
- cache_prefetch_next ((__m128i*)src);
-
xmm_src = load_128_unaligned ((__m128i*)src);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -3556,25 +3145,16 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
-
while (w && (unsigned long)dst & 15)
{
*dst++ = *src++ | 0xff000000;
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
-
while (w >= 16)
{
__m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
-
xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
@@ -3646,10 +3226,6 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)src);
-
while (w && (unsigned long)dst & 15)
{
uint32_t s = (*src++) | 0xff000000;
@@ -3666,16 +3242,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)src);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
- cache_prefetch_next ((__m128i*)src);
-
xmm_src = _mm_or_si128 (
load_128_unaligned ((__m128i*)src), mask_ff000000);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -3815,10 +3383,6 @@
dst = dst_line;
src = src_line;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
dst_line += dst_stride;
src_line += src_stride;
w = width;
@@ -3834,17 +3398,9 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
/* It's a 8 pixel loop */
while (w >= 8)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
-
/* I'm loading unaligned because I'm not sure
* about the address alignment.
*/
@@ -3954,10 +3510,6 @@
mask_line += mask_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && (unsigned long)dst & 15)
{
uint8_t m = *mask++;
@@ -3978,16 +3530,8 @@
dst++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
m = *((uint32_t*)mask);
if (srca == 0xff && m == 0xffffffff)
@@ -4099,7 +3643,6 @@
return FALSE;
}
- cache_prefetch ((__m128i*)byte_line);
xmm_def = create_mask_2x32_128 (data, data);
while (height--)
@@ -4109,8 +3652,6 @@
byte_line += stride;
w = byte_width;
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 1 && ((unsigned long)d & 1))
{
*(uint8_t *)d = data;
@@ -4133,12 +3674,8 @@
d += 4;
}
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 128)
{
- cache_prefetch (((__m128i*)d) + 12);
-
save_128_aligned ((__m128i*)(d), xmm_def);
save_128_aligned ((__m128i*)(d + 16), xmm_def);
save_128_aligned ((__m128i*)(d + 32), xmm_def);
@@ -4154,8 +3691,6 @@
if (w >= 64)
{
- cache_prefetch (((__m128i*)d) + 8);
-
save_128_aligned ((__m128i*)(d), xmm_def);
save_128_aligned ((__m128i*)(d + 16), xmm_def);
save_128_aligned ((__m128i*)(d + 32), xmm_def);
@@ -4165,8 +3700,6 @@
w -= 64;
}
- cache_prefetch_next ((__m128i*)d);
-
if (w >= 32)
{
save_128_aligned ((__m128i*)(d), xmm_def);
@@ -4184,8 +3717,6 @@
w -= 16;
}
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 4)
{
*(uint32_t *)d = data;
@@ -4265,10 +3796,6 @@
mask_line += mask_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && (unsigned long)dst & 15)
{
uint8_t m = *mask++;
@@ -4288,16 +3815,8 @@
dst++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
m = *((uint32_t*)mask);
if (srca == 0xff && m == 0xffffffff)
@@ -4410,10 +3929,6 @@
mask_line += mask_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && (unsigned long)dst & 15)
{
m = *mask++;
@@ -4434,16 +3949,8 @@
dst++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 8)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
xmm_dst = load_128_aligned ((__m128i*) dst);
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
@@ -4570,10 +4077,6 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w && (unsigned long)dst & 15)
{
s = *src++;
@@ -4587,16 +4090,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 8)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
-
/* First round */
xmm_src = load_128_unaligned ((__m128i*)src);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -4715,10 +4210,6 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w && (unsigned long)dst & 15)
{
s = *src++;
@@ -4731,16 +4222,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
-
xmm_src_hi = load_128_unaligned ((__m128i*)src);
opaque = is_opaque (xmm_src_hi);
@@ -4845,10 +4328,6 @@
mask_line += mask_stride;
dst_line += dst_stride;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
m = *(uint32_t *) mask;
@@ -4870,16 +4349,8 @@
mask++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 8)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
/* First round */
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -5001,10 +4472,6 @@
mask_line += mask_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
m = (uint32_t) *mask++;
@@ -5018,16 +4485,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 16)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -5121,9 +4580,6 @@
dst_line += dst_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
d = (uint32_t) *dst;
@@ -5135,14 +4591,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
while (w >= 16)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
-
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
@@ -5214,10 +4664,6 @@
src_line += src_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
s = (uint32_t) *src++;
@@ -5229,16 +4675,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 16)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
-
xmm_src = load_128_unaligned ((__m128i*)src);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -5321,10 +4759,6 @@
mask_line += mask_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
m = (uint32_t) *mask++;
@@ -5338,16 +4772,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)mask);
- cache_prefetch ((__m128i*)dst);
-
while (w >= 16)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)mask);
- cache_prefetch_next ((__m128i*)dst);
-
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
@@ -5440,9 +4866,6 @@
dst_line += dst_stride;
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
while (w && ((unsigned long)dst & 15))
{
*dst = (uint8_t)_mm_cvtsi64_si32 (
@@ -5454,14 +4877,8 @@
dst++;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
while (w >= 16)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)dst);
-
save_128_aligned (
(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
@@ -5485,23 +4902,23 @@
}
/* ----------------------------------------------------------------------
- * composite_add_8000_8000
+ * composite_add_8_8
*/
static void
-sse2_composite_add_8000_8000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+sse2_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
@@ -5519,10 +4936,6 @@
dst = dst_line;
src = src_line;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
-
dst_line += dst_stride;
src_line += src_stride;
w = width;
@@ -5644,9 +5057,6 @@
return FALSE;
}
- cache_prefetch ((__m128i*)src_bytes);
- cache_prefetch ((__m128i*)dst_bytes);
-
while (height--)
{
int w;
@@ -5656,9 +5066,6 @@
dst_bytes += dst_stride;
w = byte_width;
- cache_prefetch_next ((__m128i*)s);
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 2 && ((unsigned long)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
@@ -5676,17 +5083,10 @@
d += 4;
}
- cache_prefetch_next ((__m128i*)s);
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 64)
{
__m128i xmm0, xmm1, xmm2, xmm3;
- /* 128 bytes ahead */
- cache_prefetch (((__m128i*)s) + 8);
- cache_prefetch (((__m128i*)d) + 8);
-
xmm0 = load_128_unaligned ((__m128i*)(s));
xmm1 = load_128_unaligned ((__m128i*)(s + 16));
xmm2 = load_128_unaligned ((__m128i*)(s + 32));
@@ -5702,9 +5102,6 @@
w -= 64;
}
- cache_prefetch_next ((__m128i*)s);
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 16)
{
save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
@@ -5714,9 +5111,6 @@
s += 16;
}
- cache_prefetch_next ((__m128i*)s);
- cache_prefetch_next ((__m128i*)d);
-
while (w >= 4)
{
*(uint32_t *)d = *(uint32_t *)s;
@@ -5809,11 +5203,6 @@
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)mask);
-
while (w && (unsigned long)dst & 15)
{
s = 0xff000000 | *src++;
@@ -5833,18 +5222,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)mask);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
- cache_prefetch_next ((__m128i*)mask);
-
m = *(uint32_t*) mask;
xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
@@ -5955,11 +5334,6 @@
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i *)src);
- cache_prefetch ((__m128i *)dst);
- cache_prefetch ((__m128i *)mask);
-
while (w && (unsigned long)dst & 15)
{
uint32_t sa;
@@ -5994,18 +5368,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i *)src);
- cache_prefetch ((__m128i *)dst);
- cache_prefetch ((__m128i *)mask);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i *)src);
- cache_prefetch_next ((__m128i *)dst);
- cache_prefetch_next ((__m128i *)mask);
-
m = *(uint32_t *) mask;
if (m)
@@ -6117,9 +5481,6 @@
{
dst = dst_line;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)dst);
-
dst_line += dst_stride;
w = width;
@@ -6135,15 +5496,10 @@
dst++;
}
- cache_prefetch ((__m128i*)dst);
-
while (w >= 4)
{
__m128i tmp_lo, tmp_hi;
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)(dst + 4));
-
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
@@ -6224,11 +5580,6 @@
w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i *)src);
- cache_prefetch ((__m128i *)dst);
- cache_prefetch ((__m128i *)mask);
-
while (w && (unsigned long)dst & 15)
{
uint32_t sa;
@@ -6263,18 +5614,8 @@
w--;
}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i *)src);
- cache_prefetch ((__m128i *)dst);
- cache_prefetch ((__m128i *)mask);
-
while (w >= 4)
{
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i *)src);
- cache_prefetch_next ((__m128i *)dst);
- cache_prefetch_next ((__m128i *)mask);
-
xmm_mask = load_128_unaligned ((__m128i*)mask);
if (!is_transparent (xmm_mask))
@@ -6504,7 +5845,7 @@
/* PIXMAN_OP_ADD */
PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
--- pixman/pixman-version.h
+++ pixman/pixman-version.h
@@ -32,10 +32,10 @@
#endif
#define PIXMAN_VERSION_MAJOR 0
-#define PIXMAN_VERSION_MINOR 19
-#define PIXMAN_VERSION_MICRO 4
+#define PIXMAN_VERSION_MINOR 20
+#define PIXMAN_VERSION_MICRO 0
-#define PIXMAN_VERSION_STRING "0.19.4"
+#define PIXMAN_VERSION_STRING "0.20.0"
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \
--- pixman/pixman.c
+++ pixman/pixman.c
@@ -377,126 +377,6 @@
return TRUE;
}
-static void
-walk_region_internal (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height,
- pixman_bool_t src_repeat,
- pixman_bool_t mask_repeat,
- pixman_region32_t * region,
- pixman_composite_func_t composite_rect)
-{
- int w, h, w_this, h_this;
- int x_msk, y_msk, x_src, y_src, x_dst, y_dst;
- int src_dy = src_y - dest_y;
- int src_dx = src_x - dest_x;
- int mask_dy = mask_y - dest_y;
- int mask_dx = mask_x - dest_x;
- const pixman_box32_t *pbox;
- int n;
-
- pbox = pixman_region32_rectangles (region, &n);
-
- /* Fast path for non-repeating sources */
- if (!src_repeat && !mask_repeat)
- {
- while (n--)
- {
- (*composite_rect) (imp, op,
- src_image, mask_image, dst_image,
- pbox->x1 + src_dx,
- pbox->y1 + src_dy,
- pbox->x1 + mask_dx,
- pbox->y1 + mask_dy,
- pbox->x1,
- pbox->y1,
- pbox->x2 - pbox->x1,
- pbox->y2 - pbox->y1);
-
- pbox++;
- }
-
- return;
- }
-
- while (n--)
- {
- h = pbox->y2 - pbox->y1;
- y_src = pbox->y1 + src_dy;
- y_msk = pbox->y1 + mask_dy;
- y_dst = pbox->y1;
-
- while (h)
- {
- h_this = h;
- w = pbox->x2 - pbox->x1;
- x_src = pbox->x1 + src_dx;
- x_msk = pbox->x1 + mask_dx;
- x_dst = pbox->x1;
-
- if (mask_repeat)
- {
- y_msk = MOD (y_msk, mask_image->bits.height);
- if (h_this > mask_image->bits.height - y_msk)
- h_this = mask_image->bits.height - y_msk;
- }
-
- if (src_repeat)
- {
- y_src = MOD (y_src, src_image->bits.height);
- if (h_this > src_image->bits.height - y_src)
- h_this = src_image->bits.height - y_src;
- }
-
- while (w)
- {
- w_this = w;
-
- if (mask_repeat)
- {
- x_msk = MOD (x_msk, mask_image->bits.width);
- if (w_this > mask_image->bits.width - x_msk)
- w_this = mask_image->bits.width - x_msk;
- }
-
- if (src_repeat)
- {
- x_src = MOD (x_src, src_image->bits.width);
- if (w_this > src_image->bits.width - x_src)
- w_this = src_image->bits.width - x_src;
- }
-
- (*composite_rect) (imp, op,
- src_image, mask_image, dst_image,
- x_src, y_src, x_msk, y_msk, x_dst, y_dst,
- w_this, h_this);
- w -= w_this;
-
- x_src += w_this;
- x_msk += w_this;
- x_dst += w_this;
- }
-
- h -= h_this;
- y_src += h_this;
- y_msk += h_this;
- y_dst += h_this;
- }
-
- pbox++;
- }
-}
-
#define N_CACHED_FAST_PATHS 8
typedef struct
@@ -746,7 +626,7 @@
extents->x2 - x <= image->bits.width &&
extents->y2 - y <= image->bits.height)
{
- *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
return TRUE;
}
@@ -789,7 +669,7 @@
ex.x1 >= 0 && ex.y1 >= 0 &&
ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
{
- *flags |= (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_COVERS_CLIP);
+ *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
}
}
else
@@ -949,14 +829,26 @@
dest_format, dest_flags,
&imp, &func))
{
- walk_region_internal (imp, op,
- src, mask, dest,
- src_x, src_y, mask_x, mask_y,
- dest_x, dest_y,
- width, height,
- (src_flags & FAST_PATH_SIMPLE_REPEAT),
- (mask_flags & FAST_PATH_SIMPLE_REPEAT),
- ®ion, func);
+ const pixman_box32_t *pbox;
+ int n;
+
+ pbox = pixman_region32_rectangles (®ion, &n);
+
+ while (n--)
+ {
+ func (imp, op,
+ src, mask, dest,
+ pbox->x1 + src_x - dest_x,
+ pbox->y1 + src_y - dest_y,
+ pbox->x1 + mask_x - dest_x,
+ pbox->y1 + mask_y - dest_y,
+ pbox->x1,
+ pbox->y1,
+ pbox->x2 - pbox->x1,
+ pbox->y2 - pbox->y1);
+
+ pbox++;
+ }
}
out:
--- test/Makefile.am
+++ test/Makefile.am
@@ -23,7 +23,6 @@
a1_trap_test_LDADD = $(TEST_LDADD)
fetch_test_LDADD = $(TEST_LDADD)
-composite_LDADD = $(TEST_LDADD)
gradient_crash_test_LDADD = $(TEST_LDADD)
trap_crasher_LDADD = $(TEST_LDADD)
oob_test_LDADD = $(TEST_LDADD)
@@ -49,6 +48,9 @@
alpha_loop_LDADD = $(TEST_LDADD)
alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+composite_LDADD = $(TEST_LDADD)
+composite_SOURCES = composite.c utils.c utils.h
+
# GTK using test programs
if HAVE_GTK
--- test/Makefile.in
+++ test/Makefile.in
@@ -96,8 +96,8 @@
@HAVE_GTK_TRUE@ $(am__objects_1)
clip_test_OBJECTS = $(am_clip_test_OBJECTS)
@HAVE_GTK_TRUE at clip_test_DEPENDENCIES = $(am__DEPENDENCIES_2)
-composite_SOURCES = composite.c
-composite_OBJECTS = composite.$(OBJEXT)
+am_composite_OBJECTS = composite.$(OBJEXT) utils.$(OBJEXT)
+composite_OBJECTS = $(am_composite_OBJECTS)
composite_DEPENDENCIES = $(TEST_LDADD)
am__composite_test_SOURCES_DIST = composite-test.c gtk-utils.c \
gtk-utils.h
@@ -187,10 +187,11 @@
SOURCES = a1-trap-test.c $(affine_test_SOURCES) $(alpha_loop_SOURCES) \
$(alpha_test_SOURCES) $(alphamap_SOURCES) \
$(blitters_test_SOURCES) $(clip_in_SOURCES) \
- $(clip_test_SOURCES) composite.c $(composite_test_SOURCES) \
- $(convolution_test_SOURCES) fetch-test.c gradient-crash-test.c \
- $(gradient_test_SOURCES) $(lowlevel_blt_bench_SOURCES) \
- oob-test.c $(region_test_SOURCES) region-translate-test.c \
+ $(clip_test_SOURCES) $(composite_SOURCES) \
+ $(composite_test_SOURCES) $(convolution_test_SOURCES) \
+ fetch-test.c gradient-crash-test.c $(gradient_test_SOURCES) \
+ $(lowlevel_blt_bench_SOURCES) oob-test.c \
+ $(region_test_SOURCES) region-translate-test.c \
scaling-crash-test.c $(scaling_test_SOURCES) \
$(screen_test_SOURCES) trap-crasher.c $(trap_test_SOURCES) \
window-test.c
@@ -198,7 +199,7 @@
$(alpha_loop_SOURCES) $(am__alpha_test_SOURCES_DIST) \
$(alphamap_SOURCES) $(blitters_test_SOURCES) \
$(am__clip_in_SOURCES_DIST) $(am__clip_test_SOURCES_DIST) \
- composite.c $(am__composite_test_SOURCES_DIST) \
+ $(composite_SOURCES) $(am__composite_test_SOURCES_DIST) \
$(am__convolution_test_SOURCES_DIST) fetch-test.c \
gradient-crash-test.c $(am__gradient_test_SOURCES_DIST) \
$(lowlevel_blt_bench_SOURCES) oob-test.c \
@@ -373,7 +374,6 @@
a1_trap_test_LDADD = $(TEST_LDADD)
fetch_test_LDADD = $(TEST_LDADD)
-composite_LDADD = $(TEST_LDADD)
gradient_crash_test_LDADD = $(TEST_LDADD)
trap_crasher_LDADD = $(TEST_LDADD)
oob_test_LDADD = $(TEST_LDADD)
@@ -392,6 +392,8 @@
alphamap_SOURCES = alphamap.c utils.c utils.h
alpha_loop_LDADD = $(TEST_LDADD)
alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+composite_LDADD = $(TEST_LDADD)
+composite_SOURCES = composite.c utils.c utils.h
# GTK using test programs
@HAVE_GTK_TRUE at GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS)
--- test/alphamap.c
+++ test/alphamap.c
@@ -45,15 +45,29 @@
return "<unknown - bug in alphamap.c>";
}
+static void
+on_destroy (pixman_image_t *image, void *data)
+{
+ uint32_t *bits = pixman_image_get_data (image);
+
+ fence_free (bits);
+}
+
static pixman_image_t *
make_image (pixman_format_code_t format)
{
uint32_t *bits;
uint8_t bpp = PIXMAN_FORMAT_BPP (format) / 8;
+ pixman_image_t *image;
bits = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * bpp);
- return pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp);
+ image = pixman_image_create_bits (format, WIDTH, HEIGHT, bits, WIDTH * bpp);
+
+ if (image && bits)
+ pixman_image_set_destroy_function (image, on_destroy, NULL);
+
+ return image;
}
static pixman_image_t *
@@ -68,6 +82,7 @@
pixman_image_set_alpha_map (image, alpha,
alpha_origin_x, alpha_origin_y);
+ pixman_image_unref (alpha);
}
return image;
@@ -203,6 +218,10 @@
}
}
+ pixman_image_set_alpha_map (src, NULL, 0, 0);
+ pixman_image_set_alpha_map (dst, NULL, 0, 0);
+ pixman_image_set_alpha_map (orig_dst, NULL, 0, 0);
+
pixman_image_unref (src);
pixman_image_unref (dst);
pixman_image_unref (orig_dst);
--- test/blitters-test.c
+++ test/blitters-test.c
@@ -465,6 +465,6 @@
}
return fuzzer_test_main("blitters", 2000000,
- 0x217CF14A,
+ 0x1DB8BDF8,
test_composite, argc, argv);
}
--- test/composite.c
+++ test/composite.c
@@ -1,6 +1,8 @@
/*
* Copyright © 2005 Eric Anholt
* Copyright © 2009 Chris Wilson
+ * Copyright © 2010 Soeren Sandmann
+ * Copyright © 2010 Red Hat, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
@@ -20,15 +22,14 @@
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
-
+#define PIXMAN_USE_INTERNAL_API
#include <pixman.h>
#include <stdio.h>
#include <stdlib.h> /* abort() */
#include <math.h>
#include <config.h>
-
-#define FALSE 0
-#define TRUE !FALSE
+#include <time.h>
+#include "utils.h"
#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
#define min(a,b) ((a) <= (b) ? (a) : (b))
@@ -50,14 +51,15 @@
const char *name;
};
-static color_t colors[] =
+static const color_t colors[] =
{
- /* these are premultiplied in main() */
{ 1.0, 1.0, 1.0, 1.0 },
+ { 1.0, 1.0, 1.0, 0.0 },
+ { 0.0, 0.0, 0.0, 1.0 },
+ { 0.0, 0.0, 0.0, 0.0 },
{ 1.0, 0.0, 0.0, 1.0 },
{ 0.0, 1.0, 0.0, 1.0 },
{ 0.0, 0.0, 1.0, 1.0 },
- { 0.0, 0.0, 0.0, 1.0 },
{ 0.5, 0.0, 0.0, 0.5 },
};
@@ -82,62 +84,66 @@
out->alpha = _color_double_to_short (color->a);
}
+#define REPEAT 0x01000000
+#define FLAGS 0xff000000
+
+static const int sizes[] =
+{
+ 0,
+ 1,
+ 1 | REPEAT,
+ 10
+};
+
static const format_t formats[] =
{
#define P(x) { PIXMAN_##x, #x }
- P(a8),
- /* 32bpp formats */
+ /* 32 bpp formats */
P(a8r8g8b8),
P(x8r8g8b8),
P(a8b8g8r8),
P(x8b8g8r8),
P(b8g8r8a8),
P(b8g8r8x8),
-
- /* XXX: and here the errors begin! */
-#if 0
P(x2r10g10b10),
- P(a2r10g10b10),
P(x2b10g10r10),
+ P(a2r10g10b10),
P(a2b10g10r10),
- /* 24bpp formats */
+ /* 24 bpp formats */
P(r8g8b8),
P(b8g8r8),
-
- /* 16bpp formats */
P(r5g6b5),
P(b5g6r5),
- P(a1r5g5b5),
+ /* 16 bpp formats */
P(x1r5g5b5),
- P(a1b5g5r5),
P(x1b5g5r5),
- P(a4r4g4b4),
- P(x4r4g4b4),
+ P(a1r5g5b5),
+ P(a1b5g5r5),
P(a4b4g4r4),
P(x4b4g4r4),
+ P(a4r4g4b4),
+ P(x4r4g4b4),
- /* 8bpp formats */
+ /* 8 bpp formats */
P(a8),
P(r3g3b2),
P(b2g3r3),
P(a2r2g2b2),
P(a2b2g2r2),
-
P(x4a4),
- /* 4bpp formats */
+ /* 4 bpp formats */
P(a4),
P(r1g2b1),
P(b1g2r1),
P(a1r1g1b1),
P(a1b1g1r1),
- /* 1bpp formats */
+ /* 1 bpp formats */
P(a1)
-#endif
#undef P
};
@@ -482,8 +488,9 @@
color_correct (pixman_format_code_t format,
color_t *color)
{
-#define round_pix(pix, mask) \
- ((int)((pix) * (mask) + .5) / (double) (mask))
+#define MASK(x) ((1 << (x)) - 1)
+#define round_pix(pix, m) \
+ ((int)((pix) * (MASK(m)) + .5) / (double) (MASK(m)))
if (PIXMAN_FORMAT_R (format) == 0)
{
@@ -504,6 +511,7 @@
color->a = round_pix (color->a, PIXMAN_FORMAT_A (format));
#undef round_pix
+#undef MASK
}
static void
@@ -594,18 +602,15 @@
}
static double
-eval_diff (color_t *expected, color_t *test)
+eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
{
double rscale, gscale, bscale, ascale;
double rdiff, gdiff, bdiff, adiff;
- /* XXX: Need to be provided mask shifts so we can produce useful error
- * values.
- */
- rscale = 1.0 * (1 << 5);
- gscale = 1.0 * (1 << 6);
- bscale = 1.0 * (1 << 5);
- ascale = 1.0 * 32;
+ rscale = 1.0 * ((1 << PIXMAN_FORMAT_R (format)) - 1);
+ gscale = 1.0 * ((1 << PIXMAN_FORMAT_G (format)) - 1);
+ bscale = 1.0 * ((1 << PIXMAN_FORMAT_B (format)) - 1);
+ ascale = 1.0 * ((1 << PIXMAN_FORMAT_A (format)) - 1);
rdiff = fabs (test->r - expected->r) * rscale;
bdiff = fabs (test->g - expected->g) * gscale;
@@ -699,7 +704,12 @@
&expected, component_alpha);
color_correct (dst->format->format, &expected);
- diff = eval_diff (&expected, &result);
+ diff = eval_diff (&expected, &result, dst->format->format);
+
+ /* FIXME: We should find out what deviation is acceptable. 3.0
+ * is clearly absurd for 2 bit formats for example. On the other
+ * hand currently 1.0 does not work.
+ */
if (diff > 3.0)
{
char buf[40];
@@ -717,7 +727,7 @@
result.r, result.g, result.b, result.a,
*(unsigned long *) pixman_image_get_data (dst->image),
expected.r, expected.g, expected.b, expected.a);
-
+
if (mask != NULL)
{
printf ("src color: %.2f %.2f %.2f %.2f\n"
@@ -751,9 +761,6 @@
return success;
}
-#define REPEAT 0x01000000
-#define FLAGS 0xff000000
-
static void
image_init (image_t *info,
int color,
@@ -766,7 +773,7 @@
compute_pixman_color (info->color, &fill);
info->format = &formats[format];
- info->size = size & ~FLAGS;
+ info->size = sizes[size] & ~FLAGS;
info->repeat = PIXMAN_REPEAT_NONE;
if (info->size)
@@ -800,103 +807,105 @@
pixman_image_unref (info->image);
}
-int
-main (void)
+static int
+random_size (void)
+{
+ return lcg_rand_n (ARRAY_LENGTH (sizes));
+}
+
+static int
+random_color (void)
{
- pixman_bool_t ok, group_ok = TRUE, ca;
- int i, d, m, s;
- int tests_passed = 0, tests_total = 0;
- int sizes[] = { 1, 1 | REPEAT, 10 };
- int num_tests;
+ return lcg_rand_n (ARRAY_LENGTH (colors));
+}
+
+static int
+random_format (void)
+{
+ return lcg_rand_n (ARRAY_LENGTH (formats));
+}
+
+static pixman_bool_t
+run_test (uint32_t seed)
+{
+ image_t src, mask, dst;
+ const operator_t *op;
+ int ca;
+ int ok;
+
+ lcg_srand (seed);
+
+ image_init (&dst, random_color(), random_format(), 1);
+ image_init (&src, random_color(), random_format(), random_size());
+ image_init (&mask, random_color(), random_format(), random_size());
+
+ op = &(operators [lcg_rand_n (ARRAY_LENGTH (operators))]);
+
+ ca = lcg_rand_n (3);
- for (i = 0; i < ARRAY_LENGTH (colors); i++)
+ switch (ca)
{
- colors[i].r *= colors[i].a;
- colors[i].g *= colors[i].a;
- colors[i].b *= colors[i].a;
+ case 0:
+ ok = composite_test (&dst, op, &src, NULL, FALSE);
+ break;
+ case 1:
+ ok = composite_test (&dst, op, &src, &mask, FALSE);
+ break;
+ case 2:
+ ok = composite_test (&dst, op, &src, &mask,
+ mask.size? TRUE : FALSE);
+ break;
+ default:
+ ok = FALSE;
+ break;
}
- num_tests = ARRAY_LENGTH (colors) * ARRAY_LENGTH (formats);
+ image_fini (&src);
+ image_fini (&mask);
+ image_fini (&dst);
- for (d = 0; d < num_tests; d++)
- {
- image_t dst;
+ return ok;
+}
- image_init (
- &dst, d / ARRAY_LENGTH (formats), d % ARRAY_LENGTH (formats), 1);
+int
+main (int argc, char **argv)
+{
+#define N_TESTS (8 * 1024 * 1024)
+ int result = 0;
+ int i;
+ if (argc > 1)
+ {
+ char *end;
+
+ i = strtol (argv[1], &end, 0);
- for (s = -ARRAY_LENGTH (colors);
- s < ARRAY_LENGTH (sizes) * num_tests;
- s++)
+ if (end != argv[1])
{
- image_t src;
-
- if (s < 0)
- {
- image_init (&src, -s - 1, 0, 0);
- }
+ if (!run_test (i))
+ return 1;
else
- {
- image_init (&src,
- s / ARRAY_LENGTH (sizes) / ARRAY_LENGTH (formats),
- s / ARRAY_LENGTH (sizes) % ARRAY_LENGTH (formats),
- sizes[s % ARRAY_LENGTH (sizes)]);
- }
-
- for (m = -ARRAY_LENGTH (colors);
- m < ARRAY_LENGTH (sizes) * num_tests;
- m++)
- {
- image_t mask;
+ return 0;
+ }
+ else
+ {
+ printf ("Usage:\n\n %s <number>\n\n", argv[0]);
+ return -1;
+ }
+ }
- if (m < 0)
- {
- image_init (&mask, -m - 1, 0, 0);
- }
- else
- {
- image_init (
- &mask,
- m / ARRAY_LENGTH (sizes) / ARRAY_LENGTH (formats),
- m / ARRAY_LENGTH (sizes) % ARRAY_LENGTH (formats),
- sizes[m % ARRAY_LENGTH (sizes)]);
- }
-
- for (ca = -1; ca <= 1; ca++)
- {
- for (i = 0; i < ARRAY_LENGTH (operators); i++)
- {
- const operator_t *op = &operators[i];
-
- switch (ca)
- {
- case -1:
- ok = composite_test (&dst, op, &src, NULL, FALSE);
- break;
- case 0:
- ok = composite_test (&dst, op, &src, &mask, FALSE);
- break;
- case 1:
- ok = composite_test (&dst, op, &src, &mask,
- mask.size? TRUE : FALSE);
- break;
- default:
- ok = FALSE; /* Silence GCC */
- break;
- }
- group_ok = group_ok && ok;
- tests_passed += ok;
- tests_total++;
- }
- }
+#ifdef USE_OPENMP
+# pragma omp parallel for default(none) shared(result) shared(argv)
+#endif
+ for (i = 1; i <= N_TESTS; ++i)
+ {
+ if (!result && !run_test (i))
+ {
+ printf ("Test %d failed.\n", i);
- image_fini (&mask);
- }
- image_fini (&src);
+ result = i;
}
- image_fini (&dst);
}
-
- return group_ok == FALSE;
+
+ return result;
}
--- test/lowlevel-blt-bench.c
+++ test/lowlevel-blt-bench.c
@@ -544,7 +544,7 @@
tests_tbl[] =
{
{ "add_8_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
- { "add_n_8_8000", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
+ { "add_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
{ "add_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
{ "add_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
{ "add_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
@@ -553,7 +553,7 @@
{ "add_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
{ "add_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
{ "add_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
- { "add_n_8000", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
+ { "add_n_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
{ "add_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
{ "add_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
{ "add_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
@@ -562,7 +562,7 @@
{ "add_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
{ "add_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
{ "add_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
- { "add_8000_8000", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
+ { "add_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
{ "add_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
{ "add_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
{ "add_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
--- test/utils.c
+++ test/utils.c
@@ -218,7 +218,12 @@
int n_bytes;
} info_t;
-#if defined(HAVE_MPROTECT) && defined(HAVE_GETPAGESIZE)
+#if defined(HAVE_MPROTECT) && defined(HAVE_GETPAGESIZE) && defined(HAVE_SYS_MMAN_H)
+
+/* This is apparently necessary on at least OS X */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
void *
fence_malloc (uint32_t len)
@@ -238,7 +243,7 @@
addr = mmap (NULL, n_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
- if (addr == (void *)MAP_FAILED)
+ if (addr == MAP_FAILED)
{
printf ("mmap failed on %u %u\n", len, n_bytes);
return NULL;
@@ -254,20 +259,12 @@
((info_t *)initial_page)->trailing = trailing_protected;
((info_t *)initial_page)->n_bytes = n_bytes;
- if (mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
- PROT_NONE) == -1)
- {
- free (addr);
- return NULL;
- }
-
- if (mprotect (trailing_protected, N_TRAILING_PROTECTED * page_size,
- PROT_NONE) == -1)
+ if ((mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
+ PROT_NONE) == -1) ||
+ (mprotect (trailing_protected, N_TRAILING_PROTECTED * page_size,
+ PROT_NONE) == -1))
{
- mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
- PROT_READ | PROT_WRITE);
-
- free (addr);
+ munmap (addr, n_bytes);
return NULL;
}
@@ -282,13 +279,6 @@
uint8_t *leading_protected = payload - N_LEADING_PROTECTED * page_size;
uint8_t *initial_page = leading_protected - page_size;
info_t *info = (info_t *)initial_page;
- uint8_t *trailing_protected = info->trailing;
-
- mprotect (leading_protected, N_LEADING_PROTECTED * page_size,
- PROT_READ | PROT_WRITE);
-
- mprotect (trailing_protected, N_LEADING_PROTECTED * page_size,
- PROT_READ | PROT_WRITE);
munmap (info->addr, info->n_bytes);
}
++++++ pixman.yaml
--- pixman.yaml
+++ pixman.yaml
@@ -1,6 +1,6 @@
Name: pixman
Summary: Pixel manipulation library
-Version: 0.19.4
+Version: 0.20.0
Release: 1
Group: System/Libraries
License: MIT
More information about the MeeGo-commits
mailing list