sse-11.c   [plain text]


/* PR rtl-optimization/21239 */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -msse2" } */
#include <emmintrin.h>
#include "../../gcc.dg/i386-cpuid.h"

extern void abort (void);

void
foo (unsigned int x, double *y, const double *z)
{
  __m128d tmp;
  while (x)
    {
      tmp = _mm_load_sd (z);
      _mm_store_sd (y, tmp);
      --x; ++z; ++y;
    }
}

void
bar (unsigned int x, float *y, const float *z)
{
  __m128 tmp;
  unsigned int i;
  for (i = 0; i < x; ++i)
    {
      tmp = (__m128) { *z, 0, 0, 0 };
      *y = __builtin_ia32_vec_ext_v4sf (tmp, 0);
      ++z; ++y;
    }
  for (i = 0; i < x; ++i)
    {
      tmp = (__m128) { 0, *z, 0, 0 };
      *y = __builtin_ia32_vec_ext_v4sf (tmp, 1);
      ++z; ++y;
    }
  for (i = 0; i < x; ++i)
    {
      tmp = (__m128) { 0, 0, *z, 0 };
      *y = __builtin_ia32_vec_ext_v4sf (tmp, 2);
      ++z; ++y;
    }
  for (i = 0; i < x; ++i)
    {
      tmp = (__m128) { 0, 0, 0, *z };
      *y = __builtin_ia32_vec_ext_v4sf (tmp, 3);
      ++z; ++y;
    }
}

void __attribute__((noinline))
run_tests (void)
{
  unsigned int i;
  double a[16], b[16];
  float c[16], d[16];
  for (i = 0; i < 16; ++i)
    {
      a[i] = 1;
      b[i] = 2;
      c[i] = 3;
      d[i] = 4;
    }
  foo (16, a, b);
  bar (4, c, d);
  for (i = 0; i < 16; ++i)
    {
      if (a[i] != 2)
	abort ();
      if (c[i] != 4)
	abort ();
    }
}

int
main ()
{
  unsigned long cpu_facilities;
  unsigned int i;
  double a[19], b[19];

  cpu_facilities = i386_cpuid ();

  if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
      != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
    /* If host has no vector support, pass.  */
    return 0;

  run_tests ();
  return 0;
}