/* Copyright (C) 2008-2014 Free Software Foundation, Inc.
   This file is part of the UPC runtime library.
   Written by Gary Funck <gary@intrepid.com>
   and Nenad Vukicevic <nenad@intrepid.com>

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */


#ifndef _GCC_UPC_LIB_H_
#define _GCC_UPC_LIB_H_ 1

#if __UPC_PUPC_INST__
#pragma pupc off
#endif /* __UPC_PUPC_INST__ */

#ifdef IN_TARGET_LIBS
#include "config.h"
#include <unistd.h>
/* required, for Posix sched_yield() */
#ifdef _POSIX_PRIORITY_SCHEDULING
#include <sched.h>
#endif
#endif /* IN_TARGET_LIBS */

/* required, for size_t definition */
#include <sys/types.h>

#pragma upc c_code

/* helper macros for expanding the value of a macro as a string. */
#define __UPC_STR__(S) #S
#define __UPC_XSTR__(S) __UPC_STR__(S)

#ifdef __UPC_PTHREADS_MODEL_TLS__
#define GUPCR_THREAD_LOCAL __thread
#else
#define GUPCR_THREAD_LOCAL
#endif

#ifdef __UPC_STATIC_THREADS__
/* defined at compile time by the -fupc-threads-N switch */
#define __UPC_N_THREADS__ THREADS
#define __UPC_STR__(S) #S
#define __UPC_XSTR__(S) __UPC_STR__(S)
#define __UPC_THREADS_CONFIG__ " staticthreads=" __UPC_XSTR__(THREADS)
#else
/* defined at run time */
extern const int THREADS;
#define __UPC_N_THREADS__ -1
#define __UPC_THREADS_CONFIG__ "dynamicthreads"
#endif

#ifdef __UPC_PTHREADS_MODEL_TLS__
#define __UPC_USES_PTHREADS__ 1
#define __UPC_THREADS_MODEL__ "pthreads-tls"
#else
#define __UPC_THREADS_MODEL__ "process"
#endif

/* Support for atomics - UPC Specification 1.3.  */
#define __UPC_ATOMIC__ 1

/* Support for castability - UPC Specification 1.3.  */
#define __UPC_CASTABLE__ 1

/* Support for UPC collectives.  */
#define __UPC_COLLECTIVE__ 1

/* Support for non-blocking transfer operations - UPC Specification 1.3.  */
#define __UPC_NB__ 1

/* Support for GASP - http://gasp.hcs.ufl.edu/.  */ 
#define __UPC_PUPC__ 1

/* Support for high-performance wall-clock timer - UPC Specification 1.3.  */
#define __UPC_TICK__ 1

#pragma upc upc_code
/* Opaque data type for referencing UPC atomic domains.  */
typedef shared struct upc_atomicdomain_struct upc_atomicdomain_t;
#pragma upc c_code

/* Place configuration information required by the Berkeley
   UPC compilation system into the object file. */
static const char GCCUPCConfig[]
#if __MACH__
   __attribute__ ((section("__DATA,upc_pgm_info"))) __attribute__ ((used)) =
#else
   __attribute__ ((section("upc_pgm_info"))) __attribute__ ((used)) =
#endif
  "$GCCUPCConfig: (" __BASE_FILE__ ") "
                   __UPC_THREADS_CONFIG__ " " __UPC_THREADS_MODEL__ "$";

#undef __UPC_XSTR__
#undef __UPC_STR__

extern GUPCR_THREAD_LOCAL const int MYTHREAD;

/* Depth count used to implement the semantics of
   nested upc_forall statements.  */
extern GUPCR_THREAD_LOCAL int __upc_forall_depth;

#if !defined(_CRAYT3E)
/* The UPC runtime's main program must run first,
 * we rename the user's main to upc_main(). */
#if __MACH__
extern int main () __asm__("_upc_main");
#else
extern int main () __asm__("upc_main");
#endif
#endif

/* Remap calls to exit so that they invoke the UPC runtime's
   implementation of exit instead. */
#define exit __upc_exit

/* Implementation of upc_fence.  */
#define upc_fence __upc_fence()

/* upc_shared_ptr_t is predefined as the representation of
   a shared pointer type. */

/* Runtime variables used by optimized code generation
   on some targets. */
/* The base address of the UPC global area */
extern void * const __upc_global;
/* The size of each thread's contribution to the global shared. */
extern const size_t __upc_local_size;
/* The base address of the UPC shared section */
extern char __upc_shared_start[1];
/* A pre-calculated value equal to:
     (__upc_global - __upc_shared_start) which
   is used to map a pointer-to-shared's address field
   into a global memory address. */
extern unsigned long const __upc_global_base;

/* Runtime procedures */

extern void *__cvtaddr (upc_shared_ptr_t);
extern void *__getaddr (upc_shared_ptr_t);
extern void __upc_barrier (int barrier_id);
extern void __upc_notify (int barrier_id);
extern void __upc_wait (int barrier_id);
extern void __upc_exit (int status)
      __attribute__ ((__nothrow__))
      __attribute__ ((__noreturn__));
extern void __upc_fatal (const char *fmt, ...)
      __attribute__ ((__format__ (__printf__, 1, 2)))
      __attribute__ ((__nothrow__))
      __attribute__ ((__noreturn__));

/* Profiled versions of runtime routines.  */
extern void *__cvtaddrg (upc_shared_ptr_t, const char *filename, const int linenum);
extern void *__getaddrg (upc_shared_ptr_t, const char *filename, const int linenum);
extern void __upc_barrierg (int barrier_id, const char *filename, const int linenum);
extern void __upc_notifyg (int barrier_id, const char *filename, const int linenum);
extern void __upc_waitg (int barrier_id, const char *filename, const int linenum);
extern void __upc_exitg (int status, const char *filename, const int linenum)
                        __attribute__ ((__noreturn__));
extern void __upc_funcg (int start, const char *funcname,
                         const char *filename, const int linenum);
extern void __upc_forallg (int start, const char *filename, const int linenum);

#if (defined(_LP64) && _LP64) \
    || (defined(_MIPS_SZPTR) && (_MIPS_SZPTR == 64)) \
    || (defined(_CRAYT3E))
#define GUPCR_TARGET64 1
#else
#define GUPCR_TARGET64 0
#endif

/* Runtime shared access procedures */
typedef unsigned int u_intQI_t __attribute__ ((__mode__(__QI__)));
typedef unsigned int u_intHI_t __attribute__ ((__mode__(__HI__)));
typedef unsigned int u_intSI_t __attribute__ ((__mode__(__SI__)));
typedef unsigned int u_intDI_t __attribute__ ((__mode__(__DI__)));
#if GUPCR_TARGET64
typedef unsigned int u_intTI_t __attribute__ ((__mode__(__TI__)));
#endif /* GUPCR_TARGET64 */

#if defined(__UPC_INLINE_LIB__) || defined(IN_TARGET_LIBS)

/* Library routines have access to runtime internals.  */


/* Define path to preferred addr2line for backtrace */
#define GUPCR_BACKTRACE_ADDR2LINE "addr2line-not-found-in-path-error"

/* Define path to preferred GDB for backtrace */
#define GUPCR_BACKTRACE_GDB "/opt/local/bin/ggdb"

/* Define to preferred signal for UPC backtrace. */
#define GUPCR_BACKTRACE_SIGNAL SIGUSR1

/* Size of get/put bounce buffer */
/* #undef GUPCR_BOUNCE_BUFFER_SIZE */

/* upc_global_exit() timeout in seconds. */
/* #undef GUPCR_GLOBAL_EXIT_TIMEOUT */

/* Define to 1 if UPC runtime checks are supported. */
/* #undef GUPCR_HAVE_CHECKS */

/* Define to 1 if UPC runtime debugging mode is enabled. */
/* #undef GUPCR_HAVE_DEBUG */

/* Define if UPC GUM debug server is supported. */
/* #undef GUPCR_HAVE_GUM_DEBUG */

/* Define to 1 if UPC runtime statistics collection is supported. */
/* #undef GUPCR_HAVE_STATS */

/* Define to 1 if UPC runtime tracing is supported. */
/* #undef GUPCR_HAVE_TRACE */

/* Use SLURM for UPC Portals4 job launcher */
/* #undef GUPCR_JOB_LAUNCHER_SLURM */

/* Use YOD for UPC Portals4 job launcher */
/* #undef GUPCR_JOB_LAUNCHER_YOD */

/* Maximum number of locks held per thread */
#define GUPCR_MAX_LOCKS 1024

/* Maximum number of outstanding remote puts */
/* #undef GUPCR_MAX_OUTSTANDING_PUTS */

/* Target system memory page size. */
/* #undef GUPCR_MEMORY_PAGE_SIZE */

/* Define to 1 if UPC runtime will use node local memory accesses. */
/* #undef GUPCR_NODE_LOCAL_MEM */

/* Define to 1 if UPC node local access uses mmap-ed file. */
/* #undef GUPCR_NODE_LOCAL_MEM_MMAP */

/* Define to 1 if UPC node local access uses Posix shared memory. */
/* #undef GUPCR_NODE_LOCAL_MEM_POSIX */

/* Define to 1 if UPC runtime is based on Portals4. */
/* #undef GUPCR_PORTALS_RUNTIME */

/* Portals4 PTE base index. */
/* #undef GUPCR_PTE_BASE */

/* The required alignment for the UPC struct shared pointer representation. */
/* #undef GUPCR_PTS_ALIGN */

/* Whether UPC pointers-to-shared use the 'packed' representation */
#define GUPCR_PTS_PACKED_REP 1

/* Size of shared pointer's phase field (in bits) */
#define GUPCR_PTS_PHASE_SIZE 20

/* The data type of the 'phase' field in a UPC shared pointer */
/* #undef GUPCR_PTS_PHASE_TYPE */

/* Whether UPC shared pointers use the 'struct' representation */
/* #undef GUPCR_PTS_STRUCT_REP */

/* Size of shared pointer's thread field (in bits) */
#define GUPCR_PTS_THREAD_SIZE 10

/* The data type of the 'thread' field in a UPC shared pointer */
/* #undef GUPCR_PTS_THREAD_TYPE */

/* Whether the 'vaddr' field comes first (ie, [[vaddr,thread,phase]]) */
#define GUPCR_PTS_VADDR_FIRST 1

/* Size of shared pointer's vaddr field (in bits) */
#define GUPCR_PTS_VADDR_SIZE 34

/* The data type of the 'vaddr' field in a UPC shared pointer */
/* #undef GUPCR_PTS_VADDR_TYPE */

/* Maximum number of children at each level of a collective operation tree. */
#define GUPCR_TREE_FANOUT 4

/* Define to 1 if UPC runtime will use Portals4 triggered operations. */
/* #undef GUPCR_USE_PORTALS4_TRIGGERED_OPS */

#ifndef INT_MIN
/* __INT_MAX__ is predefined by the gcc compiler */
#  define INT_MIN (-__INT_MAX__ - 1)
#endif

/* helper functions */
#define GUPCR_MIN(x,y) (((x) < (y)) ? (x): (y))
#define GUPCR_MAX(x,y) (((x) > (y)) ? (x): (y))
#define GUPCR_ABS(x) (((x) > 0) ? (x): -(x))
#define GUPCR_ROUND(x, r) (((x) + (r) - 1)/(r)*(r))
#if GUPCR_TARGET64
/* On 64-bit machines, use page size of 32M (25 bits) and a max per thread
   offset of 256G (38 bits).  This leaves 13 bits for the per thread
   number of pages.  */
#define GUPCR_VM_OFFSET_BITS 25 
#if GUPCR_PTS_VADDR_SIZE > 38
#define GUPCR_VM_MAX_PAGES_PER_THREAD \
	(1 << (38 - GUPCR_VM_OFFSET_BITS))
#else
#define GUPCR_VM_MAX_PAGES_PER_THREAD \
	(1 << (GUPCR_PTS_VADDR_SIZE - GUPCR_VM_OFFSET_BITS))
#endif
#else
/* On 32-bit machines, use page size of 4M (22 bits) and a max per thread
   offset of 4G (32 bits).  This leaves 10 bits for the per thread
   number of pages.  */
#define GUPCR_VM_OFFSET_BITS 22
#if GUPCR_PTS_VADDR_SIZE > 32
#define GUPCR_VM_MAX_PAGES_PER_THREAD \
	(1 << (32 - GUPCR_VM_OFFSET_BITS))
#else
#define GUPCR_VM_MAX_PAGES_PER_THREAD \
	(1 << (GUPCR_PTS_VADDR_SIZE - GUPCR_VM_OFFSET_BITS))
#endif
#endif /* GUPCR_TARGET64 */

/* Derive some VM specific constants. */
#define GUPCR_VM_PAGE_MASK (GUPCR_VM_MAX_PAGES_PER_THREAD - 1)
#define GUPCR_VM_PAGE_SIZE (1 << GUPCR_VM_OFFSET_BITS)
#define GUPCR_VM_OFFSET_MASK (GUPCR_VM_PAGE_SIZE - 1)
/* Declare a type sufficiently large to hold a page number.
   We can probably get by with a 'short' here, but it is
   safer to just use a full 'int'.*/
typedef unsigned int upc_page_num_t;

/* Each thread caches a mapping between global page number
   and local mapped address.  The global page number is
   hashed into a global map cache, which is N-way associative,
   where GUPCR_VM_GLOBAL_SET_SIZE defines the value of N.  */
#define GUPCR_VM_GLOBAL_MAP_BITS 6 
#define GUPCR_VM_GLOBAL_MAP_SIZE (1 << GUPCR_VM_GLOBAL_MAP_BITS)
#define GUPCR_VM_GLOBAL_MAP_MASK (GUPCR_VM_GLOBAL_MAP_SIZE - 1)
#define GUPCR_VM_GLOGAl_MAP_SET_SIZE 4
/* All 1's for the virtual page number in a global map entry (GME)
   indicates that the entry has not yet been mapped. */
#define GUPCR_VM_PAGE_INVALID -1U

extern void *__upc_vm_map_addr (upc_shared_ptr_t);
extern int __upc_vm_alloc (upc_page_num_t);
extern upc_page_num_t __upc_vm_get_cur_page_alloc (void);

/* Max. heap size
   Set here as 64 gigabytes on a 64-bit implementation
   and 1 gigabyte on other (eg, 32 bit) implementations. */
#define GUPCR_MAX_HEAP_SIZE (((sizeof (void *)*8) == 64) \
                              ? (64L * KILOBYTE * MEGABYTE) \
			      : ( 1L * KILOBYTE * MEGABYTE))

/* Per-thread space initially allocated to UPC user's heap */
#define GUPCR_DEFAULT_PER_THREAD_HEAP_SIZE (16*MEGABYTE)

/* Per-thread maximum stack size that will be added to the OS's
   default stack size, when creating pthreads.  */
#define GUPCR_DEFAULT_PER_THREAD_STACK_SIZE (16*MEGABYTE)

/* The minimum number of bytes to allocate */
#define GUPCR_HEAP_ALLOC_MIN 64

/* Heaps are increased by multiples of this chunk size.
   The chunk size should be an even multiple of the UPC VM page size.  */
#define GUPCR_HEAP_CHUNK_SIZE (1*GUPCR_VM_PAGE_SIZE)

/* an unlikely barrier id to be used for runtime synchronization */
#define GUPCR_RUNTIME_BARRIER_ID 0xBADF00D

/* a value used to tag each heap allocated item, checked by upc_free */
#define GUPCR_HEAP_ALLOC_TAG 0x0DDF00D

extern void __upc_acquire_alloc_lock (void);
extern void __upc_release_alloc_lock (void);

/* The base address of the UPC shared section */
#define GUPCR_SHARED_SECTION_START __upc_shared_start
/* The ending address (plus one) of the UPC shared section */
#define GUPCR_SHARED_SECTION_END __upc_shared_end

/* The base address of the UPC compiled program info. section */
#define GUPCR_PGM_INFO_SECTION_START __upc_pgm_info_start
/* The ending address (plus one) of the UPC compiled program info. section */
#define GUPCR_PGM_INFO_SECTION_END __upc_pgm_info_end

/* The base address of an array of pointers to UPC initialization routines.  */
#define GUPCR_INIT_ARRAY_START __upc_init_array_start
/* The ending address (plus one) of pointers to UPC initialization routines */
#define GUPCR_INIT_ARRAY_END   __upc_init_array_end


/* UPC pointer representation */

#if (defined(GUPCR_PTS_STRUCT_REP) + defined(GUPCR_PTS_WORD_PAIR_REP) \
     + defined(GUPCR_PTS_PACKED_REP)) == 0
# error Unknown PTS representation.
#elif (defined(GUPCR_PTS_STRUCT_REP) + defined(GUPCR_PTS_WORD_PAIR_REP) \
     + defined(GUPCR_PTS_PACKED_REP)) != 1
# error Only one UPC shared pointer representaion setting is permitted.
#endif

#ifdef GUPCR_PTS_STRUCT_REP

#if GUPCR_PTS_THREAD_SIZE == 32
#undef GUPCR_PTS_THREAD_TYPE
#define GUPCR_PTS_THREAD_TYPE u_intSI_t
#elif GUPCR_PTS_THREAD_SIZE == 16
#undef GUPCR_PTS_THREAD_TYPE
#define GUPCR_PTS_THREAD_TYPE u_intHI_t
#endif
#if GUPCR_PTS_PHASE_SIZE == 32
#undef GUPCR_PTS_PHASE_TYPE
#define GUPCR_PTS_PHASE_TYPE u_intSI_t
#elif GUPCR_PTS_PHASE_SIZE == 16
#undef GUPCR_PTS_PHASE_TYPE
#define GUPCR_PTS_PHASE_TYPE u_intHI_t
#endif

#if !__GCC_UPC__
/* The UPC compiler pre-defines upc_shared_ptr_t to be the
   representation of a shared pointer.  Since most of the
   runtime is written in regular "C", we need to define
   the pointer representation here.  */
typedef struct shared_ptr_struct
  {
#if GUPCR_PTS_VADDR_FIRST
    GUPCR_PTS_VADDR_TYPE  vaddr;
    GUPCR_PTS_THREAD_TYPE thread;
    GUPCR_PTS_PHASE_TYPE  phase;
#else
    GUPCR_PTS_PHASE_TYPE  phase;
    GUPCR_PTS_THREAD_TYPE thread;
    GUPCR_PTS_VADDR_TYPE  vaddr;
#endif
  } upc_shared_ptr_t
#ifdef GUPCR_PTS_ALIGN
  __attribute__ ((aligned (GUPCR_PTS_ALIGN)))
#endif
  ;
typedef upc_shared_ptr_t *upc_shared_ptr_p;
/* upc_dbg_shared_ptr_t is used by debugger to figure out
   shared pointer layout */
typedef upc_shared_ptr_t upc_dbg_shared_ptr_t;
#endif

#define GUPCR_PTS_TO_REP(V) *((upc_shared_ptr_t *)&(V)) 
#define GUPCR_PTS_IS_NULL(P) (!(P).vaddr && !(P).thread && !(P).phase)
#define GUPCR_PTS_SET_NULL_SHARED(P) \
   {(P).vaddr = 0; (P).thread = 0; (P).phase = 0;}

#define GUPCR_PTS_VADDR(P) ((size_t)(P).vaddr - (size_t)GUPCR_SHARED_SECTION_START)
#define GUPCR_PTS_OFFSET(P) ((size_t)(P).vaddr - (size_t)GUPCR_SHARED_SECTION_START)
#define GUPCR_PTS_THREAD(P) (P).thread
#define GUPCR_PTS_PHASE(P) (P).phase

#define GUPCR_PTS_SET_VADDR(P,V) (P).vaddr = (GUPCR_PTS_VADDR_TYPE)((char *)(V) \
			+ (size_t)GUPCR_SHARED_SECTION_START)
#define GUPCR_PTS_INCR_VADDR(P,V) (P).vaddr += ((size_t)(V))
#define GUPCR_PTS_SET_THREAD(P,V) (P).thread = (size_t)(V)
#define GUPCR_PTS_SET_PHASE(P,V) (P).phase = (size_t)(V)

#elif GUPCR_PTS_PACKED_REP

#if GUPCR_PTS_VADDR_FIRST
#define GUPCR_PTS_VADDR_SHIFT	(GUPCR_PTS_THREAD_SHIFT + GUPCR_PTS_THREAD_SIZE)
#define GUPCR_PTS_THREAD_SHIFT	GUPCR_PTS_PHASE_SIZE
#define GUPCR_PTS_PHASE_SHIFT	0
#else
#define GUPCR_PTS_VADDR_SHIFT   0
#define GUPCR_PTS_THREAD_SHIFT  GUPCR_PTS_VADDR_SIZE
#define GUPCR_PTS_PHASE_SHIFT   (GUPCR_PTS_THREAD_SHIFT + GUPCR_PTS_THREAD_SIZE)
#endif
#define GUPCR_PTS_TO_REP(V) *((upc_shared_ptr_t *)&(V)) 
#if GUPCR_TARGET64
#define GUPCR_ONE 1UL
#define GUPCR_PTS_REP_T unsigned long
#else
#define GUPCR_ONE 1ULL
#define GUPCR_PTS_REP_T unsigned long long
#endif
#define GUPCR_PTS_VADDR_MASK	((GUPCR_ONE << GUPCR_PTS_VADDR_SIZE) - GUPCR_ONE)
#define GUPCR_PTS_THREAD_MASK	((GUPCR_ONE << GUPCR_PTS_THREAD_SIZE) - GUPCR_ONE)
#define GUPCR_PTS_PHASE_MASK	((GUPCR_ONE << GUPCR_PTS_PHASE_SIZE) - GUPCR_ONE)

#if !__GCC_UPC__
/* upc_dbg_shared_ptr_t is used by debugger to figure out
   shared pointer layout */
typedef struct shared_ptr_struct
  {
#if GUPCR_PTS_VADDR_FIRST
    unsigned long long vaddr:GUPCR_PTS_VADDR_SIZE;
    unsigned int thread:GUPCR_PTS_THREAD_SIZE;
    unsigned int phase:GUPCR_PTS_PHASE_SIZE;
#else
    unsigned int phase:GUPCR_PTS_PHASE_SIZE;
    unsigned int thread:GUPCR_PTS_THREAD_SIZE;
    unsigned long long vaddr:GUPCR_PTS_VADDR_SIZE;
#endif
  } upc_dbg_shared_ptr_t;

typedef GUPCR_PTS_REP_T upc_shared_ptr_t;
typedef upc_shared_ptr_t *upc_shared_ptr_p;
#endif

#define GUPCR_PTS_IS_NULL(P) !(P)
#define GUPCR_PTS_SET_NULL_SHARED(P) { (P) = 0; }

/* access functions are optiimzed for a representation of the
   form (vaddr,thread,phase) and where the value is unsigned.
   Thus, right shift is logical (not arithmetic), and masking
   is avoided for vaddr, and shifting is avoided for phase. 
   Further, the value being inserted must fit into the field.
   It will not be masked.  */
#define GUPCR_PTS_VADDR(P)  \
  (void *)((size_t)((P)>>GUPCR_PTS_VADDR_SHIFT & GUPCR_PTS_VADDR_MASK))
#define GUPCR_PTS_THREAD(P) ((size_t)((P)>>GUPCR_PTS_THREAD_SHIFT & GUPCR_PTS_THREAD_MASK))
#define GUPCR_PTS_PHASE(P)  ((size_t)((P)>>GUPCR_PTS_PHASE_SHIFT & GUPCR_PTS_PHASE_MASK))
#define GUPCR_PTS_OFFSET(P) ((size_t)((P)>>GUPCR_PTS_VADDR_SHIFT & GUPCR_PTS_VADDR_MASK))

#define GUPCR_PTS_SET_VADDR(P,V) \
  (P) = ((P) & ~(GUPCR_PTS_VADDR_MASK << GUPCR_PTS_VADDR_SHIFT)) \
         	| ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_VADDR_SHIFT)
#define GUPCR_PTS_SET_THREAD(P,V) (P) = ((P) & ~(GUPCR_PTS_THREAD_MASK << GUPCR_PTS_THREAD_SHIFT)) \
                                     | ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_THREAD_SHIFT)
#define GUPCR_PTS_SET_PHASE(P,V) (P) = ((P) & ~(GUPCR_PTS_PHASE_MASK << GUPCR_PTS_PHASE_SHIFT)) \
                                     | ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_PHASE_SHIFT)
#define GUPCR_PTS_INCR_VADDR(P,V) \
  ((P) += ((GUPCR_PTS_REP_T)(V) << GUPCR_PTS_VADDR_SHIFT))
#elif GUPCR_PTS_WORD_PAIR_REP
#error UPC word pair representation is unsupported.
#endif /* GUPCR_PTS_*_REP__ */
/* Maximum number of THREADS supported in this implementation */
#define GUPCR_THREAD_SIZE 12
#define GUPCR_THREADS_MAX (1 << GUPCR_THREAD_SIZE)

/* To speed things up, the last two unique (page, thread)
   lookups are cached.  Caller must validate the pointer
   'p' (check for NULL, etc.) before calling this routine. */
__attribute__((__always_inline__))
static inline
void *
__upc_sptr_to_addr (upc_shared_ptr_t p)
{
  extern GUPCR_THREAD_LOCAL unsigned long __upc_page1_ref, __upc_page2_ref;
  extern GUPCR_THREAD_LOCAL void *__upc_page1_base, *__upc_page2_base;
  void *addr;
  size_t offset, p_offset;
  upc_page_num_t pn;
  unsigned long this_page;
  offset = GUPCR_PTS_OFFSET (p);
  p_offset = offset & GUPCR_VM_OFFSET_MASK;
  pn = (offset >> GUPCR_VM_OFFSET_BITS) & GUPCR_VM_PAGE_MASK;
  this_page = (pn << GUPCR_THREAD_SIZE) | GUPCR_PTS_THREAD (p);
  if (this_page == __upc_page1_ref)
    addr = (char *) __upc_page1_base + p_offset;
  else if (this_page == __upc_page2_ref)
    addr = (char *) __upc_page2_base + p_offset;
  else
    addr = __upc_vm_map_addr (p);
  return addr;
}

#ifdef __UPC__
  typedef upc_shared_ptr_t
          __attribute__((__may_alias__)) upc_shared_ptr_alias_t;
  #define __upc_map_to_local(P)(__upc_sptr_to_addr(*(upc_shared_ptr_alias_t *)&(P)))
#endif


#endif /* __UPC_INLINE_LIB__ || IN_TARGET_LIBS */

#ifdef IN_TARGET_LIBS

#if defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) \
    || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
  /* Use GCC's builtin implementation, if available.  */
  #define __upc_atomic_cas(PTR, OLD_VAL, NEW_VAL) \
    __sync_bool_compare_and_swap (PTR, OLD_VAL, NEW_VAL)
#else
  extern int __upc_atomic_cas (os_atomic_p, os_atomic_t, os_atomic_t);
#endif

#if defined (HAVE_SYNC_FETCH_AND_ADD_8) \
    || defined (HAVE_SYNC_FETCH_AND_ADD_4)
#define __upc_sync_fetch_and_add(PTR, INC) \
    __sync_fetch_and_add (PTR, INC)
#else
__attribute__ ((__always_inline__))
static inline
int
__upc_sync_fetch_and_add (int *addr, int inc)
{
  int old_val, new_val;
  do
    {
      old_val = *addr;
      new_val = old_val + inc;
    }
  while (!__upc_atomic_cas (addr, old_val, new_val));
  return old_val;
}
#endif

/* Give up control of the cpu for a small time interval. */
#ifdef __sgi__
#define __upc_yield_cpu() do { sginap(0); } while (0)
#else
# ifdef _POSIX_PRIORITY_SCHEDULING
# define __upc_yield_cpu() do { sched_yield(); } while (0)
# else
# define __upc_yield_cpu() do { usleep(1000L); } while (0)
# endif
#endif

/* Number of cpu's available */
extern int __upc_num_cpus;

/* Max. number of iterations to poll waiting for a
 * spinlock loop condition to be satisfied.
 */
#define OS_MAX_SPIN_COUNT (__upc_num_cpus > 1 ? 500 : 0)
/* Keep spinning until PREDICATE is true,
 * (this needs to be a macro, to ensure that
 * PREDICATE is re-evaluated on each iteration. */
#define __upc_spin_until(PREDICATE) \
    { \
      int i = 0; \
      while (!(PREDICATE)) \
	{ \
	  if (++i >= OS_MAX_SPIN_COUNT) \
	    { \
	      __upc_yield_cpu (); \
	      i = 0; \
	    } \
	} \
    }

#endif /* IN_TARGET_LIBS */

#ifdef __UPC_INLINE_LIB__


/*

The following table is excerpted from
"Implementing the UPC memory consistency model for
shared-memory architectures", Dan Bonachea et al.

CPU		Write fence		Read fence
--------------------------------------------------
Power/PowerPC	lwsync			isync
Alpha		wmb			mb
x86		lock; addl $0,0(%%esp)  none reqd.
Athlon/Opteron	mfence			none reqd.
Itanium		mf			none reqd.
SPARC		stbar			none reqd.
MIPS		sync			none reqd.
PA-RISC		SYNC			none reqd. */

#define GUPCR_FENCE() { GUPCR_READ_FENCE (); GUPCR_WRITE_FENCE (); }

#if defined (PPC) || defined (__PPC__)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("lwsync":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("isync":::"memory")
#elif defined (alpha)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("wmb":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("mb":::"memory")
#elif defined (__x86_64__)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("mfence":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#elif defined (__ia64__)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("mf":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#elif defined (i386)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#elif defined (sparc)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("stbar":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#elif defined (mips)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("sync":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#elif defined (hppa)
#define GUPCR_WRITE_FENCE() asm __volatile__ ("SYNC":::"memory")
#define GUPCR_READ_FENCE() asm __volatile__ ("":::"memory")
#else
# error "No memory fence  operations provided for this cpu."
#endif
/* We need to include <string.h> to define memcpy() */
#include <string.h>

__attribute__((__always_inline__))
static inline
void
__upc_memcpy (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
{
  if (GUPCR_PTS_IS_NULL (src))
    __upc_fatal ("Invalid access via null shared pointer");
  if (GUPCR_PTS_IS_NULL (dest))
    __upc_fatal ("Invalid access via null shared pointer");
  for (;;)
    {
      char *srcp = (char *)__upc_sptr_to_addr (src);
      size_t s_offset  = GUPCR_PTS_OFFSET(src);
      size_t ps_offset = (s_offset & GUPCR_VM_OFFSET_MASK);
      size_t ns_copy = GUPCR_VM_PAGE_SIZE - ps_offset;
      char *destp = (char *)__upc_sptr_to_addr (dest);
      size_t d_offset  = GUPCR_PTS_OFFSET(dest);
      size_t pd_offset = (d_offset & GUPCR_VM_OFFSET_MASK);
      size_t nd_copy = GUPCR_VM_PAGE_SIZE - pd_offset;
      size_t n_copy = GUPCR_MIN (GUPCR_MIN (ns_copy, nd_copy), n);
      memcpy (destp, srcp, n_copy);
      n -= n_copy;
      if (!n)
        break;
      GUPCR_PTS_INCR_VADDR (src, n_copy);
      GUPCR_PTS_INCR_VADDR (dest, n_copy);
    }
}

__attribute__((__always_inline__))
static inline
void
__upc_memget (void *dest, upc_shared_ptr_t src, size_t n)
{
  if (!dest)
    __upc_fatal ("Invalid access via null shared pointer");
  if (GUPCR_PTS_IS_NULL (src))
    __upc_fatal ("Invalid access via null shared pointer");
  for (;;)
    {
      char *srcp = (char *)__upc_sptr_to_addr (src);
      size_t offset = GUPCR_PTS_OFFSET(src);
      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
      size_t n_copy = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
      memcpy (dest, srcp, n_copy);
      n -= n_copy;
      if (!n)
        break;
      GUPCR_PTS_INCR_VADDR (src, n_copy);
      dest = (char *) dest + n_copy;
    }
}

__attribute__((__always_inline__))
static inline
void
__upc_memput (upc_shared_ptr_t dest, const void *src, size_t n)
{
  if (!src)
    __upc_fatal ("Invalid access via null shared pointer");
  if (GUPCR_PTS_IS_NULL (dest))
    __upc_fatal ("Invalid access via null shared pointer");
  for (;;)
    {
      char *destp = (char *)__upc_sptr_to_addr (dest);
      size_t offset = GUPCR_PTS_OFFSET(dest);
      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
      size_t n_copy = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
      memcpy (destp, src, n_copy);
      n -= n_copy;
      if (!n)
        break;
      GUPCR_PTS_INCR_VADDR (dest, n_copy);
      src = (char *) src + n_copy;
    }
}

__attribute__((__always_inline__))
static inline
void
__upc_memset (upc_shared_ptr_t dest, int c, size_t n)
{
  if (GUPCR_PTS_IS_NULL (dest))
    __upc_fatal ("Invalid access via null shared pointer");
  for (;;)
    {
      char *destp = (char *)__upc_sptr_to_addr (dest);
      size_t offset = GUPCR_PTS_OFFSET(dest);
      size_t p_offset = (offset & GUPCR_VM_OFFSET_MASK);
      size_t n_set = GUPCR_MIN (GUPCR_VM_PAGE_SIZE - p_offset, n);
      memset (destp, c, n_set);
      n -= n_set;
      if (!n)
        break;
      GUPCR_PTS_INCR_VADDR (dest, n_set);
    }
}

__attribute__((__always_inline__))
static inline
void *
__upc_access_sptr_to_addr (upc_shared_ptr_t p)
{
  if (GUPCR_PTS_IS_NULL (p))
    __upc_fatal ("Invalid access via null shared pointer");
  if ((int)GUPCR_PTS_THREAD(p) >= THREADS)
    __upc_fatal ("Thread number in shared address is out of range");
  return __upc_sptr_to_addr (p);
}

__attribute__((__always_inline__))
static inline
u_intQI_t
__getqi2 (upc_shared_ptr_t p)
{
  const u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
u_intHI_t
__gethi2 (upc_shared_ptr_t p)
{
  const u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
u_intSI_t
__getsi2 (upc_shared_ptr_t p)
{
  const u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
u_intDI_t
__getdi2 (upc_shared_ptr_t p)
{
  const u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
  return *addr;
}

#if GUPCR_TARGET64
__attribute__((__always_inline__))
static inline
u_intTI_t
__getti2 (upc_shared_ptr_t p)
{
  const u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
  return *addr;
}
#endif /* GUPCR_TARGET64 */

__attribute__((__always_inline__))
static inline
float
__getsf2 (upc_shared_ptr_t p)
{
  const float *addr = (float *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
double
__getdf2 (upc_shared_ptr_t p)
{
  const double *addr = (double *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
long double
__gettf2 (upc_shared_ptr_t p)
{
  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
long double
__getxf2 (upc_shared_ptr_t p)
{
  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  return *addr;
}

__attribute__((__always_inline__))
static inline
void
__getblk3 (void *dest, upc_shared_ptr_t src, size_t n)
{
  __upc_memget (dest, src, n);
}

__attribute__((__always_inline__))
static inline
void
__putqi2 (upc_shared_ptr_t p, u_intQI_t v)
{
  u_intQI_t * const addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__puthi2 (upc_shared_ptr_t p, u_intHI_t v)
{
  u_intHI_t * const addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__putsi2 (upc_shared_ptr_t p, u_intSI_t v)
{
  u_intSI_t * const addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__putdi2 (upc_shared_ptr_t p, u_intDI_t v)
{
  u_intDI_t * const addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

#if GUPCR_TARGET64
__attribute__((__always_inline__))
static inline
void
__putti2 (upc_shared_ptr_t p, u_intTI_t v)
{
  u_intTI_t * const addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
  *addr = v;
}
#endif /* GUPCR_TARGET64 */

__attribute__((__always_inline__))
static inline
void
__putsf2 (upc_shared_ptr_t p, float v)
{
  float * const addr = (float *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__putdf2 (upc_shared_ptr_t p, double v)
{
  double * const addr = (double *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__puttf2 (upc_shared_ptr_t p, long double v)
{
  long double * const addr = (long double *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__putxf2 (upc_shared_ptr_t p, long double v)
{
  long double * const addr = (long double *) __upc_access_sptr_to_addr (p);
  *addr = v;
}

__attribute__((__always_inline__))
static inline
void
__putblk3 (upc_shared_ptr_t dest, void *src, size_t n)
{
  __upc_memput (dest, src, n);
}

__attribute__((__always_inline__))
static inline
void
__copyblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t n)
{
  __upc_memcpy (dest, src, n);
}

/* Strict memory accesses. */

__attribute__((__always_inline__))
static inline
u_intQI_t
__getsqi2 (upc_shared_ptr_t p)
{
  const u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
  u_intQI_t result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
u_intHI_t
__getshi2 (upc_shared_ptr_t p)
{
  const u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
  u_intHI_t result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
u_intSI_t
__getssi2 (upc_shared_ptr_t p)
{
  const u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
  u_intSI_t result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
u_intDI_t
__getsdi2 (upc_shared_ptr_t p)
{
  const u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
  u_intDI_t result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

#if GUPCR_TARGET64
__attribute__((__always_inline__))
static inline
u_intTI_t
__getsti2 (upc_shared_ptr_t p)
{
  const u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
  u_intTI_t result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}
#endif /* GUPCR_TARGET64 */

__attribute__((__always_inline__))
static inline
float
__getssf2 (upc_shared_ptr_t p)
{
  const float *addr = (float *) __upc_access_sptr_to_addr (p);
  float result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
double
__getsdf2 (upc_shared_ptr_t p)
{
  const double *addr = (double *) __upc_access_sptr_to_addr (p);
  double result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
long double
__getstf2 (upc_shared_ptr_t p)
{
  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  long double result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
long double
__getsxf2 (upc_shared_ptr_t p)
{
  const long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  long double result;
  GUPCR_FENCE ();
  result = *addr;
  GUPCR_READ_FENCE ();
  return result;
}

__attribute__((__always_inline__))
static inline
void
__getsblk3 (void *dest, upc_shared_ptr_t src, size_t len)
{
  GUPCR_FENCE ();
  __getblk3 (dest, src, len);
  GUPCR_READ_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putsqi2 (upc_shared_ptr_t p, u_intQI_t v)
{
  u_intQI_t *addr = (u_intQI_t *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putshi2 (upc_shared_ptr_t p, u_intHI_t v)
{
  u_intHI_t *addr = (u_intHI_t *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putssi2 (upc_shared_ptr_t p, u_intSI_t v)
{
  u_intSI_t *addr = (u_intSI_t *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putsdi2 (upc_shared_ptr_t p, u_intDI_t v)
{
  u_intDI_t *addr = (u_intDI_t *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

#if GUPCR_TARGET64
__attribute__((__always_inline__))
static inline
void
__putsti2 (upc_shared_ptr_t p, u_intTI_t v)
{
  u_intTI_t *addr = (u_intTI_t *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}
#endif /* GUPCR_TARGET64 */

__attribute__((__always_inline__))
static inline
void
__putssf2 (upc_shared_ptr_t p, float v)
{
  float *addr = (float *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putsdf2 (upc_shared_ptr_t p, double v)
{
  double *addr = (double *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putstf2 (upc_shared_ptr_t p, long double v)
{
  long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putsxf2 (upc_shared_ptr_t p, long double v)
{
  long double *addr = (long double *) __upc_access_sptr_to_addr (p);
  GUPCR_WRITE_FENCE ();
  *addr = v;
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__putsblk3 (upc_shared_ptr_t dest, void *src, size_t len)
{
  GUPCR_WRITE_FENCE ();
  __putblk3 (dest, src, len);
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__copysblk3 (upc_shared_ptr_t dest, upc_shared_ptr_t src, size_t len)
{
  GUPCR_WRITE_FENCE ();
  __copyblk3 (dest, src, len);
  GUPCR_FENCE ();
}

__attribute__((__always_inline__))
static inline
void
__upc_fence (void)
{
  GUPCR_FENCE ();
}

#else

/* relaxed accesses */

extern u_intQI_t __getqi2 (upc_shared_ptr_t);
extern u_intHI_t __gethi2 (upc_shared_ptr_t);
extern u_intSI_t __getsi2 (upc_shared_ptr_t);
extern u_intDI_t __getdi2 (upc_shared_ptr_t);
#if GUPCR_TARGET64
extern u_intTI_t __getti2 (upc_shared_ptr_t);
#endif
extern float __getsf2 (upc_shared_ptr_t);
extern double __getdf2 (upc_shared_ptr_t);
extern long double __gettf2 (upc_shared_ptr_t);
extern long double __getxf2 (upc_shared_ptr_t);
extern void __getblk3 (void *, upc_shared_ptr_t, size_t);

extern void __putqi2 (upc_shared_ptr_t, u_intQI_t);
extern void __puthi2 (upc_shared_ptr_t, u_intHI_t);
extern void __putsi2 (upc_shared_ptr_t, u_intSI_t);
extern void __putdi2 (upc_shared_ptr_t, u_intDI_t);
#if GUPCR_TARGET64
extern void __putti2 (upc_shared_ptr_t, u_intTI_t);
#endif
extern void __putsf2 (upc_shared_ptr_t, float);
extern void __putdf2 (upc_shared_ptr_t, double);
extern void __puttf2 (upc_shared_ptr_t, long double);
extern void __putxf2 (upc_shared_ptr_t, long double);
extern void __putblk3 (upc_shared_ptr_t, void *, size_t);
extern void __copyblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);

/* strict accesses */

extern u_intQI_t __getsqi2 (upc_shared_ptr_t);
extern u_intHI_t __getshi2 (upc_shared_ptr_t);
extern u_intSI_t __getssi2 (upc_shared_ptr_t);
extern u_intDI_t __getsdi2 (upc_shared_ptr_t);
#if GUPCR_TARGET64
extern u_intTI_t __getsti2 (upc_shared_ptr_t);
#endif
extern float __getssf2 (upc_shared_ptr_t);
extern double __getsdf2 (upc_shared_ptr_t);
extern long double __getstf2 (upc_shared_ptr_t);
extern long double __getsxf2 (upc_shared_ptr_t);
extern void __getsblk3 (void *, upc_shared_ptr_t, size_t);

extern void __putsqi2 (upc_shared_ptr_t, u_intQI_t);
extern void __putshi2 (upc_shared_ptr_t, u_intHI_t);
extern void __putssi2 (upc_shared_ptr_t, u_intSI_t);
extern void __putsdi2 (upc_shared_ptr_t, u_intDI_t);
#if GUPCR_TARGET64
extern void __putsti2 (upc_shared_ptr_t, u_intTI_t);
#endif
extern void __putssf2 (upc_shared_ptr_t, float);
extern void __putsdf2 (upc_shared_ptr_t, double);
extern void __putstf2 (upc_shared_ptr_t, long double);
extern void __putsxf2 (upc_shared_ptr_t, long double);
extern void __putsblk3 (upc_shared_ptr_t, void *, size_t);
extern void __copysblk3 (upc_shared_ptr_t, upc_shared_ptr_t, size_t);

/* relaxed accesses (profiled) */

extern u_intQI_t __getgqi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intHI_t __getghi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intSI_t __getgsi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intDI_t __getgdi3 (upc_shared_ptr_t, const char *file, int line);
#if GUPCR_TARGET64
extern u_intTI_t __getgti3 (upc_shared_ptr_t, const char *file, int line);
#endif
extern float __getgsf3 (upc_shared_ptr_t, const char *file, int line);
extern double __getgdf3 (upc_shared_ptr_t, const char *file, int line);
extern long double __getgtf3 (upc_shared_ptr_t, const char *file, int line);
extern long double __getgxf3 (upc_shared_ptr_t, const char *file, int line);
extern void __getgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
			int line);

extern void __putgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
		       int line);
extern void __putghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
		       int line);
extern void __putgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
		       int line);
extern void __putgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
		       int line);
#if GUPCR_TARGET64
extern void __putgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
		       int line);
#endif
extern void __putgsf4 (upc_shared_ptr_t, float, const char *file, int line);
extern void __putgdf4 (upc_shared_ptr_t, double, const char *file, int line);
extern void __putgtf4 (upc_shared_ptr_t, long double, const char *file, int line);
extern void __putgxf4 (upc_shared_ptr_t, long double, const char *file, int line);
extern void __putgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
			int line);
extern void __copygblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
			 const char *file, int line);

/* strict accesses (profiled) */

extern u_intQI_t __getsgqi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intHI_t __getsghi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intSI_t __getsgsi3 (upc_shared_ptr_t, const char *file, int line);
extern u_intDI_t __getsgdi3 (upc_shared_ptr_t, const char *file, int line);
#if GUPCR_TARGET64
extern u_intTI_t __getsgti3 (upc_shared_ptr_t, const char *file, int line);
#endif
extern float __getsgsf3 (upc_shared_ptr_t, const char *file, int line);
extern double __getsgdf3 (upc_shared_ptr_t, const char *file, int line);
extern long double __getsgtf3 (upc_shared_ptr_t, const char *file, int line);
extern long double __getsgxf3 (upc_shared_ptr_t, const char *file, int line);
extern void __getsgblk5 (void *, upc_shared_ptr_t, size_t, const char *file,
			 int line);

extern void __putsgqi4 (upc_shared_ptr_t, u_intQI_t, const char *file,
			int line);
extern void __putsghi4 (upc_shared_ptr_t, u_intHI_t, const char *file,
			int line);
extern void __putsgsi4 (upc_shared_ptr_t, u_intSI_t, const char *file,
			int line);
extern void __putsgdi4 (upc_shared_ptr_t, u_intDI_t, const char *file,
			int line);
#if GUPCR_TARGET64
extern void __putsgti4 (upc_shared_ptr_t, u_intTI_t, const char *file,
			int line);
#endif
extern void __putsgsf4 (upc_shared_ptr_t, float, const char *file, int line);
extern void __putsgdf4 (upc_shared_ptr_t, double, const char *file, int line);
extern void __putsgtf4 (upc_shared_ptr_t, long double, const char *file, int line);
extern void __putsgxf4 (upc_shared_ptr_t, long double, const char *file, int line);
extern void __putsgblk5 (upc_shared_ptr_t, void *, size_t, const char *file,
			 int line);
extern void __copysgblk5 (upc_shared_ptr_t, upc_shared_ptr_t, size_t,
			  const char *file, int line);

/* Miscellaneous access related prototypes.  */
extern void __upc_fence (void);


#endif /* __UPC_INLINE_LIB__ */

#if __UPC_PUPC_INST__
#pragma pupc on
#endif /* __UPC_PUPC_INST__ */

#endif /* !_GCC_UPC_LIB_H_ */
