|
| 1 | +#ifndef Py_HASH_H |
| 2 | + |
| 3 | +#define Py_HASH_H |
| 4 | +#ifdef __cplusplus |
| 5 | +extern "C" { |
| 6 | +#endif |
| 7 | + |
| 8 | +/* Helpers for hash functions */ |
| 9 | +#ifndef Py_LIMITED_API |
| 10 | +PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); |
| 11 | +PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*); |
| 12 | +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); |
| 13 | +#endif |
| 14 | + |
| 15 | +/* Prime multiplier used in string and various other hashes. */ |
| 16 | +#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ |
| 17 | + |
| 18 | +/* Parameters used for the numeric hash implementation. See notes for |
| 19 | + _Py_HashDouble in Objects/object.c. Numeric hashes are based on |
| 20 | + reduction modulo the prime 2**_PyHASH_BITS - 1. */ |
| 21 | + |
| 22 | +#if SIZEOF_VOID_P >= 8 |
| 23 | +# define _PyHASH_BITS 61 |
| 24 | +#else |
| 25 | +# define _PyHASH_BITS 31 |
| 26 | +#endif |
| 27 | + |
| 28 | +#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) |
| 29 | +#define _PyHASH_INF 314159 |
| 30 | +#define _PyHASH_NAN 0 |
| 31 | +#define _PyHASH_IMAG _PyHASH_MULTIPLIER |
| 32 | + |
| 33 | + |
| 34 | +/* hash secret |
| 35 | + * |
| 36 | + * memory layout on 64 bit systems |
| 37 | + * cccccccc cccccccc cccccccc uc -- unsigned char[24] |
| 38 | + * pppppppp ssssssss ........ fnv -- two Py_hash_t |
| 39 | + * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T |
| 40 | + * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t |
| 41 | + * ........ ........ eeeeeeee pyexpat XML hash salt |
| 42 | + * |
| 43 | + * memory layout on 32 bit systems |
| 44 | + * cccccccc cccccccc cccccccc uc |
| 45 | + * ppppssss ........ ........ fnv -- two Py_hash_t |
| 46 | + * k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*) |
| 47 | + * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t |
| 48 | + * ........ ........ eeee.... pyexpat XML hash salt |
| 49 | + * |
| 50 | + * (*) The siphash member may not be available on 32 bit platforms without |
| 51 | + * an unsigned int64 data type. |
| 52 | + */ |
| 53 | +typedef union { |
| 54 | + /* ensure 24 bytes */ |
| 55 | + unsigned char uc[24]; |
| 56 | + /* two Py_hash_t for FNV */ |
| 57 | + struct { |
| 58 | + Py_hash_t prefix; |
| 59 | + Py_hash_t suffix; |
| 60 | + } fnv; |
| 61 | +#ifdef PY_UINT64_T |
| 62 | + /* two uint64 for SipHash24 */ |
| 63 | + struct { |
| 64 | + PY_UINT64_T k0; |
| 65 | + PY_UINT64_T k1; |
| 66 | + } siphash; |
| 67 | +#endif |
| 68 | + /* a different (!) Py_hash_t for small string optimization */ |
| 69 | + struct { |
| 70 | + unsigned char padding[16]; |
| 71 | + Py_hash_t suffix; |
| 72 | + } djbx33a; |
| 73 | + struct { |
| 74 | + unsigned char padding[16]; |
| 75 | + Py_hash_t hashsalt; |
| 76 | + } expat; |
| 77 | +} _Py_HashSecret_t; |
| 78 | +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; |
| 79 | + |
| 80 | +#ifdef Py_DEBUG |
| 81 | +PyAPI_DATA(int) _Py_HashSecret_Initialized; |
| 82 | +#endif |
| 83 | + |
| 84 | + |
| 85 | +/* hash function definition */ |
| 86 | +#ifndef Py_LIMITED_API |
| 87 | +typedef struct { |
| 88 | + Py_hash_t (*const hash)(const void *, Py_ssize_t); |
| 89 | + const char *name; |
| 90 | + const int hash_bits; |
| 91 | + const int seed_bits; |
| 92 | +} PyHash_FuncDef; |
| 93 | + |
| 94 | +PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); |
| 95 | +#endif |
| 96 | + |
| 97 | + |
| 98 | +/* cutoff for small string DJBX33A optimization in range [1, cutoff). |
| 99 | + * |
| 100 | + * About 50% of the strings in a typical Python application are smaller than |
| 101 | + * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. |
| 102 | + * NEVER use DJBX33A for long strings! |
| 103 | + * |
| 104 | + * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms |
| 105 | + * should use a smaller cutoff because it is easier to create colliding |
| 106 | + * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should |
| 107 | + * provide a decent safety margin. |
| 108 | + */ |
| 109 | +#ifndef Py_HASH_CUTOFF |
| 110 | +# define Py_HASH_CUTOFF 0 |
| 111 | +#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0) |
| 112 | +# error Py_HASH_CUTOFF must in range 0...7. |
| 113 | +#endif /* Py_HASH_CUTOFF */ |
| 114 | + |
| 115 | + |
| 116 | +/* hash algorithm selection |
| 117 | + * |
| 118 | + * The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the |
| 119 | + * configure script. |
| 120 | + * |
| 121 | + * - FNV is available on all platforms and architectures. |
| 122 | + * - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't |
| 123 | + * require aligned memory for integers. |
| 124 | + * - With EXTERNAL embedders can provide an alternative implementation with:: |
| 125 | + * |
| 126 | + * PyHash_FuncDef PyHash_Func = {...}; |
| 127 | + * |
| 128 | + * XXX: Figure out __declspec() for extern PyHash_FuncDef. |
| 129 | + */ |
| 130 | +#define Py_HASH_EXTERNAL 0 |
| 131 | +#define Py_HASH_SIPHASH24 1 |
| 132 | +#define Py_HASH_FNV 2 |
| 133 | + |
| 134 | +#ifndef Py_HASH_ALGORITHM |
| 135 | +# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \ |
| 136 | + && !defined(HAVE_ALIGNED_REQUIRED)) |
| 137 | +# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24 |
| 138 | +# else |
| 139 | +# define Py_HASH_ALGORITHM Py_HASH_FNV |
| 140 | +# endif /* uint64_t && uint32_t && aligned */ |
| 141 | +#endif /* Py_HASH_ALGORITHM */ |
| 142 | + |
| 143 | +#ifdef __cplusplus |
| 144 | +} |
| 145 | +#endif |
| 146 | + |
| 147 | +#endif /* !Py_HASH_H */ |
0 commit comments