aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2019-02-13 16:10:06 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2019-02-13 16:10:06 -0300
commit711bab1e4d19b5c9967328315a542d93386b1ac5 (patch)
tree65ee7e238438642e152ff52b24f042eb1491e28a /src/backend
parent754ca99314e9e1debe855b0462869ef6e58b7e7a (diff)
downloadpostgresql-711bab1e4d19b5c9967328315a542d93386b1ac5.tar.gz
postgresql-711bab1e4d19b5c9967328315a542d93386b1ac5.zip
Add basic support for using the POPCNT and SSE4.2s LZCNT opcodes
These opcodes have been around in the AMD world since 2007, and 2008 in the case of intel. They're supported in GCC and Clang via some __builtin macros. The opcodes may be unavailable during runtime, in which case we fall back on a C-based implementation of the code. In order to get the POPCNT instruction we must pass the -mpopcnt option to the compiler. We do this only for the pg_bitutils.c file. David Rowley (with fragments taken from a patch by Thomas Munro) Discussion: https://postgr.es/m/CAKJS1f9WTAGG1tPeJnD18hiQW5gAk59fQ6WK-vfdAKEHyRg2RA@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/heap/visibilitymap.c73
-rw-r--r--src/backend/lib/bloomfilter.c15
-rw-r--r--src/backend/nodes/bitmapset.c131
3 files changed, 51 insertions, 168 deletions
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 931ae81fd6d..9657cd0a632 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -89,12 +89,12 @@
#include "access/visibilitymap.h"
#include "access/xlog.h"
#include "miscadmin.h"
+#include "port/pg_bitutils.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/smgr.h"
#include "utils/inval.h"
-
/*#define TRACE_VISIBILITYMAP */
/*
@@ -115,43 +115,9 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-/* tables for fast counting of set bits for visible and frozen */
-static const uint8 number_of_ones_for_visible[256] = {
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
- 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
- 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
-};
-static const uint8 number_of_ones_for_frozen[256] = {
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
- 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
-};
+/* Masks for bit counting bits in the visibility map. */
+#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */
+#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */
/* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
@@ -408,18 +374,16 @@ void
visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
{
BlockNumber mapBlock;
+ BlockNumber nvisible = 0;
+ BlockNumber nfrozen = 0;
/* all_visible must be specified */
Assert(all_visible);
- *all_visible = 0;
- if (all_frozen)
- *all_frozen = 0;
-
for (mapBlock = 0;; mapBlock++)
{
Buffer mapBuffer;
- unsigned char *map;
+ uint64 *map;
int i;
/*
@@ -436,17 +400,30 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro
* immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value.
*/
- map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
+ map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
- for (i = 0; i < MAPSIZE; i++)
+ StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0,
+ "unsupported MAPSIZE");
+ if (all_frozen == NULL)
+ {
+ for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+ nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+ }
+ else
{
- *all_visible += number_of_ones_for_visible[map[i]];
- if (all_frozen)
- *all_frozen += number_of_ones_for_frozen[map[i]];
+ for (i = 0; i < MAPSIZE / sizeof(uint64); i++)
+ {
+ nvisible += pg_popcount64(map[i] & VISIBLE_MASK64);
+ nfrozen += pg_popcount64(map[i] & FROZEN_MASK64);
+ }
}
ReleaseBuffer(mapBuffer);
}
+
+ *all_visible = nvisible;
+ if (all_frozen)
+ *all_frozen = nfrozen;
}
/*
diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c
index 1e907cabc6e..e2c1276f219 100644
--- a/src/backend/lib/bloomfilter.c
+++ b/src/backend/lib/bloomfilter.c
@@ -37,6 +37,7 @@
#include "access/hash.h"
#include "lib/bloomfilter.h"
+#include "port/pg_bitutils.h"
#define MAX_HASH_FUNCS 10
@@ -187,19 +188,7 @@ double
bloom_prop_bits_set(bloom_filter *filter)
{
int bitset_bytes = filter->m / BITS_PER_BYTE;
- uint64 bits_set = 0;
- int i;
-
- for (i = 0; i < bitset_bytes; i++)
- {
- unsigned char byte = filter->bitset[i];
-
- while (byte)
- {
- bits_set++;
- byte &= (byte - 1);
- }
- }
+ uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
return bits_set / (double) filter->m;
}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 62cd00903c0..d0380abf3e4 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -22,6 +22,7 @@
#include "access/hash.h"
#include "nodes/pg_list.h"
+#include "port/pg_bitutils.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
@@ -51,79 +52,23 @@
#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x))
+/* Set the bitwise macro version we must use based on the bitmapword size */
+#if BITS_PER_BITMAPWORD == 32
-/*
- * Lookup tables to avoid need for bit-by-bit groveling
- *
- * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit
- * in a nonzero byte value x. The entry for x=0 is never used.
- *
- * leftmost_one_pos[x] gives the bit number (0-7) of the leftmost one bit in a
- * nonzero byte value x. The entry for x=0 is never used.
- *
- * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x.
- *
- * We could make these tables larger and reduce the number of iterations
- * in the functions that use them, but bytewise shifts and masks are
- * especially fast on many machines, so working a byte at a time seems best.
- */
+#define bmw_popcount(w) pg_popcount32(w)
+#define bmw_rightmost_one(w) pg_rightmost_one32(w)
+#define bmw_leftmost_one(w) pg_leftmost_one32(w)
+
+#elif BITS_PER_BITMAPWORD == 64
+
+#define bmw_popcount(w) pg_popcount64(w)
+#define bmw_rightmost_one(w) pg_rightmost_one64(w)
+#define bmw_leftmost_one(w) pg_leftmost_one64(w)
+
+#else
+#error "invalid BITS_PER_BITMAPWORD"
+#endif
-static const uint8 rightmost_one_pos[256] = {
- 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-
-static const uint8 leftmost_one_pos[256] = {
- 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-};
-
-static const uint8 number_of_ones[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-};
/*
@@ -607,12 +552,7 @@ bms_singleton_member(const Bitmapset *a)
if (result >= 0 || HAS_MULTIPLE_ONES(w))
elog(ERROR, "bitmapset has multiple members");
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
}
}
if (result < 0)
@@ -650,12 +590,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member)
if (result >= 0 || HAS_MULTIPLE_ONES(w))
return false;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
}
}
if (result < 0)
@@ -681,12 +616,9 @@ bms_num_members(const Bitmapset *a)
{
bitmapword w = a->words[wordnum];
- /* we assume here that bitmapword is an unsigned type */
- while (w != 0)
- {
- result += number_of_ones[w & 255];
- w >>= 8;
- }
+ /* No need to count the bits in a zero word */
+ if (w != 0)
+ result += bmw_popcount(w);
}
return result;
}
@@ -1041,12 +973,7 @@ bms_first_member(Bitmapset *a)
a->words[wordnum] &= ~w;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
return result;
}
}
@@ -1096,12 +1023,7 @@ bms_next_member(const Bitmapset *a, int prevbit)
int result;
result = wordnum * BITS_PER_BITMAPWORD;
- while ((w & 255) == 0)
- {
- w >>= 8;
- result += 8;
- }
- result += rightmost_one_pos[w & 255];
+ result += bmw_rightmost_one(w);
return result;
}
@@ -1168,14 +1090,9 @@ bms_prev_member(const Bitmapset *a, int prevbit)
if (w != 0)
{
int result;
- int shift = BITS_PER_BITMAPWORD - 8;
result = wordnum * BITS_PER_BITMAPWORD;
-
- while ((w >> shift) == 0)
- shift -= 8;
-
- result += shift + leftmost_one_pos[(w >> shift) & 255];
+ result += bmw_leftmost_one(w);
return result;
}