4 files changed, 154 insertions, 89 deletions
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index d969a5b9c23..c36d8ded40a 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -3,6 +3,21 @@
  * varbit.c
  *	  Functions for the SQL datatypes BIT() and BIT VARYING().
  *
+ * The data structure contains the following elements:
+ *   header  -- length of the whole data structure (incl header)
+ *              in bytes (as with all varying length datatypes)
+ *   data section -- private data section for the bits data structures
+ *     bitlength -- length of the bit string in bits
+ *     bitdata   -- bit string, most significant byte first
+ *
+ * The length of the bitdata vector should always be exactly as many
+ * bytes as are needed for the given bitlength.  If the bitlength is
+ * not a multiple of 8, the extra low-order padding bits of the last
+ * byte must be zeroes.
+ *
+ * attypmod is defined as the length of the bit string in bits, or for
+ * varying bits the maximum length.
+ *
  * Code originally contributed by Adriaan Joubert.
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
@@ -27,6 +42,40 @@
 
 #define HEXDIG(z)	 ((z)<10 ? ((z)+'0') : ((z)-10+'A'))
 
+/* Mask off any bits that should be zero in the last byte of a bitstring */
+#define VARBIT_PAD(vb) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		if (pad_ > 0) \
+			*(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \
+	} while (0)
+
+/*
+ * Many functions work byte-by-byte, so they have a pointer handy to the
+ * last-plus-one byte, which saves a cycle or two.
+ */
+#define VARBIT_PAD_LAST(vb, ptr) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		if (pad_ > 0) \
+			*((ptr) - 1) &= BITMASK << pad_; \
+	} while (0)
+
+/* Assert proper padding of a bitstring */
+#ifdef USE_ASSERT_CHECKING
+#define VARBIT_CORRECTLY_PADDED(vb) \
+	do { \
+		int32	pad_ = VARBITPAD(vb); \
+		Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+		Assert(pad_ == 0 || \
+			   (*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \
+	} while (0)
+#else
+#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0)
+#endif
+
 static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2);
 static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l,
 							bool length_not_specified);
@@ -87,24 +136,6 @@ anybit_typmodout(int32 typmod)
 }
 
 
-/*----------
- *	attypmod -- contains the length of the bit string in bits, or for
- *			   varying bits the maximum length.
- *
- *	The data structure contains the following elements:
- *	  header  -- length of the whole data structure (incl header)
- *				 in bytes. (as with all varying length datatypes)
- *	  data section -- private data section for the bits data structures
- *		bitlength -- length of the bit string in bits
- *		bitdata   -- bit string, most significant byte first
- *
- *	The length of the bitdata vector should always be exactly as many
- *	bytes as are needed for the given bitlength.  If the bitlength is
- *	not a multiple of 8, the extra low-order padding bits of the last
- *	byte must be zeroes.
- *----------
- */
-
 /*
  * bit_in -
  *	  converts a char string to the internal representation of a bitstring.
@@ -264,6 +295,9 @@ bit_out(PG_FUNCTION_ARGS)
 				len,
 				bitlen;
 
+	/* Assertion to help catch any bit functions that don't pad correctly */
+	VARBIT_CORRECTLY_PADDED(s);
+
 	bitlen = VARBITLEN(s);
 	len = (bitlen + 3) / 4;
 	result = (char *) palloc(len + 2);
@@ -304,8 +338,6 @@ bit_recv(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	int			len,
 				bitlen;
-	int			ipad;
-	bits8		mask;
 
 	bitlen = pq_getmsgint(buf, sizeof(int32));
 	if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -330,13 +362,8 @@ bit_recv(PG_FUNCTION_ARGS)
 
 	pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -367,8 +394,6 @@ bit(PG_FUNCTION_ARGS)
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	VarBit	   *result;
 	int			rlen;
-	int			ipad;
-	bits8		mask;
 
 	/* No work if typmod is invalid or supplied data matches it already */
 	if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg))
@@ -394,12 +419,7 @@ bit(PG_FUNCTION_ARGS)
 	 * if source data was shorter than target length (we assume the last byte
 	 * of the source data was itself correctly zero-padded).
 	 */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -574,6 +594,9 @@ varbit_out(PG_FUNCTION_ARGS)
 				k,
 				len;
 
+	/* Assertion to help catch any bit functions that don't pad correctly */
+	VARBIT_CORRECTLY_PADDED(s);
+
 	len = VARBITLEN(s);
 	result = (char *) palloc(len + 1);
 	sp = VARBITS(s);
@@ -620,8 +643,6 @@ varbit_recv(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	int			len,
 				bitlen;
-	int			ipad;
-	bits8		mask;
 
 	bitlen = pq_getmsgint(buf, sizeof(int32));
 	if (bitlen < 0 || bitlen > VARBITMAXLEN)
@@ -646,13 +667,8 @@ varbit_recv(PG_FUNCTION_ARGS)
 
 	pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -729,8 +745,6 @@ varbit(PG_FUNCTION_ARGS)
 	bool		isExplicit = PG_GETARG_BOOL(2);
 	VarBit	   *result;
 	int			rlen;
-	int			ipad;
-	bits8		mask;
 
 	/* No work if typmod is invalid or supplied data matches it already */
 	if (len <= 0 || len >= VARBITLEN(arg))
@@ -749,13 +763,8 @@ varbit(PG_FUNCTION_ARGS)
 
 	memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result));
 
-	/* Make sure last byte is zero-padded if needed */
-	ipad = VARBITPAD(result);
-	if (ipad > 0)
-	{
-		mask = BITMASK << ipad;
-		*(VARBITS(result) + VARBITBYTES(result) - 1) &= mask;
-	}
+	/* Make sure last byte is correctly zero-padded */
+	VARBIT_PAD(result);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1013,6 +1022,8 @@ bit_catenate(VarBit *arg1, VarBit *arg2)
 		}
 	}
 
+	/* The pad bits should be already zero at this point */
+
 	return result;
 }
 
@@ -1046,14 +1057,12 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
 	int			bitlen,
 				rbitlen,
 				len,
-				ipad = 0,
 				ishift,
 				i;
 	int			e,
 				s1,
 				e1;
-	bits8		mask,
-			   *r,
+	bits8	   *r,
 			   *ps;
 
 	bitlen = VARBITLEN(arg);
@@ -1118,13 +1127,9 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
 				r++;
 			}
 		}
-		/* Do we need to pad at the end? */
-		ipad = VARBITPAD(result);
-		if (ipad > 0)
-		{
-			mask = BITMASK << ipad;
-			*(VARBITS(result) + len - 1) &= mask;
-		}
+
+		/* Make sure last byte is correctly zero-padded */
+		VARBIT_PAD(result);
 	}
 
 	return result;
@@ -1246,7 +1251,7 @@ bit_and(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ & *p2++;
 
-	/* Padding is not needed as & of 0 pad is 0 */
+	/* Padding is not needed as & of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1268,7 +1273,6 @@ bit_or(PG_FUNCTION_ARGS)
 	bits8	   *p1,
 			   *p2,
 			   *r;
-	bits8		mask;
 
 	bitlen1 = VARBITLEN(arg1);
 	bitlen2 = VARBITLEN(arg2);
@@ -1287,13 +1291,7 @@ bit_or(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ | *p2++;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Padding is not needed as | of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1315,7 +1313,6 @@ bitxor(PG_FUNCTION_ARGS)
 	bits8	   *p1,
 			   *p2,
 			   *r;
-	bits8		mask;
 
 	bitlen1 = VARBITLEN(arg1);
 	bitlen2 = VARBITLEN(arg2);
@@ -1335,13 +1332,7 @@ bitxor(PG_FUNCTION_ARGS)
 	for (i = 0; i < VARBITBYTES(arg1); i++)
 		*r++ = *p1++ ^ *p2++;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Padding is not needed as ^ of 0 pads is 0 */
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1357,7 +1348,6 @@ bitnot(PG_FUNCTION_ARGS)
 	VarBit	   *result;
 	bits8	   *p,
 			   *r;
-	bits8		mask;
 
 	result = (VarBit *) palloc(VARSIZE(arg));
 	SET_VARSIZE(result, VARSIZE(arg));
@@ -1368,13 +1358,8 @@ bitnot(PG_FUNCTION_ARGS)
 	for (; p < VARBITEND(arg); p++)
 		*r++ = ~*p;
 
-	/* Pad the result */
-	mask = BITMASK << VARBITPAD(result);
-	if (mask)
-	{
-		r--;
-		*r &= mask;
-	}
+	/* Must zero-pad the result, because extra bits are surely 1's here */
+	VARBIT_PAD_LAST(result, r);
 
 	PG_RETURN_VARBIT_P(result);
 }
@@ -1441,6 +1426,8 @@ bitshiftleft(PG_FUNCTION_ARGS)
 			*r = 0;
 	}
 
+	/* The pad bits should be already zero at this point */
+
 	PG_RETURN_VARBIT_P(result);
 }
 
@@ -1507,6 +1494,8 @@ bitshiftright(PG_FUNCTION_ARGS)
 			if ((++r) < VARBITEND(result))
 				*r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK;
 		}
+		/* We may have shifted 1's into the pad bits, so fix that */
+		VARBIT_PAD_LAST(result, r);
 	}
 
 	PG_RETURN_VARBIT_P(result);
diff --git a/src/include/utils/varbit.h b/src/include/utils/varbit.h
index 641731c4177..e947de55be4 100644
--- a/src/include/utils/varbit.h
+++ b/src/include/utils/varbit.h
@@ -21,6 +21,11 @@
 
 /*
  * Modeled on struct varlena from postgres.h, but data type is bits8.
+ *
+ * Caution: if bit_len is not a multiple of BITS_PER_BYTE, the low-order
+ * bits of the last byte of bit_dat[] are unused and MUST be zeroes.
+ * (This allows bit_cmp() to not bother masking the last byte.)
+ * Also, there should not be any excess bytes counted in the header length.
  */
 typedef struct
 {
diff --git a/src/test/regress/expected/bit.out b/src/test/regress/expected/bit.out
index ef8aaea3eff..255b39b07e2 100644
--- a/src/test/regress/expected/bit.out
+++ b/src/test/regress/expected/bit.out
@@ -477,6 +477,50 @@ SELECT POSITION(B'1101' IN b),
         0 |        0 | 0000000000000001
 (16 rows)
 
+SELECT b, b >> 1 AS bsr, b << 1 AS bsl
+       FROM BIT_SHIFT_TABLE ;
+        b         |       bsr        |       bsl        
+------------------+------------------+------------------
+ 1101100000000000 | 0110110000000000 | 1011000000000000
+ 0110110000000000 | 0011011000000000 | 1101100000000000
+ 0011011000000000 | 0001101100000000 | 0110110000000000
+ 0001101100000000 | 0000110110000000 | 0011011000000000
+ 0000110110000000 | 0000011011000000 | 0001101100000000
+ 0000011011000000 | 0000001101100000 | 0000110110000000
+ 0000001101100000 | 0000000110110000 | 0000011011000000
+ 0000000110110000 | 0000000011011000 | 0000001101100000
+ 0000000011011000 | 0000000001101100 | 0000000110110000
+ 0000000001101100 | 0000000000110110 | 0000000011011000
+ 0000000000110110 | 0000000000011011 | 0000000001101100
+ 0000000000011011 | 0000000000001101 | 0000000000110110
+ 0000000000001101 | 0000000000000110 | 0000000000011010
+ 0000000000000110 | 0000000000000011 | 0000000000001100
+ 0000000000000011 | 0000000000000001 | 0000000000000110
+ 0000000000000001 | 0000000000000000 | 0000000000000010
+(16 rows)
+
+SELECT b::bit(15), b::bit(15) >> 1 AS bsr, b::bit(15) << 1 AS bsl
+       FROM BIT_SHIFT_TABLE ;
+        b        |       bsr       |       bsl       
+-----------------+-----------------+-----------------
+ 110110000000000 | 011011000000000 | 101100000000000
+ 011011000000000 | 001101100000000 | 110110000000000
+ 001101100000000 | 000110110000000 | 011011000000000
+ 000110110000000 | 000011011000000 | 001101100000000
+ 000011011000000 | 000001101100000 | 000110110000000
+ 000001101100000 | 000000110110000 | 000011011000000
+ 000000110110000 | 000000011011000 | 000001101100000
+ 000000011011000 | 000000001101100 | 000000110110000
+ 000000001101100 | 000000000110110 | 000000011011000
+ 000000000110110 | 000000000011011 | 000000001101100
+ 000000000011011 | 000000000001101 | 000000000110110
+ 000000000001101 | 000000000000110 | 000000000011010
+ 000000000000110 | 000000000000011 | 000000000001100
+ 000000000000011 | 000000000000001 | 000000000000110
+ 000000000000001 | 000000000000000 | 000000000000010
+ 000000000000000 | 000000000000000 | 000000000000000
+(16 rows)
+
 CREATE TABLE VARBIT_SHIFT_TABLE(v BIT VARYING(20));
 INSERT INTO VARBIT_SHIFT_TABLE VALUES (B'11011');
 INSERT INTO VARBIT_SHIFT_TABLE SELECT CAST(v || B'0' AS BIT VARYING(6)) >>1 FROM VARBIT_SHIFT_TABLE;
@@ -507,6 +551,28 @@ SELECT POSITION(B'1101' IN v),
        16 |       16 | 00000000000000011011
 (16 rows)
 
+SELECT v, v >> 1 AS vsr, v << 1 AS vsl
+       FROM VARBIT_SHIFT_TABLE ;
+          v           |         vsr          |         vsl          
+----------------------+----------------------+----------------------
+ 11011                | 01101                | 10110
+ 011011               | 001101               | 110110
+ 0011011              | 0001101              | 0110110
+ 00011011             | 00001101             | 00110110
+ 000011011            | 000001101            | 000110110
+ 0000011011           | 0000001101           | 0000110110
+ 00000011011          | 00000001101          | 00000110110
+ 000000011011         | 000000001101         | 000000110110
+ 0000000011011        | 0000000001101        | 0000000110110
+ 00000000011011       | 00000000001101       | 00000000110110
+ 000000000011011      | 000000000001101      | 000000000110110
+ 0000000000011011     | 0000000000001101     | 0000000000110110
+ 00000000000011011    | 00000000000001101    | 00000000000110110
+ 000000000000011011   | 000000000000001101   | 000000000000110110
+ 0000000000000011011  | 0000000000000001101  | 0000000000000110110
+ 00000000000000011011 | 00000000000000001101 | 00000000000000110110
+(16 rows)
+
 DROP TABLE BIT_SHIFT_TABLE;
 DROP TABLE VARBIT_SHIFT_TABLE;
 -- Get/Set bit
diff --git a/src/test/regress/sql/bit.sql b/src/test/regress/sql/bit.sql
index a73da8cb8f6..f47b8699207 100644
--- a/src/test/regress/sql/bit.sql
+++ b/src/test/regress/sql/bit.sql
@@ -168,6 +168,10 @@ SELECT POSITION(B'1101' IN b),
        POSITION(B'11011' IN b),
        b
        FROM BIT_SHIFT_TABLE ;
+SELECT b, b >> 1 AS bsr, b << 1 AS bsl
+       FROM BIT_SHIFT_TABLE ;
+SELECT b::bit(15), b::bit(15) >> 1 AS bsr, b::bit(15) << 1 AS bsl
+       FROM BIT_SHIFT_TABLE ;
 
 
 CREATE TABLE VARBIT_SHIFT_TABLE(v BIT VARYING(20));
@@ -180,7 +184,8 @@ SELECT POSITION(B'1101' IN v),
        POSITION(B'11011' IN v),
        v
        FROM VARBIT_SHIFT_TABLE ;
-
+SELECT v, v >> 1 AS vsr, v << 1 AS vsl
+       FROM VARBIT_SHIFT_TABLE ;
 
 DROP TABLE BIT_SHIFT_TABLE;
 DROP TABLE VARBIT_SHIFT_TABLE;