aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-08-26 16:17:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-08-26 16:17:42 -0400
commit40271811cb9c4906041afc21a3b2c2f31f534fd8 (patch)
tree2074fe743129f181558ba356329c5b69437c0941
parent928311a463d480ca566e2905a369ac6aa0c3e210 (diff)
downloadpostgresql-40271811cb9c4906041afc21a3b2c2f31f534fd8.tar.gz
postgresql-40271811cb9c4906041afc21a3b2c2f31f534fd8.zip
Improve comments describing tsvector data structure.
-rw-r--r--src/include/tsearch/ts_type.h52
1 files changed, 27 insertions, 25 deletions
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 6a33f851a32..d80a17ba3dc 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -18,6 +18,23 @@
/*
* TSVector type.
+ *
+ * Structure of tsvector datatype:
+ * 1) standard varlena header
+ * 2) int4 size - number of lexemes (WordEntry array entries)
+ * 3) Array of WordEntry - one per lexeme; must be sorted according to
+ * tsCompareString() (ie, memcmp of lexeme strings).
+ * WordEntry->pos gives the number of bytes from end of WordEntry
+ * array to start of lexeme's string, which is of length len.
+ * 4) Per-lexeme data storage:
+ * lexeme string (not null-terminated)
+ * if haspos is true:
+ * padding byte if necessary to make the position data 2-byte aligned
+ * uint16 number of positions that follow
+ * WordEntryPos[] positions
+ *
+ * The positions for each lexeme must be sorted.
+ *
* Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
*/
@@ -46,7 +63,7 @@ typedef uint16 WordEntryPos;
typedef struct
{
uint16 npos;
- WordEntryPos pos[1]; /* var length */
+ WordEntryPos pos[1]; /* variable length */
} WordEntryPosVector;
@@ -60,40 +77,25 @@ typedef struct
#define MAXNUMPOS (256)
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
-/*
- * Structure of tsvector datatype:
- * 1) standard varlena header
- * 2) int4 size - number of lexemes or WordEntry array, which is the same
- * 3) Array of WordEntry - sorted array, comparison based on word's length
- * and strncmp(). WordEntry->pos points number of
- * bytes from end of WordEntry array to start of
- * corresponding lexeme.
- * 4) Lexeme's storage:
- * lexeme (without null-terminator)
- * if haspos is true:
- * padding byte if necessary to make the number of positions 2-byte aligned
- * uint16 number of positions that follow.
- * uint16[] positions
- *
- * The positions must be sorted.
- */
-
+/* This struct represents a complete tsvector datum */
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 size;
- WordEntry entries[1]; /* var size */
- /* lexemes follow */
+ WordEntry entries[1]; /* variable length */
+ /* lexemes follow the entries[] array */
} TSVectorData;
typedef TSVectorData *TSVector;
#define DATAHDRSIZE (offsetof(TSVectorData, entries))
-#define CALCDATASIZE(x, lenstr) (DATAHDRSIZE + (x) * sizeof(WordEntry) + (lenstr) )
+#define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
+
+/* pointer to start of a tsvector's WordEntry array */
#define ARRPTR(x) ( (x)->entries )
-/* returns a pointer to the beginning of lexemes */
-#define STRPTR(x) ( (char *) &(x)->entries[x->size] )
+/* pointer to start of a tsvector's lexeme storage */
+#define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
@@ -231,7 +233,7 @@ typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int4 size; /* number of QueryItems */
- char data[1];
+ char data[1]; /* data starts here */
} TSQueryData;
typedef TSQueryData *TSQuery;