aboutsummaryrefslogtreecommitdiff
path: root/contrib/test_parser/test_parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/test_parser/test_parser.c')
-rw-r--r--contrib/test_parser/test_parser.c130
1 files changed, 130 insertions, 0 deletions
diff --git a/contrib/test_parser/test_parser.c b/contrib/test_parser/test_parser.c
new file mode 100644
index 00000000000..728bf4098fe
--- /dev/null
+++ b/contrib/test_parser/test_parser.c
@@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_parser.c
+ * Simple example of a text search parser
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+
+/*
+ * types
+ */
+
+/* self-defined type */
+typedef struct {
+ char * buffer; /* text to parse */
+ int len; /* length of the text in buffer */
+ int pos; /* position of the parser */
+} ParserState;
+
+/* copy-paste from wparser.h of tsearch2 */
+typedef struct {
+ int lexid;
+ char *alias;
+ char *descr;
+} LexDescr;
+
+/*
+ * prototypes
+ */
+PG_FUNCTION_INFO_V1(testprs_start);
+Datum testprs_start(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_getlexeme);
+Datum testprs_getlexeme(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_end);
+Datum testprs_end(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_lextype);
+Datum testprs_lextype(PG_FUNCTION_ARGS);
+
+/*
+ * functions
+ */
+Datum testprs_start(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
+ pst->buffer = (char *) PG_GETARG_POINTER(0);
+ pst->len = PG_GETARG_INT32(1);
+ pst->pos = 0;
+
+ PG_RETURN_POINTER(pst);
+}
+
+Datum testprs_getlexeme(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+ char **t = (char **) PG_GETARG_POINTER(1);
+ int *tlen = (int *) PG_GETARG_POINTER(2);
+ int type;
+
+ *tlen = pst->pos;
+ *t = pst->buffer + pst->pos;
+
+ if ((pst->buffer)[pst->pos] == ' ')
+ {
+ /* blank type */
+ type = 12;
+ /* go to the next non-white-space character */
+ while ((pst->buffer)[pst->pos] == ' ' &&
+ pst->pos < pst->len)
+ (pst->pos)++;
+ } else {
+ /* word type */
+ type = 3;
+ /* go to the next white-space character */
+ while ((pst->buffer)[pst->pos] != ' ' &&
+ pst->pos < pst->len)
+ (pst->pos)++;
+ }
+
+ *tlen = pst->pos - *tlen;
+
+ /* we are finished if (*tlen == 0) */
+ if (*tlen == 0)
+ type=0;
+
+ PG_RETURN_INT32(type);
+}
+
+Datum testprs_end(PG_FUNCTION_ARGS)
+{
+ ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+ pfree(pst);
+ PG_RETURN_VOID();
+}
+
+Datum testprs_lextype(PG_FUNCTION_ARGS)
+{
+ /*
+ * Remarks:
+ * - we have to return the blanks for headline reason
+ * - we use the same lexids like Teodor in the default
+ * word parser; in this way we can reuse the headline
+ * function of the default word parser.
+ */
+ LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
+
+ /* there are only two types in this parser */
+ descr[0].lexid = 3;
+ descr[0].alias = pstrdup("word");
+ descr[0].descr = pstrdup("Word");
+ descr[1].lexid = 12;
+ descr[1].alias = pstrdup("blank");
+ descr[1].descr = pstrdup("Space symbols");
+ descr[2].lexid = 0;
+
+ PG_RETURN_POINTER(descr);
+}