diff options
Diffstat (limited to 'contrib/fuzzystrmatch/fuzzystrmatch.h')
-rw-r--r-- | contrib/fuzzystrmatch/fuzzystrmatch.h | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h new file mode 100644 index 00000000000..5ed9c3746e3 --- /dev/null +++ b/contrib/fuzzystrmatch/fuzzystrmatch.h @@ -0,0 +1,161 @@ +/* + * fuzzystrmatch.h + * + * Functions for "fuzzy" comparison of strings + * + * Copyright (c) Joseph Conway <joseph.conway@home.com>, 2001; + * + * levenshtein() + * ------------- + * Written based on a description of the algorithm by Michael Gilleland + * found at http://www.merriampark.com/ld.htm + * Also looked at levenshtein.c in the PHP 4.0.6 distribution for + * inspiration. + * + * metaphone() + * ----------- + * Modified for PostgreSQL by Joe Conway. + * Based on CPAN's "Text-Metaphone-1.96" by Michael G Schwern <schwern@pobox.com> + * Code slightly modified for use as PostgreSQL function (palloc, elog, etc). + * Metaphone was originally created by Lawrence Philips and presented in article + * in "Computer Language" December 1990 issue. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without a written agreement + * is hereby granted, provided that the above copyright notice and this + * paragraph and the following two paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#ifndef FUZZYSTRMATCH_H +#define FUZZYSTRMATCH_H + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "postgres.h" +#include "fmgr.h" +#include "utils/builtins.h" + + +#define MAX_LEVENSHTEIN_STRLEN 255 +#define MAX_METAPHONE_STRLEN 255 + +typedef struct dynmatrix +{ + int value; +} dynmat; + + +/* + * External declarations + */ +extern Datum levenshtein(PG_FUNCTION_ARGS); +extern Datum metaphone(PG_FUNCTION_ARGS); + +/* + * Internal declarations + */ + + + +#define STRLEN(p) strlen(p) +#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define NextChar(p) ((p)++) + + +/* + * Original code by Michael G Schwern starts here. + * Code slightly modified for use as PostgreSQL + * function (combined *.h into here). + *------------------------------------------------------------------*/ + +/************************************************************************** + metaphone -- Breaks english phrases down into their phonemes. + + Input + word -- An english word to be phonized + max_phonemes -- How many phonemes to calculate. If 0, then it + will phonize the entire phrase. + phoned_word -- The final phonized word. (We'll allocate the + memory.) + Output + error -- A simple error flag, returns TRUE or FALSE + + NOTES: ALL non-alpha characters are ignored, this includes whitespace, + although non-alpha characters will break up phonemes. +****************************************************************************/ + + +/************************************************************************** + my constants -- constants I like + + Probably redundant. + +***************************************************************************/ + +#define META_ERROR FALSE +#define META_SUCCESS TRUE +#define META_FAILURE FALSE + + +/* I add modifications to the traditional metaphone algorithm that you + might find in books. Define this if you want metaphone to behave + traditionally */ +#undef USE_TRADITIONAL_METAPHONE + +/* Special encodings */ +#define SH 'X' +#define TH '0' + +char Lookahead(char * word, int how_far); +int _metaphone ( + /* IN */ + char * word, + int max_phonemes, + /* OUT */ + char ** phoned_word +); + +/* Metachar.h ... little bits about characters for metaphone */ + + +/*-- Character encoding array & accessing macros --*/ +/* Stolen directly out of the book... */ +char _codes[26] = { + 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0 +/* a b c d e f g h i j k l m n o p q r s t u v w x y z */ +}; + + +#define ENCODE(c) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0) + +#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */ + +/* These letters are passed through unchanged */ +#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */ + +/* These form dipthongs when preceding H */ +#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */ + +/* These make C and G soft */ +#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */ + +/* These prevent GH from becoming F */ +#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */ + + +#endif /* FUZZYSTRMATCH_H */ |