diff options
Diffstat (limited to 'src/backend')
103 files changed, 47587 insertions, 45 deletions
diff --git a/src/backend/Makefile b/src/backend/Makefile index d2caa7036ce..9d2dbe62fa5 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -2,9 +2,10 @@ # # Makefile for the postgres backend # -# Copyright (c) 1994, Regents of the University of California +# Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California # -# $PostgreSQL: pgsql/src/backend/Makefile,v 1.123 2007/07/24 09:00:27 mha Exp $ +# $PostgreSQL: pgsql/src/backend/Makefile,v 1.124 2007/08/21 01:11:12 tgl Exp $ # #------------------------------------------------------------------------- @@ -15,7 +16,7 @@ include $(top_builddir)/src/Makefile.global DIRS = access bootstrap catalog parser commands executor lib libpq \ main nodes optimizer port postmaster regex rewrite \ - storage tcop utils $(top_builddir)/src/timezone + storage tcop tsearch utils $(top_builddir)/src/timezone SUBSYSOBJS = $(DIRS:%=%/SUBSYS.o) @@ -166,6 +167,7 @@ ifeq ($(MAKE_DLL), true) endif endif $(MAKE) -C catalog install-data + $(MAKE) -C tsearch install-data $(INSTALL_DATA) $(srcdir)/libpq/pg_hba.conf.sample '$(DESTDIR)$(datadir)/pg_hba.conf.sample' $(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample' $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample' @@ -220,6 +222,7 @@ ifeq ($(MAKE_DLL), true) endif endif $(MAKE) -C catalog uninstall-data + $(MAKE) -C tsearch uninstall-data rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \ '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \ '$(DESTDIR)$(datadir)/postgresql.conf.sample' \ diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c index a46def63c50..d608bedb605 100644 --- a/src/backend/access/gin/ginarrayproc.c +++ b/src/backend/access/gin/ginarrayproc.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.9 2007/01/31 15:09:45 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginarrayproc.c,v 1.10 2007/08/21 01:11:12 tgl Exp $ *------------------------------------------------------------------------- */ #include "postgres.h" @@ -62,7 +62,7 @@ ginarrayextract(PG_FUNCTION_ARGS) if ( *nentries == 0 && PG_NARGS() == 3 ) { - switch( PG_GETARG_UINT16(2) ) + switch( PG_GETARG_UINT16(2) ) /* StrategyNumber */ { case GinOverlapStrategy: *nentries = -1; /* nobody can be found */ @@ -80,6 +80,15 @@ ginarrayextract(PG_FUNCTION_ARGS) } Datum +ginqueryarrayextract(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ginarrayextract, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum ginarrayconsistent(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 3503385c2ac..6e87b2f6010 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -2,7 +2,7 @@ # # Makefile for backend/catalog # -# $PostgreSQL: pgsql/src/backend/catalog/Makefile,v 1.64 2007/04/02 03:49:37 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/catalog/Makefile,v 1.65 2007/08/21 01:11:13 tgl Exp $ # #------------------------------------------------------------------------- @@ -35,6 +35,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_tablespace.h pg_pltemplate.h \ pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \ + pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \ + pg_ts_parser.h pg_ts_template.h \ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 292a737099e..96d238c7242 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/aclchk.c,v 1.139 2007/04/20 02:37:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/aclchk.c,v 1.140 2007/08/21 01:11:13 tgl Exp $ * * NOTES * See acl.h. @@ -34,6 +34,8 @@ #include "catalog/pg_proc.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" #include "commands/dbcommands.h" #include "miscadmin.h" #include "parser/parse_func.h" @@ -1416,7 +1418,11 @@ static const char *const no_priv_msg[MAX_ACL_KIND] = /* ACL_KIND_CONVERSION */ gettext_noop("permission denied for conversion %s"), /* ACL_KIND_TABLESPACE */ - gettext_noop("permission denied for tablespace %s") + gettext_noop("permission denied for tablespace %s"), + /* ACL_KIND_TSDICTIONARY */ + gettext_noop("permission denied for text search dictionary %s"), + /* ACL_KIND_TSCONFIGURATION */ + gettext_noop("permission denied for text search configuration %s") }; static const char *const not_owner_msg[MAX_ACL_KIND] = @@ -1444,7 +1450,11 @@ static const char *const not_owner_msg[MAX_ACL_KIND] = /* ACL_KIND_CONVERSION */ gettext_noop("must be owner of conversion %s"), /* ACL_KIND_TABLESPACE */ - gettext_noop("must be owner of tablespace %s") + gettext_noop("must be owner of tablespace %s"), + /* ACL_KIND_TSDICTIONARY */ + gettext_noop("must be owner of text search dictionary %s"), + /* ACL_KIND_TSCONFIGURATION */ + gettext_noop("must be owner of text search configuration %s") }; @@ -2298,6 +2308,65 @@ pg_opfamily_ownercheck(Oid opf_oid, Oid roleid) } /* + * Ownership check for a text search dictionary (specified by OID). + */ +bool +pg_ts_dict_ownercheck(Oid dict_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dict_oid), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search dictionary with OID %u does not exist", + dict_oid))); + + ownerId = ((Form_pg_ts_dict) GETSTRUCT(tuple))->dictowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + +/* + * Ownership check for a text search configuration (specified by OID). + */ +bool +pg_ts_config_ownercheck(Oid cfg_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfg_oid), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration with OID %u does not exist", + cfg_oid))); + + ownerId = ((Form_pg_ts_config) GETSTRUCT(tuple))->cfgowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + + +/* * Ownership check for a database (specified by OID). */ bool diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 6b58af65228..51bb4ba17f4 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.66 2007/06/05 21:31:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.67 2007/08/21 01:11:13 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,6 +40,10 @@ #include "catalog/pg_rewrite.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "commands/comment.h" #include "commands/dbcommands.h" @@ -97,6 +101,10 @@ static const Oid object_classes[MAX_OCLASS] = { RewriteRelationId, /* OCLASS_REWRITE */ TriggerRelationId, /* OCLASS_TRIGGER */ NamespaceRelationId, /* OCLASS_SCHEMA */ + TSParserRelationId, /* OCLASS_TSPARSER */ + TSDictionaryRelationId, /* OCLASS_TSDICT */ + TSTemplateRelationId, /* OCLASS_TSTEMPLATE */ + TSConfigRelationId, /* OCLASS_TSCONFIG */ AuthIdRelationId, /* OCLASS_ROLE */ DatabaseRelationId, /* OCLASS_DATABASE */ TableSpaceRelationId /* OCLASS_TBLSPACE */ @@ -988,6 +996,22 @@ doDeletion(const ObjectAddress *object) RemoveSchemaById(object->objectId); break; + case OCLASS_TSPARSER: + RemoveTSParserById(object->objectId); + break; + + case OCLASS_TSDICT: + RemoveTSDictionaryById(object->objectId); + break; + + case OCLASS_TSTEMPLATE: + RemoveTSTemplateById(object->objectId); + break; + + case OCLASS_TSCONFIG: + RemoveTSConfigurationById(object->objectId); + break; + /* OCLASS_ROLE, OCLASS_DATABASE, OCLASS_TBLSPACE not handled */ default: @@ -1201,8 +1225,8 @@ find_expr_references_walker(Node *node, /* * If it's a regclass or similar literal referring to an existing * object, add a reference to that object. (Currently, only the - * regclass case has any likely use, but we may as well handle all the - * OID-alias datatypes consistently.) + * regclass and regconfig cases have any likely use, but we may as + * well handle all the OID-alias datatypes consistently.) */ if (!con->constisnull) { @@ -1242,6 +1266,22 @@ find_expr_references_walker(Node *node, add_object_address(OCLASS_TYPE, objoid, 0, context->addrs); break; + case REGCONFIGOID: + objoid = DatumGetObjectId(con->constvalue); + if (SearchSysCacheExists(TSCONFIGOID, + ObjectIdGetDatum(objoid), + 0, 0, 0)) + add_object_address(OCLASS_TSCONFIG, objoid, 0, + context->addrs); + break; + case REGDICTIONARYOID: + objoid = DatumGetObjectId(con->constvalue); + if (SearchSysCacheExists(TSDICTOID, + ObjectIdGetDatum(objoid), + 0, 0, 0)) + add_object_address(OCLASS_TSDICT, objoid, 0, + context->addrs); + break; } } return false; @@ -1606,6 +1646,21 @@ object_address_present(const ObjectAddress *object, } /* + * Record multiple dependencies from an ObjectAddresses array, after first + * removing any duplicates. + */ +void +record_object_address_dependencies(const ObjectAddress *depender, + ObjectAddresses *referenced, + DependencyType behavior) +{ + eliminate_duplicate_dependencies(referenced); + recordMultipleDependencies(depender, + referenced->refs, referenced->numrefs, + behavior); +} + +/* * Clean up when done with an ObjectAddresses array. */ void @@ -1690,6 +1745,22 @@ getObjectClass(const ObjectAddress *object) Assert(object->objectSubId == 0); return OCLASS_SCHEMA; + case TSParserRelationId: + Assert(object->objectSubId == 0); + return OCLASS_TSPARSER; + + case TSDictionaryRelationId: + Assert(object->objectSubId == 0); + return OCLASS_TSDICT; + + case TSTemplateRelationId: + Assert(object->objectSubId == 0); + return OCLASS_TSTEMPLATE; + + case TSConfigRelationId: + Assert(object->objectSubId == 0); + return OCLASS_TSCONFIG; + case AuthIdRelationId: Assert(object->objectSubId == 0); return OCLASS_ROLE; @@ -2080,6 +2151,70 @@ getObjectDescription(const ObjectAddress *object) break; } + case OCLASS_TSPARSER: + { + HeapTuple tup; + + tup = SearchSysCache(TSPARSEROID, + ObjectIdGetDatum(object->objectId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search parser %u", + object->objectId); + appendStringInfo(&buffer, _("text search parser %s"), + NameStr(((Form_pg_ts_parser) GETSTRUCT(tup))->prsname)); + ReleaseSysCache(tup); + break; + } + + case OCLASS_TSDICT: + { + HeapTuple tup; + + tup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(object->objectId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + object->objectId); + appendStringInfo(&buffer, _("text search dictionary %s"), + NameStr(((Form_pg_ts_dict) GETSTRUCT(tup))->dictname)); + ReleaseSysCache(tup); + break; + } + + case OCLASS_TSTEMPLATE: + { + HeapTuple tup; + + tup = SearchSysCache(TSTEMPLATEOID, + ObjectIdGetDatum(object->objectId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search template %u", + object->objectId); + appendStringInfo(&buffer, _("text search template %s"), + NameStr(((Form_pg_ts_template) GETSTRUCT(tup))->tmplname)); + ReleaseSysCache(tup); + break; + } + + case OCLASS_TSCONFIG: + { + HeapTuple tup; + + tup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(object->objectId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search configuration %u", + object->objectId); + appendStringInfo(&buffer, _("text search configuration %s"), + NameStr(((Form_pg_ts_config) GETSTRUCT(tup))->cfgname)); + ReleaseSysCache(tup); + break; + } + case OCLASS_ROLE: { appendStringInfo(&buffer, _("role %s"), diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 2f8753bd2e0..84220bd4ce0 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -13,7 +13,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.97 2007/07/25 22:16:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.98 2007/08/21 01:11:13 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,10 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "commands/dbcommands.h" #include "miscadmin.h" @@ -189,6 +193,10 @@ Datum pg_function_is_visible(PG_FUNCTION_ARGS); Datum pg_operator_is_visible(PG_FUNCTION_ARGS); Datum pg_opclass_is_visible(PG_FUNCTION_ARGS); Datum pg_conversion_is_visible(PG_FUNCTION_ARGS); +Datum pg_ts_parser_is_visible(PG_FUNCTION_ARGS); +Datum pg_ts_dict_is_visible(PG_FUNCTION_ARGS); +Datum pg_ts_template_is_visible(PG_FUNCTION_ARGS); +Datum pg_ts_config_is_visible(PG_FUNCTION_ARGS); Datum pg_my_temp_schema(PG_FUNCTION_ARGS); Datum pg_is_other_temp_schema(PG_FUNCTION_ARGS); @@ -1315,6 +1323,521 @@ ConversionIsVisible(Oid conid) } /* + * TSParserGetPrsid - find a TS parser by possibly qualified name + * + * If not found, returns InvalidOid if failOK, else throws error + */ +Oid +TSParserGetPrsid(List *names, bool failOK) +{ + char *schemaname; + char *parser_name; + Oid namespaceId; + Oid prsoid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &parser_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname); + prsoid = GetSysCacheOid(TSPARSERNAMENSP, + PointerGetDatum(parser_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + prsoid = GetSysCacheOid(TSPARSERNAMENSP, + PointerGetDatum(parser_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + if (OidIsValid(prsoid)) + break; + } + } + + if (!OidIsValid(prsoid) && !failOK) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search parser \"%s\" does not exist", + NameListToString(names)))); + + return prsoid; +} + +/* + * TSParserIsVisible + * Determine whether a parser (identified by OID) is visible in the + * current search path. Visible means "would be found by searching + * for the unqualified parser name". + */ +bool +TSParserIsVisible(Oid prsId) +{ + HeapTuple tup; + Form_pg_ts_parser form; + Oid namespace; + bool visible; + + tup = SearchSysCache(TSPARSEROID, + ObjectIdGetDatum(prsId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search parser %u", prsId); + form = (Form_pg_ts_parser) GETSTRUCT(tup); + + recomputeNamespacePath(); + + /* + * Quick check: if it ain't in the path at all, it ain't visible. Items in + * the system namespace are surely in the path and so we needn't even do + * list_member_oid() for them. + */ + namespace = form->prsnamespace; + if (namespace != PG_CATALOG_NAMESPACE && + !list_member_oid(activeSearchPath, namespace)) + visible = false; + else + { + /* + * If it is in the path, it might still not be visible; it could be + * hidden by another parser of the same name earlier in the path. So we + * must do a slow check for conflicting parsers. + */ + char *name = NameStr(form->prsname); + ListCell *l; + + visible = false; + foreach(l, activeSearchPath) + { + Oid namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + if (namespaceId == namespace) + { + /* Found it first in path */ + visible = true; + break; + } + if (SearchSysCacheExists(TSPARSERNAMENSP, + PointerGetDatum(name), + ObjectIdGetDatum(namespaceId), + 0, 0)) + { + /* Found something else first in path */ + break; + } + } + } + + ReleaseSysCache(tup); + + return visible; +} + +/* + * TSDictionaryGetDictid - find a TS dictionary by possibly qualified name + * + * If not found, returns InvalidOid if failOK, else throws error + */ +Oid +TSDictionaryGetDictid(List *names, bool failOK) +{ + char *schemaname; + char *dict_name; + Oid namespaceId; + Oid dictoid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &dict_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname); + dictoid = GetSysCacheOid(TSDICTNAMENSP, + PointerGetDatum(dict_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + dictoid = GetSysCacheOid(TSDICTNAMENSP, + PointerGetDatum(dict_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + if (OidIsValid(dictoid)) + break; + } + } + + if (!OidIsValid(dictoid) && !failOK) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search dictionary \"%s\" does not exist", + NameListToString(names)))); + + return dictoid; +} + +/* + * TSDictionaryIsVisible + * Determine whether a dictionary (identified by OID) is visible in the + * current search path. Visible means "would be found by searching + * for the unqualified dictionary name". + */ +bool +TSDictionaryIsVisible(Oid dictId) +{ + HeapTuple tup; + Form_pg_ts_dict form; + Oid namespace; + bool visible; + + tup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + form = (Form_pg_ts_dict) GETSTRUCT(tup); + + recomputeNamespacePath(); + + /* + * Quick check: if it ain't in the path at all, it ain't visible. Items in + * the system namespace are surely in the path and so we needn't even do + * list_member_oid() for them. + */ + namespace = form->dictnamespace; + if (namespace != PG_CATALOG_NAMESPACE && + !list_member_oid(activeSearchPath, namespace)) + visible = false; + else + { + /* + * If it is in the path, it might still not be visible; it could be + * hidden by another dictionary of the same name earlier in the + * path. So we must do a slow check for conflicting dictionaries. + */ + char *name = NameStr(form->dictname); + ListCell *l; + + visible = false; + foreach(l, activeSearchPath) + { + Oid namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + if (namespaceId == namespace) + { + /* Found it first in path */ + visible = true; + break; + } + if (SearchSysCacheExists(TSDICTNAMENSP, + PointerGetDatum(name), + ObjectIdGetDatum(namespaceId), + 0, 0)) + { + /* Found something else first in path */ + break; + } + } + } + + ReleaseSysCache(tup); + + return visible; +} + +/* + * TSTemplateGetTmplid - find a TS template by possibly qualified name + * + * If not found, returns InvalidOid if failOK, else throws error + */ +Oid +TSTemplateGetTmplid(List *names, bool failOK) +{ + char *schemaname; + char *template_name; + Oid namespaceId; + Oid tmploid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &template_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname); + tmploid = GetSysCacheOid(TSTEMPLATENAMENSP, + PointerGetDatum(template_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + tmploid = GetSysCacheOid(TSTEMPLATENAMENSP, + PointerGetDatum(template_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + if (OidIsValid(tmploid)) + break; + } + } + + if (!OidIsValid(tmploid) && !failOK) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search template \"%s\" does not exist", + NameListToString(names)))); + + return tmploid; +} + +/* + * TSTemplateIsVisible + * Determine whether a template (identified by OID) is visible in the + * current search path. Visible means "would be found by searching + * for the unqualified template name". + */ +bool +TSTemplateIsVisible(Oid tmplId) +{ + HeapTuple tup; + Form_pg_ts_template form; + Oid namespace; + bool visible; + + tup = SearchSysCache(TSTEMPLATEOID, + ObjectIdGetDatum(tmplId), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search template %u", tmplId); + form = (Form_pg_ts_template) GETSTRUCT(tup); + + recomputeNamespacePath(); + + /* + * Quick check: if it ain't in the path at all, it ain't visible. Items in + * the system namespace are surely in the path and so we needn't even do + * list_member_oid() for them. + */ + namespace = form->tmplnamespace; + if (namespace != PG_CATALOG_NAMESPACE && + !list_member_oid(activeSearchPath, namespace)) + visible = false; + else + { + /* + * If it is in the path, it might still not be visible; it could be + * hidden by another template of the same name earlier in the path. + * So we must do a slow check for conflicting templates. + */ + char *name = NameStr(form->tmplname); + ListCell *l; + + visible = false; + foreach(l, activeSearchPath) + { + Oid namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + if (namespaceId == namespace) + { + /* Found it first in path */ + visible = true; + break; + } + if (SearchSysCacheExists(TSTEMPLATENAMENSP, + PointerGetDatum(name), + ObjectIdGetDatum(namespaceId), + 0, 0)) + { + /* Found something else first in path */ + break; + } + } + } + + ReleaseSysCache(tup); + + return visible; +} + +/* + * TSConfigGetCfgid - find a TS config by possibly qualified name + * + * If not found, returns InvalidOid if failOK, else throws error + */ +Oid +TSConfigGetCfgid(List *names, bool failOK) +{ + char *schemaname; + char *config_name; + Oid namespaceId; + Oid cfgoid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &config_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname); + cfgoid = GetSysCacheOid(TSCONFIGNAMENSP, + PointerGetDatum(config_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + cfgoid = GetSysCacheOid(TSCONFIGNAMENSP, + PointerGetDatum(config_name), + ObjectIdGetDatum(namespaceId), + 0, 0); + if (OidIsValid(cfgoid)) + break; + } + } + + if (!OidIsValid(cfgoid) && !failOK) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", + NameListToString(names)))); + + return cfgoid; +} + +/* + * TSConfigIsVisible + * Determine whether a text search configuration (identified by OID) + * is visible in the current search path. Visible means "would be found + * by searching for the unqualified text search configuration name". + */ +bool +TSConfigIsVisible(Oid cfgid) +{ + HeapTuple tup; + Form_pg_ts_config form; + Oid namespace; + bool visible; + + tup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgid), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgid); + form = (Form_pg_ts_config) GETSTRUCT(tup); + + recomputeNamespacePath(); + + /* + * Quick check: if it ain't in the path at all, it ain't visible. Items in + * the system namespace are surely in the path and so we needn't even do + * list_member_oid() for them. + */ + namespace = form->cfgnamespace; + if (namespace != PG_CATALOG_NAMESPACE && + !list_member_oid(activeSearchPath, namespace)) + visible = false; + else + { + /* + * If it is in the path, it might still not be visible; it could be + * hidden by another configuration of the same name earlier in the + * path. So we must do a slow check for conflicting configurations. + */ + char *name = NameStr(form->cfgname); + ListCell *l; + + visible = false; + foreach(l, activeSearchPath) + { + Oid namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + + if (namespaceId == namespace) + { + /* Found it first in path */ + visible = true; + break; + } + if (SearchSysCacheExists(TSCONFIGNAMENSP, + PointerGetDatum(name), + ObjectIdGetDatum(namespaceId), + 0, 0)) + { + /* Found something else first in path */ + break; + } + } + } + + ReleaseSysCache(tup); + + return visible; +} + + +/* * DeconstructQualifiedName * Given a possibly-qualified name expressed as a list of String nodes, * extract the schema name and object name. @@ -2516,6 +3039,38 @@ pg_conversion_is_visible(PG_FUNCTION_ARGS) } Datum +pg_ts_parser_is_visible(PG_FUNCTION_ARGS) +{ + Oid oid = PG_GETARG_OID(0); + + PG_RETURN_BOOL(TSParserIsVisible(oid)); +} + +Datum +pg_ts_dict_is_visible(PG_FUNCTION_ARGS) +{ + Oid oid = PG_GETARG_OID(0); + + PG_RETURN_BOOL(TSDictionaryIsVisible(oid)); +} + +Datum +pg_ts_template_is_visible(PG_FUNCTION_ARGS) +{ + Oid oid = PG_GETARG_OID(0); + + PG_RETURN_BOOL(TSTemplateIsVisible(oid)); +} + +Datum +pg_ts_config_is_visible(PG_FUNCTION_ARGS) +{ + Oid oid = PG_GETARG_OID(0); + + PG_RETURN_BOOL(TSConfigIsVisible(oid)); +} + +Datum pg_my_temp_schema(PG_FUNCTION_ARGS) { PG_RETURN_OID(myTempNamespace); diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index c091c855708..756809b8aef 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -3,7 +3,7 @@ * * Copyright (c) 1996-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.39 2007/07/25 22:16:18 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.40 2007/08/21 01:11:13 tgl Exp $ */ CREATE VIEW pg_roles AS @@ -382,3 +382,74 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean; + +-- Tsearch debug function. Defined here because it'd be pretty unwieldy +-- to put it into pg_proc.h + +CREATE TYPE ts_debug AS ( + "Alias" text, + "Description" text, + "Token" text, + "Dictionaries" regdictionary[], + "Lexized token" text +); + +COMMENT ON TYPE ts_debug IS 'returned type from ts_debug() function'; + +CREATE FUNCTION ts_debug(regconfig, text) +RETURNS SETOF ts_debug AS +$$ +SELECT + ( + SELECT + tt.alias + FROM + pg_catalog.ts_token_type( + (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ) + ) AS tt + WHERE + tt.tokid = parse.tokid + ) AS "Alias", + ( + SELECT + tt.description + FROM + pg_catalog.ts_token_type( + (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ) + ) AS tt + WHERE + tt.tokid = parse.tokid + ) AS "Description", + parse.token AS "Token", + ARRAY ( SELECT m.mapdict::pg_catalog.regdictionary + FROM pg_catalog.pg_ts_config_map AS m + WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid + ORDER BY m.mapcfg, m.maptokentype, m.mapseqno ) + AS "Dictionaries", + ( + SELECT + dl.mapdict::pg_catalog.regdictionary || ': ' || dl.lex::pg_catalog.text + FROM + ( SELECT mapdict, pg_catalog.ts_lexize(mapdict, parse.token) AS lex + FROM pg_catalog.pg_ts_config_map AS m + WHERE m.mapcfg = $1 AND m.maptokentype = parse.tokid + ORDER BY m.mapcfg, m.maptokentype, m.mapseqno ) dl + WHERE dl.lex IS NOT NULL + LIMIT 1 + ) AS "Lexized token" +FROM pg_catalog.ts_parse( + (SELECT cfgparser FROM pg_catalog.pg_ts_config WHERE oid = $1 ), $2 + ) AS parse; +$$ +LANGUAGE SQL RETURNS NULL ON NULL INPUT; + +COMMENT ON FUNCTION ts_debug(regconfig,text) IS 'debug function for text search configuration'; + +CREATE FUNCTION ts_debug(text) +RETURNS SETOF ts_debug AS +$$ + SELECT * FROM pg_catalog.ts_debug( pg_catalog.get_current_ts_config(), $1 ); +$$ +LANGUAGE SQL RETURNS NULL ON NULL INPUT; + +COMMENT ON FUNCTION ts_debug(text) IS 'debug function for current text search configuration'; diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index 4b25ae6489e..e47b8f91fac 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -4,7 +4,7 @@ # Makefile for backend/commands # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/commands/Makefile,v 1.36 2007/04/26 16:13:09 neilc Exp $ +# $PostgreSQL: pgsql/src/backend/commands/Makefile,v 1.37 2007/08/21 01:11:14 tgl Exp $ # #------------------------------------------------------------------------- @@ -18,7 +18,8 @@ OBJS = aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \ indexcmds.o lockcmds.o operatorcmds.o opclasscmds.o \ portalcmds.o prepare.o proclang.o \ schemacmds.o sequence.o tablecmds.o tablespace.o trigger.o \ - typecmds.o user.o vacuum.o vacuumlazy.o variable.o view.o + tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \ + variable.o view.o all: SUBSYS.o diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index 7cd347b69f9..65c2bbdb0f2 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/alter.c,v 1.24 2007/07/03 01:30:36 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/alter.c,v 1.25 2007/08/21 01:11:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,22 @@ ExecRenameStmt(RenameStmt *stmt) break; } + case OBJECT_TSPARSER: + RenameTSParser(stmt->object, stmt->newname); + break; + + case OBJECT_TSDICTIONARY: + RenameTSDictionary(stmt->object, stmt->newname); + break; + + case OBJECT_TSTEMPLATE: + RenameTSTemplate(stmt->object, stmt->newname); + break; + + case OBJECT_TSCONFIGURATION: + RenameTSConfiguration(stmt->object, stmt->newname); + break; + default: elog(ERROR, "unrecognized rename stmt type: %d", (int) stmt->renameType); @@ -240,6 +256,14 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt) AlterTypeOwner(stmt->object, newowner); break; + case OBJECT_TSDICTIONARY: + AlterTSDictionaryOwner(stmt->object, newowner); + break; + + case OBJECT_TSCONFIGURATION: + AlterTSConfigurationOwner(stmt->object, newowner); + break; + default: elog(ERROR, "unrecognized AlterOwnerStmt type: %d", (int) stmt->objectType); diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c index 344099ebdf6..89158251aa4 100644 --- a/src/backend/commands/comment.c +++ b/src/backend/commands/comment.c @@ -7,7 +7,7 @@ * Copyright (c) 1996-2007, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/comment.c,v 1.96 2007/02/01 19:10:25 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/comment.c,v 1.97 2007/08/21 01:11:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,6 +34,10 @@ #include "catalog/pg_shdescription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "commands/comment.h" #include "commands/dbcommands.h" @@ -78,6 +82,10 @@ static void CommentLargeObject(List *qualname, char *comment); static void CommentCast(List *qualname, List *arguments, char *comment); static void CommentTablespace(List *qualname, char *comment); static void CommentRole(List *qualname, char *comment); +static void CommentTSParser(List *qualname, char *comment); +static void CommentTSDictionary(List *qualname, char *comment); +static void CommentTSTemplate(List *qualname, char *comment); +static void CommentTSConfiguration(List *qualname, char *comment); /* @@ -151,6 +159,18 @@ CommentObject(CommentStmt *stmt) case OBJECT_ROLE: CommentRole(stmt->objname, stmt->comment); break; + case OBJECT_TSPARSER: + CommentTSParser(stmt->objname, stmt->comment); + break; + case OBJECT_TSDICTIONARY: + CommentTSDictionary(stmt->objname, stmt->comment); + break; + case OBJECT_TSTEMPLATE: + CommentTSTemplate(stmt->objname, stmt->comment); + break; + case OBJECT_TSCONFIGURATION: + CommentTSConfiguration(stmt->objname, stmt->comment); + break; default: elog(ERROR, "unrecognized object type: %d", (int) stmt->objtype); @@ -1462,3 +1482,61 @@ CommentCast(List *qualname, List *arguments, char *comment) /* Call CreateComments() to create/drop the comments */ CreateComments(castOid, CastRelationId, 0, comment); } + +static void +CommentTSParser(List *qualname, char *comment) +{ + Oid prsId; + + prsId = TSParserGetPrsid(qualname, false); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to comment on text search parser"))); + + CreateComments(prsId, TSParserRelationId, 0, comment); +} + +static void +CommentTSDictionary(List *qualname, char *comment) +{ + Oid dictId; + + dictId = TSDictionaryGetDictid(qualname, false); + + if (!pg_ts_dict_ownercheck(dictId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY, + NameListToString(qualname)); + + CreateComments(dictId, TSDictionaryRelationId, 0, comment); +} + +static void +CommentTSTemplate(List *qualname, char *comment) +{ + Oid tmplId; + + tmplId = TSTemplateGetTmplid(qualname, false); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to comment on text search template"))); + + CreateComments(tmplId, TSTemplateRelationId, 0, comment); +} + +static void +CommentTSConfiguration(List *qualname, char *comment) +{ + Oid cfgId; + + cfgId = TSConfigGetCfgid(qualname, false); + + if (!pg_ts_config_ownercheck(cfgId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION, + NameListToString(qualname)); + + CreateComments(cfgId, TSConfigRelationId, 0, comment); +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 07e56620428..006076b01f4 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.230 2007/07/17 05:02:00 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.231 2007/08/21 01:11:14 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -5079,8 +5079,13 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel, case OCLASS_LANGUAGE: case OCLASS_OPERATOR: case OCLASS_OPCLASS: + case OCLASS_OPFAMILY: case OCLASS_TRIGGER: case OCLASS_SCHEMA: + case OCLASS_TSPARSER: + case OCLASS_TSDICT: + case OCLASS_TSTEMPLATE: + case OCLASS_TSCONFIG: /* * We don't expect any of these sorts of objects to depend on diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c new file mode 100644 index 00000000000..7092da132d8 --- /dev/null +++ b/src/backend/commands/tsearchcmds.c @@ -0,0 +1,1948 @@ +/*------------------------------------------------------------------------- + * + * tsearchcmds.c + * + * Routines for tsearch manipulation commands + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/commands/tsearchcmds.c,v 1.1 2007/08/21 01:11:15 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "miscadmin.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "parser/parse_func.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +static HeapTuple UpdateTSConfiguration(AlterTSConfigurationStmt *stmt, + HeapTuple tup); +static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup); +static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt, + HeapTuple tup); + + +/* --------------------- TS Parser commands ------------------------ */ + +/* + * lookup a parser support function and return its OID (as a Datum) + * + * attnum is the pg_ts_parser column the function will go into + */ +static Datum +get_ts_parser_func(DefElem *defel, int attnum) +{ + List *funcName = defGetQualifiedName(defel); + Oid typeId[3]; + Oid retTypeId; + int nargs; + Oid procOid; + + retTypeId = INTERNALOID; /* correct for most */ + typeId[0] = INTERNALOID; + switch (attnum) + { + case Anum_pg_ts_parser_prsstart: + nargs = 2; + typeId[1] = INT4OID; + break; + case Anum_pg_ts_parser_prstoken: + nargs = 3; + typeId[1] = INTERNALOID; + typeId[2] = INTERNALOID; + break; + case Anum_pg_ts_parser_prsend: + nargs = 1; + retTypeId = VOIDOID; + break; + case Anum_pg_ts_parser_prsheadline: + nargs = 3; + typeId[1] = TEXTOID; + typeId[2] = TSQUERYOID; + break; + case Anum_pg_ts_parser_prslextype: + nargs = 1; + break; + default: + /* should not be here */ + elog(ERROR, "unknown attribute for text search parser: %d", attnum); + nargs = 0; /* keep compiler quiet */ + } + + procOid = LookupFuncName(funcName, nargs, typeId, false); + if (get_func_rettype(procOid) != retTypeId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s should return type %s", + func_signature_string(funcName, nargs, typeId), + format_type_be(retTypeId)))); + + return ObjectIdGetDatum(procOid); +} + +/* + * make pg_depend entries for a new pg_ts_parser entry + */ +static void +makeParserDependencies(HeapTuple tuple) +{ + Form_pg_ts_parser prs = (Form_pg_ts_parser) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + + myself.classId = TSParserRelationId; + myself.objectId = HeapTupleGetOid(tuple); + myself.objectSubId = 0; + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = prs->prsnamespace; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependencies on functions */ + referenced.classId = ProcedureRelationId; + referenced.objectSubId = 0; + + referenced.objectId = prs->prsstart; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + referenced.objectId = prs->prstoken; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + referenced.objectId = prs->prsend; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + referenced.objectId = prs->prslextype; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + if (OidIsValid(prs->prsheadline)) + { + referenced.objectId = prs->prsheadline; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } +} + +/* + * CREATE TEXT SEARCH PARSER + */ +void +DefineTSParser(List *names, List *parameters) +{ + char *prsname; + ListCell *pl; + Relation prsRel; + HeapTuple tup; + Datum values[Natts_pg_ts_parser]; + char nulls[Natts_pg_ts_parser]; + NameData pname; + Oid prsOid; + Oid namespaceoid; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create text search parsers"))); + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &prsname); + + /* initialize tuple fields with name/namespace */ + memset(values, 0, sizeof(values)); + memset(nulls, ' ', sizeof(nulls)); + + namestrcpy(&pname, prsname); + values[Anum_pg_ts_parser_prsname - 1] = NameGetDatum(&pname); + values[Anum_pg_ts_parser_prsnamespace - 1] = ObjectIdGetDatum(namespaceoid); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "start") == 0) + { + values[Anum_pg_ts_parser_prsstart - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsstart); + } + else if (pg_strcasecmp(defel->defname, "gettoken") == 0) + { + values[Anum_pg_ts_parser_prstoken - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prstoken); + } + else if (pg_strcasecmp(defel->defname, "end") == 0) + { + values[Anum_pg_ts_parser_prsend - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsend); + } + else if (pg_strcasecmp(defel->defname, "headline") == 0) + { + values[Anum_pg_ts_parser_prsheadline - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prsheadline); + } + else if (pg_strcasecmp(defel->defname, "lextypes") == 0) + { + values[Anum_pg_ts_parser_prslextype - 1] = + get_ts_parser_func(defel, Anum_pg_ts_parser_prslextype); + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search parser parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Validation + */ + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsstart - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser start method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prstoken - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser gettoken method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prsend - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser end method is required"))); + + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_parser_prslextype - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser lextypes method is required"))); + + /* + * Looks good, insert + */ + prsRel = heap_open(TSParserRelationId, RowExclusiveLock); + + tup = heap_formtuple(prsRel->rd_att, values, nulls); + + prsOid = simple_heap_insert(prsRel, tup); + + CatalogUpdateIndexes(prsRel, tup); + + makeParserDependencies(tup); + + heap_freetuple(tup); + + heap_close(prsRel, RowExclusiveLock); +} + +/* + * DROP TEXT SEARCH PARSER + */ +void +RemoveTSParser(List *names, DropBehavior behavior, bool missing_ok) +{ + Oid prsOid; + ObjectAddress object; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to drop text search parsers"))); + + prsOid = TSParserGetPrsid(names, true); + if (!OidIsValid(prsOid)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search parser \"%s\" does not exist", + NameListToString(names)))); + } + else + { + ereport(NOTICE, + (errmsg("text search parser \"%s\" does not exist, skipping", + NameListToString(names)))); + } + return; + } + + object.classId = TSParserRelationId; + object.objectId = prsOid; + object.objectSubId = 0; + + performDeletion(&object, behavior); +} + +/* + * Guts of TS parser deletion. + */ +void +RemoveTSParserById(Oid prsId) +{ + Relation relation; + HeapTuple tup; + + relation = heap_open(TSParserRelationId, RowExclusiveLock); + + tup = SearchSysCache(TSPARSEROID, + ObjectIdGetDatum(prsId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search parser %u", prsId); + + simple_heap_delete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH PARSER RENAME + */ +void +RenameTSParser(List *oldname, const char *newname) +{ + HeapTuple tup; + Relation rel; + Oid prsId; + Oid namespaceOid; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to rename text search parsers"))); + + rel = heap_open(TSParserRelationId, RowExclusiveLock); + + prsId = TSParserGetPrsid(oldname, false); + + tup = SearchSysCacheCopy(TSPARSEROID, + ObjectIdGetDatum(prsId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search parser %u", prsId); + + namespaceOid = ((Form_pg_ts_parser) GETSTRUCT(tup))->prsnamespace; + + if (SearchSysCacheExists(TSPARSERNAMENSP, + PointerGetDatum(newname), + ObjectIdGetDatum(namespaceOid), + 0, 0)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("text search parser \"%s\" already exists", + newname))); + + namestrcpy(&(((Form_pg_ts_parser) GETSTRUCT(tup))->prsname), newname); + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* ---------------------- TS Dictionary commands -----------------------*/ + +/* + * make pg_depend entries for a new pg_ts_dict entry + */ +static void +makeDictionaryDependencies(HeapTuple tuple) +{ + Form_pg_ts_dict dict = (Form_pg_ts_dict) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + + myself.classId = TSDictionaryRelationId; + myself.objectId = HeapTupleGetOid(tuple); + myself.objectSubId = 0; + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = dict->dictnamespace; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependency on owner */ + recordDependencyOnOwner(myself.classId, myself.objectId, dict->dictowner); + + /* dependency on template */ + referenced.classId = TSTemplateRelationId; + referenced.objectId = dict->dicttemplate; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); +} + +/* + * CREATE TEXT SEARCH DICTIONARY + */ +void +DefineTSDictionary(List *names, List *parameters) +{ + ListCell *pl; + Relation dictRel; + HeapTuple tup; + Datum values[Natts_pg_ts_dict]; + char nulls[Natts_pg_ts_dict]; + NameData dname; + int i; + Oid dictOid; + Oid namespaceoid; + AclResult aclresult; + char *dictname; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &dictname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceoid)); + + for (i = 0; i < Natts_pg_ts_dict; i++) + { + nulls[i] = ' '; + values[i] = ObjectIdGetDatum(InvalidOid); + } + + namestrcpy(&dname, dictname); + values[Anum_pg_ts_dict_dictname - 1] = NameGetDatum(&dname); + values[Anum_pg_ts_dict_dictnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_ts_dict_dictowner - 1] = ObjectIdGetDatum(GetUserId()); + nulls[Anum_pg_ts_dict_dictinitoption - 1] = 'n'; + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "template") == 0) + { + Oid templId; + + templId = TSTemplateGetTmplid(defGetQualifiedName(defel), false); + + values[Anum_pg_ts_dict_dicttemplate - 1] = ObjectIdGetDatum(templId); + nulls[Anum_pg_ts_dict_dicttemplate - 1] = ' '; + } + else if (pg_strcasecmp(defel->defname, "option") == 0) + { + char *opt = defGetString(defel); + + if (pg_strcasecmp(opt, "null") != 0) + { + values[Anum_pg_ts_dict_dictinitoption - 1] = + DirectFunctionCall1(textin, CStringGetDatum(opt)); + nulls[Anum_pg_ts_dict_dictinitoption - 1] = ' '; + } + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search dictionary parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Validation + */ + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_dict_dicttemplate - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search template is required"))); + + /* + * Looks good, insert + */ + + dictRel = heap_open(TSDictionaryRelationId, RowExclusiveLock); + + tup = heap_formtuple(dictRel->rd_att, values, nulls); + + dictOid = simple_heap_insert(dictRel, tup); + + CatalogUpdateIndexes(dictRel, tup); + + makeDictionaryDependencies(tup); + + heap_freetuple(tup); + + heap_close(dictRel, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH DICTIONARY RENAME + */ +void +RenameTSDictionary(List *oldname, const char *newname) +{ + HeapTuple tup; + Relation rel; + Oid dictId; + Oid namespaceOid; + AclResult aclresult; + + rel = heap_open(TSDictionaryRelationId, RowExclusiveLock); + + dictId = TSDictionaryGetDictid(oldname, false); + + tup = SearchSysCacheCopy(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + + namespaceOid = ((Form_pg_ts_dict) GETSTRUCT(tup))->dictnamespace; + + if (SearchSysCacheExists(TSDICTNAMENSP, + PointerGetDatum(newname), + ObjectIdGetDatum(namespaceOid), + 0, 0)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("text search dictionary \"%s\" already exists", + newname))); + + /* must be owner */ + if (!pg_ts_dict_ownercheck(dictId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY, + NameListToString(oldname)); + + /* must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(namespaceOid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceOid)); + + namestrcpy(&(((Form_pg_ts_dict) GETSTRUCT(tup))->dictname), newname); + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* + * DROP TEXT SEARCH DICTIONARY + */ +void +RemoveTSDictionary(List *names, DropBehavior behavior, bool missing_ok) +{ + Oid dictOid; + ObjectAddress object; + HeapTuple tup; + Oid namespaceId; + + dictOid = TSDictionaryGetDictid(names, true); + if (!OidIsValid(dictOid)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search dictionary \"%s\" does not exist", + NameListToString(names)))); + } + else + { + ereport(NOTICE, + (errmsg("text search dictionary \"%s\" does not exist, skipping", + NameListToString(names)))); + } + return; + } + + tup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictOid), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictOid); + + /* Permission check: must own dictionary or its namespace */ + namespaceId = ((Form_pg_ts_dict) GETSTRUCT(tup))->dictnamespace; + if (!pg_ts_dict_ownercheck(dictOid, GetUserId()) && + !pg_namespace_ownercheck(namespaceId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY, + NameListToString(names)); + + ReleaseSysCache(tup); + + object.classId = TSDictionaryRelationId; + object.objectId = dictOid; + object.objectSubId = 0; + + performDeletion(&object, behavior); +} + +/* + * Guts of TS dictionary deletion. + */ +void +RemoveTSDictionaryById(Oid dictId) +{ + Relation relation; + HeapTuple tup; + + relation = heap_open(TSDictionaryRelationId, RowExclusiveLock); + + tup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + + simple_heap_delete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH DICTIONARY + */ +void +AlterTSDictionary(AlterTSDictionaryStmt * stmt) +{ + HeapTuple tup, + newtup; + Relation rel; + Oid dictId; + ListCell *pl; + Datum repl_val[Natts_pg_ts_dict]; + char repl_null[Natts_pg_ts_dict]; + char repl_repl[Natts_pg_ts_dict]; + + dictId = TSDictionaryGetDictid(stmt->dictname, false); + + rel = heap_open(TSDictionaryRelationId, RowExclusiveLock); + + tup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + + /* must be owner */ + if (!pg_ts_dict_ownercheck(dictId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY, + NameListToString(stmt->dictname)); + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, ' ', sizeof(repl_null)); + memset(repl_repl, ' ', sizeof(repl_repl)); + + /* + * NOTE: because we only support altering the option, not the template, + * there is no need to update dependencies. + */ + foreach(pl, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "option") == 0) + { + char *opt = defGetString(defel); + + if (pg_strcasecmp(opt, "null") == 0) + { + repl_null[Anum_pg_ts_dict_dictinitoption - 1] = 'n'; + } + else + { + repl_val[Anum_pg_ts_dict_dictinitoption - 1] = + DirectFunctionCall1(textin, CStringGetDatum(opt)); + repl_null[Anum_pg_ts_dict_dictinitoption - 1] = ' '; + } + repl_repl[Anum_pg_ts_dict_dictinitoption - 1] = 'r'; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search dictionary parameter \"%s\" not recognized", + defel->defname))); + } + + newtup = heap_modifytuple(tup, RelationGetDescr(rel), + repl_val, repl_null, repl_repl); + + simple_heap_update(rel, &newtup->t_self, newtup); + + CatalogUpdateIndexes(rel, newtup); + + heap_freetuple(newtup); + ReleaseSysCache(tup); + + heap_close(rel, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH DICTIONARY OWNER + */ +void +AlterTSDictionaryOwner(List *name, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + Oid dictId; + Oid namespaceOid; + AclResult aclresult; + Form_pg_ts_dict form; + + rel = heap_open(TSDictionaryRelationId, RowExclusiveLock); + + dictId = TSDictionaryGetDictid(name, false); + + tup = SearchSysCacheCopy(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + + form = (Form_pg_ts_dict) GETSTRUCT(tup); + namespaceOid = form->dictnamespace; + + if (form->dictowner != newOwnerId) + { + /* Superusers can always do it */ + if (!superuser()) + { + /* must be owner */ + if (!pg_ts_dict_ownercheck(dictId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSDICTIONARY, + NameListToString(name)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId, ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceOid)); + } + + form->dictowner = newOwnerId; + + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(TSDictionaryRelationId, HeapTupleGetOid(tup), + newOwnerId); + } + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* ---------------------- TS Template commands -----------------------*/ + +/* + * lookup a template support function and return its OID (as a Datum) + * + * attnum is the pg_ts_template column the function will go into + */ +static Datum +get_ts_template_func(DefElem *defel, int attnum) +{ + List *funcName = defGetQualifiedName(defel); + Oid typeId[4]; + Oid retTypeId; + int nargs; + Oid procOid; + + retTypeId = INTERNALOID; + typeId[0] = INTERNALOID; + typeId[1] = INTERNALOID; + typeId[2] = INTERNALOID; + typeId[3] = INTERNALOID; + switch (attnum) + { + case Anum_pg_ts_template_tmplinit: + nargs = 1; + break; + case Anum_pg_ts_template_tmpllexize: + nargs = 4; + break; + default: + /* should not be here */ + elog(ERROR, "unknown attribute for text search template: %d", + attnum); + nargs = 0; /* keep compiler quiet */ + } + + procOid = LookupFuncName(funcName, nargs, typeId, false); + if (get_func_rettype(procOid) != retTypeId) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("function %s should return type %s", + func_signature_string(funcName, nargs, typeId), + format_type_be(retTypeId)))); + + return ObjectIdGetDatum(procOid); +} + +/* + * make pg_depend entries for a new pg_ts_template entry + */ +static void +makeTSTemplateDependencies(HeapTuple tuple) +{ + Form_pg_ts_template tmpl = (Form_pg_ts_template) GETSTRUCT(tuple); + ObjectAddress myself, + referenced; + + myself.classId = TSTemplateRelationId; + myself.objectId = HeapTupleGetOid(tuple); + myself.objectSubId = 0; + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = tmpl->tmplnamespace; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + /* dependencies on functions */ + referenced.classId = ProcedureRelationId; + referenced.objectSubId = 0; + + referenced.objectId = tmpl->tmpllexize; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + + if (OidIsValid(tmpl->tmplinit)) + { + referenced.objectId = tmpl->tmplinit; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } +} + +/* + * CREATE TEXT SEARCH TEMPLATE + */ +void +DefineTSTemplate(List *names, List *parameters) +{ + ListCell *pl; + Relation tmplRel; + HeapTuple tup; + Datum values[Natts_pg_ts_template]; + char nulls[Natts_pg_ts_template]; + NameData dname; + int i; + Oid dictOid; + Oid namespaceoid; + char *tmplname; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to create text search templates"))); + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &tmplname); + + for (i = 0; i < Natts_pg_ts_template; i++) + { + nulls[i] = ' '; + values[i] = ObjectIdGetDatum(InvalidOid); + } + + namestrcpy(&dname, tmplname); + values[Anum_pg_ts_template_tmplname - 1] = NameGetDatum(&dname); + values[Anum_pg_ts_template_tmplnamespace - 1] = ObjectIdGetDatum(namespaceoid); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "init") == 0) + { + values[Anum_pg_ts_template_tmplinit - 1] = + get_ts_template_func(defel, Anum_pg_ts_template_tmplinit); + nulls[Anum_pg_ts_template_tmplinit - 1] = ' '; + } + else if (pg_strcasecmp(defel->defname, "lexize") == 0) + { + values[Anum_pg_ts_template_tmpllexize - 1] = + get_ts_template_func(defel, Anum_pg_ts_template_tmpllexize); + nulls[Anum_pg_ts_template_tmpllexize - 1] = ' '; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search template parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Validation + */ + if (!OidIsValid(DatumGetObjectId(values[Anum_pg_ts_template_tmpllexize - 1]))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search template lexize method is required"))); + + /* + * Looks good, insert + */ + + tmplRel = heap_open(TSTemplateRelationId, RowExclusiveLock); + + tup = heap_formtuple(tmplRel->rd_att, values, nulls); + + dictOid = simple_heap_insert(tmplRel, tup); + + CatalogUpdateIndexes(tmplRel, tup); + + makeTSTemplateDependencies(tup); + + heap_freetuple(tup); + + heap_close(tmplRel, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH TEMPLATE RENAME + */ +void +RenameTSTemplate(List *oldname, const char *newname) +{ + HeapTuple tup; + Relation rel; + Oid tmplId; + Oid namespaceOid; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to rename text search templates"))); + + rel = heap_open(TSTemplateRelationId, RowExclusiveLock); + + tmplId = TSTemplateGetTmplid(oldname, false); + + tup = SearchSysCacheCopy(TSTEMPLATEOID, + ObjectIdGetDatum(tmplId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search template %u", + tmplId); + + namespaceOid = ((Form_pg_ts_template) GETSTRUCT(tup))->tmplnamespace; + + if (SearchSysCacheExists(TSTEMPLATENAMENSP, + PointerGetDatum(newname), + ObjectIdGetDatum(namespaceOid), + 0, 0)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("text search template \"%s\" already exists", + newname))); + + namestrcpy(&(((Form_pg_ts_template) GETSTRUCT(tup))->tmplname), newname); + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* + * DROP TEXT SEARCH TEMPLATE + */ +void +RemoveTSTemplate(List *names, DropBehavior behavior, bool missing_ok) +{ + Oid tmplOid; + ObjectAddress object; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to drop text search templates"))); + + tmplOid = TSTemplateGetTmplid(names, true); + if (!OidIsValid(tmplOid)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search template \"%s\" does not exist", + NameListToString(names)))); + } + else + { + ereport(NOTICE, + (errmsg("text search template \"%s\" does not exist, skipping", + NameListToString(names)))); + } + return; + } + + object.classId = TSTemplateRelationId; + object.objectId = tmplOid; + object.objectSubId = 0; + + performDeletion(&object, behavior); +} + +/* + * Guts of TS template deletion. + */ +void +RemoveTSTemplateById(Oid tmplId) +{ + Relation relation; + HeapTuple tup; + + relation = heap_open(TSTemplateRelationId, RowExclusiveLock); + + tup = SearchSysCache(TSTEMPLATEOID, + ObjectIdGetDatum(tmplId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search template %u", + tmplId); + + simple_heap_delete(relation, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); +} + +/* ---------------------- TS Configuration commands -----------------------*/ + +/* + * Finds syscache tuple of configuration. + * Returns NULL if no such cfg. + */ +static HeapTuple +GetTSConfigTuple(List *names) +{ + HeapTuple tup; + Oid cfgId; + + cfgId = TSConfigGetCfgid(names, true); + if (!OidIsValid(cfgId)) + return NULL; + + tup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + + return tup; +} + +/* + * make pg_depend entries for a new or updated pg_ts_config entry + * + * Pass opened pg_ts_config_map relation if there might be any config map + * entries for the config. + */ +static void +makeConfigurationDependencies(HeapTuple tuple, bool removeOld, + Relation mapRel) +{ + Form_pg_ts_config cfg = (Form_pg_ts_config) GETSTRUCT(tuple); + ObjectAddresses *addrs; + ObjectAddress myself, + referenced; + + myself.classId = TSConfigRelationId; + myself.objectId = HeapTupleGetOid(tuple); + myself.objectSubId = 0; + + /* for ALTER case, first flush old dependencies */ + if (removeOld) + { + deleteDependencyRecordsFor(myself.classId, myself.objectId); + deleteSharedDependencyRecordsFor(myself.classId, myself.objectId); + } + + /* + * We use an ObjectAddresses list to remove possible duplicate + * dependencies from the config map info. The pg_ts_config items + * shouldn't be duplicates, but might as well fold them all into one call. + */ + addrs = new_object_addresses(); + + /* dependency on namespace */ + referenced.classId = NamespaceRelationId; + referenced.objectId = cfg->cfgnamespace; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + + /* dependency on owner */ + recordDependencyOnOwner(myself.classId, myself.objectId, cfg->cfgowner); + + /* dependency on parser */ + referenced.classId = TSParserRelationId; + referenced.objectId = cfg->cfgparser; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + + /* dependencies on dictionaries listed in config map */ + if (mapRel) + { + ScanKeyData skey; + SysScanDesc scan; + HeapTuple maptup; + + /* CCI to ensure we can see effects of caller's changes */ + CommandCounterIncrement(); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(myself.objectId)); + + scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, + SnapshotNow, 1, &skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + + referenced.classId = TSDictionaryRelationId; + referenced.objectId = cfgmap->mapdict; + referenced.objectSubId = 0; + add_exact_object_address(&referenced, addrs); + } + + systable_endscan(scan); + } + + /* Record 'em (this includes duplicate elimination) */ + record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL); + + free_object_addresses(addrs); +} + +/* + * CREATE TEXT SEARCH CONFIGURATION + */ +void +DefineTSConfiguration(List *names, List *parameters) +{ + Relation cfgRel; + Relation mapRel = NULL; + HeapTuple tup; + Datum values[Natts_pg_ts_config]; + char nulls[Natts_pg_ts_config]; + AclResult aclresult; + Oid namespaceoid; + char *cfgname; + NameData cname; + List *templateName = NIL; + Oid templateOid = InvalidOid; + Oid prsOid = InvalidOid; + bool with_map = false; + Oid cfgOid; + ListCell *pl; + + /* Convert list of names to a name and namespace */ + namespaceoid = QualifiedNameGetCreationNamespace(names, &cfgname); + + /* Check we have creation rights in target namespace */ + aclresult = pg_namespace_aclcheck(namespaceoid, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceoid)); + + /* + * loop over the definition list and extract the information we need. + */ + foreach(pl, parameters) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "parser") == 0) + prsOid = TSParserGetPrsid(defGetQualifiedName(defel), false); + else if (pg_strcasecmp(defel->defname, "template") == 0) + templateName = defGetQualifiedName(defel); + else if (pg_strcasecmp(defel->defname, "map") == 0) + with_map = defGetBoolean(defel); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search configuration parameter \"%s\" not recognized", + defel->defname))); + } + + /* + * Look up template if given. XXX the "template" is an existing config + * that we copy, not a pg_ts_template entry. This seems confusing. + * Maybe should use "source" or some other word? + */ + if (templateName) + { + Form_pg_ts_config cfg; + + templateOid = TSConfigGetCfgid(templateName, false); + + tup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(templateOid), + 0, 0, 0); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search configuration %u", + templateOid); + + cfg = (Form_pg_ts_config) GETSTRUCT(tup); + + /* Use template's parser if no other was specified */ + if (!OidIsValid(prsOid)) + prsOid = cfg->cfgparser; + + ReleaseSysCache(tup); + } + + /* + * Validation + */ + if (!OidIsValid(prsOid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("text search parser is required"))); + + /* + * Looks good, build tuple and insert + */ + memset(values, 0, sizeof(values)); + memset(nulls, ' ', sizeof(nulls)); + + namestrcpy(&cname, cfgname); + values[Anum_pg_ts_config_cfgname - 1] = NameGetDatum(&cname); + values[Anum_pg_ts_config_cfgnamespace - 1] = ObjectIdGetDatum(namespaceoid); + values[Anum_pg_ts_config_cfgowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(prsOid); + + cfgRel = heap_open(TSConfigRelationId, RowExclusiveLock); + + tup = heap_formtuple(cfgRel->rd_att, values, nulls); + + cfgOid = simple_heap_insert(cfgRel, tup); + + CatalogUpdateIndexes(cfgRel, tup); + + if (OidIsValid(templateOid) && with_map) + { + /* + * Copy token-dicts map from template + */ + ScanKeyData skey; + SysScanDesc scan; + HeapTuple maptup; + + mapRel = heap_open(TSConfigMapRelationId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(templateOid)); + + scan = systable_beginscan(mapRel, TSConfigMapIndexId, true, + SnapshotNow, 1, &skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + HeapTuple newmaptup; + Datum mapvalues[Natts_pg_ts_config_map]; + char mapnulls[Natts_pg_ts_config_map]; + + memset(mapvalues, 0, sizeof(mapvalues)); + memset(mapnulls, ' ', sizeof(mapnulls)); + + mapvalues[Anum_pg_ts_config_map_mapcfg - 1] = cfgOid; + mapvalues[Anum_pg_ts_config_map_maptokentype - 1] = cfgmap->maptokentype; + mapvalues[Anum_pg_ts_config_map_mapseqno - 1] = cfgmap->mapseqno; + mapvalues[Anum_pg_ts_config_map_mapdict - 1] = cfgmap->mapdict; + + newmaptup = heap_formtuple(mapRel->rd_att, mapvalues, mapnulls); + + simple_heap_insert(mapRel, newmaptup); + + CatalogUpdateIndexes(mapRel, newmaptup); + + heap_freetuple(newmaptup); + } + + systable_endscan(scan); + } + + makeConfigurationDependencies(tup, false, mapRel); + + heap_freetuple(tup); + + if (mapRel) + heap_close(mapRel, RowExclusiveLock); + heap_close(cfgRel, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION RENAME + */ +void +RenameTSConfiguration(List *oldname, const char *newname) +{ + HeapTuple tup; + Relation rel; + Oid cfgId; + AclResult aclresult; + Oid namespaceOid; + + rel = heap_open(TSConfigRelationId, RowExclusiveLock); + + cfgId = TSConfigGetCfgid(oldname, false); + + tup = SearchSysCacheCopy(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + + namespaceOid = ((Form_pg_ts_config) GETSTRUCT(tup))->cfgnamespace; + + if (SearchSysCacheExists(TSCONFIGNAMENSP, + PointerGetDatum(newname), + ObjectIdGetDatum(namespaceOid), + 0, 0)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("text search configuration \"%s\" already exists", + newname))); + + /* must be owner */ + if (!pg_ts_config_ownercheck(cfgId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION, + NameListToString(oldname)); + + /* must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(namespaceOid, GetUserId(), ACL_CREATE); + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceOid)); + + namestrcpy(&(((Form_pg_ts_config) GETSTRUCT(tup))->cfgname), newname); + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* + * DROP TEXT SEARCH CONFIGURATION + */ +void +RemoveTSConfiguration(List *names, DropBehavior behavior, bool missing_ok) +{ + Oid cfgOid; + Oid namespaceId; + ObjectAddress object; + HeapTuple tup; + + tup = GetTSConfigTuple(names); + + if (!HeapTupleIsValid(tup)) + { + if (!missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", + NameListToString(names)))); + } + else + { + ereport(NOTICE, + (errmsg("text search configuration \"%s\" does not exist, skipping", + NameListToString(names)))); + } + return; + } + + /* Permission check: must own configuration or its namespace */ + cfgOid = HeapTupleGetOid(tup); + namespaceId = ((Form_pg_ts_config) GETSTRUCT(tup))->cfgnamespace; + if (!pg_ts_config_ownercheck(cfgOid, GetUserId()) && + !pg_namespace_ownercheck(namespaceId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION, + NameListToString(names)); + + ReleaseSysCache(tup); + + object.classId = TSConfigRelationId; + object.objectId = cfgOid; + object.objectSubId = 0; + + performDeletion(&object, behavior); +} + +/* + * Guts of TS configuration deletion. + */ +void +RemoveTSConfigurationById(Oid cfgId) +{ + Relation relCfg, + relMap; + HeapTuple tup; + ScanKeyData skey; + SysScanDesc scan; + + /* Remove the pg_ts_config entry */ + relCfg = heap_open(TSConfigRelationId, RowExclusiveLock); + + tup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + cfgId); + + simple_heap_delete(relCfg, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(relCfg, RowExclusiveLock); + + /* Remove any pg_ts_config_map entries */ + relMap = heap_open(TSConfigMapRelationId, RowExclusiveLock); + + ScanKeyInit(&skey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + SnapshotNow, 1, &skey); + + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + simple_heap_delete(relMap, &tup->t_self); + } + + systable_endscan(scan); + + heap_close(relMap, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION OWNER + */ +void +AlterTSConfigurationOwner(List *name, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + Oid cfgId; + AclResult aclresult; + Oid namespaceOid; + Form_pg_ts_config form; + + rel = heap_open(TSConfigRelationId, RowExclusiveLock); + + cfgId = TSConfigGetCfgid(name, false); + + tup = SearchSysCacheCopy(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + + form = (Form_pg_ts_config) GETSTRUCT(tup); + namespaceOid = form->cfgnamespace; + + if (form->cfgowner != newOwnerId) + { + /* Superusers can always do it */ + if (!superuser()) + { + /* must be owner */ + if (!pg_ts_config_ownercheck(cfgId, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION, + NameListToString(name)); + + /* Must be able to become new owner */ + check_is_member_of_role(GetUserId(), newOwnerId); + + /* New owner must have CREATE privilege on namespace */ + aclresult = pg_namespace_aclcheck(namespaceOid, newOwnerId, ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_NAMESPACE, + get_namespace_name(namespaceOid)); + } + + form->cfgowner = newOwnerId; + + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(TSConfigRelationId, HeapTupleGetOid(tup), + newOwnerId); + } + + heap_close(rel, NoLock); + heap_freetuple(tup); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION - main entry point + */ +void +AlterTSConfiguration(AlterTSConfigurationStmt *stmt) +{ + HeapTuple tup; + HeapTuple newtup; + Relation mapRel; + + /* Find the configuration */ + tup = GetTSConfigTuple(stmt->cfgname); + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("text search configuration \"%s\" does not exist", + NameListToString(stmt->cfgname)))); + + /* must be owner */ + if (!pg_ts_config_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TSCONFIGURATION, + NameListToString(stmt->cfgname)); + + /* Update fields of config tuple? */ + if (stmt->options) + newtup = UpdateTSConfiguration(stmt, tup); + else + newtup = tup; + + /* Add or drop mappings? */ + if (stmt->dicts) + MakeConfigurationMapping(stmt, newtup); + else if (stmt->tokentype) + DropConfigurationMapping(stmt, newtup); + + /* + * Even if we aren't changing mappings, there could already be some, + * so makeConfigurationDependencies always has to look. + */ + mapRel = heap_open(TSConfigMapRelationId, AccessShareLock); + + /* Update dependencies */ + makeConfigurationDependencies(newtup, true, mapRel); + + heap_close(mapRel, AccessShareLock); + + ReleaseSysCache(tup); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION - update fields of pg_ts_config tuple + */ +static HeapTuple +UpdateTSConfiguration(AlterTSConfigurationStmt *stmt, HeapTuple tup) +{ + Relation cfgRel; + ListCell *pl; + Datum repl_val[Natts_pg_ts_config]; + char repl_null[Natts_pg_ts_config]; + char repl_repl[Natts_pg_ts_config]; + HeapTuple newtup; + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, ' ', sizeof(repl_null)); + memset(repl_repl, ' ', sizeof(repl_repl)); + + cfgRel = heap_open(TSConfigRelationId, RowExclusiveLock); + + foreach(pl, stmt->options) + { + DefElem *defel = (DefElem *) lfirst(pl); + + if (pg_strcasecmp(defel->defname, "parser") == 0) + { + Oid newPrs; + + newPrs = TSParserGetPrsid(defGetQualifiedName(defel), false); + repl_val[Anum_pg_ts_config_cfgparser - 1] = ObjectIdGetDatum(newPrs); + repl_repl[Anum_pg_ts_config_cfgparser - 1] = 'r'; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("text search configuration parameter \"%s\" not recognized", + defel->defname))); + } + + newtup = heap_modifytuple(tup, RelationGetDescr(cfgRel), + repl_val, repl_null, repl_repl); + + simple_heap_update(cfgRel, &newtup->t_self, newtup); + + CatalogUpdateIndexes(cfgRel, newtup); + + heap_close(cfgRel, RowExclusiveLock); + + return newtup; +} + +/*------------------- TS Configuration mapping stuff ----------------*/ + +/* + * Translate a list of token type names to an array of token type numbers + */ +static int * +getTokenTypes(Oid prsId, List *tokennames) +{ + TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId); + LexDescr *list; + int *res, + i, + ntoken; + ListCell *tn; + + ntoken = list_length(tokennames); + if (ntoken == 0) + return NULL; + res = (int *) palloc(sizeof(int) * ntoken); + + if (!OidIsValid(prs->lextypeOid)) + elog(ERROR, "method lextype isn't defined for text search parser %u", + prsId); + + /* OidFunctionCall0 is absent */ + list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid, + (Datum) 0)); + + i = 0; + foreach(tn, tokennames) + { + Value *val = (Value *) lfirst(tn); + bool found = false; + int j; + + j = 0; + while (list && list[j].lexid) + { + /* XXX should we use pg_strcasecmp here? */ + if (strcmp(strVal(val), list[j].alias) == 0) + { + res[i] = list[j].lexid; + found = true; + break; + } + j++; + } + if (!found) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("token type \"%s\" does not exist", + strVal(val)))); + i++; + } + + return res; +} + +/* + * ALTER TEXT SEARCH CONFIGURATION ADD/ALTER MAPPING + */ +static void +MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, HeapTuple tup) +{ + Oid cfgId = HeapTupleGetOid(tup); + Relation relMap; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple maptup; + int i; + int j; + Oid prsId; + int *tokens, + ntoken; + Oid *dictIds; + int ndict; + ListCell *c; + + prsId = ((Form_pg_ts_config) GETSTRUCT(tup))->cfgparser; + + tokens = getTokenTypes(prsId, stmt->tokentype); + ntoken = list_length(stmt->tokentype); + + relMap = heap_open(TSConfigMapRelationId, RowExclusiveLock); + + if (stmt->override) + { + /* + * delete maps for tokens if they exist and command was ALTER + */ + for (i = 0; i < ntoken; i++) + { + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + ScanKeyInit(&skey[1], + Anum_pg_ts_config_map_maptokentype, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(tokens[i])); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + SnapshotNow, 2, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + simple_heap_delete(relMap, &maptup->t_self); + } + + systable_endscan(scan); + } + } + + /* + * Convert list of dictionary names to array of dict OIDs + */ + ndict = list_length(stmt->dicts); + dictIds = (Oid *) palloc(sizeof(Oid) * ndict); + i = 0; + foreach(c, stmt->dicts) + { + List *names = (List *) lfirst(c); + + dictIds[i] = TSDictionaryGetDictid(names, false); + i++; + } + + if (stmt->replace) + { + /* + * Replace a specific dictionary in existing entries + */ + Oid dictOld = dictIds[0], + dictNew = dictIds[1]; + + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + SnapshotNow, 1, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + + /* + * check if it's one of target token types + */ + if (tokens) + { + bool tokmatch = false; + + for (j = 0; j < ntoken; j++) + { + if (cfgmap->maptokentype == tokens[j]) + { + tokmatch = true; + break; + } + } + if (!tokmatch) + continue; + } + + /* + * replace dictionary if match + */ + if (cfgmap->mapdict == dictOld) + { + Datum repl_val[Natts_pg_ts_config_map]; + char repl_null[Natts_pg_ts_config_map]; + char repl_repl[Natts_pg_ts_config_map]; + HeapTuple newtup; + + memset(repl_val, 0, sizeof(repl_val)); + memset(repl_null, ' ', sizeof(repl_null)); + memset(repl_repl, ' ', sizeof(repl_repl)); + + repl_val[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictNew); + repl_repl[Anum_pg_ts_config_map_mapdict - 1] = 'r'; + + newtup = heap_modifytuple(maptup, + RelationGetDescr(relMap), + repl_val, repl_null, repl_repl); + simple_heap_update(relMap, &newtup->t_self, newtup); + + CatalogUpdateIndexes(relMap, newtup); + } + } + + systable_endscan(scan); + } + else + { + /* + * Insertion of new entries + */ + for (i = 0; i < ntoken; i++) + { + for (j = 0; j < ndict; j++) + { + Datum values[Natts_pg_ts_config_map]; + char nulls[Natts_pg_ts_config_map]; + + memset(nulls, ' ', sizeof(nulls)); + values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId); + values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]); + values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1); + values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]); + + tup = heap_formtuple(relMap->rd_att, values, nulls); + simple_heap_insert(relMap, tup); + CatalogUpdateIndexes(relMap, tup); + + heap_freetuple(tup); + } + } + } + + heap_close(relMap, RowExclusiveLock); +} + +/* + * ALTER TEXT SEARCH CONFIGURATION DROP MAPPING + */ +static void +DropConfigurationMapping(AlterTSConfigurationStmt *stmt, HeapTuple tup) +{ + Oid cfgId = HeapTupleGetOid(tup); + Relation relMap; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple maptup; + int i; + Oid prsId; + int *tokens, + ntoken; + ListCell *c; + + prsId = ((Form_pg_ts_config) GETSTRUCT(tup))->cfgparser; + + tokens = getTokenTypes(prsId, stmt->tokentype); + ntoken = list_length(stmt->tokentype); + + relMap = heap_open(TSConfigMapRelationId, RowExclusiveLock); + + i = 0; + foreach(c, stmt->tokentype) + { + Value *val = (Value *) lfirst(c); + bool found = false; + + ScanKeyInit(&skey[0], + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + ScanKeyInit(&skey[1], + Anum_pg_ts_config_map_maptokentype, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(tokens[i])); + + scan = systable_beginscan(relMap, TSConfigMapIndexId, true, + SnapshotNow, 2, skey); + + while (HeapTupleIsValid((maptup = systable_getnext(scan)))) + { + simple_heap_delete(relMap, &maptup->t_self); + found = true; + } + + systable_endscan(scan); + + if (!found) + { + if (!stmt->missing_ok) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("mapping for token type \"%s\" does not exist", + strVal(val)))); + } + else + { + ereport(NOTICE, + (errmsg("mapping for token type \"%s\" does not exist, skipping", + strVal(val)))); + } + } + + i++; + } + + heap_close(relMap, RowExclusiveLock); +} diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 324f89a2566..ed3d55c9d7b 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.597 2007/07/03 01:30:36 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.598 2007/08/21 01:11:15 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -173,6 +173,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) ViewStmt CheckPointStmt CreateConversionStmt DeallocateStmt PrepareStmt ExecuteStmt DropOwnedStmt ReassignOwnedStmt + AlterTSConfigurationStmt AlterTSDictionaryStmt %type <node> select_no_parens select_with_parens select_clause simple_select values_clause @@ -375,14 +376,14 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT - COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS + COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS CONTENT_P CONVERSION_P CONVERT COPY COST CREATE CREATEDB CREATEROLE CREATEUSER CROSS CSV CURRENT_P CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS - DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS - DESC DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP + DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DESC + DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT @@ -407,7 +408,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOGIN_P - MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE + MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB NOCREATEROLE NOCREATEUSER NOINHERIT NOLOGIN_P NONE NOSUPERUSER @@ -416,7 +417,7 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OR ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNED OWNER - PARTIAL PASSWORD PLACING PLANS POSITION + PARSER PARTIAL PASSWORD PLACING PLANS POSITION PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE @@ -426,13 +427,13 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args) REPEATABLE REPLACE REPLICA RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROW ROWS RULE - SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE + SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE SHOW SIMILAR SIMPLE SMALLINT SOME STABLE STANDALONE_P START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING SUPERUSER_P SYMMETRIC SYSID SYSTEM_P - TABLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP + TABLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT THEN TIME TIMESTAMP TO TRAILING TRANSACTION TREAT TRIGGER TRIM TRUE_P TRUNCATE TRUSTED TYPE_P @@ -537,6 +538,8 @@ stmt : | AlterTableStmt | AlterRoleSetStmt | AlterRoleStmt + | AlterTSConfigurationStmt + | AlterTSDictionaryStmt | AlterUserSetStmt | AlterUserStmt | AnalyzeStmt @@ -2972,6 +2975,42 @@ DefineStmt: n->vals = $7; $$ = (Node *)n; } + | CREATE TEXT SEARCH PARSER any_name definition + { + DefineStmt *n = makeNode(DefineStmt); + n->kind = OBJECT_TSPARSER; + n->args = NIL; + n->defnames = $5; + n->definition = $6; + $$ = (Node *)n; + } + | CREATE TEXT SEARCH DICTIONARY any_name definition + { + DefineStmt *n = makeNode(DefineStmt); + n->kind = OBJECT_TSDICTIONARY; + n->args = NIL; + n->defnames = $5; + n->definition = $6; + $$ = (Node *)n; + } + | CREATE TEXT SEARCH TEMPLATE any_name definition + { + DefineStmt *n = makeNode(DefineStmt); + n->kind = OBJECT_TSTEMPLATE; + n->args = NIL; + n->defnames = $5; + n->definition = $6; + $$ = (Node *)n; + } + | CREATE TEXT SEARCH CONFIGURATION any_name definition + { + DefineStmt *n = makeNode(DefineStmt); + n->kind = OBJECT_TSCONFIGURATION; + n->args = NIL; + n->defnames = $5; + n->definition = $6; + $$ = (Node *)n; + } ; definition: '(' def_list ')' { $$ = $2; } @@ -3281,6 +3320,10 @@ drop_type: TABLE { $$ = OBJECT_TABLE; } | DOMAIN_P { $$ = OBJECT_DOMAIN; } | CONVERSION_P { $$ = OBJECT_CONVERSION; } | SCHEMA { $$ = OBJECT_SCHEMA; } + | TEXT SEARCH PARSER { $$ = OBJECT_TSPARSER; } + | TEXT SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; } + | TEXT SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; } + | TEXT SEARCH CONFIGURATION { $$ = OBJECT_TSCONFIGURATION; } ; any_name_list: @@ -3323,7 +3366,10 @@ TruncateStmt: * * COMMENT ON [ [ DATABASE | DOMAIN | INDEX | SEQUENCE | TABLE | TYPE | VIEW | * CONVERSION | LANGUAGE | OPERATOR CLASS | LARGE OBJECT | - * CAST | COLUMN | SCHEMA | TABLESPACE | ROLE ] <objname> | + * CAST | COLUMN | SCHEMA | TABLESPACE | ROLE | + * TEXT SEARCH PARSER | TEXT SEARCH DICTIONARY | + * TEXT SEARCH TEMPLATE | + * TEXT SEARCH CONFIGURATION ] <objname> | * AGGREGATE <aggname> (arg1, ...) | * FUNCTION <funcname> (arg1, arg2, ...) | * OPERATOR <op> (leftoperand_typ, rightoperand_typ) | @@ -3454,6 +3500,38 @@ CommentStmt: n->comment = $7; $$ = (Node *) n; } + | COMMENT ON TEXT SEARCH PARSER any_name IS comment_text + { + CommentStmt *n = makeNode(CommentStmt); + n->objtype = OBJECT_TSPARSER; + n->objname = $6; + n->comment = $8; + $$ = (Node *) n; + } + | COMMENT ON TEXT SEARCH DICTIONARY any_name IS comment_text + { + CommentStmt *n = makeNode(CommentStmt); + n->objtype = OBJECT_TSDICTIONARY; + n->objname = $6; + n->comment = $8; + $$ = (Node *) n; + } + | COMMENT ON TEXT SEARCH TEMPLATE any_name IS comment_text + { + CommentStmt *n = makeNode(CommentStmt); + n->objtype = OBJECT_TSTEMPLATE; + n->objname = $6; + n->comment = $8; + $$ = (Node *) n; + } + | COMMENT ON TEXT SEARCH CONFIGURATION any_name IS comment_text + { + CommentStmt *n = makeNode(CommentStmt); + n->objtype = OBJECT_TSCONFIGURATION; + n->objname = $6; + n->comment = $8; + $$ = (Node *) n; + } ; comment_type: @@ -4615,6 +4693,38 @@ RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name n->newname = $6; $$ = (Node *)n; } + | ALTER TEXT SEARCH PARSER any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_TSPARSER; + n->object = $5; + n->newname = $8; + $$ = (Node *)n; + } + | ALTER TEXT SEARCH DICTIONARY any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_TSDICTIONARY; + n->object = $5; + n->newname = $8; + $$ = (Node *)n; + } + | ALTER TEXT SEARCH TEMPLATE any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_TSTEMPLATE; + n->object = $5; + n->newname = $8; + $$ = (Node *)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_TSCONFIGURATION; + n->object = $5; + n->newname = $8; + $$ = (Node *)n; + } ; opt_column: COLUMN { $$ = COLUMN; } @@ -4787,6 +4897,22 @@ AlterOwnerStmt: ALTER AGGREGATE func_name aggr_args OWNER TO RoleId n->newowner = $6; $$ = (Node *)n; } + | ALTER TEXT SEARCH DICTIONARY any_name OWNER TO RoleId + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_TSDICTIONARY; + n->object = $5; + n->newowner = $8; + $$ = (Node *)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name OWNER TO RoleId + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_TSCONFIGURATION; + n->object = $5; + n->newowner = $8; + $$ = (Node *)n; + } ; @@ -5382,6 +5508,89 @@ opt_as: AS {} /***************************************************************************** * + * Manipulate a text search dictionary or configuration + * + *****************************************************************************/ + +AlterTSDictionaryStmt: + ALTER TEXT SEARCH DICTIONARY any_name definition + { + AlterTSDictionaryStmt *n = makeNode(AlterTSDictionaryStmt); + n->dictname = $5; + n->options = $6; + $$ = (Node *)n; + } + ; + +AlterTSConfigurationStmt: + ALTER TEXT SEARCH CONFIGURATION any_name definition + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->options = $6; + $$ = (Node *)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name ADD_P MAPPING FOR name_list WITH any_name_list + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = $9; + n->dicts = $11; + n->override = false; + n->replace = false; + $$ = (Node*)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list WITH any_name_list + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = $9; + n->dicts = $11; + n->override = true; + n->replace = false; + $$ = (Node*)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING REPLACE any_name WITH any_name + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = NIL; + n->dicts = list_make2($9,$11); + n->override = false; + n->replace = true; + $$ = (Node*)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name ALTER MAPPING FOR name_list REPLACE any_name WITH any_name + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = $9; + n->dicts = list_make2($11,$13); + n->override = false; + n->replace = true; + $$ = (Node*)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name DROP MAPPING FOR name_list + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = $9; + n->missing_ok = false; + $$ = (Node*)n; + } + | ALTER TEXT SEARCH CONFIGURATION any_name DROP MAPPING IF_P EXISTS FOR name_list + { + AlterTSConfigurationStmt *n = makeNode(AlterTSConfigurationStmt); + n->cfgname = $5; + n->tokentype = $11; + n->missing_ok = true; + $$ = (Node*)n; + } + ; + + +/***************************************************************************** + * * Manipulate a conversion * * CREATE [DEFAULT] CONVERSION <conversion_name> @@ -8853,6 +9062,7 @@ unreserved_keyword: | COMMIT | COMMITTED | CONCURRENTLY + | CONFIGURATION | CONNECTION | CONSTRAINTS | CONTENT_P @@ -8876,6 +9086,7 @@ unreserved_keyword: | DELETE_P | DELIMITER | DELIMITERS + | DICTIONARY | DISABLE_P | DISCARD | DOCUMENT_P @@ -8933,6 +9144,7 @@ unreserved_keyword: | LOCATION | LOCK_P | LOGIN_P + | MAPPING | MATCH | MAXVALUE | MINUTE_P @@ -8961,6 +9173,7 @@ unreserved_keyword: | OPTION | OWNED | OWNER + | PARSER | PARTIAL | PASSWORD | PLANS @@ -8994,6 +9207,7 @@ unreserved_keyword: | SAVEPOINT | SCHEMA | SCROLL + | SEARCH | SECOND_P | SECURITY | SEQUENCE @@ -9020,6 +9234,7 @@ unreserved_keyword: | TEMP | TEMPLATE | TEMPORARY + | TEXT | TRANSACTION | TRIGGER | TRUNCATE diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 5b62a9c7c98..1e97d854465 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.189 2007/06/18 21:40:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.190 2007/08/21 01:11:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -87,6 +87,7 @@ static const ScanKeyword ScanKeywords[] = { {"commit", COMMIT, UNRESERVED_KEYWORD}, {"committed", COMMITTED, UNRESERVED_KEYWORD}, {"concurrently", CONCURRENTLY, UNRESERVED_KEYWORD}, + {"configuration", CONFIGURATION, UNRESERVED_KEYWORD}, {"connection", CONNECTION, UNRESERVED_KEYWORD}, {"constraint", CONSTRAINT, RESERVED_KEYWORD}, {"constraints", CONSTRAINTS, UNRESERVED_KEYWORD}, @@ -124,6 +125,7 @@ static const ScanKeyword ScanKeywords[] = { {"delimiter", DELIMITER, UNRESERVED_KEYWORD}, {"delimiters", DELIMITERS, UNRESERVED_KEYWORD}, {"desc", DESC, RESERVED_KEYWORD}, + {"dictionary", DICTIONARY, UNRESERVED_KEYWORD}, {"disable", DISABLE_P, UNRESERVED_KEYWORD}, {"discard", DISCARD, UNRESERVED_KEYWORD}, {"distinct", DISTINCT, RESERVED_KEYWORD}, @@ -219,6 +221,7 @@ static const ScanKeyword ScanKeywords[] = { {"location", LOCATION, UNRESERVED_KEYWORD}, {"lock", LOCK_P, UNRESERVED_KEYWORD}, {"login", LOGIN_P, UNRESERVED_KEYWORD}, + {"mapping", MAPPING, UNRESERVED_KEYWORD}, {"match", MATCH, UNRESERVED_KEYWORD}, {"maxvalue", MAXVALUE, UNRESERVED_KEYWORD}, {"minute", MINUTE_P, UNRESERVED_KEYWORD}, @@ -268,6 +271,7 @@ static const ScanKeyword ScanKeywords[] = { {"overlay", OVERLAY, COL_NAME_KEYWORD}, {"owned", OWNED, UNRESERVED_KEYWORD}, {"owner", OWNER, UNRESERVED_KEYWORD}, + {"parser", PARSER, UNRESERVED_KEYWORD}, {"partial", PARTIAL, UNRESERVED_KEYWORD}, {"password", PASSWORD, UNRESERVED_KEYWORD}, {"placing", PLACING, RESERVED_KEYWORD}, @@ -310,6 +314,7 @@ static const ScanKeyword ScanKeywords[] = { {"savepoint", SAVEPOINT, UNRESERVED_KEYWORD}, {"schema", SCHEMA, UNRESERVED_KEYWORD}, {"scroll", SCROLL, UNRESERVED_KEYWORD}, + {"search", SEARCH, UNRESERVED_KEYWORD}, {"second", SECOND_P, UNRESERVED_KEYWORD}, {"security", SECURITY, UNRESERVED_KEYWORD}, {"select", SELECT, RESERVED_KEYWORD}, @@ -345,6 +350,7 @@ static const ScanKeyword ScanKeywords[] = { {"temp", TEMP, UNRESERVED_KEYWORD}, {"template", TEMPLATE, UNRESERVED_KEYWORD}, {"temporary", TEMPORARY, UNRESERVED_KEYWORD}, + {"text", TEXT, UNRESERVED_KEYWORD}, {"then", THEN, RESERVED_KEYWORD}, {"time", TIME, COL_NAME_KEYWORD}, {"timestamp", TIMESTAMP, COL_NAME_KEYWORD}, diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index abc05685811..6fb7bd9b59f 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.155 2007/06/06 23:00:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.156 2007/08/21 01:11:15 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1554,6 +1554,8 @@ TypeCategory(Oid inType) case (REGOPERATOROID): case (REGCLASSOID): case (REGTYPEOID): + case (REGCONFIGOID): + case (REGDICTIONARYOID): case (INT2OID): case (INT4OID): case (INT8OID): @@ -1672,7 +1674,9 @@ IsPreferredType(CATEGORY category, Oid type) type == REGOPEROID || type == REGOPERATOROID || type == REGCLASSOID || - type == REGTYPEOID) + type == REGTYPEOID || + type == REGCONFIGOID || + type == REGDICTIONARYOID) preftype = OIDOID; else preftype = FLOAT8OID; diff --git a/src/backend/snowball/Makefile b/src/backend/snowball/Makefile new file mode 100644 index 00000000000..f1d54eb0998 --- /dev/null +++ b/src/backend/snowball/Makefile @@ -0,0 +1,144 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/backend/snowball +# +# $PostgreSQL: pgsql/src/backend/snowball/Makefile,v 1.1 2007/08/21 01:11:15 tgl Exp $ +# +#------------------------------------------------------------------------- + +subdir = src/backend/snowball +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -I$(top_srcdir)/src/include/snowball \ + -I$(top_srcdir)/src/include/snowball/libstemmer $(CPPFLAGS) + +OBJS= dict_snowball.o api.o utilities.o \ + stem_ISO_8859_1_danish.o \ + stem_ISO_8859_1_dutch.o \ + stem_ISO_8859_1_english.o \ + stem_ISO_8859_1_finnish.o \ + stem_ISO_8859_1_french.o \ + stem_ISO_8859_1_german.o \ + stem_ISO_8859_1_hungarian.o \ + stem_ISO_8859_1_italian.o \ + stem_ISO_8859_1_norwegian.o \ + stem_ISO_8859_1_porter.o \ + stem_ISO_8859_1_portuguese.o \ + stem_ISO_8859_1_spanish.o \ + stem_ISO_8859_1_swedish.o \ + stem_ISO_8859_2_romanian.o \ + stem_KOI8_R_russian.o \ + stem_UTF_8_danish.o \ + stem_UTF_8_dutch.o \ + stem_UTF_8_english.o \ + stem_UTF_8_finnish.o \ + stem_UTF_8_french.o \ + stem_UTF_8_german.o \ + stem_UTF_8_hungarian.o \ + stem_UTF_8_italian.o \ + stem_UTF_8_norwegian.o \ + stem_UTF_8_porter.o \ + stem_UTF_8_portuguese.o \ + stem_UTF_8_romanian.o \ + stem_UTF_8_russian.o \ + stem_UTF_8_spanish.o \ + stem_UTF_8_swedish.o \ + stem_UTF_8_turkish.o + +# second column is name of latin dictionary, if different +LANGUAGES= \ + danish danish \ + dutch dutch \ + english english \ + finnish finnish \ + french french \ + german german \ + hungarian hungarian \ + italian italian \ + norwegian norwegian \ + portuguese portuguese \ + romanian romanian \ + russian english \ + spanish spanish \ + swedish swedish \ + turkish turkish \ + + +SQLSCRIPT= snowball_create.sql +DICTDIR=tsearch_data + +ifdef VPATH +override VPATH := $(srcdir)/libstemmer:$(VPATH) +else +VPATH = $(srcdir)/libstemmer +endif + +SHLIB_LINK := $(BE_DLLLIBS) + +NAME := dict_snowball +SO_MAJOR_VERSION := 0 +SO_MINOR_VERSION := 0 +rpath = + +all: all-shared-lib $(SQLSCRIPT) + +include $(top_srcdir)/src/Makefile.shlib + +$(SQLSCRIPT): Makefile snowball_func.sql.in snowball.sql.in +ifeq ($(enable_shared), yes) + echo '-- Language-specific snowball dictionaries' > $@ + cat $(srcdir)/snowball_func.sql.in >> $@ + @set $(LANGUAGES) ; \ + while [ "$$#" -gt 0 ] ; \ + do \ + lang=$$1; shift; \ + if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \ + stop=", StopWords=$${lang}" ; \ + else \ + stop=""; \ + fi; \ + nonlatdictname=$$lang; \ + latdictname=$$1; shift; \ + cat $(srcdir)/snowball.sql.in | \ + sed -e "s#_DICTNAME_#$$lang#g" | \ + sed -e "s#_CFGNAME_#$$lang#g" | \ + sed -e "s#_LATDICTNAME_#$$latdictname#g" | \ + sed -e "s#_NONLATDICTNAME_#$$nonlatdictname#g" | \ + sed -e "s#_STOPWORDS_#$$stop#g" ; \ + done >> $@ +else + echo "-- No language-specific snowball dictionaries, for lack of shared library support" > $@ +endif + +install: all installdirs +ifeq ($(enable_shared), yes) + $(INSTALL_SHLIB) $(shlib) '$(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX)' +endif + $(INSTALL_DATA) $(SQLSCRIPT) '$(DESTDIR)$(datadir)' + @set $(LANGUAGES) ; \ + while [ "$$#" -gt 0 ] ; \ + do \ + lang=$$1; shift; shift; \ + if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \ + $(INSTALL_DATA) $(srcdir)/stopwords/$${lang}.stop '$(DESTDIR)$(datadir)/$(DICTDIR)' ; \ + fi \ + done + +installdirs: + $(mkinstalldirs) '$(DESTDIR)$(pkglibdir)' '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)' + +uninstall: + rm -f '$(DESTDIR)$(pkglibdir)/$(NAME)$(DLSUFFIX)' + rm -f '$(DESTDIR)$(datadir)/$(SQLSCRIPT)' + @set $(LANGUAGES) ; \ + while [ "$$#" -gt 0 ] ; \ + do \ + lang=$$1; shift; shift; \ + if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \ + rm -f '$(DESTDIR)$(datadir)/$(DICTDIR)/'$${lang}.stop ; \ + fi \ + done + +clean distclean maintainer-clean: clean-lib + rm -f $(OBJS) $(SQLSCRIPT) diff --git a/src/backend/snowball/README b/src/backend/snowball/README new file mode 100644 index 00000000000..099925ad8f0 --- /dev/null +++ b/src/backend/snowball/README @@ -0,0 +1,47 @@ +Snowball-based stemming +----------------------- + +This module uses the word stemming code developed by the Snowball project, +http://snowball.tartarus.org/ +which is released by them under a BSD-style license. + +The files under src/backend/snowball/libstemmer/ and +src/include/snowball/libstemmer/ are taken directly from their libstemmer_c +distribution, with only some minor adjustments of file inclusions. Note +that most of these files are in fact derived files, not master source. +The master sources are in the Snowball language, and are available along +with the Snowball-to-C compiler from the Snowball project. We choose to +include the derived files in the PostgreSQL distribution because most +installations will not have the Snowball compiler available. + +To update the PostgreSQL sources from a new Snowball libstemmer_c +distribution: + +1. Copy the *.c files in libstemmer_c/src_c/ to src/backend/snowball/libstemmer +with replacement of "../runtime/header.h" by "header.h", for example + +for f in libstemmer_c/src_c/*.c +do + sed 's|\.\./runtime/header\.h|header.h|' $f >libstemmer/`basename $f` +done + +(Alternatively, if you rebuild the stemmer files from the master Snowball +sources, just omit "-r ../runtime" from the Snowball compiler switches.) + +2. Copy the *.c files in libstemmer_c/runtime/ to +src/backend/snowball/libstemmer, and edit them to remove direct inclusions +of system headers such as <stdio.h> --- they should only include "header.h". +(This removal avoids portability problems on some platforms where <stdio.h> +is sensitive to largefile compilation options.) + +3. Copy the *.h files in libstemmer_c/src_c/ and libstemmer_c/runtime/ +to src/include/snowball/libstemmer. At this writing the header files +do not require any changes. + +4. Check whether any stemmer modules have been added or removed. If so, edit +the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the +stemmer_modules[] table in dict_snowball.c. + +5. The various stopword files in stopwords/ must be downloaded +individually from pages on the snowball.tartarus.org website. +Be careful that these files must be stored in UTF-8 encoding. diff --git a/src/backend/snowball/dict_snowball.c b/src/backend/snowball/dict_snowball.c new file mode 100644 index 00000000000..f0bc2feedec --- /dev/null +++ b/src/backend/snowball/dict_snowball.c @@ -0,0 +1,326 @@ +/*------------------------------------------------------------------------- + * + * dict_snowball.c + * Snowball dictionary + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.1 2007/08/21 01:11:16 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + +/* Some platforms define MAXINT and/or MININT, causing conflicts */ +#ifdef MAXINT +#undef MAXINT +#endif +#ifdef MININT +#undef MININT +#endif + +/* Now we can include the original Snowball header.h */ +#include "snowball/libstemmer/header.h" +#include "snowball/libstemmer/stem_ISO_8859_1_danish.h" +#include "snowball/libstemmer/stem_ISO_8859_1_dutch.h" +#include "snowball/libstemmer/stem_ISO_8859_1_english.h" +#include "snowball/libstemmer/stem_ISO_8859_1_finnish.h" +#include "snowball/libstemmer/stem_ISO_8859_1_french.h" +#include "snowball/libstemmer/stem_ISO_8859_1_german.h" +#include "snowball/libstemmer/stem_ISO_8859_1_hungarian.h" +#include "snowball/libstemmer/stem_ISO_8859_1_italian.h" +#include "snowball/libstemmer/stem_ISO_8859_1_norwegian.h" +#include "snowball/libstemmer/stem_ISO_8859_1_porter.h" +#include "snowball/libstemmer/stem_ISO_8859_1_portuguese.h" +#include "snowball/libstemmer/stem_ISO_8859_1_spanish.h" +#include "snowball/libstemmer/stem_ISO_8859_1_swedish.h" +#include "snowball/libstemmer/stem_ISO_8859_2_romanian.h" +#include "snowball/libstemmer/stem_KOI8_R_russian.h" +#include "snowball/libstemmer/stem_UTF_8_danish.h" +#include "snowball/libstemmer/stem_UTF_8_dutch.h" +#include "snowball/libstemmer/stem_UTF_8_english.h" +#include "snowball/libstemmer/stem_UTF_8_finnish.h" +#include "snowball/libstemmer/stem_UTF_8_french.h" +#include "snowball/libstemmer/stem_UTF_8_german.h" +#include "snowball/libstemmer/stem_UTF_8_hungarian.h" +#include "snowball/libstemmer/stem_UTF_8_italian.h" +#include "snowball/libstemmer/stem_UTF_8_norwegian.h" +#include "snowball/libstemmer/stem_UTF_8_porter.h" +#include "snowball/libstemmer/stem_UTF_8_portuguese.h" +#include "snowball/libstemmer/stem_UTF_8_romanian.h" +#include "snowball/libstemmer/stem_UTF_8_russian.h" +#include "snowball/libstemmer/stem_UTF_8_spanish.h" +#include "snowball/libstemmer/stem_UTF_8_swedish.h" +#include "snowball/libstemmer/stem_UTF_8_turkish.h" + + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(dsnowball_init); +Datum dsnowball_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(dsnowball_lexize); +Datum dsnowball_lexize(PG_FUNCTION_ARGS); + +/* List of supported modules */ +typedef struct stemmer_module +{ + const char *name; + pg_enc enc; + struct SN_env *(*create) (void); + void (*close) (struct SN_env *); + int (*stem) (struct SN_env *); +} stemmer_module; + +static const stemmer_module stemmer_modules[] = +{ + /* + * Stemmers list from Snowball distribution + */ + {"danish", PG_LATIN1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, + {"dutch", PG_LATIN1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, + {"english", PG_LATIN1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, + {"finnish", PG_LATIN1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, + {"french", PG_LATIN1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, + {"german", PG_LATIN1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, + {"hungarian", PG_LATIN1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, + {"italian", PG_LATIN1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, + {"norwegian", PG_LATIN1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, + {"porter", PG_LATIN1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, + {"portuguese", PG_LATIN1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, + {"spanish", PG_LATIN1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, + {"swedish", PG_LATIN1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, + {"romanian", PG_LATIN2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, + {"russian", PG_KOI8R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, + {"danish", PG_UTF8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"dutch", PG_UTF8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"english", PG_UTF8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"finnish", PG_UTF8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"french", PG_UTF8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"german", PG_UTF8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"hungarian", PG_UTF8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"italian", PG_UTF8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"norwegian", PG_UTF8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"porter", PG_UTF8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, + {"portuguese", PG_UTF8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"romanian", PG_UTF8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"russian", PG_UTF8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"spanish", PG_UTF8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"swedish", PG_UTF8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"turkish", PG_UTF8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + + /* + * Stemmer with PG_SQL_ASCII encoding should be valid for any server + * encoding + */ + {"english", PG_SQL_ASCII, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, + + {NULL, 0, NULL, NULL, NULL} /* list end marker */ +}; + + +typedef struct DictSnowball +{ + struct SN_env *z; + StopList stoplist; + bool needrecode; /* needs recoding before/after call stem */ + int (*stem) (struct SN_env * z); + + /* + * snowball saves alloced memory between calls, so we should run it in our + * private memory context. Note, init function is executed in long lived + * context, so we just remember CurrentMemoryContext + */ + MemoryContext dictCtx; +} DictSnowball; + + +static void +locate_stem_module(DictSnowball * d, char *lang) +{ + const stemmer_module *m; + + /* + * First, try to find exact match of stemmer module. Stemmer with + * PG_SQL_ASCII encoding is treated as working with any server encoding + */ + for (m = stemmer_modules; m->name; m++) + { + if ((m->enc == PG_SQL_ASCII || m->enc == GetDatabaseEncoding()) && + pg_strcasecmp(m->name, lang) == 0) + { + d->stem = m->stem; + d->z = m->create(); + d->needrecode = false; + return; + } + } + + /* + * Second, try to find stemmer for needed language for UTF8 encoding. + */ + for (m = stemmer_modules; m->name; m++) + { + if (m->enc == PG_UTF8 && pg_strcasecmp(m->name, lang) == 0) + { + d->stem = m->stem; + d->z = m->create(); + d->needrecode = true; + return; + } + } + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("no Snowball stemmer available for language \"%s\" and encoding \"%s\"", + lang, GetDatabaseEncodingName()))); +} + +Datum +dsnowball_init(PG_FUNCTION_ARGS) +{ + text *in; + DictSnowball *d; + Map *cfg, + *pcfg; + bool stoploaded = false; + + /* init functions must defend against NULLs for themselves */ + if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NULL config not allowed for Snowball"))); + in = PG_GETARG_TEXT_P(0); + + d = (DictSnowball *) palloc0(sizeof(DictSnowball)); + d->stoplist.wordop = recode_and_lowerstr; + + parse_keyvalpairs(in, &cfg); + pcfg = cfg; + PG_FREE_IF_COPY(in, 0); + + while (pcfg && pcfg->key) + { + if (pg_strcasecmp("StopWords", pcfg->key) == 0) + { + if (stoploaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple StopWords parameters"))); + readstoplist(pcfg->value, &d->stoplist); + sortstoplist(&d->stoplist); + stoploaded = true; + } + else if (pg_strcasecmp("Language", pcfg->key) == 0) + { + if (d->stem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple Language parameters"))); + locate_stem_module(d, pcfg->value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized Snowball parameter: \"%s\"", + pcfg->key))); + } + + pfree(pcfg->key); + pfree(pcfg->value); + pcfg++; + } + pfree(cfg); + + if (!d->stem) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing Language parameter"))); + + d->dictCtx = CurrentMemoryContext; + + PG_RETURN_POINTER(d); +} + +Datum +dsnowball_lexize(PG_FUNCTION_ARGS) +{ + DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt = lowerstr_with_len(in, len); + TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); + + if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) + { + pfree(txt); + } + else + { + MemoryContext saveCtx; + + /* + * recode to utf8 if stemmer is utf8 and doesn't match server encoding + */ + if (d->needrecode) + { + char *recoded; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt, + strlen(txt), + GetDatabaseEncoding(), + PG_UTF8); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + if (recoded != txt) + { + pfree(txt); + txt = recoded; + } + } + + /* see comment about d->dictCtx */ + saveCtx = MemoryContextSwitchTo(d->dictCtx); + SN_set_current(d->z, strlen(txt), (symbol *) txt); + d->stem(d->z); + MemoryContextSwitchTo(saveCtx); + + if (d->z->p && d->z->l) + { + txt = repalloc(txt, d->z->l + 1); + memcpy(txt, d->z->p, d->z->l); + txt[d->z->l] = '\0'; + } + + /* back recode if needed */ + if (d->needrecode) + { + char *recoded; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) txt, + strlen(txt), + PG_UTF8, + GetDatabaseEncoding()); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + if (recoded != txt) + { + pfree(txt); + txt = recoded; + } + } + + res->lexeme = txt; + } + + PG_RETURN_POINTER(res); +} diff --git a/src/backend/snowball/libstemmer/api.c b/src/backend/snowball/libstemmer/api.c new file mode 100644 index 00000000000..530b427a466 --- /dev/null +++ b/src/backend/snowball/libstemmer/api.c @@ -0,0 +1,64 @@ +#include "header.h" + +extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) +{ + struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); + if (z == NULL) return NULL; + z->p = create_s(); + if (z->p == NULL) goto error; + if (S_size) + { + int i; + z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); + if (z->S == NULL) goto error; + + for (i = 0; i < S_size; i++) + { + z->S[i] = create_s(); + if (z->S[i] == NULL) goto error; + } + } + + if (I_size) + { + z->I = (int *) calloc(I_size, sizeof(int)); + if (z->I == NULL) goto error; + } + + if (B_size) + { + z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); + if (z->B == NULL) goto error; + } + + return z; +error: + SN_close_env(z, S_size); + return NULL; +} + +extern void SN_close_env(struct SN_env * z, int S_size) +{ + if (z == NULL) return; + if (S_size) + { + int i; + for (i = 0; i < S_size; i++) + { + lose_s(z->S[i]); + } + free(z->S); + } + free(z->I); + free(z->B); + if (z->p) lose_s(z->p); + free(z); +} + +extern int SN_set_current(struct SN_env * z, int size, const symbol * s) +{ + int err = replace_s(z, 0, z->l, size, s, NULL); + z->c = 0; + return err; +} + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_danish.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_danish.c new file mode 100644 index 00000000000..36a9f99276f --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_danish.c @@ -0,0 +1,337 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int danish_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_undouble(struct SN_env * z); +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * danish_ISO_8859_1_create_env(void); +extern void danish_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[3] = { 'h', 'e', 'd' }; +static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; +static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; +static const symbol s_0_3[1] = { 'e' }; +static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; +static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; +static const symbol s_0_7[3] = { 'e', 'n', 'e' }; +static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; +static const symbol s_0_9[3] = { 'e', 'r', 'e' }; +static const symbol s_0_10[2] = { 'e', 'n' }; +static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; +static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; +static const symbol s_0_13[2] = { 'e', 'r' }; +static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; +static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; +static const symbol s_0_16[1] = { 's' }; +static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; +static const symbol s_0_18[2] = { 'e', 's' }; +static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; +static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; +static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; +static const symbol s_0_24[3] = { 'e', 'n', 's' }; +static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; +static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; +static const symbol s_0_27[3] = { 'e', 'r', 's' }; +static const symbol s_0_28[3] = { 'e', 't', 's' }; +static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; +static const symbol s_0_30[2] = { 'e', 't' }; +static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; + +static const struct among a_0[32] = +{ +/* 0 */ { 3, s_0_0, -1, 1, 0}, +/* 1 */ { 5, s_0_1, 0, 1, 0}, +/* 2 */ { 4, s_0_2, -1, 1, 0}, +/* 3 */ { 1, s_0_3, -1, 1, 0}, +/* 4 */ { 5, s_0_4, 3, 1, 0}, +/* 5 */ { 4, s_0_5, 3, 1, 0}, +/* 6 */ { 6, s_0_6, 5, 1, 0}, +/* 7 */ { 3, s_0_7, 3, 1, 0}, +/* 8 */ { 4, s_0_8, 3, 1, 0}, +/* 9 */ { 3, s_0_9, 3, 1, 0}, +/* 10 */ { 2, s_0_10, -1, 1, 0}, +/* 11 */ { 5, s_0_11, 10, 1, 0}, +/* 12 */ { 4, s_0_12, 10, 1, 0}, +/* 13 */ { 2, s_0_13, -1, 1, 0}, +/* 14 */ { 5, s_0_14, 13, 1, 0}, +/* 15 */ { 4, s_0_15, 13, 1, 0}, +/* 16 */ { 1, s_0_16, -1, 2, 0}, +/* 17 */ { 4, s_0_17, 16, 1, 0}, +/* 18 */ { 2, s_0_18, 16, 1, 0}, +/* 19 */ { 5, s_0_19, 18, 1, 0}, +/* 20 */ { 7, s_0_20, 19, 1, 0}, +/* 21 */ { 4, s_0_21, 18, 1, 0}, +/* 22 */ { 5, s_0_22, 18, 1, 0}, +/* 23 */ { 4, s_0_23, 18, 1, 0}, +/* 24 */ { 3, s_0_24, 16, 1, 0}, +/* 25 */ { 6, s_0_25, 24, 1, 0}, +/* 26 */ { 5, s_0_26, 24, 1, 0}, +/* 27 */ { 3, s_0_27, 16, 1, 0}, +/* 28 */ { 3, s_0_28, 16, 1, 0}, +/* 29 */ { 5, s_0_29, 28, 1, 0}, +/* 30 */ { 2, s_0_30, -1, 1, 0}, +/* 31 */ { 4, s_0_31, 30, 1, 0} +}; + +static const symbol s_1_0[2] = { 'g', 'd' }; +static const symbol s_1_1[2] = { 'd', 't' }; +static const symbol s_1_2[2] = { 'g', 't' }; +static const symbol s_1_3[2] = { 'k', 't' }; + +static const struct among a_1[4] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'g' }; +static const symbol s_2_1[3] = { 'l', 'i', 'g' }; +static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; +static const symbol s_2_3[3] = { 'e', 'l', 's' }; +static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; + +static const struct among a_2[5] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 4, s_2_2, 1, 1, 0}, +/* 3 */ { 3, s_2_3, -1, 1, 0}, +/* 4 */ { 4, s_2_4, -1, 2, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + +static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; + +static const symbol s_0[] = { 's', 't' }; +static const symbol s_1[] = { 'i', 'g' }; +static const symbol s_2[] = { 'l', 0xF8, 's' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 33 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) return 0; + z->c = ret; /* hop, line 33 */ + } + z->I[1] = z->c; /* setmark x, line 33 */ + z->c = c_test; + } + if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ + { /* gopast */ /* non v, line 34 */ + int ret = in_grouping(z, g_v, 97, 248, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 34 */ + /* try, line 35 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 41 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 41 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 41 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 41 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 48 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; + { int ret = slice_del(z); /* delete, line 50 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 55 */ + { int mlimit; /* setlimit, line 56 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 56 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 56 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ + z->bra = z->c; /* ], line 56 */ + z->lb = mlimit; + } + z->c = z->l - m_test; + } + if (z->c <= z->lb) return 0; + z->c--; /* next, line 62 */ + z->bra = z->c; /* ], line 62 */ + { int ret = slice_del(z); /* delete, line 62 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ + z->ket = z->c; /* [, line 66 */ + if (!(eq_s_b(z, 2, s_0))) goto lab0; + z->bra = z->c; /* ], line 66 */ + if (!(eq_s_b(z, 2, s_1))) goto lab0; + { int ret = slice_del(z); /* delete, line 66 */ + if (ret < 0) return ret; + } + lab0: + z->c = z->l - m1; + } + { int mlimit; /* setlimit, line 67 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 67 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + z->ket = z->c; /* [, line 67 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 67 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 70 */ + if (ret < 0) return ret; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m3; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_undouble(struct SN_env * z) { + { int mlimit; /* setlimit, line 76 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 76 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 76 */ + if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 76 */ + z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ + if (z->S[0] == 0) return -1; /* -> ch, line 76 */ + z->lb = mlimit; + } + if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ + { int ret = slice_del(z); /* delete, line 78 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int danish_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 84 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 84 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 85 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 86 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 88 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ + { int ret = r_undouble(z); + if (ret == 0) goto lab4; /* call undouble, line 89 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); } + +extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_dutch.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_dutch.c new file mode 100644 index 00000000000..e5ba288b1f5 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_dutch.c @@ -0,0 +1,624 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int dutch_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_standard_suffix(struct SN_env * z); +static int r_undouble(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_en_ending(struct SN_env * z); +static int r_e_ending(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * dutch_ISO_8859_1_create_env(void); +extern void dutch_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 0xE1 }; +static const symbol s_0_2[1] = { 0xE4 }; +static const symbol s_0_3[1] = { 0xE9 }; +static const symbol s_0_4[1] = { 0xEB }; +static const symbol s_0_5[1] = { 0xED }; +static const symbol s_0_6[1] = { 0xEF }; +static const symbol s_0_7[1] = { 0xF3 }; +static const symbol s_0_8[1] = { 0xF6 }; +static const symbol s_0_9[1] = { 0xFA }; +static const symbol s_0_10[1] = { 0xFC }; + +static const struct among a_0[11] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 1, s_0_1, 0, 1, 0}, +/* 2 */ { 1, s_0_2, 0, 1, 0}, +/* 3 */ { 1, s_0_3, 0, 2, 0}, +/* 4 */ { 1, s_0_4, 0, 2, 0}, +/* 5 */ { 1, s_0_5, 0, 3, 0}, +/* 6 */ { 1, s_0_6, 0, 3, 0}, +/* 7 */ { 1, s_0_7, 0, 4, 0}, +/* 8 */ { 1, s_0_8, 0, 4, 0}, +/* 9 */ { 1, s_0_9, 0, 5, 0}, +/* 10 */ { 1, s_0_10, 0, 5, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'Y' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_1_1, 0, 2, 0}, +/* 2 */ { 1, s_1_2, 0, 1, 0} +}; + +static const symbol s_2_0[2] = { 'd', 'd' }; +static const symbol s_2_1[2] = { 'k', 'k' }; +static const symbol s_2_2[2] = { 't', 't' }; + +static const struct among a_2[3] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0} +}; + +static const symbol s_3_0[3] = { 'e', 'n', 'e' }; +static const symbol s_3_1[2] = { 's', 'e' }; +static const symbol s_3_2[2] = { 'e', 'n' }; +static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; +static const symbol s_3_4[1] = { 's' }; + +static const struct among a_3[5] = +{ +/* 0 */ { 3, s_3_0, -1, 2, 0}, +/* 1 */ { 2, s_3_1, -1, 3, 0}, +/* 2 */ { 2, s_3_2, -1, 2, 0}, +/* 3 */ { 5, s_3_3, 2, 1, 0}, +/* 4 */ { 1, s_3_4, -1, 3, 0} +}; + +static const symbol s_4_0[3] = { 'e', 'n', 'd' }; +static const symbol s_4_1[2] = { 'i', 'g' }; +static const symbol s_4_2[3] = { 'i', 'n', 'g' }; +static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; +static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; +static const symbol s_4_5[3] = { 'b', 'a', 'r' }; + +static const struct among a_4[6] = +{ +/* 0 */ { 3, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 2, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 3, 0}, +/* 4 */ { 4, s_4_4, -1, 4, 0}, +/* 5 */ { 3, s_4_5, -1, 5, 0} +}; + +static const symbol s_5_0[2] = { 'a', 'a' }; +static const symbol s_5_1[2] = { 'e', 'e' }; +static const symbol s_5_2[2] = { 'o', 'o' }; +static const symbol s_5_3[2] = { 'u', 'u' }; + +static const struct among a_5[4] = +{ +/* 0 */ { 2, s_5_0, -1, -1, 0}, +/* 1 */ { 2, s_5_1, -1, -1, 0}, +/* 2 */ { 2, s_5_2, -1, -1, 0}, +/* 3 */ { 2, s_5_3, -1, -1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'o' }; +static const symbol s_4[] = { 'u' }; +static const symbol s_5[] = { 'y' }; +static const symbol s_6[] = { 'Y' }; +static const symbol s_7[] = { 'i' }; +static const symbol s_8[] = { 'I' }; +static const symbol s_9[] = { 'y' }; +static const symbol s_10[] = { 'Y' }; +static const symbol s_11[] = { 'y' }; +static const symbol s_12[] = { 'i' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'g', 'e', 'm' }; +static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; +static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; +static const symbol s_17[] = { 'c' }; +static const symbol s_18[] = { 'e', 'n' }; +static const symbol s_19[] = { 'i', 'g' }; +static const symbol s_20[] = { 'e' }; +static const symbol s_21[] = { 'e' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + { int c_test = z->c; /* test, line 42 */ + while(1) { /* repeat, line 42 */ + int c1 = z->c; + z->bra = z->c; /* [, line 43 */ + if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((340306450 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else + among_var = find_among(z, a_0, 11); /* substring, line 43 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 43 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ + if (ret < 0) return ret; + } + break; + case 6: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 54 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + { int c_keep = z->c; /* try, line 57 */ + z->bra = z->c; /* [, line 57 */ + if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } + z->ket = z->c; /* ], line 57 */ + { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ + if (ret < 0) return ret; + } + lab1: + ; + } + while(1) { /* repeat, line 58 */ + int c2 = z->c; + while(1) { /* goto, line 58 */ + int c3 = z->c; + if (in_grouping(z, g_v, 97, 232, 0)) goto lab3; + z->bra = z->c; /* [, line 59 */ + { int c4 = z->c; /* or, line 59 */ + if (!(eq_s(z, 1, s_7))) goto lab5; + z->ket = z->c; /* ], line 59 */ + if (in_grouping(z, g_v, 97, 232, 0)) goto lab5; + { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = c4; + if (!(eq_s(z, 1, s_9))) goto lab3; + z->ket = z->c; /* ], line 60 */ + { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ + if (ret < 0) return ret; + } + } + lab4: + z->c = c3; + break; + lab3: + z->c = c3; + if (z->c >= z->l) goto lab2; + z->c++; /* goto, line 58 */ + } + continue; + lab2: + z->c = c2; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { /* gopast */ /* grouping v, line 69 */ + int ret = out_grouping(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 69 */ + int ret = in_grouping(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 69 */ + /* try, line 70 */ + if (!(z->I[0] < 3)) goto lab0; + z->I[0] = 3; +lab0: + { /* gopast */ /* grouping v, line 71 */ + int ret = out_grouping(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 71 */ + int ret = in_grouping(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 71 */ + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 75 */ + int c1 = z->c; + z->bra = z->c; /* [, line 77 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 77 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 77 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 80 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_undouble(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 91 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 91 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 91 */ + z->bra = z->c; /* ], line 91 */ + { int ret = slice_del(z); /* delete, line 91 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_e_ending(struct SN_env * z) { + z->B[0] = 0; /* unset e_found, line 95 */ + z->ket = z->c; /* [, line 96 */ + if (!(eq_s_b(z, 1, s_13))) return 0; + z->bra = z->c; /* ], line 96 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 96 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 96 */ + if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set e_found, line 97 */ + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 98 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_en_ending(struct SN_env * z) { + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 102 */ + if (ret < 0) return ret; + } + { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ + if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ + if (!(eq_s_b(z, 3, s_14))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + } + { int ret = slice_del(z); /* delete, line 102 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 103 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ + z->ket = z->c; /* [, line 108 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; + among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ + if (!(among_var)) goto lab0; + z->bra = z->c; /* ], line 108 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_en_ending(z); + if (ret == 0) goto lab0; /* call en_ending, line 113 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 116 */ + if (ret < 0) return ret; + } + if (out_grouping_b(z, g_v_j, 97, 232, 0)) goto lab0; + { int ret = slice_del(z); /* delete, line 116 */ + if (ret < 0) return ret; + } + break; + } + lab0: + z->c = z->l - m1; + } + { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ + { int ret = r_e_ending(z); + if (ret == 0) goto lab1; /* call e_ending, line 120 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ + z->ket = z->c; /* [, line 122 */ + if (!(eq_s_b(z, 4, s_16))) goto lab2; + z->bra = z->c; /* ], line 122 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 122 */ + if (ret < 0) return ret; + } + { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ + if (!(eq_s_b(z, 1, s_17))) goto lab3; + goto lab2; + lab3: + z->c = z->l - m4; + } + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 123 */ + if (!(eq_s_b(z, 2, s_18))) goto lab2; + z->bra = z->c; /* ], line 123 */ + { int ret = r_en_ending(z); + if (ret == 0) goto lab2; /* call en_ending, line 123 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ + z->ket = z->c; /* [, line 127 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; + among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ + if (!(among_var)) goto lab4; + z->bra = z->c; /* ], line 127 */ + switch(among_var) { + case 0: goto lab4; + case 1: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 129 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 129 */ + if (ret < 0) return ret; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ + z->ket = z->c; /* [, line 130 */ + if (!(eq_s_b(z, 2, s_19))) goto lab6; + z->bra = z->c; /* ], line 130 */ + { int ret = r_R2(z); + if (ret == 0) goto lab6; /* call R2, line 130 */ + if (ret < 0) return ret; + } + { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ + if (!(eq_s_b(z, 1, s_20))) goto lab7; + goto lab6; + lab7: + z->c = z->l - m7; + } + { int ret = slice_del(z); /* delete, line 130 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_undouble(z); + if (ret == 0) goto lab4; /* call undouble, line 130 */ + if (ret < 0) return ret; + } + } + lab5: + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 133 */ + if (ret < 0) return ret; + } + { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ + if (!(eq_s_b(z, 1, s_21))) goto lab8; + goto lab4; + lab8: + z->c = z->l - m8; + } + { int ret = slice_del(z); /* delete, line 133 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 136 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 136 */ + if (ret < 0) return ret; + } + { int ret = r_e_ending(z); + if (ret == 0) goto lab4; /* call e_ending, line 136 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 139 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 139 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 142 */ + if (ret < 0) return ret; + } + if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + break; + } + lab4: + z->c = z->l - m5; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ + if (out_grouping_b(z, g_v_I, 73, 232, 0)) goto lab9; + { int m_test = z->l - z->c; /* test, line 148 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; + if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ + if (out_grouping_b(z, g_v, 97, 232, 0)) goto lab9; + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 152 */ + if (z->c <= z->lb) goto lab9; + z->c--; /* next, line 152 */ + z->bra = z->c; /* ], line 152 */ + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m9; + } + return 1; +} + +extern int dutch_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 159 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 159 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 160 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 160 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 161 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + z->c = z->lb; + { int c4 = z->c; /* do, line 163 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab3; /* call postlude, line 163 */ + if (ret < 0) return ret; + } + lab3: + z->c = c4; + } + return 1; +} + +extern struct SN_env * dutch_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void dutch_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_english.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_english.c new file mode 100644 index 00000000000..141c45dc275 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_english.c @@ -0,0 +1,1117 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int english_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_exception2(struct SN_env * z); +static int r_exception1(struct SN_env * z); +static int r_Step_5(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * english_ISO_8859_1_create_env(void); +extern void english_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; +static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; +static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 5, s_0_0, -1, -1, 0}, +/* 1 */ { 6, s_0_1, -1, -1, 0}, +/* 2 */ { 5, s_0_2, -1, -1, 0} +}; + +static const symbol s_1_0[1] = { '\'' }; +static const symbol s_1_1[3] = { '\'', 's', '\'' }; +static const symbol s_1_2[2] = { '\'', 's' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 3, s_1_1, 0, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0} +}; + +static const symbol s_2_0[3] = { 'i', 'e', 'd' }; +static const symbol s_2_1[1] = { 's' }; +static const symbol s_2_2[3] = { 'i', 'e', 's' }; +static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; +static const symbol s_2_4[2] = { 's', 's' }; +static const symbol s_2_5[2] = { 'u', 's' }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, 2, 0}, +/* 1 */ { 1, s_2_1, -1, 3, 0}, +/* 2 */ { 3, s_2_2, 1, 2, 0}, +/* 3 */ { 4, s_2_3, 1, 1, 0}, +/* 4 */ { 2, s_2_4, 1, -1, 0}, +/* 5 */ { 2, s_2_5, 1, -1, 0} +}; + +static const symbol s_3_1[2] = { 'b', 'b' }; +static const symbol s_3_2[2] = { 'd', 'd' }; +static const symbol s_3_3[2] = { 'f', 'f' }; +static const symbol s_3_4[2] = { 'g', 'g' }; +static const symbol s_3_5[2] = { 'b', 'l' }; +static const symbol s_3_6[2] = { 'm', 'm' }; +static const symbol s_3_7[2] = { 'n', 'n' }; +static const symbol s_3_8[2] = { 'p', 'p' }; +static const symbol s_3_9[2] = { 'r', 'r' }; +static const symbol s_3_10[2] = { 'a', 't' }; +static const symbol s_3_11[2] = { 't', 't' }; +static const symbol s_3_12[2] = { 'i', 'z' }; + +static const struct among a_3[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_3_1, 0, 2, 0}, +/* 2 */ { 2, s_3_2, 0, 2, 0}, +/* 3 */ { 2, s_3_3, 0, 2, 0}, +/* 4 */ { 2, s_3_4, 0, 2, 0}, +/* 5 */ { 2, s_3_5, 0, 1, 0}, +/* 6 */ { 2, s_3_6, 0, 2, 0}, +/* 7 */ { 2, s_3_7, 0, 2, 0}, +/* 8 */ { 2, s_3_8, 0, 2, 0}, +/* 9 */ { 2, s_3_9, 0, 2, 0}, +/* 10 */ { 2, s_3_10, 0, 1, 0}, +/* 11 */ { 2, s_3_11, 0, 2, 0}, +/* 12 */ { 2, s_3_12, 0, 1, 0} +}; + +static const symbol s_4_0[2] = { 'e', 'd' }; +static const symbol s_4_1[3] = { 'e', 'e', 'd' }; +static const symbol s_4_2[3] = { 'i', 'n', 'g' }; +static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; +static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; +static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; + +static const struct among a_4[6] = +{ +/* 0 */ { 2, s_4_0, -1, 2, 0}, +/* 1 */ { 3, s_4_1, 0, 1, 0}, +/* 2 */ { 3, s_4_2, -1, 2, 0}, +/* 3 */ { 4, s_4_3, -1, 2, 0}, +/* 4 */ { 5, s_4_4, 3, 1, 0}, +/* 5 */ { 5, s_4_5, -1, 2, 0} +}; + +static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; +static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; +static const symbol s_5_2[3] = { 'o', 'g', 'i' }; +static const symbol s_5_3[2] = { 'l', 'i' }; +static const symbol s_5_4[3] = { 'b', 'l', 'i' }; +static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; +static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; +static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; +static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; +static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; +static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; +static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; +static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; +static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; +static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; +static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static const struct among a_5[24] = +{ +/* 0 */ { 4, s_5_0, -1, 3, 0}, +/* 1 */ { 4, s_5_1, -1, 2, 0}, +/* 2 */ { 3, s_5_2, -1, 13, 0}, +/* 3 */ { 2, s_5_3, -1, 16, 0}, +/* 4 */ { 3, s_5_4, 3, 12, 0}, +/* 5 */ { 4, s_5_5, 4, 4, 0}, +/* 6 */ { 4, s_5_6, 3, 8, 0}, +/* 7 */ { 5, s_5_7, 3, 14, 0}, +/* 8 */ { 6, s_5_8, 3, 15, 0}, +/* 9 */ { 5, s_5_9, 3, 10, 0}, +/* 10 */ { 5, s_5_10, 3, 5, 0}, +/* 11 */ { 5, s_5_11, -1, 8, 0}, +/* 12 */ { 6, s_5_12, -1, 12, 0}, +/* 13 */ { 5, s_5_13, -1, 11, 0}, +/* 14 */ { 6, s_5_14, -1, 1, 0}, +/* 15 */ { 7, s_5_15, 14, 7, 0}, +/* 16 */ { 5, s_5_16, -1, 8, 0}, +/* 17 */ { 5, s_5_17, -1, 7, 0}, +/* 18 */ { 7, s_5_18, 17, 6, 0}, +/* 19 */ { 4, s_5_19, -1, 6, 0}, +/* 20 */ { 4, s_5_20, -1, 7, 0}, +/* 21 */ { 7, s_5_21, -1, 11, 0}, +/* 22 */ { 7, s_5_22, -1, 9, 0}, +/* 23 */ { 7, s_5_23, -1, 10, 0} +}; + +static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_7[3] = { 'f', 'u', 'l' }; +static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; + +static const struct among a_6[9] = +{ +/* 0 */ { 5, s_6_0, -1, 4, 0}, +/* 1 */ { 5, s_6_1, -1, 6, 0}, +/* 2 */ { 5, s_6_2, -1, 3, 0}, +/* 3 */ { 5, s_6_3, -1, 4, 0}, +/* 4 */ { 4, s_6_4, -1, 4, 0}, +/* 5 */ { 6, s_6_5, -1, 1, 0}, +/* 6 */ { 7, s_6_6, 5, 2, 0}, +/* 7 */ { 3, s_6_7, -1, 5, 0}, +/* 8 */ { 4, s_6_8, -1, 5, 0} +}; + +static const symbol s_7_0[2] = { 'i', 'c' }; +static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_7_5[3] = { 'a', 't', 'e' }; +static const symbol s_7_6[3] = { 'i', 'v', 'e' }; +static const symbol s_7_7[3] = { 'i', 'z', 'e' }; +static const symbol s_7_8[3] = { 'i', 't', 'i' }; +static const symbol s_7_9[2] = { 'a', 'l' }; +static const symbol s_7_10[3] = { 'i', 's', 'm' }; +static const symbol s_7_11[3] = { 'i', 'o', 'n' }; +static const symbol s_7_12[2] = { 'e', 'r' }; +static const symbol s_7_13[3] = { 'o', 'u', 's' }; +static const symbol s_7_14[3] = { 'a', 'n', 't' }; +static const symbol s_7_15[3] = { 'e', 'n', 't' }; +static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; + +static const struct among a_7[18] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 4, s_7_1, -1, 1, 0}, +/* 2 */ { 4, s_7_2, -1, 1, 0}, +/* 3 */ { 4, s_7_3, -1, 1, 0}, +/* 4 */ { 4, s_7_4, -1, 1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 3, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 3, s_7_8, -1, 1, 0}, +/* 9 */ { 2, s_7_9, -1, 1, 0}, +/* 10 */ { 3, s_7_10, -1, 1, 0}, +/* 11 */ { 3, s_7_11, -1, 2, 0}, +/* 12 */ { 2, s_7_12, -1, 1, 0}, +/* 13 */ { 3, s_7_13, -1, 1, 0}, +/* 14 */ { 3, s_7_14, -1, 1, 0}, +/* 15 */ { 3, s_7_15, -1, 1, 0}, +/* 16 */ { 4, s_7_16, 15, 1, 0}, +/* 17 */ { 5, s_7_17, 16, 1, 0} +}; + +static const symbol s_8_0[1] = { 'e' }; +static const symbol s_8_1[1] = { 'l' }; + +static const struct among a_8[2] = +{ +/* 0 */ { 1, s_8_0, -1, 1, 0}, +/* 1 */ { 1, s_8_1, -1, 2, 0} +}; + +static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; +static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; +static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; +static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; + +static const struct among a_9[8] = +{ +/* 0 */ { 7, s_9_0, -1, -1, 0}, +/* 1 */ { 7, s_9_1, -1, -1, 0}, +/* 2 */ { 6, s_9_2, -1, -1, 0}, +/* 3 */ { 7, s_9_3, -1, -1, 0}, +/* 4 */ { 6, s_9_4, -1, -1, 0}, +/* 5 */ { 7, s_9_5, -1, -1, 0}, +/* 6 */ { 7, s_9_6, -1, -1, 0}, +/* 7 */ { 6, s_9_7, -1, -1, 0} +}; + +static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; +static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; +static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; +static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; +static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; +static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; +static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; +static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; +static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; +static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; +static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; +static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; +static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; +static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; +static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; +static const symbol s_10_15[3] = { 's', 'k', 'y' }; +static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; +static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; + +static const struct among a_10[18] = +{ +/* 0 */ { 5, s_10_0, -1, -1, 0}, +/* 1 */ { 5, s_10_1, -1, -1, 0}, +/* 2 */ { 4, s_10_2, -1, -1, 0}, +/* 3 */ { 6, s_10_3, -1, -1, 0}, +/* 4 */ { 5, s_10_4, -1, 3, 0}, +/* 5 */ { 5, s_10_5, -1, 9, 0}, +/* 6 */ { 6, s_10_6, -1, 7, 0}, +/* 7 */ { 4, s_10_7, -1, -1, 0}, +/* 8 */ { 4, s_10_8, -1, 6, 0}, +/* 9 */ { 5, s_10_9, -1, 4, 0}, +/* 10 */ { 4, s_10_10, -1, -1, 0}, +/* 11 */ { 4, s_10_11, -1, 10, 0}, +/* 12 */ { 6, s_10_12, -1, 11, 0}, +/* 13 */ { 5, s_10_13, -1, 2, 0}, +/* 14 */ { 4, s_10_14, -1, 1, 0}, +/* 15 */ { 3, s_10_15, -1, -1, 0}, +/* 16 */ { 5, s_10_16, -1, 5, 0}, +/* 17 */ { 4, s_10_17, -1, 8, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1 }; + +static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static const unsigned char g_valid_LI[] = { 55, 141, 2 }; + +static const symbol s_0[] = { '\'' }; +static const symbol s_1[] = { 'y' }; +static const symbol s_2[] = { 'Y' }; +static const symbol s_3[] = { 'y' }; +static const symbol s_4[] = { 'Y' }; +static const symbol s_5[] = { 's', 's' }; +static const symbol s_6[] = { 'i' }; +static const symbol s_7[] = { 'i', 'e' }; +static const symbol s_8[] = { 'e', 'e' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'e' }; +static const symbol s_11[] = { 'y' }; +static const symbol s_12[] = { 'Y' }; +static const symbol s_13[] = { 'i' }; +static const symbol s_14[] = { 't', 'i', 'o', 'n' }; +static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; +static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; +static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; +static const symbol s_18[] = { 'e', 'n', 't' }; +static const symbol s_19[] = { 'i', 'z', 'e' }; +static const symbol s_20[] = { 'a', 't', 'e' }; +static const symbol s_21[] = { 'a', 'l' }; +static const symbol s_22[] = { 'f', 'u', 'l' }; +static const symbol s_23[] = { 'o', 'u', 's' }; +static const symbol s_24[] = { 'i', 'v', 'e' }; +static const symbol s_25[] = { 'b', 'l', 'e' }; +static const symbol s_26[] = { 'l' }; +static const symbol s_27[] = { 'o', 'g' }; +static const symbol s_28[] = { 'f', 'u', 'l' }; +static const symbol s_29[] = { 'l', 'e', 's', 's' }; +static const symbol s_30[] = { 't', 'i', 'o', 'n' }; +static const symbol s_31[] = { 'a', 't', 'e' }; +static const symbol s_32[] = { 'a', 'l' }; +static const symbol s_33[] = { 'i', 'c' }; +static const symbol s_34[] = { 's' }; +static const symbol s_35[] = { 't' }; +static const symbol s_36[] = { 'l' }; +static const symbol s_37[] = { 's', 'k', 'i' }; +static const symbol s_38[] = { 's', 'k', 'y' }; +static const symbol s_39[] = { 'd', 'i', 'e' }; +static const symbol s_40[] = { 'l', 'i', 'e' }; +static const symbol s_41[] = { 't', 'i', 'e' }; +static const symbol s_42[] = { 'i', 'd', 'l' }; +static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; +static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; +static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; +static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; +static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; +static const symbol s_48[] = { 'Y' }; +static const symbol s_49[] = { 'y' }; + +static int r_prelude(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 26 */ + { int c1 = z->c; /* do, line 27 */ + z->bra = z->c; /* [, line 27 */ + if (!(eq_s(z, 1, s_0))) goto lab0; + z->ket = z->c; /* ], line 27 */ + { int ret = slice_del(z); /* delete, line 27 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 28 */ + z->bra = z->c; /* [, line 28 */ + if (!(eq_s(z, 1, s_1))) goto lab1; + z->ket = z->c; /* ], line 28 */ + { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 28 */ + lab1: + z->c = c2; + } + { int c3 = z->c; /* do, line 29 */ + while(1) { /* repeat, line 29 */ + int c4 = z->c; + while(1) { /* goto, line 29 */ + int c5 = z->c; + if (in_grouping(z, g_v, 97, 121, 0)) goto lab4; + z->bra = z->c; /* [, line 29 */ + if (!(eq_s(z, 1, s_3))) goto lab4; + z->ket = z->c; /* ], line 29 */ + z->c = c5; + break; + lab4: + z->c = c5; + if (z->c >= z->l) goto lab3; + z->c++; /* goto, line 29 */ + } + { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 29 */ + continue; + lab3: + z->c = c4; + break; + } + z->c = c3; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c1 = z->c; /* do, line 35 */ + { int c2 = z->c; /* or, line 41 */ + if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; + if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ + goto lab1; + lab2: + z->c = c2; + { /* gopast */ /* grouping v, line 41 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 41 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + } + lab1: + z->I[0] = z->c; /* setmark p1, line 42 */ + { /* gopast */ /* grouping v, line 43 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 43 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 43 */ + lab0: + z->c = c1; + } + return 1; +} + +static int r_shortv(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ + if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) goto lab1; + if (in_grouping_b(z, g_v, 97, 121, 0)) goto lab1; + if (out_grouping_b(z, g_v, 97, 121, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; + if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; + if (z->c > z->lb) return 0; /* atlimit, line 52 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ + z->ket = z->c; /* [, line 60 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 60 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + { int ret = slice_del(z); /* delete, line 62 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + z->ket = z->c; /* [, line 65 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; + among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 65 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ + { int ret = z->c - 2; + if (z->lb > ret || ret > z->l) goto lab2; + z->c = ret; /* hop, line 68 */ + } + { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m1; + { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ + if (ret < 0) return ret; + } + } + lab1: + break; + case 3: + if (z->c <= z->lb) return 0; + z->c--; /* next, line 69 */ + { /* gopast */ /* grouping v, line 69 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + { int ret = slice_del(z); /* delete, line 69 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 75 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 75 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 77 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m_test = z->l - z->c; /* test, line 80 */ + { /* gopast */ /* grouping v, line 80 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 80 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 81 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else + among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + case 2: + z->ket = z->c; /* [, line 86 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 86 */ + z->bra = z->c; /* ], line 86 */ + { int ret = slice_del(z); /* delete, line 86 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 87 */ + { int m_test = z->l - z->c; /* test, line 87 */ + { int ret = r_shortv(z); + if (ret == 0) return 0; /* call shortv, line 87 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 94 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ + if (!(eq_s_b(z, 1, s_11))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_12))) return 0; + } +lab0: + z->bra = z->c; /* ], line 94 */ + if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; + { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ + if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ + return 0; + lab2: + z->c = z->l - m2; + } + { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 100 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 100 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 100 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 13: + if (!(eq_s_b(z, 1, s_26))) return 0; + { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ + if (ret < 0) return ret; + } + break; + case 16: + if (in_grouping_b(z, g_valid_LI, 99, 116, 0)) return 0; + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 127 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 127 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 127 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 136 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 136 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 141 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 141 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 141 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 144 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ + if (!(eq_s_b(z, 1, s_34))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_35))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_5(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 150 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; + among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 150 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ + { int ret = r_R2(z); + if (ret == 0) goto lab1; /* call R2, line 151 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 151 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ + { int ret = r_shortv(z); + if (ret == 0) goto lab2; /* call shortv, line 151 */ + if (ret < 0) return ret; + } + return 0; + lab2: + z->c = z->l - m2; + } + } + lab0: + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 152 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_36))) return 0; + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_exception2(struct SN_env * z) { + z->ket = z->c; /* [, line 158 */ + if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; + if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ + z->bra = z->c; /* ], line 158 */ + if (z->c > z->lb) return 0; /* atlimit, line 158 */ + return 1; +} + +static int r_exception1(struct SN_env * z) { + int among_var; + z->bra = z->c; /* [, line 170 */ + if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; + among_var = find_among(z, a_10, 18); /* substring, line 170 */ + if (!(among_var)) return 0; + z->ket = z->c; /* ], line 170 */ + if (z->c < z->l) return 0; /* atlimit, line 170 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ + while(1) { /* repeat, line 203 */ + int c1 = z->c; + while(1) { /* goto, line 203 */ + int c2 = z->c; + z->bra = z->c; /* [, line 203 */ + if (!(eq_s(z, 1, s_48))) goto lab1; + z->ket = z->c; /* ], line 203 */ + z->c = c2; + break; + lab1: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* goto, line 203 */ + } + { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ + if (ret < 0) return ret; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +extern int english_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* or, line 207 */ + { int ret = r_exception1(z); + if (ret == 0) goto lab1; /* call exception1, line 207 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = c1; + { int c2 = z->c; /* not, line 208 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) goto lab3; + z->c = ret; /* hop, line 208 */ + } + goto lab2; + lab3: + z->c = c2; + } + goto lab0; + lab2: + z->c = c1; + { int c3 = z->c; /* do, line 209 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab4; /* call prelude, line 209 */ + if (ret < 0) return ret; + } + lab4: + z->c = c3; + } + { int c4 = z->c; /* do, line 210 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab5; /* call mark_regions, line 210 */ + if (ret < 0) return ret; + } + lab5: + z->c = c4; + } + z->lb = z->c; z->c = z->l; /* backwards, line 211 */ + + { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ + { int ret = r_Step_1a(z); + if (ret == 0) goto lab6; /* call Step_1a, line 213 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ + { int ret = r_exception2(z); + if (ret == 0) goto lab8; /* call exception2, line 215 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m6; + { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ + { int ret = r_Step_1b(z); + if (ret == 0) goto lab9; /* call Step_1b, line 217 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ + { int ret = r_Step_1c(z); + if (ret == 0) goto lab10; /* call Step_1c, line 218 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ + { int ret = r_Step_2(z); + if (ret == 0) goto lab11; /* call Step_2, line 220 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ + { int ret = r_Step_3(z); + if (ret == 0) goto lab12; /* call Step_3, line 221 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m10; + } + { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ + { int ret = r_Step_4(z); + if (ret == 0) goto lab13; /* call Step_4, line 222 */ + if (ret < 0) return ret; + } + lab13: + z->c = z->l - m11; + } + { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ + { int ret = r_Step_5(z); + if (ret == 0) goto lab14; /* call Step_5, line 224 */ + if (ret < 0) return ret; + } + lab14: + z->c = z->l - m12; + } + } + lab7: + z->c = z->lb; + { int c13 = z->c; /* do, line 227 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab15; /* call postlude, line 227 */ + if (ret < 0) return ret; + } + lab15: + z->c = c13; + } + } +lab0: + return 1; +} + +extern struct SN_env * english_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void english_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_finnish.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_finnish.c new file mode 100644 index 00000000000..9621771d282 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_finnish.c @@ -0,0 +1,762 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int finnish_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_tidy(struct SN_env * z); +static int r_other_endings(struct SN_env * z); +static int r_t_plural(struct SN_env * z); +static int r_i_plural(struct SN_env * z); +static int r_case_ending(struct SN_env * z); +static int r_VI(struct SN_env * z); +static int r_LONG(struct SN_env * z); +static int r_possessive(struct SN_env * z); +static int r_particle_etc(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * finnish_ISO_8859_1_create_env(void); +extern void finnish_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[2] = { 'p', 'a' }; +static const symbol s_0_1[3] = { 's', 't', 'i' }; +static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; +static const symbol s_0_3[3] = { 'h', 'a', 'n' }; +static const symbol s_0_4[3] = { 'k', 'i', 'n' }; +static const symbol s_0_5[3] = { 'h', 0xE4, 'n' }; +static const symbol s_0_6[4] = { 'k', 0xE4, 0xE4, 'n' }; +static const symbol s_0_7[2] = { 'k', 'o' }; +static const symbol s_0_8[2] = { 'p', 0xE4 }; +static const symbol s_0_9[2] = { 'k', 0xF6 }; + +static const struct among a_0[10] = +{ +/* 0 */ { 2, s_0_0, -1, 1, 0}, +/* 1 */ { 3, s_0_1, -1, 2, 0}, +/* 2 */ { 4, s_0_2, -1, 1, 0}, +/* 3 */ { 3, s_0_3, -1, 1, 0}, +/* 4 */ { 3, s_0_4, -1, 1, 0}, +/* 5 */ { 3, s_0_5, -1, 1, 0}, +/* 6 */ { 4, s_0_6, -1, 1, 0}, +/* 7 */ { 2, s_0_7, -1, 1, 0}, +/* 8 */ { 2, s_0_8, -1, 1, 0}, +/* 9 */ { 2, s_0_9, -1, 1, 0} +}; + +static const symbol s_1_0[3] = { 'l', 'l', 'a' }; +static const symbol s_1_1[2] = { 'n', 'a' }; +static const symbol s_1_2[3] = { 's', 's', 'a' }; +static const symbol s_1_3[2] = { 't', 'a' }; +static const symbol s_1_4[3] = { 'l', 't', 'a' }; +static const symbol s_1_5[3] = { 's', 't', 'a' }; + +static const struct among a_1[6] = +{ +/* 0 */ { 3, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 3, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 3, s_1_4, 3, -1, 0}, +/* 5 */ { 3, s_1_5, 3, -1, 0} +}; + +static const symbol s_2_0[3] = { 'l', 'l', 0xE4 }; +static const symbol s_2_1[2] = { 'n', 0xE4 }; +static const symbol s_2_2[3] = { 's', 's', 0xE4 }; +static const symbol s_2_3[2] = { 't', 0xE4 }; +static const symbol s_2_4[3] = { 'l', 't', 0xE4 }; +static const symbol s_2_5[3] = { 's', 't', 0xE4 }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 3, s_2_2, -1, -1, 0}, +/* 3 */ { 2, s_2_3, -1, -1, 0}, +/* 4 */ { 3, s_2_4, 3, -1, 0}, +/* 5 */ { 3, s_2_5, 3, -1, 0} +}; + +static const symbol s_3_0[3] = { 'l', 'l', 'e' }; +static const symbol s_3_1[3] = { 'i', 'n', 'e' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 3, s_3_0, -1, -1, 0}, +/* 1 */ { 3, s_3_1, -1, -1, 0} +}; + +static const symbol s_4_0[3] = { 'n', 's', 'a' }; +static const symbol s_4_1[3] = { 'm', 'm', 'e' }; +static const symbol s_4_2[3] = { 'n', 'n', 'e' }; +static const symbol s_4_3[2] = { 'n', 'i' }; +static const symbol s_4_4[2] = { 's', 'i' }; +static const symbol s_4_5[2] = { 'a', 'n' }; +static const symbol s_4_6[2] = { 'e', 'n' }; +static const symbol s_4_7[2] = { 0xE4, 'n' }; +static const symbol s_4_8[3] = { 'n', 's', 0xE4 }; + +static const struct among a_4[9] = +{ +/* 0 */ { 3, s_4_0, -1, 3, 0}, +/* 1 */ { 3, s_4_1, -1, 3, 0}, +/* 2 */ { 3, s_4_2, -1, 3, 0}, +/* 3 */ { 2, s_4_3, -1, 2, 0}, +/* 4 */ { 2, s_4_4, -1, 1, 0}, +/* 5 */ { 2, s_4_5, -1, 4, 0}, +/* 6 */ { 2, s_4_6, -1, 6, 0}, +/* 7 */ { 2, s_4_7, -1, 5, 0}, +/* 8 */ { 3, s_4_8, -1, 3, 0} +}; + +static const symbol s_5_0[2] = { 'a', 'a' }; +static const symbol s_5_1[2] = { 'e', 'e' }; +static const symbol s_5_2[2] = { 'i', 'i' }; +static const symbol s_5_3[2] = { 'o', 'o' }; +static const symbol s_5_4[2] = { 'u', 'u' }; +static const symbol s_5_5[2] = { 0xE4, 0xE4 }; +static const symbol s_5_6[2] = { 0xF6, 0xF6 }; + +static const struct among a_5[7] = +{ +/* 0 */ { 2, s_5_0, -1, -1, 0}, +/* 1 */ { 2, s_5_1, -1, -1, 0}, +/* 2 */ { 2, s_5_2, -1, -1, 0}, +/* 3 */ { 2, s_5_3, -1, -1, 0}, +/* 4 */ { 2, s_5_4, -1, -1, 0}, +/* 5 */ { 2, s_5_5, -1, -1, 0}, +/* 6 */ { 2, s_5_6, -1, -1, 0} +}; + +static const symbol s_6_0[1] = { 'a' }; +static const symbol s_6_1[3] = { 'l', 'l', 'a' }; +static const symbol s_6_2[2] = { 'n', 'a' }; +static const symbol s_6_3[3] = { 's', 's', 'a' }; +static const symbol s_6_4[2] = { 't', 'a' }; +static const symbol s_6_5[3] = { 'l', 't', 'a' }; +static const symbol s_6_6[3] = { 's', 't', 'a' }; +static const symbol s_6_7[3] = { 't', 't', 'a' }; +static const symbol s_6_8[3] = { 'l', 'l', 'e' }; +static const symbol s_6_9[3] = { 'i', 'n', 'e' }; +static const symbol s_6_10[3] = { 'k', 's', 'i' }; +static const symbol s_6_11[1] = { 'n' }; +static const symbol s_6_12[3] = { 'h', 'a', 'n' }; +static const symbol s_6_13[3] = { 'd', 'e', 'n' }; +static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; +static const symbol s_6_15[3] = { 'h', 'e', 'n' }; +static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; +static const symbol s_6_17[3] = { 'h', 'i', 'n' }; +static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; +static const symbol s_6_19[3] = { 'h', 'o', 'n' }; +static const symbol s_6_20[3] = { 'h', 0xE4, 'n' }; +static const symbol s_6_21[3] = { 'h', 0xF6, 'n' }; +static const symbol s_6_22[1] = { 0xE4 }; +static const symbol s_6_23[3] = { 'l', 'l', 0xE4 }; +static const symbol s_6_24[2] = { 'n', 0xE4 }; +static const symbol s_6_25[3] = { 's', 's', 0xE4 }; +static const symbol s_6_26[2] = { 't', 0xE4 }; +static const symbol s_6_27[3] = { 'l', 't', 0xE4 }; +static const symbol s_6_28[3] = { 's', 't', 0xE4 }; +static const symbol s_6_29[3] = { 't', 't', 0xE4 }; + +static const struct among a_6[30] = +{ +/* 0 */ { 1, s_6_0, -1, 8, 0}, +/* 1 */ { 3, s_6_1, 0, -1, 0}, +/* 2 */ { 2, s_6_2, 0, -1, 0}, +/* 3 */ { 3, s_6_3, 0, -1, 0}, +/* 4 */ { 2, s_6_4, 0, -1, 0}, +/* 5 */ { 3, s_6_5, 4, -1, 0}, +/* 6 */ { 3, s_6_6, 4, -1, 0}, +/* 7 */ { 3, s_6_7, 4, 9, 0}, +/* 8 */ { 3, s_6_8, -1, -1, 0}, +/* 9 */ { 3, s_6_9, -1, -1, 0}, +/* 10 */ { 3, s_6_10, -1, -1, 0}, +/* 11 */ { 1, s_6_11, -1, 7, 0}, +/* 12 */ { 3, s_6_12, 11, 1, 0}, +/* 13 */ { 3, s_6_13, 11, -1, r_VI}, +/* 14 */ { 4, s_6_14, 11, -1, r_LONG}, +/* 15 */ { 3, s_6_15, 11, 2, 0}, +/* 16 */ { 4, s_6_16, 11, -1, r_VI}, +/* 17 */ { 3, s_6_17, 11, 3, 0}, +/* 18 */ { 4, s_6_18, 11, -1, r_VI}, +/* 19 */ { 3, s_6_19, 11, 4, 0}, +/* 20 */ { 3, s_6_20, 11, 5, 0}, +/* 21 */ { 3, s_6_21, 11, 6, 0}, +/* 22 */ { 1, s_6_22, -1, 8, 0}, +/* 23 */ { 3, s_6_23, 22, -1, 0}, +/* 24 */ { 2, s_6_24, 22, -1, 0}, +/* 25 */ { 3, s_6_25, 22, -1, 0}, +/* 26 */ { 2, s_6_26, 22, -1, 0}, +/* 27 */ { 3, s_6_27, 26, -1, 0}, +/* 28 */ { 3, s_6_28, 26, -1, 0}, +/* 29 */ { 3, s_6_29, 26, 9, 0} +}; + +static const symbol s_7_0[3] = { 'e', 'j', 'a' }; +static const symbol s_7_1[3] = { 'm', 'm', 'a' }; +static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; +static const symbol s_7_3[3] = { 'm', 'p', 'a' }; +static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; +static const symbol s_7_5[3] = { 'm', 'm', 'i' }; +static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; +static const symbol s_7_7[3] = { 'm', 'p', 'i' }; +static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; +static const symbol s_7_9[3] = { 'e', 'j', 0xE4 }; +static const symbol s_7_10[3] = { 'm', 'm', 0xE4 }; +static const symbol s_7_11[4] = { 'i', 'm', 'm', 0xE4 }; +static const symbol s_7_12[3] = { 'm', 'p', 0xE4 }; +static const symbol s_7_13[4] = { 'i', 'm', 'p', 0xE4 }; + +static const struct among a_7[14] = +{ +/* 0 */ { 3, s_7_0, -1, -1, 0}, +/* 1 */ { 3, s_7_1, -1, 1, 0}, +/* 2 */ { 4, s_7_2, 1, -1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 4, s_7_4, 3, -1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 4, s_7_6, 5, -1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 4, s_7_8, 7, -1, 0}, +/* 9 */ { 3, s_7_9, -1, -1, 0}, +/* 10 */ { 3, s_7_10, -1, 1, 0}, +/* 11 */ { 4, s_7_11, 10, -1, 0}, +/* 12 */ { 3, s_7_12, -1, 1, 0}, +/* 13 */ { 4, s_7_13, 12, -1, 0} +}; + +static const symbol s_8_0[1] = { 'i' }; +static const symbol s_8_1[1] = { 'j' }; + +static const struct among a_8[2] = +{ +/* 0 */ { 1, s_8_0, -1, -1, 0}, +/* 1 */ { 1, s_8_1, -1, -1, 0} +}; + +static const symbol s_9_0[3] = { 'm', 'm', 'a' }; +static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; + +static const struct among a_9[2] = +{ +/* 0 */ { 3, s_9_0, -1, 1, 0}, +/* 1 */ { 4, s_9_1, 0, -1, 0} +}; + +static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + +static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const symbol s_0[] = { 'k' }; +static const symbol s_1[] = { 'k', 's', 'e' }; +static const symbol s_2[] = { 'k', 's', 'i' }; +static const symbol s_3[] = { 'i' }; +static const symbol s_4[] = { 'a' }; +static const symbol s_5[] = { 'e' }; +static const symbol s_6[] = { 'i' }; +static const symbol s_7[] = { 'o' }; +static const symbol s_8[] = { 0xE4 }; +static const symbol s_9[] = { 0xF6 }; +static const symbol s_10[] = { 'i', 'e' }; +static const symbol s_11[] = { 'e' }; +static const symbol s_12[] = { 'p', 'o' }; +static const symbol s_13[] = { 't' }; +static const symbol s_14[] = { 'p', 'o' }; +static const symbol s_15[] = { 'j' }; +static const symbol s_16[] = { 'o' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'o' }; +static const symbol s_19[] = { 'j' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ + { /* gopast */ /* non V1, line 46 */ + int ret = in_grouping(z, g_V1, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 46 */ + if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ + { /* gopast */ /* non V1, line 47 */ + int ret = in_grouping(z, g_V1, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 47 */ + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_particle_etc(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 55 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 55 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 55 */ + among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 55 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (in_grouping_b(z, g_particle_end, 97, 246, 0)) return 0; + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 64 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 66 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_possessive(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 69 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 69 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 69 */ + among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 69 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ + if (!(eq_s_b(z, 1, s_0))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + { int ret = slice_del(z); /* delete, line 72 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 74 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 74 */ + if (!(eq_s_b(z, 3, s_1))) return 0; + z->bra = z->c; /* ], line 74 */ + { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 78 */ + if (ret < 0) return ret; + } + break; + case 4: + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; + if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + break; + case 5: + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 228) return 0; + if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ + { int ret = slice_del(z); /* delete, line 84 */ + if (ret < 0) return ret; + } + break; + case 6: + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; + if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ + { int ret = slice_del(z); /* delete, line 86 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_LONG(struct SN_env * z) { + if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ + return 1; +} + +static int r_VI(struct SN_env * z) { + if (!(eq_s_b(z, 1, s_3))) return 0; + if (in_grouping_b(z, g_V2, 97, 246, 0)) return 0; + return 1; +} + +static int r_case_ending(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 96 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 96 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 96 */ + among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 96 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (!(eq_s_b(z, 1, s_4))) return 0; + break; + case 2: + if (!(eq_s_b(z, 1, s_5))) return 0; + break; + case 3: + if (!(eq_s_b(z, 1, s_6))) return 0; + break; + case 4: + if (!(eq_s_b(z, 1, s_7))) return 0; + break; + case 5: + if (!(eq_s_b(z, 1, s_8))) return 0; + break; + case 6: + if (!(eq_s_b(z, 1, s_9))) return 0; + break; + case 7: + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ + { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ + { int ret = r_LONG(z); + if (ret == 0) goto lab2; /* call LONG, line 111 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m3; + if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } + } + lab1: + z->c = z->l - m2; + if (z->c <= z->lb) { z->c = z->l - m_keep; goto lab0; } + z->c--; /* next, line 113 */ + } + z->bra = z->c; /* ], line 113 */ + lab0: + ; + } + break; + case 8: + if (in_grouping_b(z, g_V1, 97, 246, 0)) return 0; + if (out_grouping_b(z, g_V1, 97, 246, 0)) return 0; + break; + case 9: + if (!(eq_s_b(z, 1, s_11))) return 0; + break; + } + { int ret = slice_del(z); /* delete, line 138 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set ending_removed, line 139 */ + return 1; +} + +static int r_other_endings(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 142 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[1]) return 0; + z->c = z->I[1]; /* tomark, line 142 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 142 */ + among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 142 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ + if (!(eq_s_b(z, 2, s_12))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + break; + } + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_i_plural(struct SN_env * z) { + { int mlimit; /* setlimit, line 154 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 154 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 154 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ + z->bra = z->c; /* ], line 154 */ + z->lb = mlimit; + } + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_t_plural(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 161 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 161 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 162 */ + if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 162 */ + { int m_test = z->l - z->c; /* test, line 162 */ + if (in_grouping_b(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + z->lb = mlimit; + } + { int mlimit; /* setlimit, line 165 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[1]) return 0; + z->c = z->I[1]; /* tomark, line 165 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + z->ket = z->c; /* [, line 165 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 165 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ + if (!(eq_s_b(z, 2, s_14))) goto lab0; + return 0; + lab0: + z->c = z->l - m3; + } + break; + } + { int ret = slice_del(z); /* delete, line 170 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_tidy(struct SN_env * z) { + { int mlimit; /* setlimit, line 173 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 173 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ + { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ + { int ret = r_LONG(z); + if (ret == 0) goto lab0; /* call LONG, line 174 */ + if (ret < 0) return ret; + } + z->c = z->l - m3; + z->ket = z->c; /* [, line 174 */ + if (z->c <= z->lb) goto lab0; + z->c--; /* next, line 174 */ + z->bra = z->c; /* ], line 174 */ + { int ret = slice_del(z); /* delete, line 174 */ + if (ret < 0) return ret; + } + } + lab0: + z->c = z->l - m2; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ + z->ket = z->c; /* [, line 175 */ + if (in_grouping_b(z, g_AEI, 97, 228, 0)) goto lab1; + z->bra = z->c; /* ], line 175 */ + if (out_grouping_b(z, g_V1, 97, 246, 0)) goto lab1; + { int ret = slice_del(z); /* delete, line 175 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ + z->ket = z->c; /* [, line 176 */ + if (!(eq_s_b(z, 1, s_15))) goto lab2; + z->bra = z->c; /* ], line 176 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ + if (!(eq_s_b(z, 1, s_16))) goto lab4; + goto lab3; + lab4: + z->c = z->l - m6; + if (!(eq_s_b(z, 1, s_17))) goto lab2; + } + lab3: + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m5; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ + z->ket = z->c; /* [, line 177 */ + if (!(eq_s_b(z, 1, s_18))) goto lab5; + z->bra = z->c; /* ], line 177 */ + if (!(eq_s_b(z, 1, s_19))) goto lab5; + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m7; + } + z->lb = mlimit; + } + if (in_grouping_b(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ + z->ket = z->c; /* [, line 179 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 179 */ + z->bra = z->c; /* ], line 179 */ + z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ + if (z->S[0] == 0) return -1; /* -> x, line 179 */ + if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int finnish_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 185 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 185 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->B[0] = 0; /* unset ending_removed, line 186 */ + z->lb = z->c; z->c = z->l; /* backwards, line 187 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ + { int ret = r_particle_etc(z); + if (ret == 0) goto lab1; /* call particle_etc, line 188 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ + { int ret = r_possessive(z); + if (ret == 0) goto lab2; /* call possessive, line 189 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ + { int ret = r_case_ending(z); + if (ret == 0) goto lab3; /* call case_ending, line 190 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ + { int ret = r_other_endings(z); + if (ret == 0) goto lab4; /* call other_endings, line 191 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ + if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ + { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ + { int ret = r_i_plural(z); + if (ret == 0) goto lab7; /* call i_plural, line 192 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ + { int ret = r_t_plural(z); + if (ret == 0) goto lab8; /* call t_plural, line 192 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m8; + } + } +lab5: + { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ + { int ret = r_tidy(z); + if (ret == 0) goto lab9; /* call tidy, line 193 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m9; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * finnish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 1); } + +extern void finnish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_french.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_french.c new file mode 100644 index 00000000000..fc79c0a24dc --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_french.c @@ -0,0 +1,1246 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int french_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_un_accent(struct SN_env * z); +static int r_un_double(struct SN_env * z); +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_i_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * french_ISO_8859_1_create_env(void); +extern void french_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[3] = { 'c', 'o', 'l' }; +static const symbol s_0_1[3] = { 'p', 'a', 'r' }; +static const symbol s_0_2[3] = { 't', 'a', 'p' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 3, s_0_0, -1, -1, 0}, +/* 1 */ { 3, s_0_1, -1, -1, 0}, +/* 2 */ { 3, s_0_2, -1, -1, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'U' }; +static const symbol s_1_3[1] = { 'Y' }; + +static const struct among a_1[4] = +{ +/* 0 */ { 0, 0, -1, 4, 0}, +/* 1 */ { 1, s_1_1, 0, 1, 0}, +/* 2 */ { 1, s_1_2, 0, 2, 0}, +/* 3 */ { 1, s_1_3, 0, 3, 0} +}; + +static const symbol s_2_0[3] = { 'i', 'q', 'U' }; +static const symbol s_2_1[3] = { 'a', 'b', 'l' }; +static const symbol s_2_2[3] = { 'I', 0xE8, 'r' }; +static const symbol s_2_3[3] = { 'i', 0xE8, 'r' }; +static const symbol s_2_4[3] = { 'e', 'u', 's' }; +static const symbol s_2_5[2] = { 'i', 'v' }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, 3, 0}, +/* 1 */ { 3, s_2_1, -1, 3, 0}, +/* 2 */ { 3, s_2_2, -1, 4, 0}, +/* 3 */ { 3, s_2_3, -1, 4, 0}, +/* 4 */ { 3, s_2_4, -1, 2, 0}, +/* 5 */ { 2, s_2_5, -1, 1, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'c' }; +static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_3_2[2] = { 'i', 'v' }; + +static const struct among a_3[3] = +{ +/* 0 */ { 2, s_3_0, -1, 2, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 2, s_3_2, -1, 3, 0} +}; + +static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; +static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; +static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; +static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; +static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; +static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; +static const symbol s_4_9[3] = { 'i', 'v', 'e' }; +static const symbol s_4_10[2] = { 'i', 'f' }; +static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; +static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; +static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; +static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; +static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; +static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; +static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; +static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; +static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; +static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; +static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; +static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; +static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; +static const symbol s_4_25[3] = { 'i', 'f', 's' }; +static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; +static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; +static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; +static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; +static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_33[4] = { 'i', 't', 0xE9, 's' }; +static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; +static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; +static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; +static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; +static const symbol s_4_39[3] = { 'a', 'u', 'x' }; +static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; +static const symbol s_4_41[3] = { 'e', 'u', 'x' }; +static const symbol s_4_42[3] = { 'i', 't', 0xE9 }; + +static const struct among a_4[43] = +{ +/* 0 */ { 4, s_4_0, -1, 1, 0}, +/* 1 */ { 6, s_4_1, -1, 2, 0}, +/* 2 */ { 4, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 5, 0}, +/* 4 */ { 5, s_4_4, -1, 3, 0}, +/* 5 */ { 4, s_4_5, -1, 1, 0}, +/* 6 */ { 4, s_4_6, -1, 1, 0}, +/* 7 */ { 4, s_4_7, -1, 11, 0}, +/* 8 */ { 4, s_4_8, -1, 1, 0}, +/* 9 */ { 3, s_4_9, -1, 8, 0}, +/* 10 */ { 2, s_4_10, -1, 8, 0}, +/* 11 */ { 5, s_4_11, -1, 4, 0}, +/* 12 */ { 5, s_4_12, -1, 2, 0}, +/* 13 */ { 5, s_4_13, -1, 4, 0}, +/* 14 */ { 5, s_4_14, -1, 2, 0}, +/* 15 */ { 5, s_4_15, -1, 1, 0}, +/* 16 */ { 7, s_4_16, -1, 2, 0}, +/* 17 */ { 5, s_4_17, -1, 1, 0}, +/* 18 */ { 5, s_4_18, -1, 5, 0}, +/* 19 */ { 6, s_4_19, -1, 3, 0}, +/* 20 */ { 5, s_4_20, -1, 1, 0}, +/* 21 */ { 5, s_4_21, -1, 1, 0}, +/* 22 */ { 5, s_4_22, -1, 11, 0}, +/* 23 */ { 5, s_4_23, -1, 1, 0}, +/* 24 */ { 4, s_4_24, -1, 8, 0}, +/* 25 */ { 3, s_4_25, -1, 8, 0}, +/* 26 */ { 6, s_4_26, -1, 4, 0}, +/* 27 */ { 6, s_4_27, -1, 2, 0}, +/* 28 */ { 6, s_4_28, -1, 4, 0}, +/* 29 */ { 6, s_4_29, -1, 2, 0}, +/* 30 */ { 5, s_4_30, -1, 15, 0}, +/* 31 */ { 6, s_4_31, 30, 6, 0}, +/* 32 */ { 9, s_4_32, 31, 12, 0}, +/* 33 */ { 4, s_4_33, -1, 7, 0}, +/* 34 */ { 4, s_4_34, -1, 15, 0}, +/* 35 */ { 5, s_4_35, 34, 6, 0}, +/* 36 */ { 8, s_4_36, 35, 12, 0}, +/* 37 */ { 6, s_4_37, 34, 13, 0}, +/* 38 */ { 6, s_4_38, 34, 14, 0}, +/* 39 */ { 3, s_4_39, -1, 10, 0}, +/* 40 */ { 4, s_4_40, 39, 9, 0}, +/* 41 */ { 3, s_4_41, -1, 1, 0}, +/* 42 */ { 3, s_4_42, -1, 7, 0} +}; + +static const symbol s_5_0[3] = { 'i', 'r', 'a' }; +static const symbol s_5_1[2] = { 'i', 'e' }; +static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; +static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; +static const symbol s_5_4[1] = { 'i' }; +static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; +static const symbol s_5_6[2] = { 'i', 'r' }; +static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_5_8[3] = { 'i', 'e', 's' }; +static const symbol s_5_9[4] = { 0xEE, 'm', 'e', 's' }; +static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; +static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; +static const symbol s_5_12[4] = { 0xEE, 't', 'e', 's' }; +static const symbol s_5_13[2] = { 'i', 's' }; +static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; +static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; +static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; +static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; +static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; +static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; +static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; +static const symbol s_5_21[2] = { 'i', 't' }; +static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; +static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; +static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; +static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; +static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; +static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; +static const symbol s_5_30[2] = { 0xEE, 't' }; +static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; +static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; +static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; +static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; + +static const struct among a_5[35] = +{ +/* 0 */ { 3, s_5_0, -1, 1, 0}, +/* 1 */ { 2, s_5_1, -1, 1, 0}, +/* 2 */ { 4, s_5_2, -1, 1, 0}, +/* 3 */ { 7, s_5_3, -1, 1, 0}, +/* 4 */ { 1, s_5_4, -1, 1, 0}, +/* 5 */ { 4, s_5_5, 4, 1, 0}, +/* 6 */ { 2, s_5_6, -1, 1, 0}, +/* 7 */ { 4, s_5_7, -1, 1, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 4, s_5_9, -1, 1, 0}, +/* 10 */ { 5, s_5_10, -1, 1, 0}, +/* 11 */ { 8, s_5_11, -1, 1, 0}, +/* 12 */ { 4, s_5_12, -1, 1, 0}, +/* 13 */ { 2, s_5_13, -1, 1, 0}, +/* 14 */ { 5, s_5_14, 13, 1, 0}, +/* 15 */ { 6, s_5_15, 13, 1, 0}, +/* 16 */ { 6, s_5_16, -1, 1, 0}, +/* 17 */ { 7, s_5_17, -1, 1, 0}, +/* 18 */ { 5, s_5_18, -1, 1, 0}, +/* 19 */ { 6, s_5_19, -1, 1, 0}, +/* 20 */ { 7, s_5_20, -1, 1, 0}, +/* 21 */ { 2, s_5_21, -1, 1, 0}, +/* 22 */ { 5, s_5_22, 21, 1, 0}, +/* 23 */ { 6, s_5_23, 21, 1, 0}, +/* 24 */ { 6, s_5_24, -1, 1, 0}, +/* 25 */ { 7, s_5_25, -1, 1, 0}, +/* 26 */ { 8, s_5_26, -1, 1, 0}, +/* 27 */ { 5, s_5_27, -1, 1, 0}, +/* 28 */ { 6, s_5_28, -1, 1, 0}, +/* 29 */ { 5, s_5_29, -1, 1, 0}, +/* 30 */ { 2, s_5_30, -1, 1, 0}, +/* 31 */ { 5, s_5_31, -1, 1, 0}, +/* 32 */ { 6, s_5_32, -1, 1, 0}, +/* 33 */ { 4, s_5_33, -1, 1, 0}, +/* 34 */ { 5, s_5_34, -1, 1, 0} +}; + +static const symbol s_6_0[1] = { 'a' }; +static const symbol s_6_1[3] = { 'e', 'r', 'a' }; +static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; +static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_4[2] = { 0xE9, 'e' }; +static const symbol s_6_5[2] = { 'a', 'i' }; +static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; +static const symbol s_6_7[2] = { 'e', 'r' }; +static const symbol s_6_8[2] = { 'a', 's' }; +static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; +static const symbol s_6_10[4] = { 0xE2, 'm', 'e', 's' }; +static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; +static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_6_13[4] = { 0xE2, 't', 'e', 's' }; +static const symbol s_6_14[3] = { 0xE9, 'e', 's' }; +static const symbol s_6_15[3] = { 'a', 'i', 's' }; +static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; +static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; +static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; +static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; +static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; +static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; +static const symbol s_6_22[2] = { 0xE9, 's' }; +static const symbol s_6_23[3] = { 'a', 'i', 't' }; +static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; +static const symbol s_6_25[3] = { 'a', 'n', 't' }; +static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; +static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_6_28[5] = { 0xE8, 'r', 'e', 'n', 't' }; +static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; +static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; +static const symbol s_6_31[2] = { 0xE2, 't' }; +static const symbol s_6_32[2] = { 'e', 'z' }; +static const symbol s_6_33[3] = { 'i', 'e', 'z' }; +static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; +static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; +static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; +static const symbol s_6_37[1] = { 0xE9 }; + +static const struct among a_6[38] = +{ +/* 0 */ { 1, s_6_0, -1, 3, 0}, +/* 1 */ { 3, s_6_1, 0, 2, 0}, +/* 2 */ { 4, s_6_2, -1, 3, 0}, +/* 3 */ { 4, s_6_3, -1, 3, 0}, +/* 4 */ { 2, s_6_4, -1, 2, 0}, +/* 5 */ { 2, s_6_5, -1, 3, 0}, +/* 6 */ { 4, s_6_6, 5, 2, 0}, +/* 7 */ { 2, s_6_7, -1, 2, 0}, +/* 8 */ { 2, s_6_8, -1, 3, 0}, +/* 9 */ { 4, s_6_9, 8, 2, 0}, +/* 10 */ { 4, s_6_10, -1, 3, 0}, +/* 11 */ { 5, s_6_11, -1, 3, 0}, +/* 12 */ { 5, s_6_12, -1, 3, 0}, +/* 13 */ { 4, s_6_13, -1, 3, 0}, +/* 14 */ { 3, s_6_14, -1, 2, 0}, +/* 15 */ { 3, s_6_15, -1, 3, 0}, +/* 16 */ { 5, s_6_16, 15, 2, 0}, +/* 17 */ { 4, s_6_17, -1, 1, 0}, +/* 18 */ { 6, s_6_18, 17, 2, 0}, +/* 19 */ { 7, s_6_19, 17, 3, 0}, +/* 20 */ { 5, s_6_20, -1, 2, 0}, +/* 21 */ { 4, s_6_21, -1, 3, 0}, +/* 22 */ { 2, s_6_22, -1, 2, 0}, +/* 23 */ { 3, s_6_23, -1, 3, 0}, +/* 24 */ { 5, s_6_24, 23, 2, 0}, +/* 25 */ { 3, s_6_25, -1, 3, 0}, +/* 26 */ { 5, s_6_26, -1, 3, 0}, +/* 27 */ { 7, s_6_27, 26, 2, 0}, +/* 28 */ { 5, s_6_28, -1, 2, 0}, +/* 29 */ { 6, s_6_29, -1, 3, 0}, +/* 30 */ { 5, s_6_30, -1, 2, 0}, +/* 31 */ { 2, s_6_31, -1, 3, 0}, +/* 32 */ { 2, s_6_32, -1, 2, 0}, +/* 33 */ { 3, s_6_33, 32, 2, 0}, +/* 34 */ { 5, s_6_34, 33, 2, 0}, +/* 35 */ { 6, s_6_35, 33, 3, 0}, +/* 36 */ { 4, s_6_36, 32, 2, 0}, +/* 37 */ { 1, s_6_37, -1, 2, 0} +}; + +static const symbol s_7_0[1] = { 'e' }; +static const symbol s_7_1[4] = { 'I', 0xE8, 'r', 'e' }; +static const symbol s_7_2[4] = { 'i', 0xE8, 'r', 'e' }; +static const symbol s_7_3[3] = { 'i', 'o', 'n' }; +static const symbol s_7_4[3] = { 'I', 'e', 'r' }; +static const symbol s_7_5[3] = { 'i', 'e', 'r' }; +static const symbol s_7_6[1] = { 0xEB }; + +static const struct among a_7[7] = +{ +/* 0 */ { 1, s_7_0, -1, 3, 0}, +/* 1 */ { 4, s_7_1, 0, 2, 0}, +/* 2 */ { 4, s_7_2, 0, 2, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 3, s_7_4, -1, 2, 0}, +/* 5 */ { 3, s_7_5, -1, 2, 0}, +/* 6 */ { 1, s_7_6, -1, 4, 0} +}; + +static const symbol s_8_0[3] = { 'e', 'l', 'l' }; +static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; +static const symbol s_8_2[3] = { 'e', 'n', 'n' }; +static const symbol s_8_3[3] = { 'o', 'n', 'n' }; +static const symbol s_8_4[3] = { 'e', 't', 't' }; + +static const struct among a_8[5] = +{ +/* 0 */ { 3, s_8_0, -1, -1, 0}, +/* 1 */ { 4, s_8_1, -1, -1, 0}, +/* 2 */ { 3, s_8_2, -1, -1, 0}, +/* 3 */ { 3, s_8_3, -1, -1, 0}, +/* 4 */ { 3, s_8_4, -1, -1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; + +static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const symbol s_0[] = { 'u' }; +static const symbol s_1[] = { 'U' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'I' }; +static const symbol s_4[] = { 'y' }; +static const symbol s_5[] = { 'Y' }; +static const symbol s_6[] = { 'y' }; +static const symbol s_7[] = { 'Y' }; +static const symbol s_8[] = { 'q' }; +static const symbol s_9[] = { 'u' }; +static const symbol s_10[] = { 'U' }; +static const symbol s_11[] = { 'i' }; +static const symbol s_12[] = { 'u' }; +static const symbol s_13[] = { 'y' }; +static const symbol s_14[] = { 'i', 'c' }; +static const symbol s_15[] = { 'i', 'q', 'U' }; +static const symbol s_16[] = { 'l', 'o', 'g' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'e', 'n', 't' }; +static const symbol s_19[] = { 'a', 't' }; +static const symbol s_20[] = { 'e', 'u', 'x' }; +static const symbol s_21[] = { 'i' }; +static const symbol s_22[] = { 'a', 'b', 'l' }; +static const symbol s_23[] = { 'i', 'q', 'U' }; +static const symbol s_24[] = { 'a', 't' }; +static const symbol s_25[] = { 'i', 'c' }; +static const symbol s_26[] = { 'i', 'q', 'U' }; +static const symbol s_27[] = { 'e', 'a', 'u' }; +static const symbol s_28[] = { 'a', 'l' }; +static const symbol s_29[] = { 'e', 'u', 'x' }; +static const symbol s_30[] = { 'a', 'n', 't' }; +static const symbol s_31[] = { 'e', 'n', 't' }; +static const symbol s_32[] = { 'e' }; +static const symbol s_33[] = { 's' }; +static const symbol s_34[] = { 's' }; +static const symbol s_35[] = { 't' }; +static const symbol s_36[] = { 'i' }; +static const symbol s_37[] = { 'g', 'u' }; +static const symbol s_38[] = { 0xE9 }; +static const symbol s_39[] = { 0xE8 }; +static const symbol s_40[] = { 'e' }; +static const symbol s_41[] = { 'Y' }; +static const symbol s_42[] = { 'i' }; +static const symbol s_43[] = { 0xE7 }; +static const symbol s_44[] = { 'c' }; + +static int r_prelude(struct SN_env * z) { + while(1) { /* repeat, line 38 */ + int c1 = z->c; + while(1) { /* goto, line 38 */ + int c2 = z->c; + { int c3 = z->c; /* or, line 44 */ + if (in_grouping(z, g_v, 97, 251, 0)) goto lab3; + z->bra = z->c; /* [, line 40 */ + { int c4 = z->c; /* or, line 40 */ + if (!(eq_s(z, 1, s_0))) goto lab5; + z->ket = z->c; /* ], line 40 */ + if (in_grouping(z, g_v, 97, 251, 0)) goto lab5; + { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = c4; + if (!(eq_s(z, 1, s_2))) goto lab6; + z->ket = z->c; /* ], line 41 */ + if (in_grouping(z, g_v, 97, 251, 0)) goto lab6; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ + if (ret < 0) return ret; + } + goto lab4; + lab6: + z->c = c4; + if (!(eq_s(z, 1, s_4))) goto lab3; + z->ket = z->c; /* ], line 42 */ + { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ + if (ret < 0) return ret; + } + } + lab4: + goto lab2; + lab3: + z->c = c3; + z->bra = z->c; /* [, line 45 */ + if (!(eq_s(z, 1, s_6))) goto lab7; + z->ket = z->c; /* ], line 45 */ + if (in_grouping(z, g_v, 97, 251, 0)) goto lab7; + { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ + if (ret < 0) return ret; + } + goto lab2; + lab7: + z->c = c3; + if (!(eq_s(z, 1, s_8))) goto lab1; + z->bra = z->c; /* [, line 47 */ + if (!(eq_s(z, 1, s_9))) goto lab1; + z->ket = z->c; /* ], line 47 */ + { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ + if (ret < 0) return ret; + } + } + lab2: + z->c = c2; + break; + lab1: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* goto, line 38 */ + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 56 */ + { int c2 = z->c; /* or, line 58 */ + if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; + if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; + if (z->c >= z->l) goto lab2; + z->c++; /* next, line 57 */ + goto lab1; + lab2: + z->c = c2; + if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; + if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ + goto lab1; + lab3: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 66 */ + { /* gopast */ /* grouping v, line 66 */ + int ret = out_grouping(z, g_v, 97, 251, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + } + lab1: + z->I[0] = z->c; /* setmark pV, line 67 */ + lab0: + z->c = c1; + } + { int c3 = z->c; /* do, line 69 */ + { /* gopast */ /* grouping v, line 70 */ + int ret = out_grouping(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 70 */ + int ret = in_grouping(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 70 */ + { /* gopast */ /* grouping v, line 71 */ + int ret = out_grouping(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 71 */ + int ret = in_grouping(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 71 */ + lab4: + z->c = c3; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 75 */ + int c1 = z->c; + z->bra = z->c; /* [, line 77 */ + if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else + among_var = find_among(z, a_1, 4); /* substring, line 77 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 77 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ + if (ret < 0) return ret; + } + break; + case 4: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 81 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 92 */ + among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 96 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 99 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 99 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ + z->ket = z->c; /* [, line 100 */ + if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 100 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 100 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 100 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m1; + { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ + if (ret < 0) return ret; + } + } + lab1: + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 104 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 107 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 114 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ + z->ket = z->c; /* [, line 116 */ + among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 116 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab3; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 117 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 117 */ + if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 117 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 117 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ + { int ret = r_R2(z); + if (ret == 0) goto lab5; /* call R2, line 118 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 118 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = z->l - m2; + { int ret = r_R1(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ + if (ret < 0) return ret; + } + } + lab4: + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 120 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ + if (ret < 0) return ret; + } + break; + } + lab3: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 129 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 129 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ + z->ket = z->c; /* [, line 131 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } + among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } + z->bra = z->c; /* ], line 131 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab6; } + case 1: + { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ + { int ret = r_R2(z); + if (ret == 0) goto lab8; /* call R2, line 132 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 132 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m3; + { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ + if (ret < 0) return ret; + } + } + lab7: + break; + case 2: + { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ + { int ret = r_R2(z); + if (ret == 0) goto lab10; /* call R2, line 133 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 133 */ + if (ret < 0) return ret; + } + goto lab9; + lab10: + z->c = z->l - m4; + { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ + if (ret < 0) return ret; + } + } + lab9: + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + } + lab6: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 141 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 141 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ + z->ket = z->c; /* [, line 142 */ + if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } + z->bra = z->c; /* ], line 142 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 142 */ + if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } + z->bra = z->c; /* ], line 142 */ + { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ + { int ret = r_R2(z); + if (ret == 0) goto lab13; /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + goto lab12; + lab13: + z->c = z->l - m5; + { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ + if (ret < 0) return ret; + } + } + lab12: + lab11: + ; + } + break; + case 9: + { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 145 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ + if (ret < 0) return ret; + } + break; + case 11: + { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ + { int ret = r_R2(z); + if (ret == 0) goto lab15; /* call R2, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + goto lab14; + lab15: + z->c = z->l - m6; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ + if (ret < 0) return ret; + } + } + lab14: + break; + case 12: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 150 */ + if (ret < 0) return ret; + } + if (out_grouping_b(z, g_v, 97, 251, 0)) return 0; + { int ret = slice_del(z); /* delete, line 150 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 155 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 155 */ + break; + case 14: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 156 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 156 */ + break; + case 15: + { int m_test = z->l - z->c; /* test, line 158 */ + if (in_grouping_b(z, g_v, 97, 251, 0)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 158 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 158 */ + break; + } + return 1; +} + +static int r_i_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 163 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 163 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 164 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 164 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + if (out_grouping_b(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } + { int ret = slice_del(z); /* delete, line 170 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 174 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 174 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 175 */ + among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 175 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 185 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 190 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ + z->ket = z->c; /* [, line 191 */ + if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 191 */ + { int ret = slice_del(z); /* delete, line 191 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ + z->ket = z->c; /* [, line 199 */ + if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 199 */ + { int m_test = z->l - z->c; /* test, line 199 */ + if (out_grouping_b(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 199 */ + if (ret < 0) return ret; + } + lab0: + ; + } + { int mlimit; /* setlimit, line 200 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 200 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 201 */ + among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 201 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ + if (!(eq_s_b(z, 1, s_34))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } + } + lab1: + { int ret = slice_del(z); /* delete, line 202 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 205 */ + if (ret < 0) return ret; + } + break; + case 4: + if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } + { int ret = slice_del(z); /* delete, line 206 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_un_double(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 212 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 212 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 212 */ + z->bra = z->c; /* ], line 212 */ + { int ret = slice_del(z); /* delete, line 212 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_un_accent(struct SN_env * z) { + { int i = 1; + while(1) { /* atleast, line 216 */ + if (out_grouping_b(z, g_v, 97, 251, 0)) goto lab0; + i--; + continue; + lab0: + break; + } + if (i > 0) return 0; + } + z->ket = z->c; /* [, line 217 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ + if (!(eq_s_b(z, 1, s_38))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_39))) return 0; + } +lab1: + z->bra = z->c; /* ], line 217 */ + { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int french_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 223 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 223 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 224 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 224 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 225 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ + { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_i_verb_suffix(z); + if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ + if (ret < 0) return ret; + } + goto lab5; + lab7: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = z->l - m5; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ + z->ket = z->c; /* [, line 234 */ + { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ + if (!(eq_s_b(z, 1, s_41))) goto lab10; + z->bra = z->c; /* ], line 234 */ + { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ + if (ret < 0) return ret; + } + goto lab9; + lab10: + z->c = z->l - m7; + if (!(eq_s_b(z, 1, s_43))) { z->c = z->l - m_keep; goto lab8; } + z->bra = z->c; /* ], line 235 */ + { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ + if (ret < 0) return ret; + } + } + lab9: + lab8: + ; + } + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ + { int ret = r_un_double(z); + if (ret == 0) goto lab11; /* call un_double, line 243 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ + { int ret = r_un_accent(z); + if (ret == 0) goto lab12; /* call un_accent, line 244 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m9; + } + z->c = z->lb; + { int c10 = z->c; /* do, line 246 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab13; /* call postlude, line 246 */ + if (ret < 0) return ret; + } + lab13: + z->c = c10; + } + return 1; +} + +extern struct SN_env * french_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void french_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_german.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_german.c new file mode 100644 index 00000000000..13c24230923 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_german.c @@ -0,0 +1,503 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int german_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * german_ISO_8859_1_create_env(void); +extern void german_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 'U' }; +static const symbol s_0_2[1] = { 'Y' }; +static const symbol s_0_3[1] = { 0xE4 }; +static const symbol s_0_4[1] = { 0xF6 }; +static const symbol s_0_5[1] = { 0xFC }; + +static const struct among a_0[6] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 1, s_0_1, 0, 2, 0}, +/* 2 */ { 1, s_0_2, 0, 1, 0}, +/* 3 */ { 1, s_0_3, 0, 3, 0}, +/* 4 */ { 1, s_0_4, 0, 4, 0}, +/* 5 */ { 1, s_0_5, 0, 5, 0} +}; + +static const symbol s_1_0[1] = { 'e' }; +static const symbol s_1_1[2] = { 'e', 'm' }; +static const symbol s_1_2[2] = { 'e', 'n' }; +static const symbol s_1_3[3] = { 'e', 'r', 'n' }; +static const symbol s_1_4[2] = { 'e', 'r' }; +static const symbol s_1_5[1] = { 's' }; +static const symbol s_1_6[2] = { 'e', 's' }; + +static const struct among a_1[7] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 2, s_1_1, -1, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0}, +/* 3 */ { 3, s_1_3, -1, 1, 0}, +/* 4 */ { 2, s_1_4, -1, 1, 0}, +/* 5 */ { 1, s_1_5, -1, 2, 0}, +/* 6 */ { 2, s_1_6, 5, 1, 0} +}; + +static const symbol s_2_0[2] = { 'e', 'n' }; +static const symbol s_2_1[2] = { 'e', 'r' }; +static const symbol s_2_2[2] = { 's', 't' }; +static const symbol s_2_3[3] = { 'e', 's', 't' }; + +static const struct among a_2[4] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 2, s_2_1, -1, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 2, 0}, +/* 3 */ { 3, s_2_3, 2, 1, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'g' }; +static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0} +}; + +static const symbol s_4_0[3] = { 'e', 'n', 'd' }; +static const symbol s_4_1[2] = { 'i', 'g' }; +static const symbol s_4_2[3] = { 'u', 'n', 'g' }; +static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; +static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; +static const symbol s_4_5[2] = { 'i', 'k' }; +static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; +static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; + +static const struct among a_4[8] = +{ +/* 0 */ { 3, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 2, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 3, 0}, +/* 4 */ { 4, s_4_4, -1, 2, 0}, +/* 5 */ { 2, s_4_5, -1, 2, 0}, +/* 6 */ { 4, s_4_6, -1, 3, 0}, +/* 7 */ { 4, s_4_7, -1, 4, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + +static const unsigned char g_s_ending[] = { 117, 30, 5 }; + +static const unsigned char g_st_ending[] = { 117, 30, 4 }; + +static const symbol s_0[] = { 0xDF }; +static const symbol s_1[] = { 's', 's' }; +static const symbol s_2[] = { 'u' }; +static const symbol s_3[] = { 'U' }; +static const symbol s_4[] = { 'y' }; +static const symbol s_5[] = { 'Y' }; +static const symbol s_6[] = { 'y' }; +static const symbol s_7[] = { 'u' }; +static const symbol s_8[] = { 'a' }; +static const symbol s_9[] = { 'o' }; +static const symbol s_10[] = { 'u' }; +static const symbol s_11[] = { 'i', 'g' }; +static const symbol s_12[] = { 'e' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'e', 'r' }; +static const symbol s_15[] = { 'e', 'n' }; + +static int r_prelude(struct SN_env * z) { + { int c_test = z->c; /* test, line 30 */ + while(1) { /* repeat, line 30 */ + int c1 = z->c; + { int c2 = z->c; /* or, line 33 */ + z->bra = z->c; /* [, line 32 */ + if (!(eq_s(z, 1, s_0))) goto lab2; + z->ket = z->c; /* ], line 32 */ + { int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 33 */ + } + lab1: + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + while(1) { /* repeat, line 36 */ + int c3 = z->c; + while(1) { /* goto, line 36 */ + int c4 = z->c; + if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; + z->bra = z->c; /* [, line 37 */ + { int c5 = z->c; /* or, line 37 */ + if (!(eq_s(z, 1, s_2))) goto lab6; + z->ket = z->c; /* ], line 37 */ + if (in_grouping(z, g_v, 97, 252, 0)) goto lab6; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = c5; + if (!(eq_s(z, 1, s_4))) goto lab4; + z->ket = z->c; /* ], line 38 */ + if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; + { int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = c4; + break; + lab4: + z->c = c4; + if (z->c >= z->l) goto lab3; + z->c++; /* goto, line 36 */ + } + continue; + lab3: + z->c = c3; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c_test = z->c; /* test, line 47 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) return 0; + z->c = ret; /* hop, line 47 */ + } + z->I[2] = z->c; /* setmark x, line 47 */ + z->c = c_test; + } + { /* gopast */ /* grouping v, line 49 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 49 */ + int ret = in_grouping(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 49 */ + /* try, line 50 */ + if (!(z->I[0] < z->I[2])) goto lab0; + z->I[0] = z->I[2]; +lab0: + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 51 */ + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 55 */ + int c1 = z->c; + z->bra = z->c; /* [, line 57 */ + among_var = find_among(z, a_0, 6); /* substring, line 57 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 57 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 6: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 63 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 74 */ + z->ket = z->c; /* [, line 75 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; + among_var = find_among_b(z, a_1, 7); /* substring, line 75 */ + if (!(among_var)) goto lab0; + z->bra = z->c; /* ], line 75 */ + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 75 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_del(z); /* delete, line 77 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0; + { int ret = slice_del(z); /* delete, line 80 */ + if (ret < 0) return ret; + } + break; + } + lab0: + z->c = z->l - m1; + } + { int m2 = z->l - z->c; (void)m2; /* do, line 84 */ + z->ket = z->c; /* [, line 85 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1; + among_var = find_among_b(z, a_2, 4); /* substring, line 85 */ + if (!(among_var)) goto lab1; + z->bra = z->c; /* ], line 85 */ + { int ret = r_R1(z); + if (ret == 0) goto lab1; /* call R1, line 85 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab1; + case 1: + { int ret = slice_del(z); /* delete, line 87 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab1; + { int ret = z->c - 3; + if (z->lb > ret || ret > z->l) goto lab1; + z->c = ret; /* hop, line 90 */ + } + { int ret = slice_del(z); /* delete, line 90 */ + if (ret < 0) return ret; + } + break; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 94 */ + z->ket = z->c; /* [, line 95 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; + among_var = find_among_b(z, a_4, 8); /* substring, line 95 */ + if (!(among_var)) goto lab2; + z->bra = z->c; /* ], line 95 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 95 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab2; + case 1: + { int ret = slice_del(z); /* delete, line 97 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */ + z->ket = z->c; /* [, line 98 */ + if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 98 */ + { int m4 = z->l - z->c; (void)m4; /* not, line 98 */ + if (!(eq_s_b(z, 1, s_12))) goto lab4; + { z->c = z->l - m_keep; goto lab3; } + lab4: + z->c = z->l - m4; + } + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 98 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + case 2: + { int m5 = z->l - z->c; (void)m5; /* not, line 101 */ + if (!(eq_s_b(z, 1, s_13))) goto lab5; + goto lab2; + lab5: + z->c = z->l - m5; + } + { int ret = slice_del(z); /* delete, line 101 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 104 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */ + z->ket = z->c; /* [, line 106 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 106 */ + if (!(eq_s_b(z, 2, s_14))) goto lab8; + goto lab7; + lab8: + z->c = z->l - m6; + if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; } + } + lab7: + z->bra = z->c; /* ], line 106 */ + { int ret = r_R1(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 106 */ + if (ret < 0) return ret; + } + lab6: + ; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 110 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + z->ket = z->c; /* [, line 112 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; } + among_var = find_among_b(z, a_3, 2); /* substring, line 112 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab9; } + z->bra = z->c; /* ], line 112 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab9; } + case 1: + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + break; + } + lab9: + ; + } + break; + } + lab2: + z->c = z->l - m3; + } + return 1; +} + +extern int german_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 125 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 125 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 126 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 126 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 127 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 128 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab2; /* call standard_suffix, line 128 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + z->c = z->lb; + { int c4 = z->c; /* do, line 129 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab3; /* call postlude, line 129 */ + if (ret < 0) return ret; + } + lab3: + z->c = c4; + } + return 1; +} + +extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_hungarian.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_hungarian.c new file mode 100644 index 00000000000..ff4b23e0605 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_hungarian.c @@ -0,0 +1,1230 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int hungarian_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_double(struct SN_env * z); +static int r_undouble(struct SN_env * z); +static int r_factive(struct SN_env * z); +static int r_instrum(struct SN_env * z); +static int r_plur_owner(struct SN_env * z); +static int r_sing_owner(struct SN_env * z); +static int r_owned(struct SN_env * z); +static int r_plural(struct SN_env * z); +static int r_case_other(struct SN_env * z); +static int r_case_special(struct SN_env * z); +static int r_case(struct SN_env * z); +static int r_v_ending(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * hungarian_ISO_8859_1_create_env(void); +extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[2] = { 'c', 's' }; +static const symbol s_0_1[3] = { 'd', 'z', 's' }; +static const symbol s_0_2[2] = { 'g', 'y' }; +static const symbol s_0_3[2] = { 'l', 'y' }; +static const symbol s_0_4[2] = { 'n', 'y' }; +static const symbol s_0_5[2] = { 's', 'z' }; +static const symbol s_0_6[2] = { 't', 'y' }; +static const symbol s_0_7[2] = { 'z', 's' }; + +static const struct among a_0[8] = +{ +/* 0 */ { 2, s_0_0, -1, -1, 0}, +/* 1 */ { 3, s_0_1, -1, -1, 0}, +/* 2 */ { 2, s_0_2, -1, -1, 0}, +/* 3 */ { 2, s_0_3, -1, -1, 0}, +/* 4 */ { 2, s_0_4, -1, -1, 0}, +/* 5 */ { 2, s_0_5, -1, -1, 0}, +/* 6 */ { 2, s_0_6, -1, -1, 0}, +/* 7 */ { 2, s_0_7, -1, -1, 0} +}; + +static const symbol s_1_0[1] = { 0xE1 }; +static const symbol s_1_1[1] = { 0xE9 }; + +static const struct among a_1[2] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 1, s_1_1, -1, 2, 0} +}; + +static const symbol s_2_0[2] = { 'b', 'b' }; +static const symbol s_2_1[2] = { 'c', 'c' }; +static const symbol s_2_2[2] = { 'd', 'd' }; +static const symbol s_2_3[2] = { 'f', 'f' }; +static const symbol s_2_4[2] = { 'g', 'g' }; +static const symbol s_2_5[2] = { 'j', 'j' }; +static const symbol s_2_6[2] = { 'k', 'k' }; +static const symbol s_2_7[2] = { 'l', 'l' }; +static const symbol s_2_8[2] = { 'm', 'm' }; +static const symbol s_2_9[2] = { 'n', 'n' }; +static const symbol s_2_10[2] = { 'p', 'p' }; +static const symbol s_2_11[2] = { 'r', 'r' }; +static const symbol s_2_12[3] = { 'c', 'c', 's' }; +static const symbol s_2_13[2] = { 's', 's' }; +static const symbol s_2_14[3] = { 'z', 'z', 's' }; +static const symbol s_2_15[2] = { 't', 't' }; +static const symbol s_2_16[2] = { 'v', 'v' }; +static const symbol s_2_17[3] = { 'g', 'g', 'y' }; +static const symbol s_2_18[3] = { 'l', 'l', 'y' }; +static const symbol s_2_19[3] = { 'n', 'n', 'y' }; +static const symbol s_2_20[3] = { 't', 't', 'y' }; +static const symbol s_2_21[3] = { 's', 's', 'z' }; +static const symbol s_2_22[2] = { 'z', 'z' }; + +static const struct among a_2[23] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0}, +/* 3 */ { 2, s_2_3, -1, -1, 0}, +/* 4 */ { 2, s_2_4, -1, -1, 0}, +/* 5 */ { 2, s_2_5, -1, -1, 0}, +/* 6 */ { 2, s_2_6, -1, -1, 0}, +/* 7 */ { 2, s_2_7, -1, -1, 0}, +/* 8 */ { 2, s_2_8, -1, -1, 0}, +/* 9 */ { 2, s_2_9, -1, -1, 0}, +/* 10 */ { 2, s_2_10, -1, -1, 0}, +/* 11 */ { 2, s_2_11, -1, -1, 0}, +/* 12 */ { 3, s_2_12, -1, -1, 0}, +/* 13 */ { 2, s_2_13, -1, -1, 0}, +/* 14 */ { 3, s_2_14, -1, -1, 0}, +/* 15 */ { 2, s_2_15, -1, -1, 0}, +/* 16 */ { 2, s_2_16, -1, -1, 0}, +/* 17 */ { 3, s_2_17, -1, -1, 0}, +/* 18 */ { 3, s_2_18, -1, -1, 0}, +/* 19 */ { 3, s_2_19, -1, -1, 0}, +/* 20 */ { 3, s_2_20, -1, -1, 0}, +/* 21 */ { 3, s_2_21, -1, -1, 0}, +/* 22 */ { 2, s_2_22, -1, -1, 0} +}; + +static const symbol s_3_0[2] = { 'a', 'l' }; +static const symbol s_3_1[2] = { 'e', 'l' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 2, s_3_1, -1, 2, 0} +}; + +static const symbol s_4_0[2] = { 'b', 'a' }; +static const symbol s_4_1[2] = { 'r', 'a' }; +static const symbol s_4_2[2] = { 'b', 'e' }; +static const symbol s_4_3[2] = { 'r', 'e' }; +static const symbol s_4_4[2] = { 'i', 'g' }; +static const symbol s_4_5[3] = { 'n', 'a', 'k' }; +static const symbol s_4_6[3] = { 'n', 'e', 'k' }; +static const symbol s_4_7[3] = { 'v', 'a', 'l' }; +static const symbol s_4_8[3] = { 'v', 'e', 'l' }; +static const symbol s_4_9[2] = { 'u', 'l' }; +static const symbol s_4_10[3] = { 'n', 0xE1, 'l' }; +static const symbol s_4_11[3] = { 'n', 0xE9, 'l' }; +static const symbol s_4_12[3] = { 'b', 0xF3, 'l' }; +static const symbol s_4_13[3] = { 'r', 0xF3, 'l' }; +static const symbol s_4_14[3] = { 't', 0xF3, 'l' }; +static const symbol s_4_15[3] = { 'b', 0xF5, 'l' }; +static const symbol s_4_16[3] = { 'r', 0xF5, 'l' }; +static const symbol s_4_17[3] = { 't', 0xF5, 'l' }; +static const symbol s_4_18[2] = { 0xFC, 'l' }; +static const symbol s_4_19[1] = { 'n' }; +static const symbol s_4_20[2] = { 'a', 'n' }; +static const symbol s_4_21[3] = { 'b', 'a', 'n' }; +static const symbol s_4_22[2] = { 'e', 'n' }; +static const symbol s_4_23[3] = { 'b', 'e', 'n' }; +static const symbol s_4_24[6] = { 'k', 0xE9, 'p', 'p', 'e', 'n' }; +static const symbol s_4_25[2] = { 'o', 'n' }; +static const symbol s_4_26[2] = { 0xF6, 'n' }; +static const symbol s_4_27[4] = { 'k', 0xE9, 'p', 'p' }; +static const symbol s_4_28[3] = { 'k', 'o', 'r' }; +static const symbol s_4_29[1] = { 't' }; +static const symbol s_4_30[2] = { 'a', 't' }; +static const symbol s_4_31[2] = { 'e', 't' }; +static const symbol s_4_32[4] = { 'k', 0xE9, 'n', 't' }; +static const symbol s_4_33[6] = { 'a', 'n', 'k', 0xE9, 'n', 't' }; +static const symbol s_4_34[6] = { 'e', 'n', 'k', 0xE9, 'n', 't' }; +static const symbol s_4_35[6] = { 'o', 'n', 'k', 0xE9, 'n', 't' }; +static const symbol s_4_36[2] = { 'o', 't' }; +static const symbol s_4_37[3] = { 0xE9, 'r', 't' }; +static const symbol s_4_38[2] = { 0xF6, 't' }; +static const symbol s_4_39[3] = { 'h', 'e', 'z' }; +static const symbol s_4_40[3] = { 'h', 'o', 'z' }; +static const symbol s_4_41[3] = { 'h', 0xF6, 'z' }; +static const symbol s_4_42[2] = { 'v', 0xE1 }; +static const symbol s_4_43[2] = { 'v', 0xE9 }; + +static const struct among a_4[44] = +{ +/* 0 */ { 2, s_4_0, -1, -1, 0}, +/* 1 */ { 2, s_4_1, -1, -1, 0}, +/* 2 */ { 2, s_4_2, -1, -1, 0}, +/* 3 */ { 2, s_4_3, -1, -1, 0}, +/* 4 */ { 2, s_4_4, -1, -1, 0}, +/* 5 */ { 3, s_4_5, -1, -1, 0}, +/* 6 */ { 3, s_4_6, -1, -1, 0}, +/* 7 */ { 3, s_4_7, -1, -1, 0}, +/* 8 */ { 3, s_4_8, -1, -1, 0}, +/* 9 */ { 2, s_4_9, -1, -1, 0}, +/* 10 */ { 3, s_4_10, -1, -1, 0}, +/* 11 */ { 3, s_4_11, -1, -1, 0}, +/* 12 */ { 3, s_4_12, -1, -1, 0}, +/* 13 */ { 3, s_4_13, -1, -1, 0}, +/* 14 */ { 3, s_4_14, -1, -1, 0}, +/* 15 */ { 3, s_4_15, -1, -1, 0}, +/* 16 */ { 3, s_4_16, -1, -1, 0}, +/* 17 */ { 3, s_4_17, -1, -1, 0}, +/* 18 */ { 2, s_4_18, -1, -1, 0}, +/* 19 */ { 1, s_4_19, -1, -1, 0}, +/* 20 */ { 2, s_4_20, 19, -1, 0}, +/* 21 */ { 3, s_4_21, 20, -1, 0}, +/* 22 */ { 2, s_4_22, 19, -1, 0}, +/* 23 */ { 3, s_4_23, 22, -1, 0}, +/* 24 */ { 6, s_4_24, 22, -1, 0}, +/* 25 */ { 2, s_4_25, 19, -1, 0}, +/* 26 */ { 2, s_4_26, 19, -1, 0}, +/* 27 */ { 4, s_4_27, -1, -1, 0}, +/* 28 */ { 3, s_4_28, -1, -1, 0}, +/* 29 */ { 1, s_4_29, -1, -1, 0}, +/* 30 */ { 2, s_4_30, 29, -1, 0}, +/* 31 */ { 2, s_4_31, 29, -1, 0}, +/* 32 */ { 4, s_4_32, 29, -1, 0}, +/* 33 */ { 6, s_4_33, 32, -1, 0}, +/* 34 */ { 6, s_4_34, 32, -1, 0}, +/* 35 */ { 6, s_4_35, 32, -1, 0}, +/* 36 */ { 2, s_4_36, 29, -1, 0}, +/* 37 */ { 3, s_4_37, 29, -1, 0}, +/* 38 */ { 2, s_4_38, 29, -1, 0}, +/* 39 */ { 3, s_4_39, -1, -1, 0}, +/* 40 */ { 3, s_4_40, -1, -1, 0}, +/* 41 */ { 3, s_4_41, -1, -1, 0}, +/* 42 */ { 2, s_4_42, -1, -1, 0}, +/* 43 */ { 2, s_4_43, -1, -1, 0} +}; + +static const symbol s_5_0[2] = { 0xE1, 'n' }; +static const symbol s_5_1[2] = { 0xE9, 'n' }; +static const symbol s_5_2[6] = { 0xE1, 'n', 'k', 0xE9, 'n', 't' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 2, s_5_0, -1, 2, 0}, +/* 1 */ { 2, s_5_1, -1, 1, 0}, +/* 2 */ { 6, s_5_2, -1, 3, 0} +}; + +static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; +static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; +static const symbol s_6_2[5] = { 0xE1, 's', 't', 'u', 'l' }; +static const symbol s_6_3[4] = { 's', 't', 0xFC, 'l' }; +static const symbol s_6_4[5] = { 'e', 's', 't', 0xFC, 'l' }; +static const symbol s_6_5[5] = { 0xE9, 's', 't', 0xFC, 'l' }; + +static const struct among a_6[6] = +{ +/* 0 */ { 4, s_6_0, -1, 2, 0}, +/* 1 */ { 5, s_6_1, 0, 1, 0}, +/* 2 */ { 5, s_6_2, 0, 3, 0}, +/* 3 */ { 4, s_6_3, -1, 2, 0}, +/* 4 */ { 5, s_6_4, 3, 1, 0}, +/* 5 */ { 5, s_6_5, 3, 4, 0} +}; + +static const symbol s_7_0[1] = { 0xE1 }; +static const symbol s_7_1[1] = { 0xE9 }; + +static const struct among a_7[2] = +{ +/* 0 */ { 1, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 2, 0} +}; + +static const symbol s_8_0[1] = { 'k' }; +static const symbol s_8_1[2] = { 'a', 'k' }; +static const symbol s_8_2[2] = { 'e', 'k' }; +static const symbol s_8_3[2] = { 'o', 'k' }; +static const symbol s_8_4[2] = { 0xE1, 'k' }; +static const symbol s_8_5[2] = { 0xE9, 'k' }; +static const symbol s_8_6[2] = { 0xF6, 'k' }; + +static const struct among a_8[7] = +{ +/* 0 */ { 1, s_8_0, -1, 7, 0}, +/* 1 */ { 2, s_8_1, 0, 4, 0}, +/* 2 */ { 2, s_8_2, 0, 6, 0}, +/* 3 */ { 2, s_8_3, 0, 5, 0}, +/* 4 */ { 2, s_8_4, 0, 1, 0}, +/* 5 */ { 2, s_8_5, 0, 2, 0}, +/* 6 */ { 2, s_8_6, 0, 3, 0} +}; + +static const symbol s_9_0[2] = { 0xE9, 'i' }; +static const symbol s_9_1[3] = { 0xE1, 0xE9, 'i' }; +static const symbol s_9_2[3] = { 0xE9, 0xE9, 'i' }; +static const symbol s_9_3[1] = { 0xE9 }; +static const symbol s_9_4[2] = { 'k', 0xE9 }; +static const symbol s_9_5[3] = { 'a', 'k', 0xE9 }; +static const symbol s_9_6[3] = { 'e', 'k', 0xE9 }; +static const symbol s_9_7[3] = { 'o', 'k', 0xE9 }; +static const symbol s_9_8[3] = { 0xE1, 'k', 0xE9 }; +static const symbol s_9_9[3] = { 0xE9, 'k', 0xE9 }; +static const symbol s_9_10[3] = { 0xF6, 'k', 0xE9 }; +static const symbol s_9_11[2] = { 0xE9, 0xE9 }; + +static const struct among a_9[12] = +{ +/* 0 */ { 2, s_9_0, -1, 7, 0}, +/* 1 */ { 3, s_9_1, 0, 6, 0}, +/* 2 */ { 3, s_9_2, 0, 5, 0}, +/* 3 */ { 1, s_9_3, -1, 9, 0}, +/* 4 */ { 2, s_9_4, 3, 4, 0}, +/* 5 */ { 3, s_9_5, 4, 1, 0}, +/* 6 */ { 3, s_9_6, 4, 1, 0}, +/* 7 */ { 3, s_9_7, 4, 1, 0}, +/* 8 */ { 3, s_9_8, 4, 3, 0}, +/* 9 */ { 3, s_9_9, 4, 2, 0}, +/* 10 */ { 3, s_9_10, 4, 1, 0}, +/* 11 */ { 2, s_9_11, 3, 8, 0} +}; + +static const symbol s_10_0[1] = { 'a' }; +static const symbol s_10_1[2] = { 'j', 'a' }; +static const symbol s_10_2[1] = { 'd' }; +static const symbol s_10_3[2] = { 'a', 'd' }; +static const symbol s_10_4[2] = { 'e', 'd' }; +static const symbol s_10_5[2] = { 'o', 'd' }; +static const symbol s_10_6[2] = { 0xE1, 'd' }; +static const symbol s_10_7[2] = { 0xE9, 'd' }; +static const symbol s_10_8[2] = { 0xF6, 'd' }; +static const symbol s_10_9[1] = { 'e' }; +static const symbol s_10_10[2] = { 'j', 'e' }; +static const symbol s_10_11[2] = { 'n', 'k' }; +static const symbol s_10_12[3] = { 'u', 'n', 'k' }; +static const symbol s_10_13[3] = { 0xE1, 'n', 'k' }; +static const symbol s_10_14[3] = { 0xE9, 'n', 'k' }; +static const symbol s_10_15[3] = { 0xFC, 'n', 'k' }; +static const symbol s_10_16[2] = { 'u', 'k' }; +static const symbol s_10_17[3] = { 'j', 'u', 'k' }; +static const symbol s_10_18[4] = { 0xE1, 'j', 'u', 'k' }; +static const symbol s_10_19[2] = { 0xFC, 'k' }; +static const symbol s_10_20[3] = { 'j', 0xFC, 'k' }; +static const symbol s_10_21[4] = { 0xE9, 'j', 0xFC, 'k' }; +static const symbol s_10_22[1] = { 'm' }; +static const symbol s_10_23[2] = { 'a', 'm' }; +static const symbol s_10_24[2] = { 'e', 'm' }; +static const symbol s_10_25[2] = { 'o', 'm' }; +static const symbol s_10_26[2] = { 0xE1, 'm' }; +static const symbol s_10_27[2] = { 0xE9, 'm' }; +static const symbol s_10_28[1] = { 'o' }; +static const symbol s_10_29[1] = { 0xE1 }; +static const symbol s_10_30[1] = { 0xE9 }; + +static const struct among a_10[31] = +{ +/* 0 */ { 1, s_10_0, -1, 18, 0}, +/* 1 */ { 2, s_10_1, 0, 17, 0}, +/* 2 */ { 1, s_10_2, -1, 16, 0}, +/* 3 */ { 2, s_10_3, 2, 13, 0}, +/* 4 */ { 2, s_10_4, 2, 13, 0}, +/* 5 */ { 2, s_10_5, 2, 13, 0}, +/* 6 */ { 2, s_10_6, 2, 14, 0}, +/* 7 */ { 2, s_10_7, 2, 15, 0}, +/* 8 */ { 2, s_10_8, 2, 13, 0}, +/* 9 */ { 1, s_10_9, -1, 18, 0}, +/* 10 */ { 2, s_10_10, 9, 17, 0}, +/* 11 */ { 2, s_10_11, -1, 4, 0}, +/* 12 */ { 3, s_10_12, 11, 1, 0}, +/* 13 */ { 3, s_10_13, 11, 2, 0}, +/* 14 */ { 3, s_10_14, 11, 3, 0}, +/* 15 */ { 3, s_10_15, 11, 1, 0}, +/* 16 */ { 2, s_10_16, -1, 8, 0}, +/* 17 */ { 3, s_10_17, 16, 7, 0}, +/* 18 */ { 4, s_10_18, 17, 5, 0}, +/* 19 */ { 2, s_10_19, -1, 8, 0}, +/* 20 */ { 3, s_10_20, 19, 7, 0}, +/* 21 */ { 4, s_10_21, 20, 6, 0}, +/* 22 */ { 1, s_10_22, -1, 12, 0}, +/* 23 */ { 2, s_10_23, 22, 9, 0}, +/* 24 */ { 2, s_10_24, 22, 9, 0}, +/* 25 */ { 2, s_10_25, 22, 9, 0}, +/* 26 */ { 2, s_10_26, 22, 10, 0}, +/* 27 */ { 2, s_10_27, 22, 11, 0}, +/* 28 */ { 1, s_10_28, -1, 18, 0}, +/* 29 */ { 1, s_10_29, -1, 19, 0}, +/* 30 */ { 1, s_10_30, -1, 20, 0} +}; + +static const symbol s_11_0[2] = { 'i', 'd' }; +static const symbol s_11_1[3] = { 'a', 'i', 'd' }; +static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; +static const symbol s_11_3[3] = { 'e', 'i', 'd' }; +static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; +static const symbol s_11_5[3] = { 0xE1, 'i', 'd' }; +static const symbol s_11_6[3] = { 0xE9, 'i', 'd' }; +static const symbol s_11_7[1] = { 'i' }; +static const symbol s_11_8[2] = { 'a', 'i' }; +static const symbol s_11_9[3] = { 'j', 'a', 'i' }; +static const symbol s_11_10[2] = { 'e', 'i' }; +static const symbol s_11_11[3] = { 'j', 'e', 'i' }; +static const symbol s_11_12[2] = { 0xE1, 'i' }; +static const symbol s_11_13[2] = { 0xE9, 'i' }; +static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; +static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; +static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; +static const symbol s_11_17[5] = { 0xE9, 'i', 't', 'e', 'k' }; +static const symbol s_11_18[2] = { 'i', 'k' }; +static const symbol s_11_19[3] = { 'a', 'i', 'k' }; +static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; +static const symbol s_11_21[3] = { 'e', 'i', 'k' }; +static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; +static const symbol s_11_23[3] = { 0xE1, 'i', 'k' }; +static const symbol s_11_24[3] = { 0xE9, 'i', 'k' }; +static const symbol s_11_25[3] = { 'i', 'n', 'k' }; +static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; +static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; +static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; +static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; +static const symbol s_11_30[4] = { 0xE1, 'i', 'n', 'k' }; +static const symbol s_11_31[4] = { 0xE9, 'i', 'n', 'k' }; +static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; +static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; +static const symbol s_11_34[5] = { 0xE1, 'i', 't', 'o', 'k' }; +static const symbol s_11_35[2] = { 'i', 'm' }; +static const symbol s_11_36[3] = { 'a', 'i', 'm' }; +static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; +static const symbol s_11_38[3] = { 'e', 'i', 'm' }; +static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; +static const symbol s_11_40[3] = { 0xE1, 'i', 'm' }; +static const symbol s_11_41[3] = { 0xE9, 'i', 'm' }; + +static const struct among a_11[42] = +{ +/* 0 */ { 2, s_11_0, -1, 10, 0}, +/* 1 */ { 3, s_11_1, 0, 9, 0}, +/* 2 */ { 4, s_11_2, 1, 6, 0}, +/* 3 */ { 3, s_11_3, 0, 9, 0}, +/* 4 */ { 4, s_11_4, 3, 6, 0}, +/* 5 */ { 3, s_11_5, 0, 7, 0}, +/* 6 */ { 3, s_11_6, 0, 8, 0}, +/* 7 */ { 1, s_11_7, -1, 15, 0}, +/* 8 */ { 2, s_11_8, 7, 14, 0}, +/* 9 */ { 3, s_11_9, 8, 11, 0}, +/* 10 */ { 2, s_11_10, 7, 14, 0}, +/* 11 */ { 3, s_11_11, 10, 11, 0}, +/* 12 */ { 2, s_11_12, 7, 12, 0}, +/* 13 */ { 2, s_11_13, 7, 13, 0}, +/* 14 */ { 4, s_11_14, -1, 24, 0}, +/* 15 */ { 5, s_11_15, 14, 21, 0}, +/* 16 */ { 6, s_11_16, 15, 20, 0}, +/* 17 */ { 5, s_11_17, 14, 23, 0}, +/* 18 */ { 2, s_11_18, -1, 29, 0}, +/* 19 */ { 3, s_11_19, 18, 26, 0}, +/* 20 */ { 4, s_11_20, 19, 25, 0}, +/* 21 */ { 3, s_11_21, 18, 26, 0}, +/* 22 */ { 4, s_11_22, 21, 25, 0}, +/* 23 */ { 3, s_11_23, 18, 27, 0}, +/* 24 */ { 3, s_11_24, 18, 28, 0}, +/* 25 */ { 3, s_11_25, -1, 20, 0}, +/* 26 */ { 4, s_11_26, 25, 17, 0}, +/* 27 */ { 5, s_11_27, 26, 16, 0}, +/* 28 */ { 4, s_11_28, 25, 17, 0}, +/* 29 */ { 5, s_11_29, 28, 16, 0}, +/* 30 */ { 4, s_11_30, 25, 18, 0}, +/* 31 */ { 4, s_11_31, 25, 19, 0}, +/* 32 */ { 5, s_11_32, -1, 21, 0}, +/* 33 */ { 6, s_11_33, 32, 20, 0}, +/* 34 */ { 5, s_11_34, -1, 22, 0}, +/* 35 */ { 2, s_11_35, -1, 5, 0}, +/* 36 */ { 3, s_11_36, 35, 4, 0}, +/* 37 */ { 4, s_11_37, 36, 1, 0}, +/* 38 */ { 3, s_11_38, 35, 4, 0}, +/* 39 */ { 4, s_11_39, 38, 1, 0}, +/* 40 */ { 3, s_11_40, 35, 2, 0}, +/* 41 */ { 3, s_11_41, 35, 3, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'e' }; +static const symbol s_3[] = { 'a' }; +static const symbol s_4[] = { 'a' }; +static const symbol s_5[] = { 'a' }; +static const symbol s_6[] = { 'e' }; +static const symbol s_7[] = { 'a' }; +static const symbol s_8[] = { 'e' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'a' }; +static const symbol s_11[] = { 'e' }; +static const symbol s_12[] = { 'a' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'a' }; +static const symbol s_15[] = { 'e' }; +static const symbol s_16[] = { 'a' }; +static const symbol s_17[] = { 'e' }; +static const symbol s_18[] = { 'a' }; +static const symbol s_19[] = { 'e' }; +static const symbol s_20[] = { 'a' }; +static const symbol s_21[] = { 'e' }; +static const symbol s_22[] = { 'a' }; +static const symbol s_23[] = { 'e' }; +static const symbol s_24[] = { 'a' }; +static const symbol s_25[] = { 'e' }; +static const symbol s_26[] = { 'a' }; +static const symbol s_27[] = { 'e' }; +static const symbol s_28[] = { 'a' }; +static const symbol s_29[] = { 'e' }; +static const symbol s_30[] = { 'a' }; +static const symbol s_31[] = { 'e' }; +static const symbol s_32[] = { 'a' }; +static const symbol s_33[] = { 'e' }; +static const symbol s_34[] = { 'a' }; +static const symbol s_35[] = { 'e' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c1 = z->c; /* or, line 51 */ + if (in_grouping(z, g_v, 97, 252, 0)) goto lab1; + if (in_grouping(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ + { int c2 = z->c; /* or, line 49 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; + if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ + goto lab2; + lab3: + z->c = c2; + if (z->c >= z->l) goto lab1; + z->c++; /* next, line 49 */ + } + lab2: + z->I[0] = z->c; /* setmark p1, line 50 */ + goto lab0; + lab1: + z->c = c1; + if (out_grouping(z, g_v, 97, 252, 0)) return 0; + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 53 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_v_ending(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 61 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; + among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 61 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 61 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_double(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 68 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ + z->c = z->l - m_test; + } + return 1; +} + +static int r_undouble(struct SN_env * z) { + if (z->c <= z->lb) return 0; + z->c--; /* next, line 73 */ + z->ket = z->c; /* [, line 73 */ + { int ret = z->c - 1; + if (z->lb > ret || ret > z->l) return 0; + z->c = ret; /* hop, line 73 */ + } + z->bra = z->c; /* ], line 73 */ + { int ret = slice_del(z); /* delete, line 73 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_instrum(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 77 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; + among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 77 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 77 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 79 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 82 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_case(struct SN_env * z) { + z->ket = z->c; /* [, line 87 */ + if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ + z->bra = z->c; /* ], line 87 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 87 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 111 */ + if (ret < 0) return ret; + } + { int ret = r_v_ending(z); + if (ret == 0) return 0; /* call v_ending, line 112 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_case_special(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 116 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; + among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 116 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 116 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_case_other(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 124 */ + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; + among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 124 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 124 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 126 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_factive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 133 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; + among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 133 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 133 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 134 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 135 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 137 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 138 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_plural(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 142 */ + if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; + among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 142 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 142 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 146 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 148 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 149 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_owned(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 154 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 233)) return 0; + among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 154 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 154 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 155 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 161 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_sing_owner(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 168 */ + among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 168 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 168 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 169 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 172 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 175 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_del(z); /* delete, line 180 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_del(z); /* delete, line 181 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ + if (ret < 0) return ret; + } + break; + case 16: + { int ret = slice_del(z); /* delete, line 184 */ + if (ret < 0) return ret; + } + break; + case 17: + { int ret = slice_del(z); /* delete, line 185 */ + if (ret < 0) return ret; + } + break; + case 18: + { int ret = slice_del(z); /* delete, line 186 */ + if (ret < 0) return ret; + } + break; + case 19: + { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ + if (ret < 0) return ret; + } + break; + case 20: + { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_plur_owner(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 193 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 193 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 193 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 197 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 198 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 199 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 202 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_del(z); /* delete, line 203 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_del(z); /* delete, line 204 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_del(z); /* delete, line 207 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_del(z); /* delete, line 208 */ + if (ret < 0) return ret; + } + break; + case 16: + { int ret = slice_del(z); /* delete, line 209 */ + if (ret < 0) return ret; + } + break; + case 17: + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + break; + case 18: + { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ + if (ret < 0) return ret; + } + break; + case 19: + { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ + if (ret < 0) return ret; + } + break; + case 20: + { int ret = slice_del(z); /* delete, line 214 */ + if (ret < 0) return ret; + } + break; + case 21: + { int ret = slice_del(z); /* delete, line 215 */ + if (ret < 0) return ret; + } + break; + case 22: + { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ + if (ret < 0) return ret; + } + break; + case 23: + { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ + if (ret < 0) return ret; + } + break; + case 24: + { int ret = slice_del(z); /* delete, line 218 */ + if (ret < 0) return ret; + } + break; + case 25: + { int ret = slice_del(z); /* delete, line 219 */ + if (ret < 0) return ret; + } + break; + case 26: + { int ret = slice_del(z); /* delete, line 220 */ + if (ret < 0) return ret; + } + break; + case 27: + { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ + if (ret < 0) return ret; + } + break; + case 28: + { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ + if (ret < 0) return ret; + } + break; + case 29: + { int ret = slice_del(z); /* delete, line 223 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int hungarian_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 229 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 229 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 230 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ + { int ret = r_instrum(z); + if (ret == 0) goto lab1; /* call instrum, line 231 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ + { int ret = r_case(z); + if (ret == 0) goto lab2; /* call case, line 232 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ + { int ret = r_case_special(z); + if (ret == 0) goto lab3; /* call case_special, line 233 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ + { int ret = r_case_other(z); + if (ret == 0) goto lab4; /* call case_other, line 234 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ + { int ret = r_factive(z); + if (ret == 0) goto lab5; /* call factive, line 235 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ + { int ret = r_owned(z); + if (ret == 0) goto lab6; /* call owned, line 236 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ + { int ret = r_sing_owner(z); + if (ret == 0) goto lab7; /* call sing_owner, line 237 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ + { int ret = r_plur_owner(z); + if (ret == 0) goto lab8; /* call plur_owner, line 238 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ + { int ret = r_plural(z); + if (ret == 0) goto lab9; /* call plural, line 239 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m10; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * hungarian_ISO_8859_1_create_env(void) { return SN_create_env(0, 1, 0); } + +extern void hungarian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_italian.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_italian.c new file mode 100644 index 00000000000..d941b0f0363 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_italian.c @@ -0,0 +1,1065 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int italian_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_vowel_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_attached_pronoun(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * italian_ISO_8859_1_create_env(void); +extern void italian_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[2] = { 'q', 'u' }; +static const symbol s_0_2[1] = { 0xE1 }; +static const symbol s_0_3[1] = { 0xE9 }; +static const symbol s_0_4[1] = { 0xED }; +static const symbol s_0_5[1] = { 0xF3 }; +static const symbol s_0_6[1] = { 0xFA }; + +static const struct among a_0[7] = +{ +/* 0 */ { 0, 0, -1, 7, 0}, +/* 1 */ { 2, s_0_1, 0, 6, 0}, +/* 2 */ { 1, s_0_2, 0, 1, 0}, +/* 3 */ { 1, s_0_3, 0, 2, 0}, +/* 4 */ { 1, s_0_4, 0, 3, 0}, +/* 5 */ { 1, s_0_5, 0, 4, 0}, +/* 6 */ { 1, s_0_6, 0, 5, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'U' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_1_1, 0, 1, 0}, +/* 2 */ { 1, s_1_2, 0, 2, 0} +}; + +static const symbol s_2_0[2] = { 'l', 'a' }; +static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; +static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; +static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; +static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; +static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; +static const symbol s_2_6[2] = { 'l', 'e' }; +static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; +static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; +static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; +static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; +static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; +static const symbol s_2_12[2] = { 'n', 'e' }; +static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; +static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; +static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; +static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; +static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; +static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; +static const symbol s_2_19[2] = { 'c', 'i' }; +static const symbol s_2_20[2] = { 'l', 'i' }; +static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; +static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; +static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; +static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; +static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; +static const symbol s_2_26[3] = { 'g', 'l', 'i' }; +static const symbol s_2_27[2] = { 'm', 'i' }; +static const symbol s_2_28[2] = { 's', 'i' }; +static const symbol s_2_29[2] = { 't', 'i' }; +static const symbol s_2_30[2] = { 'v', 'i' }; +static const symbol s_2_31[2] = { 'l', 'o' }; +static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; +static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; +static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; +static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; +static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; + +static const struct among a_2[37] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 4, s_2_1, 0, -1, 0}, +/* 2 */ { 6, s_2_2, 0, -1, 0}, +/* 3 */ { 4, s_2_3, 0, -1, 0}, +/* 4 */ { 4, s_2_4, 0, -1, 0}, +/* 5 */ { 4, s_2_5, 0, -1, 0}, +/* 6 */ { 2, s_2_6, -1, -1, 0}, +/* 7 */ { 4, s_2_7, 6, -1, 0}, +/* 8 */ { 6, s_2_8, 6, -1, 0}, +/* 9 */ { 4, s_2_9, 6, -1, 0}, +/* 10 */ { 4, s_2_10, 6, -1, 0}, +/* 11 */ { 4, s_2_11, 6, -1, 0}, +/* 12 */ { 2, s_2_12, -1, -1, 0}, +/* 13 */ { 4, s_2_13, 12, -1, 0}, +/* 14 */ { 6, s_2_14, 12, -1, 0}, +/* 15 */ { 4, s_2_15, 12, -1, 0}, +/* 16 */ { 4, s_2_16, 12, -1, 0}, +/* 17 */ { 4, s_2_17, 12, -1, 0}, +/* 18 */ { 4, s_2_18, 12, -1, 0}, +/* 19 */ { 2, s_2_19, -1, -1, 0}, +/* 20 */ { 2, s_2_20, -1, -1, 0}, +/* 21 */ { 4, s_2_21, 20, -1, 0}, +/* 22 */ { 6, s_2_22, 20, -1, 0}, +/* 23 */ { 4, s_2_23, 20, -1, 0}, +/* 24 */ { 4, s_2_24, 20, -1, 0}, +/* 25 */ { 4, s_2_25, 20, -1, 0}, +/* 26 */ { 3, s_2_26, 20, -1, 0}, +/* 27 */ { 2, s_2_27, -1, -1, 0}, +/* 28 */ { 2, s_2_28, -1, -1, 0}, +/* 29 */ { 2, s_2_29, -1, -1, 0}, +/* 30 */ { 2, s_2_30, -1, -1, 0}, +/* 31 */ { 2, s_2_31, -1, -1, 0}, +/* 32 */ { 4, s_2_32, 31, -1, 0}, +/* 33 */ { 6, s_2_33, 31, -1, 0}, +/* 34 */ { 4, s_2_34, 31, -1, 0}, +/* 35 */ { 4, s_2_35, 31, -1, 0}, +/* 36 */ { 4, s_2_36, 31, -1, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_3_2[2] = { 'a', 'r' }; +static const symbol s_3_3[2] = { 'e', 'r' }; +static const symbol s_3_4[2] = { 'i', 'r' }; + +static const struct among a_3[5] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 2, s_3_2, -1, 2, 0}, +/* 3 */ { 2, s_3_3, -1, 2, 0}, +/* 4 */ { 2, s_3_4, -1, 2, 0} +}; + +static const symbol s_4_0[2] = { 'i', 'c' }; +static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_4_2[2] = { 'o', 's' }; +static const symbol s_4_3[2] = { 'i', 'v' }; + +static const struct among a_4[4] = +{ +/* 0 */ { 2, s_4_0, -1, -1, 0}, +/* 1 */ { 4, s_4_1, -1, -1, 0}, +/* 2 */ { 2, s_4_2, -1, -1, 0}, +/* 3 */ { 2, s_4_3, -1, 1, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_5_2[2] = { 'i', 'v' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, -1, 1, 0} +}; + +static const symbol s_6_0[3] = { 'i', 'c', 'a' }; +static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; +static const symbol s_6_2[3] = { 'o', 's', 'a' }; +static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; +static const symbol s_6_4[3] = { 'i', 'v', 'a' }; +static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; +static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; +static const symbol s_6_7[3] = { 'i', 'c', 'e' }; +static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; +static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; +static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; +static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; +static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; +static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; +static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; +static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; +static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; +static const symbol s_6_17[3] = { 'o', 's', 'e' }; +static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; +static const symbol s_6_22[3] = { 'i', 'v', 'e' }; +static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; +static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; +static const symbol s_6_25[3] = { 'i', 'c', 'i' }; +static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; +static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; +static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; +static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; +static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; +static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; +static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; +static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; +static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_6_35[3] = { 'o', 's', 'i' }; +static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; +static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; +static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; +static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; +static const symbol s_6_40[3] = { 'i', 'v', 'i' }; +static const symbol s_6_41[3] = { 'i', 'c', 'o' }; +static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_6_43[3] = { 'o', 's', 'o' }; +static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_6_46[3] = { 'i', 'v', 'o' }; +static const symbol s_6_47[3] = { 'i', 't', 0xE0 }; +static const symbol s_6_48[4] = { 'i', 's', 't', 0xE0 }; +static const symbol s_6_49[4] = { 'i', 's', 't', 0xE8 }; +static const symbol s_6_50[4] = { 'i', 's', 't', 0xEC }; + +static const struct among a_6[51] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 5, s_6_1, -1, 3, 0}, +/* 2 */ { 3, s_6_2, -1, 1, 0}, +/* 3 */ { 4, s_6_3, -1, 1, 0}, +/* 4 */ { 3, s_6_4, -1, 9, 0}, +/* 5 */ { 4, s_6_5, -1, 1, 0}, +/* 6 */ { 4, s_6_6, -1, 5, 0}, +/* 7 */ { 3, s_6_7, -1, 1, 0}, +/* 8 */ { 6, s_6_8, 7, 1, 0}, +/* 9 */ { 4, s_6_9, -1, 1, 0}, +/* 10 */ { 5, s_6_10, -1, 3, 0}, +/* 11 */ { 5, s_6_11, -1, 1, 0}, +/* 12 */ { 5, s_6_12, -1, 1, 0}, +/* 13 */ { 6, s_6_13, -1, 4, 0}, +/* 14 */ { 6, s_6_14, -1, 2, 0}, +/* 15 */ { 6, s_6_15, -1, 4, 0}, +/* 16 */ { 5, s_6_16, -1, 2, 0}, +/* 17 */ { 3, s_6_17, -1, 1, 0}, +/* 18 */ { 4, s_6_18, -1, 1, 0}, +/* 19 */ { 5, s_6_19, -1, 1, 0}, +/* 20 */ { 6, s_6_20, 19, 7, 0}, +/* 21 */ { 4, s_6_21, -1, 1, 0}, +/* 22 */ { 3, s_6_22, -1, 9, 0}, +/* 23 */ { 4, s_6_23, -1, 1, 0}, +/* 24 */ { 4, s_6_24, -1, 5, 0}, +/* 25 */ { 3, s_6_25, -1, 1, 0}, +/* 26 */ { 6, s_6_26, 25, 1, 0}, +/* 27 */ { 4, s_6_27, -1, 1, 0}, +/* 28 */ { 5, s_6_28, -1, 1, 0}, +/* 29 */ { 5, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, -1, 1, 0}, +/* 31 */ { 6, s_6_31, -1, 4, 0}, +/* 32 */ { 6, s_6_32, -1, 2, 0}, +/* 33 */ { 6, s_6_33, -1, 4, 0}, +/* 34 */ { 5, s_6_34, -1, 2, 0}, +/* 35 */ { 3, s_6_35, -1, 1, 0}, +/* 36 */ { 4, s_6_36, -1, 1, 0}, +/* 37 */ { 6, s_6_37, -1, 6, 0}, +/* 38 */ { 6, s_6_38, -1, 6, 0}, +/* 39 */ { 4, s_6_39, -1, 1, 0}, +/* 40 */ { 3, s_6_40, -1, 9, 0}, +/* 41 */ { 3, s_6_41, -1, 1, 0}, +/* 42 */ { 4, s_6_42, -1, 1, 0}, +/* 43 */ { 3, s_6_43, -1, 1, 0}, +/* 44 */ { 6, s_6_44, -1, 6, 0}, +/* 45 */ { 6, s_6_45, -1, 6, 0}, +/* 46 */ { 3, s_6_46, -1, 9, 0}, +/* 47 */ { 3, s_6_47, -1, 8, 0}, +/* 48 */ { 4, s_6_48, -1, 1, 0}, +/* 49 */ { 4, s_6_49, -1, 1, 0}, +/* 50 */ { 4, s_6_50, -1, 1, 0} +}; + +static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; +static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; +static const symbol s_7_2[3] = { 'a', 't', 'a' }; +static const symbol s_7_3[3] = { 'i', 't', 'a' }; +static const symbol s_7_4[3] = { 'u', 't', 'a' }; +static const symbol s_7_5[3] = { 'a', 'v', 'a' }; +static const symbol s_7_6[3] = { 'e', 'v', 'a' }; +static const symbol s_7_7[3] = { 'i', 'v', 'a' }; +static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; +static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; +static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; +static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_7_12[3] = { 'a', 'r', 'e' }; +static const symbol s_7_13[3] = { 'e', 'r', 'e' }; +static const symbol s_7_14[3] = { 'i', 'r', 'e' }; +static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; +static const symbol s_7_16[3] = { 'a', 't', 'e' }; +static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; +static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; +static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; +static const symbol s_7_20[3] = { 'e', 't', 'e' }; +static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; +static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; +static const symbol s_7_23[3] = { 'i', 't', 'e' }; +static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; +static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; +static const symbol s_7_26[3] = { 'u', 't', 'e' }; +static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; +static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; +static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; +static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; +static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; +static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; +static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; +static const symbol s_7_34[3] = { 'a', 't', 'i' }; +static const symbol s_7_35[3] = { 'i', 't', 'i' }; +static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; +static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; +static const symbol s_7_38[3] = { 'u', 't', 'i' }; +static const symbol s_7_39[3] = { 'a', 'v', 'i' }; +static const symbol s_7_40[3] = { 'e', 'v', 'i' }; +static const symbol s_7_41[3] = { 'i', 'v', 'i' }; +static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; +static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; +static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; +static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; +static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; +static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; +static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; +static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; +static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; +static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; +static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; +static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; +static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; +static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; +static const symbol s_7_58[3] = { 'a', 'n', 'o' }; +static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; +static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; +static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; +static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; +static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; +static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; +static const symbol s_7_65[3] = { 'o', 'n', 'o' }; +static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; +static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; +static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; +static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; +static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; +static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; +static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_75[3] = { 'a', 't', 'o' }; +static const symbol s_7_76[3] = { 'i', 't', 'o' }; +static const symbol s_7_77[3] = { 'u', 't', 'o' }; +static const symbol s_7_78[3] = { 'a', 'v', 'o' }; +static const symbol s_7_79[3] = { 'e', 'v', 'o' }; +static const symbol s_7_80[3] = { 'i', 'v', 'o' }; +static const symbol s_7_81[2] = { 'a', 'r' }; +static const symbol s_7_82[2] = { 'i', 'r' }; +static const symbol s_7_83[3] = { 'e', 'r', 0xE0 }; +static const symbol s_7_84[3] = { 'i', 'r', 0xE0 }; +static const symbol s_7_85[3] = { 'e', 'r', 0xF2 }; +static const symbol s_7_86[3] = { 'i', 'r', 0xF2 }; + +static const struct among a_7[87] = +{ +/* 0 */ { 4, s_7_0, -1, 1, 0}, +/* 1 */ { 4, s_7_1, -1, 1, 0}, +/* 2 */ { 3, s_7_2, -1, 1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 3, s_7_4, -1, 1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 3, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 6, s_7_8, -1, 1, 0}, +/* 9 */ { 6, s_7_9, -1, 1, 0}, +/* 10 */ { 4, s_7_10, -1, 1, 0}, +/* 11 */ { 4, s_7_11, -1, 1, 0}, +/* 12 */ { 3, s_7_12, -1, 1, 0}, +/* 13 */ { 3, s_7_13, -1, 1, 0}, +/* 14 */ { 3, s_7_14, -1, 1, 0}, +/* 15 */ { 4, s_7_15, -1, 1, 0}, +/* 16 */ { 3, s_7_16, -1, 1, 0}, +/* 17 */ { 5, s_7_17, 16, 1, 0}, +/* 18 */ { 5, s_7_18, 16, 1, 0}, +/* 19 */ { 5, s_7_19, 16, 1, 0}, +/* 20 */ { 3, s_7_20, -1, 1, 0}, +/* 21 */ { 5, s_7_21, 20, 1, 0}, +/* 22 */ { 5, s_7_22, 20, 1, 0}, +/* 23 */ { 3, s_7_23, -1, 1, 0}, +/* 24 */ { 6, s_7_24, -1, 1, 0}, +/* 25 */ { 6, s_7_25, -1, 1, 0}, +/* 26 */ { 3, s_7_26, -1, 1, 0}, +/* 27 */ { 4, s_7_27, -1, 1, 0}, +/* 28 */ { 4, s_7_28, -1, 1, 0}, +/* 29 */ { 4, s_7_29, -1, 1, 0}, +/* 30 */ { 4, s_7_30, -1, 1, 0}, +/* 31 */ { 4, s_7_31, -1, 1, 0}, +/* 32 */ { 4, s_7_32, -1, 1, 0}, +/* 33 */ { 4, s_7_33, -1, 1, 0}, +/* 34 */ { 3, s_7_34, -1, 1, 0}, +/* 35 */ { 3, s_7_35, -1, 1, 0}, +/* 36 */ { 6, s_7_36, -1, 1, 0}, +/* 37 */ { 6, s_7_37, -1, 1, 0}, +/* 38 */ { 3, s_7_38, -1, 1, 0}, +/* 39 */ { 3, s_7_39, -1, 1, 0}, +/* 40 */ { 3, s_7_40, -1, 1, 0}, +/* 41 */ { 3, s_7_41, -1, 1, 0}, +/* 42 */ { 4, s_7_42, -1, 1, 0}, +/* 43 */ { 4, s_7_43, -1, 1, 0}, +/* 44 */ { 4, s_7_44, -1, 1, 0}, +/* 45 */ { 4, s_7_45, -1, 1, 0}, +/* 46 */ { 4, s_7_46, -1, 1, 0}, +/* 47 */ { 5, s_7_47, -1, 1, 0}, +/* 48 */ { 5, s_7_48, -1, 1, 0}, +/* 49 */ { 5, s_7_49, -1, 1, 0}, +/* 50 */ { 5, s_7_50, -1, 1, 0}, +/* 51 */ { 5, s_7_51, -1, 1, 0}, +/* 52 */ { 6, s_7_52, -1, 1, 0}, +/* 53 */ { 4, s_7_53, -1, 1, 0}, +/* 54 */ { 4, s_7_54, -1, 1, 0}, +/* 55 */ { 6, s_7_55, 54, 1, 0}, +/* 56 */ { 6, s_7_56, 54, 1, 0}, +/* 57 */ { 4, s_7_57, -1, 1, 0}, +/* 58 */ { 3, s_7_58, -1, 1, 0}, +/* 59 */ { 6, s_7_59, 58, 1, 0}, +/* 60 */ { 5, s_7_60, 58, 1, 0}, +/* 61 */ { 5, s_7_61, 58, 1, 0}, +/* 62 */ { 5, s_7_62, 58, 1, 0}, +/* 63 */ { 6, s_7_63, -1, 1, 0}, +/* 64 */ { 6, s_7_64, -1, 1, 0}, +/* 65 */ { 3, s_7_65, -1, 1, 0}, +/* 66 */ { 6, s_7_66, 65, 1, 0}, +/* 67 */ { 5, s_7_67, 65, 1, 0}, +/* 68 */ { 5, s_7_68, 65, 1, 0}, +/* 69 */ { 5, s_7_69, 65, 1, 0}, +/* 70 */ { 8, s_7_70, -1, 1, 0}, +/* 71 */ { 8, s_7_71, -1, 1, 0}, +/* 72 */ { 6, s_7_72, -1, 1, 0}, +/* 73 */ { 6, s_7_73, -1, 1, 0}, +/* 74 */ { 6, s_7_74, -1, 1, 0}, +/* 75 */ { 3, s_7_75, -1, 1, 0}, +/* 76 */ { 3, s_7_76, -1, 1, 0}, +/* 77 */ { 3, s_7_77, -1, 1, 0}, +/* 78 */ { 3, s_7_78, -1, 1, 0}, +/* 79 */ { 3, s_7_79, -1, 1, 0}, +/* 80 */ { 3, s_7_80, -1, 1, 0}, +/* 81 */ { 2, s_7_81, -1, 1, 0}, +/* 82 */ { 2, s_7_82, -1, 1, 0}, +/* 83 */ { 3, s_7_83, -1, 1, 0}, +/* 84 */ { 3, s_7_84, -1, 1, 0}, +/* 85 */ { 3, s_7_85, -1, 1, 0}, +/* 86 */ { 3, s_7_86, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; + +static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; + +static const unsigned char g_CG[] = { 17 }; + +static const symbol s_0[] = { 0xE0 }; +static const symbol s_1[] = { 0xE8 }; +static const symbol s_2[] = { 0xEC }; +static const symbol s_3[] = { 0xF2 }; +static const symbol s_4[] = { 0xF9 }; +static const symbol s_5[] = { 'q', 'U' }; +static const symbol s_6[] = { 'u' }; +static const symbol s_7[] = { 'U' }; +static const symbol s_8[] = { 'i' }; +static const symbol s_9[] = { 'I' }; +static const symbol s_10[] = { 'i' }; +static const symbol s_11[] = { 'u' }; +static const symbol s_12[] = { 'e' }; +static const symbol s_13[] = { 'i', 'c' }; +static const symbol s_14[] = { 'l', 'o', 'g' }; +static const symbol s_15[] = { 'u' }; +static const symbol s_16[] = { 'e', 'n', 't', 'e' }; +static const symbol s_17[] = { 'a', 't' }; +static const symbol s_18[] = { 'a', 't' }; +static const symbol s_19[] = { 'i', 'c' }; +static const symbol s_20[] = { 'i' }; +static const symbol s_21[] = { 'h' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + { int c_test = z->c; /* test, line 35 */ + while(1) { /* repeat, line 35 */ + int c1 = z->c; + z->bra = z->c; /* [, line 36 */ + among_var = find_among(z, a_0, 7); /* substring, line 36 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 36 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 37 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 38 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 39 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 40 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 41 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ + if (ret < 0) return ret; + } + break; + case 7: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 43 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + while(1) { /* repeat, line 46 */ + int c2 = z->c; + while(1) { /* goto, line 46 */ + int c3 = z->c; + if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; + z->bra = z->c; /* [, line 47 */ + { int c4 = z->c; /* or, line 47 */ + if (!(eq_s(z, 1, s_6))) goto lab4; + z->ket = z->c; /* ], line 47 */ + if (in_grouping(z, g_v, 97, 249, 0)) goto lab4; + { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ + if (ret < 0) return ret; + } + goto lab3; + lab4: + z->c = c4; + if (!(eq_s(z, 1, s_8))) goto lab2; + z->ket = z->c; /* ], line 48 */ + if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; + { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ + if (ret < 0) return ret; + } + } + lab3: + z->c = c3; + break; + lab2: + z->c = c3; + if (z->c >= z->l) goto lab1; + z->c++; /* goto, line 46 */ + } + continue; + lab1: + z->c = c2; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 58 */ + { int c2 = z->c; /* or, line 60 */ + if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; + { int c3 = z->c; /* or, line 59 */ + if (out_grouping(z, g_v, 97, 249, 0)) goto lab4; + { /* gopast */ /* grouping v, line 59 */ + int ret = out_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; + { /* gopast */ /* non v, line 59 */ + int ret = in_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping(z, g_v, 97, 249, 0)) goto lab0; + { int c4 = z->c; /* or, line 61 */ + if (out_grouping(z, g_v, 97, 249, 0)) goto lab6; + { /* gopast */ /* grouping v, line 61 */ + int ret = out_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping(z, g_v, 97, 249, 0)) goto lab0; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 61 */ + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 62 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 64 */ + { /* gopast */ /* grouping v, line 65 */ + int ret = out_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 65 */ + int ret = in_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 65 */ + { /* gopast */ /* grouping v, line 66 */ + int ret = out_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 66 */ + int ret = in_grouping(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 66 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 70 */ + int c1 = z->c; + z->bra = z->c; /* [, line 72 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 72 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 72 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 75 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_attached_pronoun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 87 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ + z->bra = z->c; /* ], line 87 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; + among_var = find_among_b(z, a_3, 5); /* among, line 97 */ + if (!(among_var)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 97 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 98 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 104 */ + among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 104 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 111 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 111 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 113 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 113 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ + z->ket = z->c; /* [, line 114 */ + if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 114 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 119 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 121 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 123 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 123 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 125 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ + z->ket = z->c; /* [, line 127 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 127 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 127 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + z->ket = z->c; /* [, line 128 */ + if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 128 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 128 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ + z->ket = z->c; /* [, line 136 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 136 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 137 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 9: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ + z->ket = z->c; /* [, line 143 */ + if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 143 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 143 */ + if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 143 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 148 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 148 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 149 */ + among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 149 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_vowel_suffix(struct SN_env * z) { + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ + z->ket = z->c; /* [, line 172 */ + if (in_grouping_b(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 172 */ + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 172 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 173 */ + if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 173 */ + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 173 */ + if (ret < 0) return ret; + } + lab0: + ; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ + z->ket = z->c; /* [, line 176 */ + if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 176 */ + if (in_grouping_b(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + lab1: + ; + } + return 1; +} + +extern int italian_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 182 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 182 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 183 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 183 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 184 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ + { int ret = r_attached_pronoun(z); + if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ + { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = z->l - m5; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ + if (ret < 0) return ret; + } + } + lab4: + lab3: + z->c = z->l - m4; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ + { int ret = r_vowel_suffix(z); + if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m6; + } + z->c = z->lb; + { int c7 = z->c; /* do, line 189 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab7; /* call postlude, line 189 */ + if (ret < 0) return ret; + } + lab7: + z->c = c7; + } + return 1; +} + +extern struct SN_env * italian_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void italian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_norwegian.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_norwegian.c new file mode 100644 index 00000000000..2debf1082d8 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_norwegian.c @@ -0,0 +1,297 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int norwegian_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * norwegian_ISO_8859_1_create_env(void); +extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 'a' }; +static const symbol s_0_1[1] = { 'e' }; +static const symbol s_0_2[3] = { 'e', 'd', 'e' }; +static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; +static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_0_5[3] = { 'a', 'n', 'e' }; +static const symbol s_0_6[3] = { 'e', 'n', 'e' }; +static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; +static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; +static const symbol s_0_9[2] = { 'e', 'n' }; +static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; +static const symbol s_0_11[2] = { 'a', 'r' }; +static const symbol s_0_12[2] = { 'e', 'r' }; +static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; +static const symbol s_0_14[1] = { 's' }; +static const symbol s_0_15[2] = { 'a', 's' }; +static const symbol s_0_16[2] = { 'e', 's' }; +static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; +static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; +static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; +static const symbol s_0_21[3] = { 'e', 'n', 's' }; +static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; +static const symbol s_0_23[3] = { 'e', 'r', 's' }; +static const symbol s_0_24[3] = { 'e', 't', 's' }; +static const symbol s_0_25[2] = { 'e', 't' }; +static const symbol s_0_26[3] = { 'h', 'e', 't' }; +static const symbol s_0_27[3] = { 'e', 'r', 't' }; +static const symbol s_0_28[3] = { 'a', 's', 't' }; + +static const struct among a_0[29] = +{ +/* 0 */ { 1, s_0_0, -1, 1, 0}, +/* 1 */ { 1, s_0_1, -1, 1, 0}, +/* 2 */ { 3, s_0_2, 1, 1, 0}, +/* 3 */ { 4, s_0_3, 1, 1, 0}, +/* 4 */ { 4, s_0_4, 1, 1, 0}, +/* 5 */ { 3, s_0_5, 1, 1, 0}, +/* 6 */ { 3, s_0_6, 1, 1, 0}, +/* 7 */ { 6, s_0_7, 6, 1, 0}, +/* 8 */ { 4, s_0_8, 1, 3, 0}, +/* 9 */ { 2, s_0_9, -1, 1, 0}, +/* 10 */ { 5, s_0_10, 9, 1, 0}, +/* 11 */ { 2, s_0_11, -1, 1, 0}, +/* 12 */ { 2, s_0_12, -1, 1, 0}, +/* 13 */ { 5, s_0_13, 12, 1, 0}, +/* 14 */ { 1, s_0_14, -1, 2, 0}, +/* 15 */ { 2, s_0_15, 14, 1, 0}, +/* 16 */ { 2, s_0_16, 14, 1, 0}, +/* 17 */ { 4, s_0_17, 16, 1, 0}, +/* 18 */ { 5, s_0_18, 16, 1, 0}, +/* 19 */ { 4, s_0_19, 16, 1, 0}, +/* 20 */ { 7, s_0_20, 19, 1, 0}, +/* 21 */ { 3, s_0_21, 14, 1, 0}, +/* 22 */ { 6, s_0_22, 21, 1, 0}, +/* 23 */ { 3, s_0_23, 14, 1, 0}, +/* 24 */ { 3, s_0_24, 14, 1, 0}, +/* 25 */ { 2, s_0_25, -1, 1, 0}, +/* 26 */ { 3, s_0_26, 25, 1, 0}, +/* 27 */ { 3, s_0_27, -1, 3, 0}, +/* 28 */ { 3, s_0_28, -1, 1, 0} +}; + +static const symbol s_1_0[2] = { 'd', 't' }; +static const symbol s_1_1[2] = { 'v', 't' }; + +static const struct among a_1[2] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0} +}; + +static const symbol s_2_0[3] = { 'l', 'e', 'g' }; +static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; +static const symbol s_2_2[2] = { 'i', 'g' }; +static const symbol s_2_3[3] = { 'e', 'i', 'g' }; +static const symbol s_2_4[3] = { 'l', 'i', 'g' }; +static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; +static const symbol s_2_6[3] = { 'e', 'l', 's' }; +static const symbol s_2_7[3] = { 'l', 'o', 'v' }; +static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; +static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; +static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; + +static const struct among a_2[11] = +{ +/* 0 */ { 3, s_2_0, -1, 1, 0}, +/* 1 */ { 4, s_2_1, 0, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 1, 0}, +/* 3 */ { 3, s_2_3, 2, 1, 0}, +/* 4 */ { 3, s_2_4, 2, 1, 0}, +/* 5 */ { 4, s_2_5, 4, 1, 0}, +/* 6 */ { 3, s_2_6, -1, 1, 0}, +/* 7 */ { 3, s_2_7, -1, 1, 0}, +/* 8 */ { 4, s_2_8, 7, 1, 0}, +/* 9 */ { 4, s_2_9, 7, 1, 0}, +/* 10 */ { 7, s_2_10, 9, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + +static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; + +static const symbol s_0[] = { 'k' }; +static const symbol s_1[] = { 'e', 'r' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 30 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) return 0; + z->c = ret; /* hop, line 30 */ + } + z->I[1] = z->c; /* setmark x, line 30 */ + z->c = c_test; + } + if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ + { /* gopast */ /* non v, line 31 */ + int ret = in_grouping(z, g_v, 97, 248, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 31 */ + /* try, line 32 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 38 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 38 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 38 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 38 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ + if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_0))) return 0; + if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 46 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 53 */ + { int mlimit; /* setlimit, line 54 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 54 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 54 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ + z->bra = z->c; /* ], line 54 */ + z->lb = mlimit; + } + z->c = z->l - m_test; + } + if (z->c <= z->lb) return 0; + z->c--; /* next, line 59 */ + z->bra = z->c; /* ], line 59 */ + { int ret = slice_del(z); /* delete, line 59 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 63 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 63 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 63 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 63 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 67 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 74 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 74 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 75 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 76 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 78 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_porter.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_porter.c new file mode 100644 index 00000000000..69e4fc4c1f2 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_porter.c @@ -0,0 +1,749 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int porter_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_Step_5b(struct SN_env * z); +static int r_Step_5a(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * porter_ISO_8859_1_create_env(void); +extern void porter_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 's' }; +static const symbol s_0_1[3] = { 'i', 'e', 's' }; +static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; +static const symbol s_0_3[2] = { 's', 's' }; + +static const struct among a_0[4] = +{ +/* 0 */ { 1, s_0_0, -1, 3, 0}, +/* 1 */ { 3, s_0_1, 0, 2, 0}, +/* 2 */ { 4, s_0_2, 0, 1, 0}, +/* 3 */ { 2, s_0_3, 0, -1, 0} +}; + +static const symbol s_1_1[2] = { 'b', 'b' }; +static const symbol s_1_2[2] = { 'd', 'd' }; +static const symbol s_1_3[2] = { 'f', 'f' }; +static const symbol s_1_4[2] = { 'g', 'g' }; +static const symbol s_1_5[2] = { 'b', 'l' }; +static const symbol s_1_6[2] = { 'm', 'm' }; +static const symbol s_1_7[2] = { 'n', 'n' }; +static const symbol s_1_8[2] = { 'p', 'p' }; +static const symbol s_1_9[2] = { 'r', 'r' }; +static const symbol s_1_10[2] = { 'a', 't' }; +static const symbol s_1_11[2] = { 't', 't' }; +static const symbol s_1_12[2] = { 'i', 'z' }; + +static const struct among a_1[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_1_1, 0, 2, 0}, +/* 2 */ { 2, s_1_2, 0, 2, 0}, +/* 3 */ { 2, s_1_3, 0, 2, 0}, +/* 4 */ { 2, s_1_4, 0, 2, 0}, +/* 5 */ { 2, s_1_5, 0, 1, 0}, +/* 6 */ { 2, s_1_6, 0, 2, 0}, +/* 7 */ { 2, s_1_7, 0, 2, 0}, +/* 8 */ { 2, s_1_8, 0, 2, 0}, +/* 9 */ { 2, s_1_9, 0, 2, 0}, +/* 10 */ { 2, s_1_10, 0, 1, 0}, +/* 11 */ { 2, s_1_11, 0, 2, 0}, +/* 12 */ { 2, s_1_12, 0, 1, 0} +}; + +static const symbol s_2_0[2] = { 'e', 'd' }; +static const symbol s_2_1[3] = { 'e', 'e', 'd' }; +static const symbol s_2_2[3] = { 'i', 'n', 'g' }; + +static const struct among a_2[3] = +{ +/* 0 */ { 2, s_2_0, -1, 2, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 3, s_2_2, -1, 2, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; +static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; +static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; +static const symbol s_3_3[3] = { 'e', 'l', 'i' }; +static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; +static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; +static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; +static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; +static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; +static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; +static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; +static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static const struct among a_3[20] = +{ +/* 0 */ { 4, s_3_0, -1, 3, 0}, +/* 1 */ { 4, s_3_1, -1, 2, 0}, +/* 2 */ { 4, s_3_2, -1, 4, 0}, +/* 3 */ { 3, s_3_3, -1, 6, 0}, +/* 4 */ { 4, s_3_4, -1, 9, 0}, +/* 5 */ { 5, s_3_5, -1, 12, 0}, +/* 6 */ { 5, s_3_6, -1, 5, 0}, +/* 7 */ { 5, s_3_7, -1, 10, 0}, +/* 8 */ { 6, s_3_8, -1, 14, 0}, +/* 9 */ { 5, s_3_9, -1, 13, 0}, +/* 10 */ { 6, s_3_10, -1, 1, 0}, +/* 11 */ { 7, s_3_11, 10, 8, 0}, +/* 12 */ { 5, s_3_12, -1, 10, 0}, +/* 13 */ { 5, s_3_13, -1, 8, 0}, +/* 14 */ { 7, s_3_14, 13, 7, 0}, +/* 15 */ { 4, s_3_15, -1, 7, 0}, +/* 16 */ { 4, s_3_16, -1, 8, 0}, +/* 17 */ { 7, s_3_17, -1, 13, 0}, +/* 18 */ { 7, s_3_18, -1, 11, 0}, +/* 19 */ { 7, s_3_19, -1, 12, 0} +}; + +static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_4_5[3] = { 'f', 'u', 'l' }; +static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; + +static const struct among a_4[7] = +{ +/* 0 */ { 5, s_4_0, -1, 2, 0}, +/* 1 */ { 5, s_4_1, -1, 3, 0}, +/* 2 */ { 5, s_4_2, -1, 1, 0}, +/* 3 */ { 5, s_4_3, -1, 2, 0}, +/* 4 */ { 4, s_4_4, -1, 2, 0}, +/* 5 */ { 3, s_4_5, -1, 3, 0}, +/* 6 */ { 4, s_4_6, -1, 3, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_5_5[3] = { 'a', 't', 'e' }; +static const symbol s_5_6[3] = { 'i', 'v', 'e' }; +static const symbol s_5_7[3] = { 'i', 'z', 'e' }; +static const symbol s_5_8[3] = { 'i', 't', 'i' }; +static const symbol s_5_9[2] = { 'a', 'l' }; +static const symbol s_5_10[3] = { 'i', 's', 'm' }; +static const symbol s_5_11[3] = { 'i', 'o', 'n' }; +static const symbol s_5_12[2] = { 'e', 'r' }; +static const symbol s_5_13[3] = { 'o', 'u', 's' }; +static const symbol s_5_14[3] = { 'a', 'n', 't' }; +static const symbol s_5_15[3] = { 'e', 'n', 't' }; +static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; +static const symbol s_5_18[2] = { 'o', 'u' }; + +static const struct among a_5[19] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 4, s_5_2, -1, 1, 0}, +/* 3 */ { 4, s_5_3, -1, 1, 0}, +/* 4 */ { 4, s_5_4, -1, 1, 0}, +/* 5 */ { 3, s_5_5, -1, 1, 0}, +/* 6 */ { 3, s_5_6, -1, 1, 0}, +/* 7 */ { 3, s_5_7, -1, 1, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 2, s_5_9, -1, 1, 0}, +/* 10 */ { 3, s_5_10, -1, 1, 0}, +/* 11 */ { 3, s_5_11, -1, 2, 0}, +/* 12 */ { 2, s_5_12, -1, 1, 0}, +/* 13 */ { 3, s_5_13, -1, 1, 0}, +/* 14 */ { 3, s_5_14, -1, 1, 0}, +/* 15 */ { 3, s_5_15, -1, 1, 0}, +/* 16 */ { 4, s_5_16, 15, 1, 0}, +/* 17 */ { 5, s_5_17, 16, 1, 0}, +/* 18 */ { 2, s_5_18, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1 }; + +static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static const symbol s_0[] = { 's', 's' }; +static const symbol s_1[] = { 'i' }; +static const symbol s_2[] = { 'e', 'e' }; +static const symbol s_3[] = { 'e' }; +static const symbol s_4[] = { 'e' }; +static const symbol s_5[] = { 'y' }; +static const symbol s_6[] = { 'Y' }; +static const symbol s_7[] = { 'i' }; +static const symbol s_8[] = { 't', 'i', 'o', 'n' }; +static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; +static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; +static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; +static const symbol s_12[] = { 'e', 'n', 't' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'i', 'z', 'e' }; +static const symbol s_15[] = { 'a', 't', 'e' }; +static const symbol s_16[] = { 'a', 'l' }; +static const symbol s_17[] = { 'a', 'l' }; +static const symbol s_18[] = { 'f', 'u', 'l' }; +static const symbol s_19[] = { 'o', 'u', 's' }; +static const symbol s_20[] = { 'i', 'v', 'e' }; +static const symbol s_21[] = { 'b', 'l', 'e' }; +static const symbol s_22[] = { 'a', 'l' }; +static const symbol s_23[] = { 'i', 'c' }; +static const symbol s_24[] = { 's' }; +static const symbol s_25[] = { 't' }; +static const symbol s_26[] = { 'e' }; +static const symbol s_27[] = { 'l' }; +static const symbol s_28[] = { 'l' }; +static const symbol s_29[] = { 'y' }; +static const symbol s_30[] = { 'Y' }; +static const symbol s_31[] = { 'y' }; +static const symbol s_32[] = { 'Y' }; +static const symbol s_33[] = { 'Y' }; +static const symbol s_34[] = { 'y' }; + +static int r_shortv(struct SN_env * z) { + if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) return 0; + if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; + if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 25 */ + if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; + among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 25 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 29 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 34 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; + among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 34 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 35 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m_test = z->l - z->c; /* test, line 38 */ + { /* gopast */ /* grouping v, line 38 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 38 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 39 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else + among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + case 2: + z->ket = z->c; /* [, line 44 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 44 */ + z->bra = z->c; /* ], line 44 */ + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 45 */ + { int m_test = z->l - z->c; /* test, line 45 */ + { int ret = r_shortv(z); + if (ret == 0) return 0; /* call shortv, line 45 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 52 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ + if (!(eq_s_b(z, 1, s_5))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_6))) return 0; + } +lab0: + z->bra = z->c; /* ], line 52 */ + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping_b(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 58 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 58 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 58 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 82 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 82 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 82 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 87 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 92 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 92 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 95 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ + if (!(eq_s_b(z, 1, s_24))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_25))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_5a(struct SN_env * z) { + z->ket = z->c; /* [, line 101 */ + if (!(eq_s_b(z, 1, s_26))) return 0; + z->bra = z->c; /* ], line 101 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ + { int ret = r_R2(z); + if (ret == 0) goto lab1; /* call R2, line 102 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 102 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ + { int ret = r_shortv(z); + if (ret == 0) goto lab2; /* call shortv, line 102 */ + if (ret < 0) return ret; + } + return 0; + lab2: + z->c = z->l - m2; + } + } +lab0: + { int ret = slice_del(z); /* delete, line 103 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_5b(struct SN_env * z) { + z->ket = z->c; /* [, line 107 */ + if (!(eq_s_b(z, 1, s_27))) return 0; + z->bra = z->c; /* ], line 107 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 108 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_28))) return 0; + { int ret = slice_del(z); /* delete, line 109 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int porter_ISO_8859_1_stem(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 115 */ + { int c1 = z->c; /* do, line 116 */ + z->bra = z->c; /* [, line 116 */ + if (!(eq_s(z, 1, s_29))) goto lab0; + z->ket = z->c; /* ], line 116 */ + { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 116 */ + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 117 */ + while(1) { /* repeat, line 117 */ + int c3 = z->c; + while(1) { /* goto, line 117 */ + int c4 = z->c; + if (in_grouping(z, g_v, 97, 121, 0)) goto lab3; + z->bra = z->c; /* [, line 117 */ + if (!(eq_s(z, 1, s_31))) goto lab3; + z->ket = z->c; /* ], line 117 */ + z->c = c4; + break; + lab3: + z->c = c4; + if (z->c >= z->l) goto lab2; + z->c++; /* goto, line 117 */ + } + { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 117 */ + continue; + lab2: + z->c = c3; + break; + } + z->c = c2; + } + z->I[0] = z->l; + z->I[1] = z->l; + { int c5 = z->c; /* do, line 121 */ + { /* gopast */ /* grouping v, line 122 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 122 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 122 */ + { /* gopast */ /* grouping v, line 123 */ + int ret = out_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 123 */ + int ret = in_grouping(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 123 */ + lab4: + z->c = c5; + } + z->lb = z->c; z->c = z->l; /* backwards, line 126 */ + + { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ + { int ret = r_Step_1a(z); + if (ret == 0) goto lab5; /* call Step_1a, line 127 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ + { int ret = r_Step_1b(z); + if (ret == 0) goto lab6; /* call Step_1b, line 128 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ + { int ret = r_Step_1c(z); + if (ret == 0) goto lab7; /* call Step_1c, line 129 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ + { int ret = r_Step_2(z); + if (ret == 0) goto lab8; /* call Step_2, line 130 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ + { int ret = r_Step_3(z); + if (ret == 0) goto lab9; /* call Step_3, line 131 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m10; + } + { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ + { int ret = r_Step_4(z); + if (ret == 0) goto lab10; /* call Step_4, line 132 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m11; + } + { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ + { int ret = r_Step_5a(z); + if (ret == 0) goto lab11; /* call Step_5a, line 133 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m12; + } + { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ + { int ret = r_Step_5b(z); + if (ret == 0) goto lab12; /* call Step_5b, line 134 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m13; + } + z->c = z->lb; + { int c14 = z->c; /* do, line 137 */ + if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ + while(1) { /* repeat, line 137 */ + int c15 = z->c; + while(1) { /* goto, line 137 */ + int c16 = z->c; + z->bra = z->c; /* [, line 137 */ + if (!(eq_s(z, 1, s_33))) goto lab15; + z->ket = z->c; /* ], line 137 */ + z->c = c16; + break; + lab15: + z->c = c16; + if (z->c >= z->l) goto lab14; + z->c++; /* goto, line 137 */ + } + { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ + if (ret < 0) return ret; + } + continue; + lab14: + z->c = c15; + break; + } + lab13: + z->c = c14; + } + return 1; +} + +extern struct SN_env * porter_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void porter_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_portuguese.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_portuguese.c new file mode 100644 index 00000000000..06d425d008f --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_portuguese.c @@ -0,0 +1,1017 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int portuguese_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_residual_form(struct SN_env * z); +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * portuguese_ISO_8859_1_create_env(void); +extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 0xE3 }; +static const symbol s_0_2[1] = { 0xF5 }; + +static const struct among a_0[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_0_1, 0, 1, 0}, +/* 2 */ { 1, s_0_2, 0, 2, 0} +}; + +static const symbol s_1_1[2] = { 'a', '~' }; +static const symbol s_1_2[2] = { 'o', '~' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_1_1, 0, 1, 0}, +/* 2 */ { 2, s_1_2, 0, 2, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'c' }; +static const symbol s_2_1[2] = { 'a', 'd' }; +static const symbol s_2_2[2] = { 'o', 's' }; +static const symbol s_2_3[2] = { 'i', 'v' }; + +static const struct among a_2[4] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0}, +/* 3 */ { 2, s_2_3, -1, 1, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; +static const symbol s_3_2[4] = { 0xED, 'v', 'e', 'l' }; + +static const struct among a_3[3] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 4, s_3_2, -1, 1, 0} +}; + +static const symbol s_4_0[2] = { 'i', 'c' }; +static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_4_2[2] = { 'i', 'v' }; + +static const struct among a_4[3] = +{ +/* 0 */ { 2, s_4_0, -1, 1, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 2, s_4_2, -1, 1, 0} +}; + +static const symbol s_5_0[3] = { 'i', 'c', 'a' }; +static const symbol s_5_1[5] = { 0xE2, 'n', 'c', 'i', 'a' }; +static const symbol s_5_2[5] = { 0xEA, 'n', 'c', 'i', 'a' }; +static const symbol s_5_3[3] = { 'i', 'r', 'a' }; +static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; +static const symbol s_5_5[3] = { 'o', 's', 'a' }; +static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; +static const symbol s_5_7[3] = { 'i', 'v', 'a' }; +static const symbol s_5_8[3] = { 'e', 'z', 'a' }; +static const symbol s_5_9[5] = { 'l', 'o', 'g', 0xED, 'a' }; +static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; +static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_5_14[4] = { 0xE1, 'v', 'e', 'l' }; +static const symbol s_5_15[4] = { 0xED, 'v', 'e', 'l' }; +static const symbol s_5_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; +static const symbol s_5_17[3] = { 'i', 'c', 'o' }; +static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_5_19[3] = { 'o', 's', 'o' }; +static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_5_22[3] = { 'i', 'v', 'o' }; +static const symbol s_5_23[5] = { 'a', 0xE7, 'a', '~', 'o' }; +static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; +static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; +static const symbol s_5_26[6] = { 0xEA, 'n', 'c', 'i', 'a', 's' }; +static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; +static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; +static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; +static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; +static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; +static const symbol s_5_33[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; +static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; +static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; +static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_5_38[6] = { 'a', 0xE7, 'o', '~', 'e', 's' }; +static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; +static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; +static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; +static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; +static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; +static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; + +static const struct among a_5[45] = +{ +/* 0 */ { 3, s_5_0, -1, 1, 0}, +/* 1 */ { 5, s_5_1, -1, 1, 0}, +/* 2 */ { 5, s_5_2, -1, 4, 0}, +/* 3 */ { 3, s_5_3, -1, 9, 0}, +/* 4 */ { 5, s_5_4, -1, 1, 0}, +/* 5 */ { 3, s_5_5, -1, 1, 0}, +/* 6 */ { 4, s_5_6, -1, 1, 0}, +/* 7 */ { 3, s_5_7, -1, 8, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 5, s_5_9, -1, 2, 0}, +/* 10 */ { 5, s_5_10, -1, 7, 0}, +/* 11 */ { 4, s_5_11, -1, 1, 0}, +/* 12 */ { 5, s_5_12, -1, 6, 0}, +/* 13 */ { 6, s_5_13, 12, 5, 0}, +/* 14 */ { 4, s_5_14, -1, 1, 0}, +/* 15 */ { 4, s_5_15, -1, 1, 0}, +/* 16 */ { 5, s_5_16, -1, 3, 0}, +/* 17 */ { 3, s_5_17, -1, 1, 0}, +/* 18 */ { 4, s_5_18, -1, 1, 0}, +/* 19 */ { 3, s_5_19, -1, 1, 0}, +/* 20 */ { 6, s_5_20, -1, 1, 0}, +/* 21 */ { 6, s_5_21, -1, 1, 0}, +/* 22 */ { 3, s_5_22, -1, 8, 0}, +/* 23 */ { 5, s_5_23, -1, 1, 0}, +/* 24 */ { 4, s_5_24, -1, 1, 0}, +/* 25 */ { 4, s_5_25, -1, 1, 0}, +/* 26 */ { 6, s_5_26, -1, 4, 0}, +/* 27 */ { 4, s_5_27, -1, 9, 0}, +/* 28 */ { 6, s_5_28, -1, 1, 0}, +/* 29 */ { 4, s_5_29, -1, 1, 0}, +/* 30 */ { 5, s_5_30, -1, 1, 0}, +/* 31 */ { 4, s_5_31, -1, 8, 0}, +/* 32 */ { 4, s_5_32, -1, 1, 0}, +/* 33 */ { 6, s_5_33, -1, 2, 0}, +/* 34 */ { 6, s_5_34, -1, 7, 0}, +/* 35 */ { 7, s_5_35, -1, 3, 0}, +/* 36 */ { 6, s_5_36, -1, 1, 0}, +/* 37 */ { 5, s_5_37, -1, 1, 0}, +/* 38 */ { 6, s_5_38, -1, 1, 0}, +/* 39 */ { 4, s_5_39, -1, 1, 0}, +/* 40 */ { 5, s_5_40, -1, 1, 0}, +/* 41 */ { 4, s_5_41, -1, 1, 0}, +/* 42 */ { 7, s_5_42, -1, 1, 0}, +/* 43 */ { 7, s_5_43, -1, 1, 0}, +/* 44 */ { 4, s_5_44, -1, 8, 0} +}; + +static const symbol s_6_0[3] = { 'a', 'd', 'a' }; +static const symbol s_6_1[3] = { 'i', 'd', 'a' }; +static const symbol s_6_2[2] = { 'i', 'a' }; +static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; +static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; +static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; +static const symbol s_6_6[3] = { 'a', 'r', 'a' }; +static const symbol s_6_7[3] = { 'e', 'r', 'a' }; +static const symbol s_6_8[3] = { 'i', 'r', 'a' }; +static const symbol s_6_9[3] = { 'a', 'v', 'a' }; +static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; +static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; +static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; +static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; +static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; +static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; +static const symbol s_6_16[2] = { 'e', 'i' }; +static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; +static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; +static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; +static const symbol s_6_20[2] = { 'a', 'm' }; +static const symbol s_6_21[3] = { 'i', 'a', 'm' }; +static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; +static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; +static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; +static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; +static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; +static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; +static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; +static const symbol s_6_29[2] = { 'e', 'm' }; +static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; +static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; +static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; +static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; +static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; +static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; +static const symbol s_6_36[3] = { 'a', 'd', 'o' }; +static const symbol s_6_37[3] = { 'i', 'd', 'o' }; +static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; +static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; +static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; +static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; +static const symbol s_6_44[2] = { 'a', 'r' }; +static const symbol s_6_45[2] = { 'e', 'r' }; +static const symbol s_6_46[2] = { 'i', 'r' }; +static const symbol s_6_47[2] = { 'a', 's' }; +static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; +static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; +static const symbol s_6_50[3] = { 'i', 'a', 's' }; +static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; +static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; +static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; +static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; +static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; +static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; +static const symbol s_6_58[2] = { 'e', 's' }; +static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; +static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; +static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; +static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; +static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; +static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; +static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; +static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; +static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; +static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; +static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; +static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; +static const symbol s_6_71[2] = { 'i', 's' }; +static const symbol s_6_72[3] = { 'a', 'i', 's' }; +static const symbol s_6_73[3] = { 'e', 'i', 's' }; +static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; +static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; +static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; +static const symbol s_6_77[5] = { 0xE1, 'r', 'e', 'i', 's' }; +static const symbol s_6_78[5] = { 0xE9, 'r', 'e', 'i', 's' }; +static const symbol s_6_79[5] = { 0xED, 'r', 'e', 'i', 's' }; +static const symbol s_6_80[6] = { 0xE1, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_81[6] = { 0xE9, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_82[6] = { 0xED, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_83[5] = { 0xE1, 'v', 'e', 'i', 's' }; +static const symbol s_6_84[4] = { 0xED, 'e', 'i', 's' }; +static const symbol s_6_85[6] = { 'a', 'r', 0xED, 'e', 'i', 's' }; +static const symbol s_6_86[6] = { 'e', 'r', 0xED, 'e', 'i', 's' }; +static const symbol s_6_87[6] = { 'i', 'r', 0xED, 'e', 'i', 's' }; +static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; +static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; +static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; +static const symbol s_6_91[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_92[6] = { 0xE9, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_93[6] = { 0xED, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_94[6] = { 0xE1, 'v', 'a', 'm', 'o', 's' }; +static const symbol s_6_95[5] = { 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_6_96[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_6_97[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_6_98[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; +static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_103[7] = { 0xE1, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_104[7] = { 0xEA, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_105[7] = { 0xED, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; +static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; +static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; +static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; +static const symbol s_6_110[4] = { 0xE1, 'm', 'o', 's' }; +static const symbol s_6_111[4] = { 'a', 'r', 0xE1, 's' }; +static const symbol s_6_112[4] = { 'e', 'r', 0xE1, 's' }; +static const symbol s_6_113[4] = { 'i', 'r', 0xE1, 's' }; +static const symbol s_6_114[2] = { 'e', 'u' }; +static const symbol s_6_115[2] = { 'i', 'u' }; +static const symbol s_6_116[2] = { 'o', 'u' }; +static const symbol s_6_117[3] = { 'a', 'r', 0xE1 }; +static const symbol s_6_118[3] = { 'e', 'r', 0xE1 }; +static const symbol s_6_119[3] = { 'i', 'r', 0xE1 }; + +static const struct among a_6[120] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 3, s_6_1, -1, 1, 0}, +/* 2 */ { 2, s_6_2, -1, 1, 0}, +/* 3 */ { 4, s_6_3, 2, 1, 0}, +/* 4 */ { 4, s_6_4, 2, 1, 0}, +/* 5 */ { 4, s_6_5, 2, 1, 0}, +/* 6 */ { 3, s_6_6, -1, 1, 0}, +/* 7 */ { 3, s_6_7, -1, 1, 0}, +/* 8 */ { 3, s_6_8, -1, 1, 0}, +/* 9 */ { 3, s_6_9, -1, 1, 0}, +/* 10 */ { 4, s_6_10, -1, 1, 0}, +/* 11 */ { 4, s_6_11, -1, 1, 0}, +/* 12 */ { 4, s_6_12, -1, 1, 0}, +/* 13 */ { 4, s_6_13, -1, 1, 0}, +/* 14 */ { 4, s_6_14, -1, 1, 0}, +/* 15 */ { 4, s_6_15, -1, 1, 0}, +/* 16 */ { 2, s_6_16, -1, 1, 0}, +/* 17 */ { 4, s_6_17, 16, 1, 0}, +/* 18 */ { 4, s_6_18, 16, 1, 0}, +/* 19 */ { 4, s_6_19, 16, 1, 0}, +/* 20 */ { 2, s_6_20, -1, 1, 0}, +/* 21 */ { 3, s_6_21, 20, 1, 0}, +/* 22 */ { 5, s_6_22, 21, 1, 0}, +/* 23 */ { 5, s_6_23, 21, 1, 0}, +/* 24 */ { 5, s_6_24, 21, 1, 0}, +/* 25 */ { 4, s_6_25, 20, 1, 0}, +/* 26 */ { 4, s_6_26, 20, 1, 0}, +/* 27 */ { 4, s_6_27, 20, 1, 0}, +/* 28 */ { 4, s_6_28, 20, 1, 0}, +/* 29 */ { 2, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, 29, 1, 0}, +/* 31 */ { 4, s_6_31, 29, 1, 0}, +/* 32 */ { 4, s_6_32, 29, 1, 0}, +/* 33 */ { 5, s_6_33, 29, 1, 0}, +/* 34 */ { 5, s_6_34, 29, 1, 0}, +/* 35 */ { 5, s_6_35, 29, 1, 0}, +/* 36 */ { 3, s_6_36, -1, 1, 0}, +/* 37 */ { 3, s_6_37, -1, 1, 0}, +/* 38 */ { 4, s_6_38, -1, 1, 0}, +/* 39 */ { 4, s_6_39, -1, 1, 0}, +/* 40 */ { 4, s_6_40, -1, 1, 0}, +/* 41 */ { 5, s_6_41, -1, 1, 0}, +/* 42 */ { 5, s_6_42, -1, 1, 0}, +/* 43 */ { 5, s_6_43, -1, 1, 0}, +/* 44 */ { 2, s_6_44, -1, 1, 0}, +/* 45 */ { 2, s_6_45, -1, 1, 0}, +/* 46 */ { 2, s_6_46, -1, 1, 0}, +/* 47 */ { 2, s_6_47, -1, 1, 0}, +/* 48 */ { 4, s_6_48, 47, 1, 0}, +/* 49 */ { 4, s_6_49, 47, 1, 0}, +/* 50 */ { 3, s_6_50, 47, 1, 0}, +/* 51 */ { 5, s_6_51, 50, 1, 0}, +/* 52 */ { 5, s_6_52, 50, 1, 0}, +/* 53 */ { 5, s_6_53, 50, 1, 0}, +/* 54 */ { 4, s_6_54, 47, 1, 0}, +/* 55 */ { 4, s_6_55, 47, 1, 0}, +/* 56 */ { 4, s_6_56, 47, 1, 0}, +/* 57 */ { 4, s_6_57, 47, 1, 0}, +/* 58 */ { 2, s_6_58, -1, 1, 0}, +/* 59 */ { 5, s_6_59, 58, 1, 0}, +/* 60 */ { 5, s_6_60, 58, 1, 0}, +/* 61 */ { 5, s_6_61, 58, 1, 0}, +/* 62 */ { 4, s_6_62, 58, 1, 0}, +/* 63 */ { 4, s_6_63, 58, 1, 0}, +/* 64 */ { 4, s_6_64, 58, 1, 0}, +/* 65 */ { 5, s_6_65, 58, 1, 0}, +/* 66 */ { 5, s_6_66, 58, 1, 0}, +/* 67 */ { 5, s_6_67, 58, 1, 0}, +/* 68 */ { 5, s_6_68, 58, 1, 0}, +/* 69 */ { 5, s_6_69, 58, 1, 0}, +/* 70 */ { 5, s_6_70, 58, 1, 0}, +/* 71 */ { 2, s_6_71, -1, 1, 0}, +/* 72 */ { 3, s_6_72, 71, 1, 0}, +/* 73 */ { 3, s_6_73, 71, 1, 0}, +/* 74 */ { 5, s_6_74, 73, 1, 0}, +/* 75 */ { 5, s_6_75, 73, 1, 0}, +/* 76 */ { 5, s_6_76, 73, 1, 0}, +/* 77 */ { 5, s_6_77, 73, 1, 0}, +/* 78 */ { 5, s_6_78, 73, 1, 0}, +/* 79 */ { 5, s_6_79, 73, 1, 0}, +/* 80 */ { 6, s_6_80, 73, 1, 0}, +/* 81 */ { 6, s_6_81, 73, 1, 0}, +/* 82 */ { 6, s_6_82, 73, 1, 0}, +/* 83 */ { 5, s_6_83, 73, 1, 0}, +/* 84 */ { 4, s_6_84, 73, 1, 0}, +/* 85 */ { 6, s_6_85, 84, 1, 0}, +/* 86 */ { 6, s_6_86, 84, 1, 0}, +/* 87 */ { 6, s_6_87, 84, 1, 0}, +/* 88 */ { 4, s_6_88, -1, 1, 0}, +/* 89 */ { 4, s_6_89, -1, 1, 0}, +/* 90 */ { 4, s_6_90, -1, 1, 0}, +/* 91 */ { 6, s_6_91, 90, 1, 0}, +/* 92 */ { 6, s_6_92, 90, 1, 0}, +/* 93 */ { 6, s_6_93, 90, 1, 0}, +/* 94 */ { 6, s_6_94, 90, 1, 0}, +/* 95 */ { 5, s_6_95, 90, 1, 0}, +/* 96 */ { 7, s_6_96, 95, 1, 0}, +/* 97 */ { 7, s_6_97, 95, 1, 0}, +/* 98 */ { 7, s_6_98, 95, 1, 0}, +/* 99 */ { 4, s_6_99, -1, 1, 0}, +/*100 */ { 6, s_6_100, 99, 1, 0}, +/*101 */ { 6, s_6_101, 99, 1, 0}, +/*102 */ { 6, s_6_102, 99, 1, 0}, +/*103 */ { 7, s_6_103, 99, 1, 0}, +/*104 */ { 7, s_6_104, 99, 1, 0}, +/*105 */ { 7, s_6_105, 99, 1, 0}, +/*106 */ { 4, s_6_106, -1, 1, 0}, +/*107 */ { 5, s_6_107, -1, 1, 0}, +/*108 */ { 5, s_6_108, -1, 1, 0}, +/*109 */ { 5, s_6_109, -1, 1, 0}, +/*110 */ { 4, s_6_110, -1, 1, 0}, +/*111 */ { 4, s_6_111, -1, 1, 0}, +/*112 */ { 4, s_6_112, -1, 1, 0}, +/*113 */ { 4, s_6_113, -1, 1, 0}, +/*114 */ { 2, s_6_114, -1, 1, 0}, +/*115 */ { 2, s_6_115, -1, 1, 0}, +/*116 */ { 2, s_6_116, -1, 1, 0}, +/*117 */ { 3, s_6_117, -1, 1, 0}, +/*118 */ { 3, s_6_118, -1, 1, 0}, +/*119 */ { 3, s_6_119, -1, 1, 0} +}; + +static const symbol s_7_0[1] = { 'a' }; +static const symbol s_7_1[1] = { 'i' }; +static const symbol s_7_2[1] = { 'o' }; +static const symbol s_7_3[2] = { 'o', 's' }; +static const symbol s_7_4[1] = { 0xE1 }; +static const symbol s_7_5[1] = { 0xED }; +static const symbol s_7_6[1] = { 0xF3 }; + +static const struct among a_7[7] = +{ +/* 0 */ { 1, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 1, 0}, +/* 2 */ { 1, s_7_2, -1, 1, 0}, +/* 3 */ { 2, s_7_3, -1, 1, 0}, +/* 4 */ { 1, s_7_4, -1, 1, 0}, +/* 5 */ { 1, s_7_5, -1, 1, 0}, +/* 6 */ { 1, s_7_6, -1, 1, 0} +}; + +static const symbol s_8_0[1] = { 'e' }; +static const symbol s_8_1[1] = { 0xE7 }; +static const symbol s_8_2[1] = { 0xE9 }; +static const symbol s_8_3[1] = { 0xEA }; + +static const struct among a_8[4] = +{ +/* 0 */ { 1, s_8_0, -1, 1, 0}, +/* 1 */ { 1, s_8_1, -1, 2, 0}, +/* 2 */ { 1, s_8_2, -1, 1, 0}, +/* 3 */ { 1, s_8_3, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; + +static const symbol s_0[] = { 'a', '~' }; +static const symbol s_1[] = { 'o', '~' }; +static const symbol s_2[] = { 0xE3 }; +static const symbol s_3[] = { 0xF5 }; +static const symbol s_4[] = { 'l', 'o', 'g' }; +static const symbol s_5[] = { 'u' }; +static const symbol s_6[] = { 'e', 'n', 't', 'e' }; +static const symbol s_7[] = { 'a', 't' }; +static const symbol s_8[] = { 'a', 't' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'i', 'r' }; +static const symbol s_11[] = { 'u' }; +static const symbol s_12[] = { 'g' }; +static const symbol s_13[] = { 'i' }; +static const symbol s_14[] = { 'c' }; +static const symbol s_15[] = { 'c' }; +static const symbol s_16[] = { 'i' }; +static const symbol s_17[] = { 'c' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 36 */ + int c1 = z->c; + z->bra = z->c; /* [, line 37 */ + if (z->c >= z->l || (z->p[z->c + 0] != 227 && z->p[z->c + 0] != 245)) among_var = 3; else + among_var = find_among(z, a_0, 3); /* substring, line 37 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 37 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 40 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 50 */ + { int c2 = z->c; /* or, line 52 */ + if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; + { int c3 = z->c; /* or, line 51 */ + if (out_grouping(z, g_v, 97, 250, 0)) goto lab4; + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping(z, g_v, 97, 250, 0)) goto lab0; + { int c4 = z->c; /* or, line 53 */ + if (out_grouping(z, g_v, 97, 250, 0)) goto lab6; + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping(z, g_v, 97, 250, 0)) goto lab0; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 53 */ + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 54 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 56 */ + { /* gopast */ /* grouping v, line 57 */ + int ret = out_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 57 */ + int ret = in_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 57 */ + { /* gopast */ /* grouping v, line 58 */ + int ret = out_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 58 */ + int ret = in_grouping(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 58 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 62 */ + int c1 = z->c; + z->bra = z->c; /* [, line 63 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 63 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 63 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 64 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 65 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 66 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 77 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 77 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 93 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 93 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 98 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 102 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 110 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + z->ket = z->c; /* [, line 112 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 112 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 112 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + z->ket = z->c; /* [, line 113 */ + if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 113 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 113 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + break; + case 6: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 122 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ + z->ket = z->c; /* [, line 124 */ + if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 124 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 127 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ + z->ket = z->c; /* [, line 136 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 136 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 139 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 146 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 146 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ + z->ket = z->c; /* [, line 148 */ + if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 148 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 148 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + case 9: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 153 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_9))) return 0; + { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 159 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 159 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 160 */ + among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 160 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 184 */ + among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 184 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 187 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 187 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_residual_form(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 192 */ + among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 192 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 194 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 194 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ + if (!(eq_s_b(z, 1, s_11))) goto lab1; + z->bra = z->c; /* ], line 194 */ + { int m_test = z->l - z->c; /* test, line 194 */ + if (!(eq_s_b(z, 1, s_12))) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_13))) return 0; + z->bra = z->c; /* ], line 195 */ + { int m_test = z->l - z->c; /* test, line 195 */ + if (!(eq_s_b(z, 1, s_14))) return 0; + z->c = z->l - m_test; + } + } + lab0: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 195 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 195 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int portuguese_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 202 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 202 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 203 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 203 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 204 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ + { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = z->l - m5; + { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ + z->ket = z->c; /* [, line 207 */ + if (!(eq_s_b(z, 1, s_16))) goto lab7; + z->bra = z->c; /* ], line 207 */ + { int m_test = z->l - z->c; /* test, line 207 */ + if (!(eq_s_b(z, 1, s_17))) goto lab7; + z->c = z->l - m_test; + } + { int ret = r_RV(z); + if (ret == 0) goto lab7; /* call RV, line 207 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 207 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ + { int ret = r_residual_form(z); + if (ret == 0) goto lab8; /* call residual_form, line 211 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m8; + } + z->c = z->lb; + { int c9 = z->c; /* do, line 213 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab9; /* call postlude, line 213 */ + if (ret < 0) return ret; + } + lab9: + z->c = c9; + } + return 1; +} + +extern struct SN_env * portuguese_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void portuguese_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_spanish.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_spanish.c new file mode 100644 index 00000000000..27f26e7865e --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_spanish.c @@ -0,0 +1,1093 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int spanish_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_y_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_attached_pronoun(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * spanish_ISO_8859_1_create_env(void); +extern void spanish_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 0xE1 }; +static const symbol s_0_2[1] = { 0xE9 }; +static const symbol s_0_3[1] = { 0xED }; +static const symbol s_0_4[1] = { 0xF3 }; +static const symbol s_0_5[1] = { 0xFA }; + +static const struct among a_0[6] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 1, s_0_1, 0, 1, 0}, +/* 2 */ { 1, s_0_2, 0, 2, 0}, +/* 3 */ { 1, s_0_3, 0, 3, 0}, +/* 4 */ { 1, s_0_4, 0, 4, 0}, +/* 5 */ { 1, s_0_5, 0, 5, 0} +}; + +static const symbol s_1_0[2] = { 'l', 'a' }; +static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; +static const symbol s_1_2[2] = { 'l', 'e' }; +static const symbol s_1_3[2] = { 'm', 'e' }; +static const symbol s_1_4[2] = { 's', 'e' }; +static const symbol s_1_5[2] = { 'l', 'o' }; +static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; +static const symbol s_1_7[3] = { 'l', 'a', 's' }; +static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; +static const symbol s_1_9[3] = { 'l', 'e', 's' }; +static const symbol s_1_10[3] = { 'l', 'o', 's' }; +static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; +static const symbol s_1_12[3] = { 'n', 'o', 's' }; + +static const struct among a_1[13] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 4, s_1_1, 0, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 2, s_1_4, -1, -1, 0}, +/* 5 */ { 2, s_1_5, -1, -1, 0}, +/* 6 */ { 4, s_1_6, 5, -1, 0}, +/* 7 */ { 3, s_1_7, -1, -1, 0}, +/* 8 */ { 5, s_1_8, 7, -1, 0}, +/* 9 */ { 3, s_1_9, -1, -1, 0}, +/* 10 */ { 3, s_1_10, -1, -1, 0}, +/* 11 */ { 5, s_1_11, 10, -1, 0}, +/* 12 */ { 3, s_1_12, -1, -1, 0} +}; + +static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; +static const symbol s_2_3[4] = { 0xE1, 'n', 'd', 'o' }; +static const symbol s_2_4[5] = { 'i', 0xE9, 'n', 'd', 'o' }; +static const symbol s_2_5[2] = { 'a', 'r' }; +static const symbol s_2_6[2] = { 'e', 'r' }; +static const symbol s_2_7[2] = { 'i', 'r' }; +static const symbol s_2_8[2] = { 0xE1, 'r' }; +static const symbol s_2_9[2] = { 0xE9, 'r' }; +static const symbol s_2_10[2] = { 0xED, 'r' }; + +static const struct among a_2[11] = +{ +/* 0 */ { 4, s_2_0, -1, 6, 0}, +/* 1 */ { 5, s_2_1, -1, 6, 0}, +/* 2 */ { 5, s_2_2, -1, 7, 0}, +/* 3 */ { 4, s_2_3, -1, 2, 0}, +/* 4 */ { 5, s_2_4, -1, 1, 0}, +/* 5 */ { 2, s_2_5, -1, 6, 0}, +/* 6 */ { 2, s_2_6, -1, 6, 0}, +/* 7 */ { 2, s_2_7, -1, 6, 0}, +/* 8 */ { 2, s_2_8, -1, 3, 0}, +/* 9 */ { 2, s_2_9, -1, 4, 0}, +/* 10 */ { 2, s_2_10, -1, 5, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'c' }; +static const symbol s_3_1[2] = { 'a', 'd' }; +static const symbol s_3_2[2] = { 'o', 's' }; +static const symbol s_3_3[2] = { 'i', 'v' }; + +static const struct among a_3[4] = +{ +/* 0 */ { 2, s_3_0, -1, -1, 0}, +/* 1 */ { 2, s_3_1, -1, -1, 0}, +/* 2 */ { 2, s_3_2, -1, -1, 0}, +/* 3 */ { 2, s_3_3, -1, 1, 0} +}; + +static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; + +static const struct among a_4[3] = +{ +/* 0 */ { 4, s_4_0, -1, 1, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 4, s_4_2, -1, 1, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_5_2[2] = { 'i', 'v' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, -1, 1, 0} +}; + +static const symbol s_6_0[3] = { 'i', 'c', 'a' }; +static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; +static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; +static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; +static const symbol s_6_4[3] = { 'o', 's', 'a' }; +static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; +static const symbol s_6_6[3] = { 'i', 'v', 'a' }; +static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; +static const symbol s_6_8[5] = { 'l', 'o', 'g', 0xED, 'a' }; +static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; +static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_15[5] = { 'a', 'c', 'i', 0xF3, 'n' }; +static const symbol s_6_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; +static const symbol s_6_17[3] = { 'i', 'c', 'o' }; +static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_6_19[3] = { 'o', 's', 'o' }; +static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; +static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; +static const symbol s_6_22[3] = { 'i', 'v', 'o' }; +static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; +static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; +static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; +static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; +static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; +static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; +static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; +static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; +static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; +static const symbol s_6_32[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; +static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; +static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; +static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; +static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; +static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; +static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; +static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; +static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; +static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; +static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; + +static const struct among a_6[46] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 5, s_6_1, -1, 2, 0}, +/* 2 */ { 5, s_6_2, -1, 5, 0}, +/* 3 */ { 5, s_6_3, -1, 2, 0}, +/* 4 */ { 3, s_6_4, -1, 1, 0}, +/* 5 */ { 4, s_6_5, -1, 1, 0}, +/* 6 */ { 3, s_6_6, -1, 9, 0}, +/* 7 */ { 4, s_6_7, -1, 1, 0}, +/* 8 */ { 5, s_6_8, -1, 3, 0}, +/* 9 */ { 4, s_6_9, -1, 8, 0}, +/* 10 */ { 4, s_6_10, -1, 1, 0}, +/* 11 */ { 4, s_6_11, -1, 1, 0}, +/* 12 */ { 4, s_6_12, -1, 2, 0}, +/* 13 */ { 5, s_6_13, -1, 7, 0}, +/* 14 */ { 6, s_6_14, 13, 6, 0}, +/* 15 */ { 5, s_6_15, -1, 2, 0}, +/* 16 */ { 5, s_6_16, -1, 4, 0}, +/* 17 */ { 3, s_6_17, -1, 1, 0}, +/* 18 */ { 4, s_6_18, -1, 1, 0}, +/* 19 */ { 3, s_6_19, -1, 1, 0}, +/* 20 */ { 7, s_6_20, -1, 1, 0}, +/* 21 */ { 7, s_6_21, -1, 1, 0}, +/* 22 */ { 3, s_6_22, -1, 9, 0}, +/* 23 */ { 4, s_6_23, -1, 2, 0}, +/* 24 */ { 4, s_6_24, -1, 1, 0}, +/* 25 */ { 6, s_6_25, -1, 2, 0}, +/* 26 */ { 6, s_6_26, -1, 5, 0}, +/* 27 */ { 6, s_6_27, -1, 2, 0}, +/* 28 */ { 4, s_6_28, -1, 1, 0}, +/* 29 */ { 5, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, -1, 9, 0}, +/* 31 */ { 5, s_6_31, -1, 1, 0}, +/* 32 */ { 6, s_6_32, -1, 3, 0}, +/* 33 */ { 6, s_6_33, -1, 8, 0}, +/* 34 */ { 5, s_6_34, -1, 1, 0}, +/* 35 */ { 5, s_6_35, -1, 1, 0}, +/* 36 */ { 7, s_6_36, -1, 2, 0}, +/* 37 */ { 7, s_6_37, -1, 4, 0}, +/* 38 */ { 6, s_6_38, -1, 2, 0}, +/* 39 */ { 5, s_6_39, -1, 2, 0}, +/* 40 */ { 4, s_6_40, -1, 1, 0}, +/* 41 */ { 5, s_6_41, -1, 1, 0}, +/* 42 */ { 4, s_6_42, -1, 1, 0}, +/* 43 */ { 8, s_6_43, -1, 1, 0}, +/* 44 */ { 8, s_6_44, -1, 1, 0}, +/* 45 */ { 4, s_6_45, -1, 9, 0} +}; + +static const symbol s_7_0[2] = { 'y', 'a' }; +static const symbol s_7_1[2] = { 'y', 'e' }; +static const symbol s_7_2[3] = { 'y', 'a', 'n' }; +static const symbol s_7_3[3] = { 'y', 'e', 'n' }; +static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; +static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; +static const symbol s_7_6[2] = { 'y', 'o' }; +static const symbol s_7_7[3] = { 'y', 'a', 's' }; +static const symbol s_7_8[3] = { 'y', 'e', 's' }; +static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; +static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; +static const symbol s_7_11[2] = { 'y', 0xF3 }; + +static const struct among a_7[12] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 2, s_7_1, -1, 1, 0}, +/* 2 */ { 3, s_7_2, -1, 1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 5, s_7_4, -1, 1, 0}, +/* 5 */ { 5, s_7_5, -1, 1, 0}, +/* 6 */ { 2, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 3, s_7_8, -1, 1, 0}, +/* 9 */ { 4, s_7_9, -1, 1, 0}, +/* 10 */ { 5, s_7_10, -1, 1, 0}, +/* 11 */ { 2, s_7_11, -1, 1, 0} +}; + +static const symbol s_8_0[3] = { 'a', 'b', 'a' }; +static const symbol s_8_1[3] = { 'a', 'd', 'a' }; +static const symbol s_8_2[3] = { 'i', 'd', 'a' }; +static const symbol s_8_3[3] = { 'a', 'r', 'a' }; +static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; +static const symbol s_8_5[2] = { 0xED, 'a' }; +static const symbol s_8_6[4] = { 'a', 'r', 0xED, 'a' }; +static const symbol s_8_7[4] = { 'e', 'r', 0xED, 'a' }; +static const symbol s_8_8[4] = { 'i', 'r', 0xED, 'a' }; +static const symbol s_8_9[2] = { 'a', 'd' }; +static const symbol s_8_10[2] = { 'e', 'd' }; +static const symbol s_8_11[2] = { 'i', 'd' }; +static const symbol s_8_12[3] = { 'a', 's', 'e' }; +static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; +static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; +static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; +static const symbol s_8_16[2] = { 'a', 'n' }; +static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; +static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; +static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; +static const symbol s_8_20[3] = { 0xED, 'a', 'n' }; +static const symbol s_8_21[5] = { 'a', 'r', 0xED, 'a', 'n' }; +static const symbol s_8_22[5] = { 'e', 'r', 0xED, 'a', 'n' }; +static const symbol s_8_23[5] = { 'i', 'r', 0xED, 'a', 'n' }; +static const symbol s_8_24[2] = { 'e', 'n' }; +static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; +static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; +static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; +static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; +static const symbol s_8_29[4] = { 'a', 'r', 0xE1, 'n' }; +static const symbol s_8_30[4] = { 'e', 'r', 0xE1, 'n' }; +static const symbol s_8_31[4] = { 'i', 'r', 0xE1, 'n' }; +static const symbol s_8_32[3] = { 'a', 'd', 'o' }; +static const symbol s_8_33[3] = { 'i', 'd', 'o' }; +static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_8_36[2] = { 'a', 'r' }; +static const symbol s_8_37[2] = { 'e', 'r' }; +static const symbol s_8_38[2] = { 'i', 'r' }; +static const symbol s_8_39[2] = { 'a', 's' }; +static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; +static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; +static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; +static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; +static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; +static const symbol s_8_45[3] = { 0xED, 'a', 's' }; +static const symbol s_8_46[5] = { 'a', 'r', 0xED, 'a', 's' }; +static const symbol s_8_47[5] = { 'e', 'r', 0xED, 'a', 's' }; +static const symbol s_8_48[5] = { 'i', 'r', 0xED, 'a', 's' }; +static const symbol s_8_49[2] = { 'e', 's' }; +static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; +static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; +static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; +static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; +static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; +static const symbol s_8_55[4] = { 0xED, 'a', 'i', 's' }; +static const symbol s_8_56[6] = { 'a', 'r', 0xED, 'a', 'i', 's' }; +static const symbol s_8_57[6] = { 'e', 'r', 0xED, 'a', 'i', 's' }; +static const symbol s_8_58[6] = { 'i', 'r', 0xED, 'a', 'i', 's' }; +static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; +static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; +static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; +static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; +static const symbol s_8_63[3] = { 0xE1, 'i', 's' }; +static const symbol s_8_64[3] = { 0xE9, 'i', 's' }; +static const symbol s_8_65[5] = { 'a', 'r', 0xE9, 'i', 's' }; +static const symbol s_8_66[5] = { 'e', 'r', 0xE9, 'i', 's' }; +static const symbol s_8_67[5] = { 'i', 'r', 0xE9, 'i', 's' }; +static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; +static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; +static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; +static const symbol s_8_71[6] = { 0xE1, 'b', 'a', 'm', 'o', 's' }; +static const symbol s_8_72[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_8_73[7] = { 'i', 0xE9, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_8_74[5] = { 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_8_75[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_8_76[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_8_77[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; +static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; +static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_82[6] = { 0xE1, 's', 'e', 'm', 'o', 's' }; +static const symbol s_8_83[7] = { 'i', 0xE9, 's', 'e', 'm', 'o', 's' }; +static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; +static const symbol s_8_85[4] = { 'a', 'r', 0xE1, 's' }; +static const symbol s_8_86[4] = { 'e', 'r', 0xE1, 's' }; +static const symbol s_8_87[4] = { 'i', 'r', 0xE1, 's' }; +static const symbol s_8_88[2] = { 0xED, 's' }; +static const symbol s_8_89[3] = { 'a', 'r', 0xE1 }; +static const symbol s_8_90[3] = { 'e', 'r', 0xE1 }; +static const symbol s_8_91[3] = { 'i', 'r', 0xE1 }; +static const symbol s_8_92[3] = { 'a', 'r', 0xE9 }; +static const symbol s_8_93[3] = { 'e', 'r', 0xE9 }; +static const symbol s_8_94[3] = { 'i', 'r', 0xE9 }; +static const symbol s_8_95[2] = { 'i', 0xF3 }; + +static const struct among a_8[96] = +{ +/* 0 */ { 3, s_8_0, -1, 2, 0}, +/* 1 */ { 3, s_8_1, -1, 2, 0}, +/* 2 */ { 3, s_8_2, -1, 2, 0}, +/* 3 */ { 3, s_8_3, -1, 2, 0}, +/* 4 */ { 4, s_8_4, -1, 2, 0}, +/* 5 */ { 2, s_8_5, -1, 2, 0}, +/* 6 */ { 4, s_8_6, 5, 2, 0}, +/* 7 */ { 4, s_8_7, 5, 2, 0}, +/* 8 */ { 4, s_8_8, 5, 2, 0}, +/* 9 */ { 2, s_8_9, -1, 2, 0}, +/* 10 */ { 2, s_8_10, -1, 2, 0}, +/* 11 */ { 2, s_8_11, -1, 2, 0}, +/* 12 */ { 3, s_8_12, -1, 2, 0}, +/* 13 */ { 4, s_8_13, -1, 2, 0}, +/* 14 */ { 4, s_8_14, -1, 2, 0}, +/* 15 */ { 4, s_8_15, -1, 2, 0}, +/* 16 */ { 2, s_8_16, -1, 2, 0}, +/* 17 */ { 4, s_8_17, 16, 2, 0}, +/* 18 */ { 4, s_8_18, 16, 2, 0}, +/* 19 */ { 5, s_8_19, 16, 2, 0}, +/* 20 */ { 3, s_8_20, 16, 2, 0}, +/* 21 */ { 5, s_8_21, 20, 2, 0}, +/* 22 */ { 5, s_8_22, 20, 2, 0}, +/* 23 */ { 5, s_8_23, 20, 2, 0}, +/* 24 */ { 2, s_8_24, -1, 1, 0}, +/* 25 */ { 4, s_8_25, 24, 2, 0}, +/* 26 */ { 5, s_8_26, 24, 2, 0}, +/* 27 */ { 4, s_8_27, -1, 2, 0}, +/* 28 */ { 5, s_8_28, -1, 2, 0}, +/* 29 */ { 4, s_8_29, -1, 2, 0}, +/* 30 */ { 4, s_8_30, -1, 2, 0}, +/* 31 */ { 4, s_8_31, -1, 2, 0}, +/* 32 */ { 3, s_8_32, -1, 2, 0}, +/* 33 */ { 3, s_8_33, -1, 2, 0}, +/* 34 */ { 4, s_8_34, -1, 2, 0}, +/* 35 */ { 5, s_8_35, -1, 2, 0}, +/* 36 */ { 2, s_8_36, -1, 2, 0}, +/* 37 */ { 2, s_8_37, -1, 2, 0}, +/* 38 */ { 2, s_8_38, -1, 2, 0}, +/* 39 */ { 2, s_8_39, -1, 2, 0}, +/* 40 */ { 4, s_8_40, 39, 2, 0}, +/* 41 */ { 4, s_8_41, 39, 2, 0}, +/* 42 */ { 4, s_8_42, 39, 2, 0}, +/* 43 */ { 4, s_8_43, 39, 2, 0}, +/* 44 */ { 5, s_8_44, 39, 2, 0}, +/* 45 */ { 3, s_8_45, 39, 2, 0}, +/* 46 */ { 5, s_8_46, 45, 2, 0}, +/* 47 */ { 5, s_8_47, 45, 2, 0}, +/* 48 */ { 5, s_8_48, 45, 2, 0}, +/* 49 */ { 2, s_8_49, -1, 1, 0}, +/* 50 */ { 4, s_8_50, 49, 2, 0}, +/* 51 */ { 5, s_8_51, 49, 2, 0}, +/* 52 */ { 5, s_8_52, -1, 2, 0}, +/* 53 */ { 5, s_8_53, -1, 2, 0}, +/* 54 */ { 6, s_8_54, -1, 2, 0}, +/* 55 */ { 4, s_8_55, -1, 2, 0}, +/* 56 */ { 6, s_8_56, 55, 2, 0}, +/* 57 */ { 6, s_8_57, 55, 2, 0}, +/* 58 */ { 6, s_8_58, 55, 2, 0}, +/* 59 */ { 5, s_8_59, -1, 2, 0}, +/* 60 */ { 6, s_8_60, -1, 2, 0}, +/* 61 */ { 6, s_8_61, -1, 2, 0}, +/* 62 */ { 6, s_8_62, -1, 2, 0}, +/* 63 */ { 3, s_8_63, -1, 2, 0}, +/* 64 */ { 3, s_8_64, -1, 1, 0}, +/* 65 */ { 5, s_8_65, 64, 2, 0}, +/* 66 */ { 5, s_8_66, 64, 2, 0}, +/* 67 */ { 5, s_8_67, 64, 2, 0}, +/* 68 */ { 4, s_8_68, -1, 2, 0}, +/* 69 */ { 4, s_8_69, -1, 2, 0}, +/* 70 */ { 4, s_8_70, -1, 2, 0}, +/* 71 */ { 6, s_8_71, 70, 2, 0}, +/* 72 */ { 6, s_8_72, 70, 2, 0}, +/* 73 */ { 7, s_8_73, 70, 2, 0}, +/* 74 */ { 5, s_8_74, 70, 2, 0}, +/* 75 */ { 7, s_8_75, 74, 2, 0}, +/* 76 */ { 7, s_8_76, 74, 2, 0}, +/* 77 */ { 7, s_8_77, 74, 2, 0}, +/* 78 */ { 4, s_8_78, -1, 1, 0}, +/* 79 */ { 6, s_8_79, 78, 2, 0}, +/* 80 */ { 6, s_8_80, 78, 2, 0}, +/* 81 */ { 6, s_8_81, 78, 2, 0}, +/* 82 */ { 6, s_8_82, 78, 2, 0}, +/* 83 */ { 7, s_8_83, 78, 2, 0}, +/* 84 */ { 4, s_8_84, -1, 2, 0}, +/* 85 */ { 4, s_8_85, -1, 2, 0}, +/* 86 */ { 4, s_8_86, -1, 2, 0}, +/* 87 */ { 4, s_8_87, -1, 2, 0}, +/* 88 */ { 2, s_8_88, -1, 2, 0}, +/* 89 */ { 3, s_8_89, -1, 2, 0}, +/* 90 */ { 3, s_8_90, -1, 2, 0}, +/* 91 */ { 3, s_8_91, -1, 2, 0}, +/* 92 */ { 3, s_8_92, -1, 2, 0}, +/* 93 */ { 3, s_8_93, -1, 2, 0}, +/* 94 */ { 3, s_8_94, -1, 2, 0}, +/* 95 */ { 2, s_8_95, -1, 2, 0} +}; + +static const symbol s_9_0[1] = { 'a' }; +static const symbol s_9_1[1] = { 'e' }; +static const symbol s_9_2[1] = { 'o' }; +static const symbol s_9_3[2] = { 'o', 's' }; +static const symbol s_9_4[1] = { 0xE1 }; +static const symbol s_9_5[1] = { 0xE9 }; +static const symbol s_9_6[1] = { 0xED }; +static const symbol s_9_7[1] = { 0xF3 }; + +static const struct among a_9[8] = +{ +/* 0 */ { 1, s_9_0, -1, 1, 0}, +/* 1 */ { 1, s_9_1, -1, 2, 0}, +/* 2 */ { 1, s_9_2, -1, 1, 0}, +/* 3 */ { 2, s_9_3, -1, 1, 0}, +/* 4 */ { 1, s_9_4, -1, 1, 0}, +/* 5 */ { 1, s_9_5, -1, 2, 0}, +/* 6 */ { 1, s_9_6, -1, 1, 0}, +/* 7 */ { 1, s_9_7, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'o' }; +static const symbol s_4[] = { 'u' }; +static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; +static const symbol s_7[] = { 'a', 'r' }; +static const symbol s_8[] = { 'e', 'r' }; +static const symbol s_9[] = { 'i', 'r' }; +static const symbol s_10[] = { 'u' }; +static const symbol s_11[] = { 'i', 'c' }; +static const symbol s_12[] = { 'l', 'o', 'g' }; +static const symbol s_13[] = { 'u' }; +static const symbol s_14[] = { 'e', 'n', 't', 'e' }; +static const symbol s_15[] = { 'a', 't' }; +static const symbol s_16[] = { 'a', 't' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'u' }; +static const symbol s_19[] = { 'g' }; +static const symbol s_20[] = { 'u' }; +static const symbol s_21[] = { 'g' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 37 */ + { int c2 = z->c; /* or, line 39 */ + if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; + { int c3 = z->c; /* or, line 38 */ + if (out_grouping(z, g_v, 97, 252, 0)) goto lab4; + { /* gopast */ /* grouping v, line 38 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; + { /* gopast */ /* non v, line 38 */ + int ret = in_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping(z, g_v, 97, 252, 0)) goto lab0; + { int c4 = z->c; /* or, line 40 */ + if (out_grouping(z, g_v, 97, 252, 0)) goto lab6; + { /* gopast */ /* grouping v, line 40 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping(z, g_v, 97, 252, 0)) goto lab0; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 40 */ + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 41 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 43 */ + { /* gopast */ /* grouping v, line 44 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 44 */ + int ret = in_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 44 */ + { /* gopast */ /* grouping v, line 45 */ + int ret = out_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 45 */ + int ret = in_grouping(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 45 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 49 */ + int c1 = z->c; + z->bra = z->c; /* [, line 50 */ + if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((67641858 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else + among_var = find_among(z, a_0, 6); /* substring, line 50 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 50 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ + if (ret < 0) return ret; + } + break; + case 6: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 57 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_attached_pronoun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 68 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ + z->bra = z->c; /* ], line 68 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; + among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ + if (!(among_var)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 72 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + z->bra = z->c; /* ], line 73 */ + { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ + if (ret < 0) return ret; + } + break; + case 2: + z->bra = z->c; /* ], line 74 */ + { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + z->bra = z->c; /* ], line 75 */ + { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ + if (ret < 0) return ret; + } + break; + case 4: + z->bra = z->c; /* ], line 76 */ + { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ + if (ret < 0) return ret; + } + break; + case 5: + z->bra = z->c; /* ], line 77 */ + { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + break; + case 7: + if (!(eq_s_b(z, 1, s_10))) return 0; + { int ret = slice_del(z); /* delete, line 82 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 87 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 87 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 99 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 99 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 105 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 105 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ + z->ket = z->c; /* [, line 106 */ + if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 106 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 106 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 111 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 115 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 119 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 123 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 123 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ + z->ket = z->c; /* [, line 125 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 125 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + z->ket = z->c; /* [, line 126 */ + if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 126 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 126 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 135 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 135 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ + z->ket = z->c; /* [, line 137 */ + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 137 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 140 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ + z->ket = z->c; /* [, line 149 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } + among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 149 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab3; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + break; + } + lab3: + ; + } + break; + case 9: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 159 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 159 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ + z->ket = z->c; /* [, line 161 */ + if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } + z->bra = z->c; /* ], line 161 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 161 */ + if (ret < 0) return ret; + } + lab4: + ; + } + break; + } + return 1; +} + +static int r_y_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 168 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 168 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 168 */ + among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 168 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (!(eq_s_b(z, 1, s_17))) return 0; + { int ret = slice_del(z); /* delete, line 171 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 176 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 176 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 176 */ + among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 176 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ + if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } + { int m_test = z->l - z->c; /* test, line 179 */ + if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + lab0: + ; + } + z->bra = z->c; /* ], line 179 */ + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 200 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 205 */ + among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 205 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 208 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 208 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 210 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ + z->ket = z->c; /* [, line 210 */ + if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 210 */ + { int m_test = z->l - z->c; /* test, line 210 */ + if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + } + return 1; +} + +extern int spanish_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 216 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 216 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 217 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ + { int ret = r_attached_pronoun(z); + if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ + if (ret < 0) return ret; + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_y_verb_suffix(z); + if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ + if (ret < 0) return ret; + } + goto lab3; + lab5: + z->c = z->l - m4; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m5; + } + z->c = z->lb; + { int c6 = z->c; /* do, line 225 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab7; /* call postlude, line 225 */ + if (ret < 0) return ret; + } + lab7: + z->c = c6; + } + return 1; +} + +extern struct SN_env * spanish_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void spanish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_1_swedish.c b/src/backend/snowball/libstemmer/stem_ISO_8859_1_swedish.c new file mode 100644 index 00000000000..f9bef1ada56 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_1_swedish.c @@ -0,0 +1,307 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int swedish_ISO_8859_1_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * swedish_ISO_8859_1_create_env(void); +extern void swedish_ISO_8859_1_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 'a' }; +static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; +static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; +static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; +static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; +static const symbol s_0_5[2] = { 'a', 'd' }; +static const symbol s_0_6[1] = { 'e' }; +static const symbol s_0_7[3] = { 'a', 'd', 'e' }; +static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; +static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; +static const symbol s_0_10[3] = { 'a', 'r', 'e' }; +static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; +static const symbol s_0_12[2] = { 'e', 'n' }; +static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; +static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; +static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; +static const symbol s_0_16[3] = { 'e', 'r', 'n' }; +static const symbol s_0_17[2] = { 'a', 'r' }; +static const symbol s_0_18[2] = { 'e', 'r' }; +static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; +static const symbol s_0_20[2] = { 'o', 'r' }; +static const symbol s_0_21[1] = { 's' }; +static const symbol s_0_22[2] = { 'a', 's' }; +static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; +static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; +static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; +static const symbol s_0_26[2] = { 'e', 's' }; +static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; +static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; +static const symbol s_0_29[3] = { 'e', 'n', 's' }; +static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; +static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; +static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; +static const symbol s_0_33[2] = { 'a', 't' }; +static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; +static const symbol s_0_35[3] = { 'h', 'e', 't' }; +static const symbol s_0_36[3] = { 'a', 's', 't' }; + +static const struct among a_0[37] = +{ +/* 0 */ { 1, s_0_0, -1, 1, 0}, +/* 1 */ { 4, s_0_1, 0, 1, 0}, +/* 2 */ { 4, s_0_2, 0, 1, 0}, +/* 3 */ { 7, s_0_3, 2, 1, 0}, +/* 4 */ { 4, s_0_4, 0, 1, 0}, +/* 5 */ { 2, s_0_5, -1, 1, 0}, +/* 6 */ { 1, s_0_6, -1, 1, 0}, +/* 7 */ { 3, s_0_7, 6, 1, 0}, +/* 8 */ { 4, s_0_8, 6, 1, 0}, +/* 9 */ { 4, s_0_9, 6, 1, 0}, +/* 10 */ { 3, s_0_10, 6, 1, 0}, +/* 11 */ { 4, s_0_11, 6, 1, 0}, +/* 12 */ { 2, s_0_12, -1, 1, 0}, +/* 13 */ { 5, s_0_13, 12, 1, 0}, +/* 14 */ { 4, s_0_14, 12, 1, 0}, +/* 15 */ { 5, s_0_15, 12, 1, 0}, +/* 16 */ { 3, s_0_16, -1, 1, 0}, +/* 17 */ { 2, s_0_17, -1, 1, 0}, +/* 18 */ { 2, s_0_18, -1, 1, 0}, +/* 19 */ { 5, s_0_19, 18, 1, 0}, +/* 20 */ { 2, s_0_20, -1, 1, 0}, +/* 21 */ { 1, s_0_21, -1, 2, 0}, +/* 22 */ { 2, s_0_22, 21, 1, 0}, +/* 23 */ { 5, s_0_23, 22, 1, 0}, +/* 24 */ { 5, s_0_24, 22, 1, 0}, +/* 25 */ { 5, s_0_25, 22, 1, 0}, +/* 26 */ { 2, s_0_26, 21, 1, 0}, +/* 27 */ { 4, s_0_27, 26, 1, 0}, +/* 28 */ { 5, s_0_28, 26, 1, 0}, +/* 29 */ { 3, s_0_29, 21, 1, 0}, +/* 30 */ { 5, s_0_30, 29, 1, 0}, +/* 31 */ { 6, s_0_31, 29, 1, 0}, +/* 32 */ { 4, s_0_32, 21, 1, 0}, +/* 33 */ { 2, s_0_33, -1, 1, 0}, +/* 34 */ { 5, s_0_34, -1, 1, 0}, +/* 35 */ { 3, s_0_35, -1, 1, 0}, +/* 36 */ { 3, s_0_36, -1, 1, 0} +}; + +static const symbol s_1_0[2] = { 'd', 'd' }; +static const symbol s_1_1[2] = { 'g', 'd' }; +static const symbol s_1_2[2] = { 'n', 'n' }; +static const symbol s_1_3[2] = { 'd', 't' }; +static const symbol s_1_4[2] = { 'g', 't' }; +static const symbol s_1_5[2] = { 'k', 't' }; +static const symbol s_1_6[2] = { 't', 't' }; + +static const struct among a_1[7] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 2, s_1_4, -1, -1, 0}, +/* 5 */ { 2, s_1_5, -1, -1, 0}, +/* 6 */ { 2, s_1_6, -1, -1, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'g' }; +static const symbol s_2_1[3] = { 'l', 'i', 'g' }; +static const symbol s_2_2[3] = { 'e', 'l', 's' }; +static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; +static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; + +static const struct among a_2[5] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 3, s_2_2, -1, 1, 0}, +/* 3 */ { 5, s_2_3, -1, 3, 0}, +/* 4 */ { 4, s_2_4, -1, 2, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; + +static const unsigned char g_s_ending[] = { 119, 127, 149 }; + +static const symbol s_0[] = { 'l', 0xF6, 's' }; +static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 29 */ + { int ret = z->c + 3; + if (0 > ret || ret > z->l) return 0; + z->c = ret; /* hop, line 29 */ + } + z->I[1] = z->c; /* setmark x, line 29 */ + z->c = c_test; + } + if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ + { /* gopast */ /* non v, line 30 */ + int ret = in_grouping(z, g_v, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 30 */ + /* try, line 31 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 37 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 37 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 37 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 37 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; + { int ret = slice_del(z); /* delete, line 46 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int mlimit; /* setlimit, line 50 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 50 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ + z->c = z->l - m2; + z->ket = z->c; /* [, line 52 */ + if (z->c <= z->lb) { z->lb = mlimit; return 0; } + z->c--; /* next, line 52 */ + z->bra = z->c; /* ], line 52 */ + { int ret = slice_del(z); /* delete, line 52 */ + if (ret < 0) return ret; + } + } + z->lb = mlimit; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 55 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 55 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 56 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 56 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 57 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +extern int swedish_ISO_8859_1_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 66 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 66 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 67 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 68 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 70 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_ISO_8859_2_romanian.c b/src/backend/snowball/libstemmer/stem_ISO_8859_2_romanian.c new file mode 100644 index 00000000000..d5cc2bec3fc --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_ISO_8859_2_romanian.c @@ -0,0 +1,998 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int romanian_ISO_8859_2_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_vowel_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_combo_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_step_0(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * romanian_ISO_8859_2_create_env(void); +extern void romanian_ISO_8859_2_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 'I' }; +static const symbol s_0_2[1] = { 'U' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_0_1, 0, 1, 0}, +/* 2 */ { 1, s_0_2, 0, 2, 0} +}; + +static const symbol s_1_0[2] = { 'e', 'a' }; +static const symbol s_1_1[4] = { 'a', 0xFE, 'i', 'a' }; +static const symbol s_1_2[3] = { 'a', 'u', 'a' }; +static const symbol s_1_3[3] = { 'i', 'u', 'a' }; +static const symbol s_1_4[4] = { 'a', 0xFE, 'i', 'e' }; +static const symbol s_1_5[3] = { 'e', 'l', 'e' }; +static const symbol s_1_6[3] = { 'i', 'l', 'e' }; +static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; +static const symbol s_1_8[3] = { 'i', 'e', 'i' }; +static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; +static const symbol s_1_10[2] = { 'i', 'i' }; +static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; +static const symbol s_1_12[2] = { 'u', 'l' }; +static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; +static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; +static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; + +static const struct among a_1[16] = +{ +/* 0 */ { 2, s_1_0, -1, 3, 0}, +/* 1 */ { 4, s_1_1, -1, 7, 0}, +/* 2 */ { 3, s_1_2, -1, 2, 0}, +/* 3 */ { 3, s_1_3, -1, 4, 0}, +/* 4 */ { 4, s_1_4, -1, 7, 0}, +/* 5 */ { 3, s_1_5, -1, 3, 0}, +/* 6 */ { 3, s_1_6, -1, 5, 0}, +/* 7 */ { 4, s_1_7, 6, 4, 0}, +/* 8 */ { 3, s_1_8, -1, 4, 0}, +/* 9 */ { 4, s_1_9, -1, 6, 0}, +/* 10 */ { 2, s_1_10, -1, 4, 0}, +/* 11 */ { 4, s_1_11, -1, 1, 0}, +/* 12 */ { 2, s_1_12, -1, 1, 0}, +/* 13 */ { 4, s_1_13, -1, 3, 0}, +/* 14 */ { 4, s_1_14, -1, 4, 0}, +/* 15 */ { 5, s_1_15, 14, 4, 0} +}; + +static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; +static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; +static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; +static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; +static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; +static const symbol s_2_5[6] = { 'a', 0xFE, 'i', 'u', 'n', 'e' }; +static const symbol s_2_6[6] = { 'i', 0xFE, 'i', 'u', 'n', 'e' }; +static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_9[6] = { 0xE3, 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; +static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; +static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; +static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; +static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; +static const symbol s_2_21[5] = { 0xE3, 't', 'o', 'r', 'i' }; +static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; +static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; +static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; +static const symbol s_2_28[6] = { 'i', 'c', 'i', 't', 0xE3, 'i' }; +static const symbol s_2_29[8] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 'i' }; +static const symbol s_2_30[6] = { 'i', 'v', 'i', 't', 0xE3, 'i' }; +static const symbol s_2_31[7] = { 'i', 'c', 'i', 't', 0xE3, 0xFE, 'i' }; +static const symbol s_2_32[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 0xFE, 'i' }; +static const symbol s_2_33[7] = { 'i', 'v', 'i', 't', 0xE3, 0xFE, 'i' }; +static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; +static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; +static const symbol s_2_38[4] = { 0xE3, 't', 'o', 'r' }; +static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; +static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; +static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; +static const symbol s_2_42[5] = { 'i', 'c', 'a', 'l', 0xE3 }; +static const symbol s_2_43[5] = { 'i', 'c', 'i', 'v', 0xE3 }; +static const symbol s_2_44[5] = { 'a', 't', 'i', 'v', 0xE3 }; +static const symbol s_2_45[5] = { 'i', 't', 'i', 'v', 0xE3 }; + +static const struct among a_2[46] = +{ +/* 0 */ { 5, s_2_0, -1, 4, 0}, +/* 1 */ { 5, s_2_1, -1, 4, 0}, +/* 2 */ { 5, s_2_2, -1, 5, 0}, +/* 3 */ { 5, s_2_3, -1, 6, 0}, +/* 4 */ { 5, s_2_4, -1, 4, 0}, +/* 5 */ { 6, s_2_5, -1, 5, 0}, +/* 6 */ { 6, s_2_6, -1, 6, 0}, +/* 7 */ { 6, s_2_7, -1, 5, 0}, +/* 8 */ { 6, s_2_8, -1, 6, 0}, +/* 9 */ { 6, s_2_9, -1, 5, 0}, +/* 10 */ { 7, s_2_10, -1, 4, 0}, +/* 11 */ { 9, s_2_11, -1, 1, 0}, +/* 12 */ { 9, s_2_12, -1, 2, 0}, +/* 13 */ { 7, s_2_13, -1, 3, 0}, +/* 14 */ { 5, s_2_14, -1, 4, 0}, +/* 15 */ { 5, s_2_15, -1, 5, 0}, +/* 16 */ { 5, s_2_16, -1, 6, 0}, +/* 17 */ { 5, s_2_17, -1, 4, 0}, +/* 18 */ { 5, s_2_18, -1, 5, 0}, +/* 19 */ { 7, s_2_19, 18, 4, 0}, +/* 20 */ { 5, s_2_20, -1, 6, 0}, +/* 21 */ { 5, s_2_21, -1, 5, 0}, +/* 22 */ { 7, s_2_22, -1, 4, 0}, +/* 23 */ { 9, s_2_23, -1, 1, 0}, +/* 24 */ { 7, s_2_24, -1, 3, 0}, +/* 25 */ { 5, s_2_25, -1, 4, 0}, +/* 26 */ { 5, s_2_26, -1, 5, 0}, +/* 27 */ { 5, s_2_27, -1, 6, 0}, +/* 28 */ { 6, s_2_28, -1, 4, 0}, +/* 29 */ { 8, s_2_29, -1, 1, 0}, +/* 30 */ { 6, s_2_30, -1, 3, 0}, +/* 31 */ { 7, s_2_31, -1, 4, 0}, +/* 32 */ { 9, s_2_32, -1, 1, 0}, +/* 33 */ { 7, s_2_33, -1, 3, 0}, +/* 34 */ { 4, s_2_34, -1, 4, 0}, +/* 35 */ { 4, s_2_35, -1, 5, 0}, +/* 36 */ { 6, s_2_36, 35, 4, 0}, +/* 37 */ { 4, s_2_37, -1, 6, 0}, +/* 38 */ { 4, s_2_38, -1, 5, 0}, +/* 39 */ { 4, s_2_39, -1, 4, 0}, +/* 40 */ { 4, s_2_40, -1, 5, 0}, +/* 41 */ { 4, s_2_41, -1, 6, 0}, +/* 42 */ { 5, s_2_42, -1, 4, 0}, +/* 43 */ { 5, s_2_43, -1, 4, 0}, +/* 44 */ { 5, s_2_44, -1, 5, 0}, +/* 45 */ { 5, s_2_45, -1, 6, 0} +}; + +static const symbol s_3_0[3] = { 'i', 'c', 'a' }; +static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; +static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; +static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; +static const symbol s_3_4[3] = { 'a', 't', 'a' }; +static const symbol s_3_5[3] = { 'i', 't', 'a' }; +static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; +static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; +static const symbol s_3_8[3] = { 'u', 't', 'a' }; +static const symbol s_3_9[3] = { 'i', 'v', 'a' }; +static const symbol s_3_10[2] = { 'i', 'c' }; +static const symbol s_3_11[3] = { 'i', 'c', 'e' }; +static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; +static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; +static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; +static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; +static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; +static const symbol s_3_17[3] = { 'a', 't', 'e' }; +static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; +static const symbol s_3_19[3] = { 'i', 't', 'e' }; +static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; +static const symbol s_3_22[3] = { 'u', 't', 'e' }; +static const symbol s_3_23[3] = { 'i', 'v', 'e' }; +static const symbol s_3_24[3] = { 'i', 'c', 'i' }; +static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; +static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; +static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; +static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_3_29[3] = { 'o', 's', 'i' }; +static const symbol s_3_30[3] = { 'a', 't', 'i' }; +static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; +static const symbol s_3_32[3] = { 'i', 't', 'i' }; +static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; +static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; +static const symbol s_3_35[3] = { 'u', 't', 'i' }; +static const symbol s_3_36[4] = { 'i', 0xBA, 't', 'i' }; +static const symbol s_3_37[3] = { 'i', 'v', 'i' }; +static const symbol s_3_38[3] = { 'o', 0xBA, 'i' }; +static const symbol s_3_39[4] = { 'i', 't', 0xE3, 'i' }; +static const symbol s_3_40[5] = { 'i', 't', 0xE3, 0xFE, 'i' }; +static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; +static const symbol s_3_43[3] = { 'i', 's', 'm' }; +static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_3_45[2] = { 'o', 's' }; +static const symbol s_3_46[2] = { 'a', 't' }; +static const symbol s_3_47[2] = { 'i', 't' }; +static const symbol s_3_48[3] = { 'a', 'n', 't' }; +static const symbol s_3_49[3] = { 'i', 's', 't' }; +static const symbol s_3_50[2] = { 'u', 't' }; +static const symbol s_3_51[2] = { 'i', 'v' }; +static const symbol s_3_52[3] = { 'i', 'c', 0xE3 }; +static const symbol s_3_53[5] = { 'a', 'b', 'i', 'l', 0xE3 }; +static const symbol s_3_54[5] = { 'i', 'b', 'i', 'l', 0xE3 }; +static const symbol s_3_55[4] = { 'o', 'a', 's', 0xE3 }; +static const symbol s_3_56[3] = { 'a', 't', 0xE3 }; +static const symbol s_3_57[3] = { 'i', 't', 0xE3 }; +static const symbol s_3_58[4] = { 'a', 'n', 't', 0xE3 }; +static const symbol s_3_59[4] = { 'i', 's', 't', 0xE3 }; +static const symbol s_3_60[3] = { 'u', 't', 0xE3 }; +static const symbol s_3_61[3] = { 'i', 'v', 0xE3 }; + +static const struct among a_3[62] = +{ +/* 0 */ { 3, s_3_0, -1, 1, 0}, +/* 1 */ { 5, s_3_1, -1, 1, 0}, +/* 2 */ { 5, s_3_2, -1, 1, 0}, +/* 3 */ { 4, s_3_3, -1, 1, 0}, +/* 4 */ { 3, s_3_4, -1, 1, 0}, +/* 5 */ { 3, s_3_5, -1, 1, 0}, +/* 6 */ { 4, s_3_6, -1, 1, 0}, +/* 7 */ { 4, s_3_7, -1, 3, 0}, +/* 8 */ { 3, s_3_8, -1, 1, 0}, +/* 9 */ { 3, s_3_9, -1, 1, 0}, +/* 10 */ { 2, s_3_10, -1, 1, 0}, +/* 11 */ { 3, s_3_11, -1, 1, 0}, +/* 12 */ { 5, s_3_12, -1, 1, 0}, +/* 13 */ { 5, s_3_13, -1, 1, 0}, +/* 14 */ { 4, s_3_14, -1, 3, 0}, +/* 15 */ { 4, s_3_15, -1, 2, 0}, +/* 16 */ { 4, s_3_16, -1, 1, 0}, +/* 17 */ { 3, s_3_17, -1, 1, 0}, +/* 18 */ { 5, s_3_18, 17, 1, 0}, +/* 19 */ { 3, s_3_19, -1, 1, 0}, +/* 20 */ { 4, s_3_20, -1, 1, 0}, +/* 21 */ { 4, s_3_21, -1, 3, 0}, +/* 22 */ { 3, s_3_22, -1, 1, 0}, +/* 23 */ { 3, s_3_23, -1, 1, 0}, +/* 24 */ { 3, s_3_24, -1, 1, 0}, +/* 25 */ { 5, s_3_25, -1, 1, 0}, +/* 26 */ { 5, s_3_26, -1, 1, 0}, +/* 27 */ { 4, s_3_27, -1, 2, 0}, +/* 28 */ { 5, s_3_28, -1, 1, 0}, +/* 29 */ { 3, s_3_29, -1, 1, 0}, +/* 30 */ { 3, s_3_30, -1, 1, 0}, +/* 31 */ { 5, s_3_31, 30, 1, 0}, +/* 32 */ { 3, s_3_32, -1, 1, 0}, +/* 33 */ { 4, s_3_33, -1, 1, 0}, +/* 34 */ { 4, s_3_34, -1, 3, 0}, +/* 35 */ { 3, s_3_35, -1, 1, 0}, +/* 36 */ { 4, s_3_36, -1, 3, 0}, +/* 37 */ { 3, s_3_37, -1, 1, 0}, +/* 38 */ { 3, s_3_38, -1, 1, 0}, +/* 39 */ { 4, s_3_39, -1, 1, 0}, +/* 40 */ { 5, s_3_40, -1, 1, 0}, +/* 41 */ { 4, s_3_41, -1, 1, 0}, +/* 42 */ { 4, s_3_42, -1, 1, 0}, +/* 43 */ { 3, s_3_43, -1, 3, 0}, +/* 44 */ { 4, s_3_44, -1, 1, 0}, +/* 45 */ { 2, s_3_45, -1, 1, 0}, +/* 46 */ { 2, s_3_46, -1, 1, 0}, +/* 47 */ { 2, s_3_47, -1, 1, 0}, +/* 48 */ { 3, s_3_48, -1, 1, 0}, +/* 49 */ { 3, s_3_49, -1, 3, 0}, +/* 50 */ { 2, s_3_50, -1, 1, 0}, +/* 51 */ { 2, s_3_51, -1, 1, 0}, +/* 52 */ { 3, s_3_52, -1, 1, 0}, +/* 53 */ { 5, s_3_53, -1, 1, 0}, +/* 54 */ { 5, s_3_54, -1, 1, 0}, +/* 55 */ { 4, s_3_55, -1, 1, 0}, +/* 56 */ { 3, s_3_56, -1, 1, 0}, +/* 57 */ { 3, s_3_57, -1, 1, 0}, +/* 58 */ { 4, s_3_58, -1, 1, 0}, +/* 59 */ { 4, s_3_59, -1, 3, 0}, +/* 60 */ { 3, s_3_60, -1, 1, 0}, +/* 61 */ { 3, s_3_61, -1, 1, 0} +}; + +static const symbol s_4_0[2] = { 'e', 'a' }; +static const symbol s_4_1[2] = { 'i', 'a' }; +static const symbol s_4_2[3] = { 'e', 's', 'c' }; +static const symbol s_4_3[3] = { 0xE3, 's', 'c' }; +static const symbol s_4_4[3] = { 'i', 'n', 'd' }; +static const symbol s_4_5[3] = { 0xE2, 'n', 'd' }; +static const symbol s_4_6[3] = { 'a', 'r', 'e' }; +static const symbol s_4_7[3] = { 'e', 'r', 'e' }; +static const symbol s_4_8[3] = { 'i', 'r', 'e' }; +static const symbol s_4_9[3] = { 0xE2, 'r', 'e' }; +static const symbol s_4_10[2] = { 's', 'e' }; +static const symbol s_4_11[3] = { 'a', 's', 'e' }; +static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; +static const symbol s_4_13[3] = { 'i', 's', 'e' }; +static const symbol s_4_14[3] = { 'u', 's', 'e' }; +static const symbol s_4_15[3] = { 0xE2, 's', 'e' }; +static const symbol s_4_16[4] = { 'e', 0xBA, 't', 'e' }; +static const symbol s_4_17[4] = { 0xE3, 0xBA, 't', 'e' }; +static const symbol s_4_18[3] = { 'e', 'z', 'e' }; +static const symbol s_4_19[2] = { 'a', 'i' }; +static const symbol s_4_20[3] = { 'e', 'a', 'i' }; +static const symbol s_4_21[3] = { 'i', 'a', 'i' }; +static const symbol s_4_22[3] = { 's', 'e', 'i' }; +static const symbol s_4_23[4] = { 'e', 0xBA, 't', 'i' }; +static const symbol s_4_24[4] = { 0xE3, 0xBA, 't', 'i' }; +static const symbol s_4_25[2] = { 'u', 'i' }; +static const symbol s_4_26[3] = { 'e', 'z', 'i' }; +static const symbol s_4_27[3] = { 'a', 0xBA, 'i' }; +static const symbol s_4_28[4] = { 's', 'e', 0xBA, 'i' }; +static const symbol s_4_29[5] = { 'a', 's', 'e', 0xBA, 'i' }; +static const symbol s_4_30[6] = { 's', 'e', 's', 'e', 0xBA, 'i' }; +static const symbol s_4_31[5] = { 'i', 's', 'e', 0xBA, 'i' }; +static const symbol s_4_32[5] = { 'u', 's', 'e', 0xBA, 'i' }; +static const symbol s_4_33[5] = { 0xE2, 's', 'e', 0xBA, 'i' }; +static const symbol s_4_34[3] = { 'i', 0xBA, 'i' }; +static const symbol s_4_35[3] = { 'u', 0xBA, 'i' }; +static const symbol s_4_36[3] = { 0xE2, 0xBA, 'i' }; +static const symbol s_4_37[2] = { 0xE2, 'i' }; +static const symbol s_4_38[3] = { 'a', 0xFE, 'i' }; +static const symbol s_4_39[4] = { 'e', 'a', 0xFE, 'i' }; +static const symbol s_4_40[4] = { 'i', 'a', 0xFE, 'i' }; +static const symbol s_4_41[3] = { 'e', 0xFE, 'i' }; +static const symbol s_4_42[3] = { 'i', 0xFE, 'i' }; +static const symbol s_4_43[3] = { 0xE2, 0xFE, 'i' }; +static const symbol s_4_44[5] = { 'a', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_45[6] = { 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_46[7] = { 'a', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_47[8] = { 's', 'e', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_48[7] = { 'i', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_49[7] = { 'u', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_50[7] = { 0xE2, 's', 'e', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_51[5] = { 'i', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_52[5] = { 'u', 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_53[5] = { 0xE2, 'r', 0xE3, 0xFE, 'i' }; +static const symbol s_4_54[2] = { 'a', 'm' }; +static const symbol s_4_55[3] = { 'e', 'a', 'm' }; +static const symbol s_4_56[3] = { 'i', 'a', 'm' }; +static const symbol s_4_57[2] = { 'e', 'm' }; +static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; +static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; +static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; +static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; +static const symbol s_4_62[4] = { 0xE2, 's', 'e', 'm' }; +static const symbol s_4_63[2] = { 'i', 'm' }; +static const symbol s_4_64[2] = { 0xE2, 'm' }; +static const symbol s_4_65[2] = { 0xE3, 'm' }; +static const symbol s_4_66[4] = { 'a', 'r', 0xE3, 'm' }; +static const symbol s_4_67[5] = { 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_68[6] = { 'a', 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_69[7] = { 's', 'e', 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_70[6] = { 'i', 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_71[6] = { 'u', 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_72[6] = { 0xE2, 's', 'e', 'r', 0xE3, 'm' }; +static const symbol s_4_73[4] = { 'i', 'r', 0xE3, 'm' }; +static const symbol s_4_74[4] = { 'u', 'r', 0xE3, 'm' }; +static const symbol s_4_75[4] = { 0xE2, 'r', 0xE3, 'm' }; +static const symbol s_4_76[2] = { 'a', 'u' }; +static const symbol s_4_77[3] = { 'e', 'a', 'u' }; +static const symbol s_4_78[3] = { 'i', 'a', 'u' }; +static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; +static const symbol s_4_80[4] = { 0xE2, 'n', 'd', 'u' }; +static const symbol s_4_81[2] = { 'e', 'z' }; +static const symbol s_4_82[5] = { 'e', 'a', 's', 'c', 0xE3 }; +static const symbol s_4_83[3] = { 'a', 'r', 0xE3 }; +static const symbol s_4_84[4] = { 's', 'e', 'r', 0xE3 }; +static const symbol s_4_85[5] = { 'a', 's', 'e', 'r', 0xE3 }; +static const symbol s_4_86[6] = { 's', 'e', 's', 'e', 'r', 0xE3 }; +static const symbol s_4_87[5] = { 'i', 's', 'e', 'r', 0xE3 }; +static const symbol s_4_88[5] = { 'u', 's', 'e', 'r', 0xE3 }; +static const symbol s_4_89[5] = { 0xE2, 's', 'e', 'r', 0xE3 }; +static const symbol s_4_90[3] = { 'i', 'r', 0xE3 }; +static const symbol s_4_91[3] = { 'u', 'r', 0xE3 }; +static const symbol s_4_92[3] = { 0xE2, 'r', 0xE3 }; +static const symbol s_4_93[4] = { 'e', 'a', 'z', 0xE3 }; + +static const struct among a_4[94] = +{ +/* 0 */ { 2, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 1, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 3, s_4_3, -1, 1, 0}, +/* 4 */ { 3, s_4_4, -1, 1, 0}, +/* 5 */ { 3, s_4_5, -1, 1, 0}, +/* 6 */ { 3, s_4_6, -1, 1, 0}, +/* 7 */ { 3, s_4_7, -1, 1, 0}, +/* 8 */ { 3, s_4_8, -1, 1, 0}, +/* 9 */ { 3, s_4_9, -1, 1, 0}, +/* 10 */ { 2, s_4_10, -1, 2, 0}, +/* 11 */ { 3, s_4_11, 10, 1, 0}, +/* 12 */ { 4, s_4_12, 10, 2, 0}, +/* 13 */ { 3, s_4_13, 10, 1, 0}, +/* 14 */ { 3, s_4_14, 10, 1, 0}, +/* 15 */ { 3, s_4_15, 10, 1, 0}, +/* 16 */ { 4, s_4_16, -1, 1, 0}, +/* 17 */ { 4, s_4_17, -1, 1, 0}, +/* 18 */ { 3, s_4_18, -1, 1, 0}, +/* 19 */ { 2, s_4_19, -1, 1, 0}, +/* 20 */ { 3, s_4_20, 19, 1, 0}, +/* 21 */ { 3, s_4_21, 19, 1, 0}, +/* 22 */ { 3, s_4_22, -1, 2, 0}, +/* 23 */ { 4, s_4_23, -1, 1, 0}, +/* 24 */ { 4, s_4_24, -1, 1, 0}, +/* 25 */ { 2, s_4_25, -1, 1, 0}, +/* 26 */ { 3, s_4_26, -1, 1, 0}, +/* 27 */ { 3, s_4_27, -1, 1, 0}, +/* 28 */ { 4, s_4_28, -1, 2, 0}, +/* 29 */ { 5, s_4_29, 28, 1, 0}, +/* 30 */ { 6, s_4_30, 28, 2, 0}, +/* 31 */ { 5, s_4_31, 28, 1, 0}, +/* 32 */ { 5, s_4_32, 28, 1, 0}, +/* 33 */ { 5, s_4_33, 28, 1, 0}, +/* 34 */ { 3, s_4_34, -1, 1, 0}, +/* 35 */ { 3, s_4_35, -1, 1, 0}, +/* 36 */ { 3, s_4_36, -1, 1, 0}, +/* 37 */ { 2, s_4_37, -1, 1, 0}, +/* 38 */ { 3, s_4_38, -1, 2, 0}, +/* 39 */ { 4, s_4_39, 38, 1, 0}, +/* 40 */ { 4, s_4_40, 38, 1, 0}, +/* 41 */ { 3, s_4_41, -1, 2, 0}, +/* 42 */ { 3, s_4_42, -1, 2, 0}, +/* 43 */ { 3, s_4_43, -1, 2, 0}, +/* 44 */ { 5, s_4_44, -1, 1, 0}, +/* 45 */ { 6, s_4_45, -1, 2, 0}, +/* 46 */ { 7, s_4_46, 45, 1, 0}, +/* 47 */ { 8, s_4_47, 45, 2, 0}, +/* 48 */ { 7, s_4_48, 45, 1, 0}, +/* 49 */ { 7, s_4_49, 45, 1, 0}, +/* 50 */ { 7, s_4_50, 45, 1, 0}, +/* 51 */ { 5, s_4_51, -1, 1, 0}, +/* 52 */ { 5, s_4_52, -1, 1, 0}, +/* 53 */ { 5, s_4_53, -1, 1, 0}, +/* 54 */ { 2, s_4_54, -1, 1, 0}, +/* 55 */ { 3, s_4_55, 54, 1, 0}, +/* 56 */ { 3, s_4_56, 54, 1, 0}, +/* 57 */ { 2, s_4_57, -1, 2, 0}, +/* 58 */ { 4, s_4_58, 57, 1, 0}, +/* 59 */ { 5, s_4_59, 57, 2, 0}, +/* 60 */ { 4, s_4_60, 57, 1, 0}, +/* 61 */ { 4, s_4_61, 57, 1, 0}, +/* 62 */ { 4, s_4_62, 57, 1, 0}, +/* 63 */ { 2, s_4_63, -1, 2, 0}, +/* 64 */ { 2, s_4_64, -1, 2, 0}, +/* 65 */ { 2, s_4_65, -1, 2, 0}, +/* 66 */ { 4, s_4_66, 65, 1, 0}, +/* 67 */ { 5, s_4_67, 65, 2, 0}, +/* 68 */ { 6, s_4_68, 67, 1, 0}, +/* 69 */ { 7, s_4_69, 67, 2, 0}, +/* 70 */ { 6, s_4_70, 67, 1, 0}, +/* 71 */ { 6, s_4_71, 67, 1, 0}, +/* 72 */ { 6, s_4_72, 67, 1, 0}, +/* 73 */ { 4, s_4_73, 65, 1, 0}, +/* 74 */ { 4, s_4_74, 65, 1, 0}, +/* 75 */ { 4, s_4_75, 65, 1, 0}, +/* 76 */ { 2, s_4_76, -1, 1, 0}, +/* 77 */ { 3, s_4_77, 76, 1, 0}, +/* 78 */ { 3, s_4_78, 76, 1, 0}, +/* 79 */ { 4, s_4_79, -1, 1, 0}, +/* 80 */ { 4, s_4_80, -1, 1, 0}, +/* 81 */ { 2, s_4_81, -1, 1, 0}, +/* 82 */ { 5, s_4_82, -1, 1, 0}, +/* 83 */ { 3, s_4_83, -1, 1, 0}, +/* 84 */ { 4, s_4_84, -1, 2, 0}, +/* 85 */ { 5, s_4_85, 84, 1, 0}, +/* 86 */ { 6, s_4_86, 84, 2, 0}, +/* 87 */ { 5, s_4_87, 84, 1, 0}, +/* 88 */ { 5, s_4_88, 84, 1, 0}, +/* 89 */ { 5, s_4_89, 84, 1, 0}, +/* 90 */ { 3, s_4_90, -1, 1, 0}, +/* 91 */ { 3, s_4_91, -1, 1, 0}, +/* 92 */ { 3, s_4_92, -1, 1, 0}, +/* 93 */ { 4, s_4_93, -1, 1, 0} +}; + +static const symbol s_5_0[1] = { 'a' }; +static const symbol s_5_1[1] = { 'e' }; +static const symbol s_5_2[2] = { 'i', 'e' }; +static const symbol s_5_3[1] = { 'i' }; +static const symbol s_5_4[1] = { 0xE3 }; + +static const struct among a_5[5] = +{ +/* 0 */ { 1, s_5_0, -1, 1, 0}, +/* 1 */ { 1, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, 1, 1, 0}, +/* 3 */ { 1, s_5_3, -1, 1, 0}, +/* 4 */ { 1, s_5_4, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 32 }; + +static const symbol s_0[] = { 'u' }; +static const symbol s_1[] = { 'U' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'I' }; +static const symbol s_4[] = { 'i' }; +static const symbol s_5[] = { 'u' }; +static const symbol s_6[] = { 'a' }; +static const symbol s_7[] = { 'e' }; +static const symbol s_8[] = { 'i' }; +static const symbol s_9[] = { 'a', 'b' }; +static const symbol s_10[] = { 'i' }; +static const symbol s_11[] = { 'a', 't' }; +static const symbol s_12[] = { 'a', 0xFE, 'i' }; +static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; +static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; +static const symbol s_15[] = { 'i', 'v' }; +static const symbol s_16[] = { 'i', 'c' }; +static const symbol s_17[] = { 'a', 't' }; +static const symbol s_18[] = { 'i', 't' }; +static const symbol s_19[] = { 0xFE }; +static const symbol s_20[] = { 't' }; +static const symbol s_21[] = { 'i', 's', 't' }; +static const symbol s_22[] = { 'u' }; + +static int r_prelude(struct SN_env * z) { + while(1) { /* repeat, line 32 */ + int c1 = z->c; + while(1) { /* goto, line 32 */ + int c2 = z->c; + if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; + z->bra = z->c; /* [, line 33 */ + { int c3 = z->c; /* or, line 33 */ + if (!(eq_s(z, 1, s_0))) goto lab3; + z->ket = z->c; /* ], line 33 */ + if (in_grouping(z, g_v, 97, 238, 0)) goto lab3; + { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = c3; + if (!(eq_s(z, 1, s_2))) goto lab1; + z->ket = z->c; /* ], line 34 */ + if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ + if (ret < 0) return ret; + } + } + lab2: + z->c = c2; + break; + lab1: + z->c = c2; + if (z->c >= z->l) goto lab0; + z->c++; /* goto, line 32 */ + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 44 */ + { int c2 = z->c; /* or, line 46 */ + if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; + { int c3 = z->c; /* or, line 45 */ + if (out_grouping(z, g_v, 97, 238, 0)) goto lab4; + { /* gopast */ /* grouping v, line 45 */ + int ret = out_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; + { /* gopast */ /* non v, line 45 */ + int ret = in_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping(z, g_v, 97, 238, 0)) goto lab0; + { int c4 = z->c; /* or, line 47 */ + if (out_grouping(z, g_v, 97, 238, 0)) goto lab6; + { /* gopast */ /* grouping v, line 47 */ + int ret = out_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping(z, g_v, 97, 238, 0)) goto lab0; + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 47 */ + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 48 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 50 */ + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 51 */ + { /* gopast */ /* grouping v, line 52 */ + int ret = out_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 52 */ + int ret = in_grouping(z, g_v, 97, 238, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 52 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 56 */ + int c1 = z->c; + z->bra = z->c; /* [, line 58 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else + among_var = find_among(z, a_0, 3); /* substring, line 58 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 58 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c >= z->l) goto lab0; + z->c++; /* next, line 61 */ + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_step_0(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 73 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 73 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 73 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 75 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ + if (ret < 0) return ret; + } + break; + case 5: + { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ + if (!(eq_s_b(z, 2, s_9))) goto lab0; + return 0; + lab0: + z->c = z->l - m1; + } + { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_12); /* <-, line 87 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_combo_suffix(struct SN_env * z) { + int among_var; + { int m_test = z->l - z->c; /* test, line 91 */ + z->ket = z->c; /* [, line 92 */ + among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 92 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ + if (ret < 0) return ret; + } + break; + } + z->B[0] = 1; /* set standard_suffix_removed, line 125 */ + z->c = z->l - m_test; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ + while(1) { /* repeat, line 131 */ + int m1 = z->l - z->c; (void)m1; + { int ret = r_combo_suffix(z); + if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ + if (ret < 0) return ret; + } + continue; + lab0: + z->c = z->l - m1; + break; + } + z->ket = z->c; /* [, line 132 */ + among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 132 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 132 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 149 */ + if (ret < 0) return ret; + } + break; + case 2: + if (!(eq_s_b(z, 1, s_19))) return 0; + z->bra = z->c; /* ], line 152 */ + { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ + if (ret < 0) return ret; + } + break; + } + z->B[0] = 1; /* set standard_suffix_removed, line 160 */ + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 164 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 164 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 165 */ + among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 165 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ + if (out_grouping_b(z, g_v, 97, 238, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } + } + lab0: + { int ret = slice_del(z); /* delete, line 200 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 214 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_vowel_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 219 */ + among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 219 */ + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 219 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 220 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int romanian_ISO_8859_2_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 226 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 226 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 227 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 227 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 228 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ + { int ret = r_step_0(z); + if (ret == 0) goto lab2; /* call step_0, line 229 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ + if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ + if (ret < 0) return ret; + } + } + lab5: + lab4: + z->c = z->l - m5; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ + { int ret = r_vowel_suffix(z); + if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + z->c = z->lb; + { int c8 = z->c; /* do, line 234 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab8; /* call postlude, line 234 */ + if (ret < 0) return ret; + } + lab8: + z->c = c8; + } + return 1; +} + +extern struct SN_env * romanian_ISO_8859_2_create_env(void) { return SN_create_env(0, 3, 1); } + +extern void romanian_ISO_8859_2_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_KOI8_R_russian.c b/src/backend/snowball/libstemmer/stem_KOI8_R_russian.c new file mode 100644 index 00000000000..be7feb752ee --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_KOI8_R_russian.c @@ -0,0 +1,700 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int russian_KOI8_R_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_tidy_up(struct SN_env * z); +static int r_derivational(struct SN_env * z); +static int r_noun(struct SN_env * z); +static int r_verb(struct SN_env * z); +static int r_reflexive(struct SN_env * z); +static int r_adjectival(struct SN_env * z); +static int r_adjective(struct SN_env * z); +static int r_perfective_gerund(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * russian_KOI8_R_create_env(void); +extern void russian_KOI8_R_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[3] = { 0xD7, 0xDB, 0xC9 }; +static const symbol s_0_1[4] = { 0xC9, 0xD7, 0xDB, 0xC9 }; +static const symbol s_0_2[4] = { 0xD9, 0xD7, 0xDB, 0xC9 }; +static const symbol s_0_3[1] = { 0xD7 }; +static const symbol s_0_4[2] = { 0xC9, 0xD7 }; +static const symbol s_0_5[2] = { 0xD9, 0xD7 }; +static const symbol s_0_6[5] = { 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; +static const symbol s_0_7[6] = { 0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; +static const symbol s_0_8[6] = { 0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; + +static const struct among a_0[9] = +{ +/* 0 */ { 3, s_0_0, -1, 1, 0}, +/* 1 */ { 4, s_0_1, 0, 2, 0}, +/* 2 */ { 4, s_0_2, 0, 2, 0}, +/* 3 */ { 1, s_0_3, -1, 1, 0}, +/* 4 */ { 2, s_0_4, 3, 2, 0}, +/* 5 */ { 2, s_0_5, 3, 2, 0}, +/* 6 */ { 5, s_0_6, -1, 1, 0}, +/* 7 */ { 6, s_0_7, 6, 2, 0}, +/* 8 */ { 6, s_0_8, 6, 2, 0} +}; + +static const symbol s_1_0[2] = { 0xC0, 0xC0 }; +static const symbol s_1_1[2] = { 0xC5, 0xC0 }; +static const symbol s_1_2[2] = { 0xCF, 0xC0 }; +static const symbol s_1_3[2] = { 0xD5, 0xC0 }; +static const symbol s_1_4[2] = { 0xC5, 0xC5 }; +static const symbol s_1_5[2] = { 0xC9, 0xC5 }; +static const symbol s_1_6[2] = { 0xCF, 0xC5 }; +static const symbol s_1_7[2] = { 0xD9, 0xC5 }; +static const symbol s_1_8[2] = { 0xC9, 0xC8 }; +static const symbol s_1_9[2] = { 0xD9, 0xC8 }; +static const symbol s_1_10[3] = { 0xC9, 0xCD, 0xC9 }; +static const symbol s_1_11[3] = { 0xD9, 0xCD, 0xC9 }; +static const symbol s_1_12[2] = { 0xC5, 0xCA }; +static const symbol s_1_13[2] = { 0xC9, 0xCA }; +static const symbol s_1_14[2] = { 0xCF, 0xCA }; +static const symbol s_1_15[2] = { 0xD9, 0xCA }; +static const symbol s_1_16[2] = { 0xC5, 0xCD }; +static const symbol s_1_17[2] = { 0xC9, 0xCD }; +static const symbol s_1_18[2] = { 0xCF, 0xCD }; +static const symbol s_1_19[2] = { 0xD9, 0xCD }; +static const symbol s_1_20[3] = { 0xC5, 0xC7, 0xCF }; +static const symbol s_1_21[3] = { 0xCF, 0xC7, 0xCF }; +static const symbol s_1_22[2] = { 0xC1, 0xD1 }; +static const symbol s_1_23[2] = { 0xD1, 0xD1 }; +static const symbol s_1_24[3] = { 0xC5, 0xCD, 0xD5 }; +static const symbol s_1_25[3] = { 0xCF, 0xCD, 0xD5 }; + +static const struct among a_1[26] = +{ +/* 0 */ { 2, s_1_0, -1, 1, 0}, +/* 1 */ { 2, s_1_1, -1, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0}, +/* 3 */ { 2, s_1_3, -1, 1, 0}, +/* 4 */ { 2, s_1_4, -1, 1, 0}, +/* 5 */ { 2, s_1_5, -1, 1, 0}, +/* 6 */ { 2, s_1_6, -1, 1, 0}, +/* 7 */ { 2, s_1_7, -1, 1, 0}, +/* 8 */ { 2, s_1_8, -1, 1, 0}, +/* 9 */ { 2, s_1_9, -1, 1, 0}, +/* 10 */ { 3, s_1_10, -1, 1, 0}, +/* 11 */ { 3, s_1_11, -1, 1, 0}, +/* 12 */ { 2, s_1_12, -1, 1, 0}, +/* 13 */ { 2, s_1_13, -1, 1, 0}, +/* 14 */ { 2, s_1_14, -1, 1, 0}, +/* 15 */ { 2, s_1_15, -1, 1, 0}, +/* 16 */ { 2, s_1_16, -1, 1, 0}, +/* 17 */ { 2, s_1_17, -1, 1, 0}, +/* 18 */ { 2, s_1_18, -1, 1, 0}, +/* 19 */ { 2, s_1_19, -1, 1, 0}, +/* 20 */ { 3, s_1_20, -1, 1, 0}, +/* 21 */ { 3, s_1_21, -1, 1, 0}, +/* 22 */ { 2, s_1_22, -1, 1, 0}, +/* 23 */ { 2, s_1_23, -1, 1, 0}, +/* 24 */ { 3, s_1_24, -1, 1, 0}, +/* 25 */ { 3, s_1_25, -1, 1, 0} +}; + +static const symbol s_2_0[2] = { 0xC5, 0xCD }; +static const symbol s_2_1[2] = { 0xCE, 0xCE }; +static const symbol s_2_2[2] = { 0xD7, 0xDB }; +static const symbol s_2_3[3] = { 0xC9, 0xD7, 0xDB }; +static const symbol s_2_4[3] = { 0xD9, 0xD7, 0xDB }; +static const symbol s_2_5[1] = { 0xDD }; +static const symbol s_2_6[2] = { 0xC0, 0xDD }; +static const symbol s_2_7[3] = { 0xD5, 0xC0, 0xDD }; + +static const struct among a_2[8] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 2, s_2_1, -1, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 1, 0}, +/* 3 */ { 3, s_2_3, 2, 2, 0}, +/* 4 */ { 3, s_2_4, 2, 2, 0}, +/* 5 */ { 1, s_2_5, -1, 1, 0}, +/* 6 */ { 2, s_2_6, 5, 1, 0}, +/* 7 */ { 3, s_2_7, 6, 2, 0} +}; + +static const symbol s_3_0[2] = { 0xD3, 0xD1 }; +static const symbol s_3_1[2] = { 0xD3, 0xD8 }; + +static const struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 2, s_3_1, -1, 1, 0} +}; + +static const symbol s_4_0[1] = { 0xC0 }; +static const symbol s_4_1[2] = { 0xD5, 0xC0 }; +static const symbol s_4_2[2] = { 0xCC, 0xC1 }; +static const symbol s_4_3[3] = { 0xC9, 0xCC, 0xC1 }; +static const symbol s_4_4[3] = { 0xD9, 0xCC, 0xC1 }; +static const symbol s_4_5[2] = { 0xCE, 0xC1 }; +static const symbol s_4_6[3] = { 0xC5, 0xCE, 0xC1 }; +static const symbol s_4_7[3] = { 0xC5, 0xD4, 0xC5 }; +static const symbol s_4_8[3] = { 0xC9, 0xD4, 0xC5 }; +static const symbol s_4_9[3] = { 0xCA, 0xD4, 0xC5 }; +static const symbol s_4_10[4] = { 0xC5, 0xCA, 0xD4, 0xC5 }; +static const symbol s_4_11[4] = { 0xD5, 0xCA, 0xD4, 0xC5 }; +static const symbol s_4_12[2] = { 0xCC, 0xC9 }; +static const symbol s_4_13[3] = { 0xC9, 0xCC, 0xC9 }; +static const symbol s_4_14[3] = { 0xD9, 0xCC, 0xC9 }; +static const symbol s_4_15[1] = { 0xCA }; +static const symbol s_4_16[2] = { 0xC5, 0xCA }; +static const symbol s_4_17[2] = { 0xD5, 0xCA }; +static const symbol s_4_18[1] = { 0xCC }; +static const symbol s_4_19[2] = { 0xC9, 0xCC }; +static const symbol s_4_20[2] = { 0xD9, 0xCC }; +static const symbol s_4_21[2] = { 0xC5, 0xCD }; +static const symbol s_4_22[2] = { 0xC9, 0xCD }; +static const symbol s_4_23[2] = { 0xD9, 0xCD }; +static const symbol s_4_24[1] = { 0xCE }; +static const symbol s_4_25[2] = { 0xC5, 0xCE }; +static const symbol s_4_26[2] = { 0xCC, 0xCF }; +static const symbol s_4_27[3] = { 0xC9, 0xCC, 0xCF }; +static const symbol s_4_28[3] = { 0xD9, 0xCC, 0xCF }; +static const symbol s_4_29[2] = { 0xCE, 0xCF }; +static const symbol s_4_30[3] = { 0xC5, 0xCE, 0xCF }; +static const symbol s_4_31[3] = { 0xCE, 0xCE, 0xCF }; +static const symbol s_4_32[2] = { 0xC0, 0xD4 }; +static const symbol s_4_33[3] = { 0xD5, 0xC0, 0xD4 }; +static const symbol s_4_34[2] = { 0xC5, 0xD4 }; +static const symbol s_4_35[3] = { 0xD5, 0xC5, 0xD4 }; +static const symbol s_4_36[2] = { 0xC9, 0xD4 }; +static const symbol s_4_37[2] = { 0xD1, 0xD4 }; +static const symbol s_4_38[2] = { 0xD9, 0xD4 }; +static const symbol s_4_39[2] = { 0xD4, 0xD8 }; +static const symbol s_4_40[3] = { 0xC9, 0xD4, 0xD8 }; +static const symbol s_4_41[3] = { 0xD9, 0xD4, 0xD8 }; +static const symbol s_4_42[3] = { 0xC5, 0xDB, 0xD8 }; +static const symbol s_4_43[3] = { 0xC9, 0xDB, 0xD8 }; +static const symbol s_4_44[2] = { 0xCE, 0xD9 }; +static const symbol s_4_45[3] = { 0xC5, 0xCE, 0xD9 }; + +static const struct among a_4[46] = +{ +/* 0 */ { 1, s_4_0, -1, 2, 0}, +/* 1 */ { 2, s_4_1, 0, 2, 0}, +/* 2 */ { 2, s_4_2, -1, 1, 0}, +/* 3 */ { 3, s_4_3, 2, 2, 0}, +/* 4 */ { 3, s_4_4, 2, 2, 0}, +/* 5 */ { 2, s_4_5, -1, 1, 0}, +/* 6 */ { 3, s_4_6, 5, 2, 0}, +/* 7 */ { 3, s_4_7, -1, 1, 0}, +/* 8 */ { 3, s_4_8, -1, 2, 0}, +/* 9 */ { 3, s_4_9, -1, 1, 0}, +/* 10 */ { 4, s_4_10, 9, 2, 0}, +/* 11 */ { 4, s_4_11, 9, 2, 0}, +/* 12 */ { 2, s_4_12, -1, 1, 0}, +/* 13 */ { 3, s_4_13, 12, 2, 0}, +/* 14 */ { 3, s_4_14, 12, 2, 0}, +/* 15 */ { 1, s_4_15, -1, 1, 0}, +/* 16 */ { 2, s_4_16, 15, 2, 0}, +/* 17 */ { 2, s_4_17, 15, 2, 0}, +/* 18 */ { 1, s_4_18, -1, 1, 0}, +/* 19 */ { 2, s_4_19, 18, 2, 0}, +/* 20 */ { 2, s_4_20, 18, 2, 0}, +/* 21 */ { 2, s_4_21, -1, 1, 0}, +/* 22 */ { 2, s_4_22, -1, 2, 0}, +/* 23 */ { 2, s_4_23, -1, 2, 0}, +/* 24 */ { 1, s_4_24, -1, 1, 0}, +/* 25 */ { 2, s_4_25, 24, 2, 0}, +/* 26 */ { 2, s_4_26, -1, 1, 0}, +/* 27 */ { 3, s_4_27, 26, 2, 0}, +/* 28 */ { 3, s_4_28, 26, 2, 0}, +/* 29 */ { 2, s_4_29, -1, 1, 0}, +/* 30 */ { 3, s_4_30, 29, 2, 0}, +/* 31 */ { 3, s_4_31, 29, 1, 0}, +/* 32 */ { 2, s_4_32, -1, 1, 0}, +/* 33 */ { 3, s_4_33, 32, 2, 0}, +/* 34 */ { 2, s_4_34, -1, 1, 0}, +/* 35 */ { 3, s_4_35, 34, 2, 0}, +/* 36 */ { 2, s_4_36, -1, 2, 0}, +/* 37 */ { 2, s_4_37, -1, 2, 0}, +/* 38 */ { 2, s_4_38, -1, 2, 0}, +/* 39 */ { 2, s_4_39, -1, 1, 0}, +/* 40 */ { 3, s_4_40, 39, 2, 0}, +/* 41 */ { 3, s_4_41, 39, 2, 0}, +/* 42 */ { 3, s_4_42, -1, 1, 0}, +/* 43 */ { 3, s_4_43, -1, 2, 0}, +/* 44 */ { 2, s_4_44, -1, 1, 0}, +/* 45 */ { 3, s_4_45, 44, 2, 0} +}; + +static const symbol s_5_0[1] = { 0xC0 }; +static const symbol s_5_1[2] = { 0xC9, 0xC0 }; +static const symbol s_5_2[2] = { 0xD8, 0xC0 }; +static const symbol s_5_3[1] = { 0xC1 }; +static const symbol s_5_4[1] = { 0xC5 }; +static const symbol s_5_5[2] = { 0xC9, 0xC5 }; +static const symbol s_5_6[2] = { 0xD8, 0xC5 }; +static const symbol s_5_7[2] = { 0xC1, 0xC8 }; +static const symbol s_5_8[2] = { 0xD1, 0xC8 }; +static const symbol s_5_9[3] = { 0xC9, 0xD1, 0xC8 }; +static const symbol s_5_10[1] = { 0xC9 }; +static const symbol s_5_11[2] = { 0xC5, 0xC9 }; +static const symbol s_5_12[2] = { 0xC9, 0xC9 }; +static const symbol s_5_13[3] = { 0xC1, 0xCD, 0xC9 }; +static const symbol s_5_14[3] = { 0xD1, 0xCD, 0xC9 }; +static const symbol s_5_15[4] = { 0xC9, 0xD1, 0xCD, 0xC9 }; +static const symbol s_5_16[1] = { 0xCA }; +static const symbol s_5_17[2] = { 0xC5, 0xCA }; +static const symbol s_5_18[3] = { 0xC9, 0xC5, 0xCA }; +static const symbol s_5_19[2] = { 0xC9, 0xCA }; +static const symbol s_5_20[2] = { 0xCF, 0xCA }; +static const symbol s_5_21[2] = { 0xC1, 0xCD }; +static const symbol s_5_22[2] = { 0xC5, 0xCD }; +static const symbol s_5_23[3] = { 0xC9, 0xC5, 0xCD }; +static const symbol s_5_24[2] = { 0xCF, 0xCD }; +static const symbol s_5_25[2] = { 0xD1, 0xCD }; +static const symbol s_5_26[3] = { 0xC9, 0xD1, 0xCD }; +static const symbol s_5_27[1] = { 0xCF }; +static const symbol s_5_28[1] = { 0xD1 }; +static const symbol s_5_29[2] = { 0xC9, 0xD1 }; +static const symbol s_5_30[2] = { 0xD8, 0xD1 }; +static const symbol s_5_31[1] = { 0xD5 }; +static const symbol s_5_32[2] = { 0xC5, 0xD7 }; +static const symbol s_5_33[2] = { 0xCF, 0xD7 }; +static const symbol s_5_34[1] = { 0xD8 }; +static const symbol s_5_35[1] = { 0xD9 }; + +static const struct among a_5[36] = +{ +/* 0 */ { 1, s_5_0, -1, 1, 0}, +/* 1 */ { 2, s_5_1, 0, 1, 0}, +/* 2 */ { 2, s_5_2, 0, 1, 0}, +/* 3 */ { 1, s_5_3, -1, 1, 0}, +/* 4 */ { 1, s_5_4, -1, 1, 0}, +/* 5 */ { 2, s_5_5, 4, 1, 0}, +/* 6 */ { 2, s_5_6, 4, 1, 0}, +/* 7 */ { 2, s_5_7, -1, 1, 0}, +/* 8 */ { 2, s_5_8, -1, 1, 0}, +/* 9 */ { 3, s_5_9, 8, 1, 0}, +/* 10 */ { 1, s_5_10, -1, 1, 0}, +/* 11 */ { 2, s_5_11, 10, 1, 0}, +/* 12 */ { 2, s_5_12, 10, 1, 0}, +/* 13 */ { 3, s_5_13, 10, 1, 0}, +/* 14 */ { 3, s_5_14, 10, 1, 0}, +/* 15 */ { 4, s_5_15, 14, 1, 0}, +/* 16 */ { 1, s_5_16, -1, 1, 0}, +/* 17 */ { 2, s_5_17, 16, 1, 0}, +/* 18 */ { 3, s_5_18, 17, 1, 0}, +/* 19 */ { 2, s_5_19, 16, 1, 0}, +/* 20 */ { 2, s_5_20, 16, 1, 0}, +/* 21 */ { 2, s_5_21, -1, 1, 0}, +/* 22 */ { 2, s_5_22, -1, 1, 0}, +/* 23 */ { 3, s_5_23, 22, 1, 0}, +/* 24 */ { 2, s_5_24, -1, 1, 0}, +/* 25 */ { 2, s_5_25, -1, 1, 0}, +/* 26 */ { 3, s_5_26, 25, 1, 0}, +/* 27 */ { 1, s_5_27, -1, 1, 0}, +/* 28 */ { 1, s_5_28, -1, 1, 0}, +/* 29 */ { 2, s_5_29, 28, 1, 0}, +/* 30 */ { 2, s_5_30, 28, 1, 0}, +/* 31 */ { 1, s_5_31, -1, 1, 0}, +/* 32 */ { 2, s_5_32, -1, 1, 0}, +/* 33 */ { 2, s_5_33, -1, 1, 0}, +/* 34 */ { 1, s_5_34, -1, 1, 0}, +/* 35 */ { 1, s_5_35, -1, 1, 0} +}; + +static const symbol s_6_0[3] = { 0xCF, 0xD3, 0xD4 }; +static const symbol s_6_1[4] = { 0xCF, 0xD3, 0xD4, 0xD8 }; + +static const struct among a_6[2] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 4, s_6_1, -1, 1, 0} +}; + +static const symbol s_7_0[4] = { 0xC5, 0xCA, 0xDB, 0xC5 }; +static const symbol s_7_1[1] = { 0xCE }; +static const symbol s_7_2[1] = { 0xD8 }; +static const symbol s_7_3[3] = { 0xC5, 0xCA, 0xDB }; + +static const struct among a_7[4] = +{ +/* 0 */ { 4, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 2, 0}, +/* 2 */ { 1, s_7_2, -1, 3, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 35, 130, 34, 18 }; + +static const symbol s_0[] = { 0xC1 }; +static const symbol s_1[] = { 0xD1 }; +static const symbol s_2[] = { 0xC1 }; +static const symbol s_3[] = { 0xD1 }; +static const symbol s_4[] = { 0xC1 }; +static const symbol s_5[] = { 0xD1 }; +static const symbol s_6[] = { 0xCE }; +static const symbol s_7[] = { 0xCE }; +static const symbol s_8[] = { 0xCE }; +static const symbol s_9[] = { 0xC9 }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c1 = z->c; /* do, line 63 */ + { /* gopast */ /* grouping v, line 64 */ + int ret = out_grouping(z, g_v, 192, 220, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[0] = z->c; /* setmark pV, line 64 */ + { /* gopast */ /* non v, line 64 */ + int ret = in_grouping(z, g_v, 192, 220, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* grouping v, line 65 */ + int ret = out_grouping(z, g_v, 192, 220, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 65 */ + int ret = in_grouping(z, g_v, 192, 220, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 65 */ + lab0: + z->c = c1; + } + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_perfective_gerund(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 74 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((25166336 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_0, 9); /* substring, line 74 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 74 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 78 */ + if (!(eq_s_b(z, 1, s_0))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_1))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 85 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjective(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 90 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((2271009 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_1, 26); /* substring, line 90 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 90 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 99 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjectival(struct SN_env * z) { + int among_var; + { int ret = r_adjective(z); + if (ret == 0) return 0; /* call adjective, line 104 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + z->ket = z->c; /* [, line 112 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((671113216 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_2, 8); /* substring, line 112 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 112 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 117 */ + if (!(eq_s_b(z, 1, s_2))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m_keep; goto lab0; } + } + lab1: + { int ret = slice_del(z); /* delete, line 117 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 124 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + return 1; +} + +static int r_reflexive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 131 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 209 && z->p[z->c - 1] != 216)) return 0; + among_var = find_among_b(z, a_3, 2); /* substring, line 131 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 131 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 139 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((51443235 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 46); /* substring, line 139 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 139 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ + if (!(eq_s_b(z, 1, s_4))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_5))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 153 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_noun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 162 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((60991267 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 36); /* substring, line 162 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 162 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 169 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_derivational(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 178 */ + if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 212 && z->p[z->c - 1] != 216)) return 0; + among_var = find_among_b(z, a_6, 2); /* substring, line 178 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 178 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 178 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 181 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_tidy_up(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 186 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((151011360 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_7, 4); /* substring, line 186 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 186 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 190 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 191 */ + if (!(eq_s_b(z, 1, s_6))) return 0; + z->bra = z->c; /* ], line 191 */ + if (!(eq_s_b(z, 1, s_7))) return 0; + { int ret = slice_del(z); /* delete, line 191 */ + if (ret < 0) return ret; + } + break; + case 2: + if (!(eq_s_b(z, 1, s_8))) return 0; + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 196 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int russian_KOI8_R_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 203 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 203 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 204 */ + + { int mlimit; /* setlimit, line 204 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 204 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 206 */ + { int ret = r_perfective_gerund(z); + if (ret == 0) goto lab3; /* call perfective_gerund, line 206 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = z->l - m4; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 207 */ + { int ret = r_reflexive(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 207 */ + if (ret < 0) return ret; + } + lab4: + ; + } + { int m5 = z->l - z->c; (void)m5; /* or, line 208 */ + { int ret = r_adjectival(z); + if (ret == 0) goto lab6; /* call adjectival, line 208 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m5; + { int ret = r_verb(z); + if (ret == 0) goto lab7; /* call verb, line 208 */ + if (ret < 0) return ret; + } + goto lab5; + lab7: + z->c = z->l - m5; + { int ret = r_noun(z); + if (ret == 0) goto lab1; /* call noun, line 208 */ + if (ret < 0) return ret; + } + } + lab5: + ; + } + lab2: + lab1: + z->c = z->l - m3; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 211 */ + z->ket = z->c; /* [, line 211 */ + if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m_keep; goto lab8; } + z->bra = z->c; /* ], line 211 */ + { int ret = slice_del(z); /* delete, line 211 */ + if (ret < 0) return ret; + } + lab8: + ; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 214 */ + { int ret = r_derivational(z); + if (ret == 0) goto lab9; /* call derivational, line 214 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 215 */ + { int ret = r_tidy_up(z); + if (ret == 0) goto lab10; /* call tidy_up, line 215 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m7; + } + z->lb = mlimit; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * russian_KOI8_R_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void russian_KOI8_R_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_danish.c b/src/backend/snowball/libstemmer/stem_UTF_8_danish.c new file mode 100644 index 00000000000..cfd41376da2 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_danish.c @@ -0,0 +1,339 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int danish_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_undouble(struct SN_env * z); +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * danish_UTF_8_create_env(void); +extern void danish_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[3] = { 'h', 'e', 'd' }; +static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; +static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; +static const symbol s_0_3[1] = { 'e' }; +static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; +static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; +static const symbol s_0_7[3] = { 'e', 'n', 'e' }; +static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; +static const symbol s_0_9[3] = { 'e', 'r', 'e' }; +static const symbol s_0_10[2] = { 'e', 'n' }; +static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; +static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; +static const symbol s_0_13[2] = { 'e', 'r' }; +static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; +static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; +static const symbol s_0_16[1] = { 's' }; +static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; +static const symbol s_0_18[2] = { 'e', 's' }; +static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; +static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; +static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; +static const symbol s_0_24[3] = { 'e', 'n', 's' }; +static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; +static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; +static const symbol s_0_27[3] = { 'e', 'r', 's' }; +static const symbol s_0_28[3] = { 'e', 't', 's' }; +static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; +static const symbol s_0_30[2] = { 'e', 't' }; +static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; + +static const struct among a_0[32] = +{ +/* 0 */ { 3, s_0_0, -1, 1, 0}, +/* 1 */ { 5, s_0_1, 0, 1, 0}, +/* 2 */ { 4, s_0_2, -1, 1, 0}, +/* 3 */ { 1, s_0_3, -1, 1, 0}, +/* 4 */ { 5, s_0_4, 3, 1, 0}, +/* 5 */ { 4, s_0_5, 3, 1, 0}, +/* 6 */ { 6, s_0_6, 5, 1, 0}, +/* 7 */ { 3, s_0_7, 3, 1, 0}, +/* 8 */ { 4, s_0_8, 3, 1, 0}, +/* 9 */ { 3, s_0_9, 3, 1, 0}, +/* 10 */ { 2, s_0_10, -1, 1, 0}, +/* 11 */ { 5, s_0_11, 10, 1, 0}, +/* 12 */ { 4, s_0_12, 10, 1, 0}, +/* 13 */ { 2, s_0_13, -1, 1, 0}, +/* 14 */ { 5, s_0_14, 13, 1, 0}, +/* 15 */ { 4, s_0_15, 13, 1, 0}, +/* 16 */ { 1, s_0_16, -1, 2, 0}, +/* 17 */ { 4, s_0_17, 16, 1, 0}, +/* 18 */ { 2, s_0_18, 16, 1, 0}, +/* 19 */ { 5, s_0_19, 18, 1, 0}, +/* 20 */ { 7, s_0_20, 19, 1, 0}, +/* 21 */ { 4, s_0_21, 18, 1, 0}, +/* 22 */ { 5, s_0_22, 18, 1, 0}, +/* 23 */ { 4, s_0_23, 18, 1, 0}, +/* 24 */ { 3, s_0_24, 16, 1, 0}, +/* 25 */ { 6, s_0_25, 24, 1, 0}, +/* 26 */ { 5, s_0_26, 24, 1, 0}, +/* 27 */ { 3, s_0_27, 16, 1, 0}, +/* 28 */ { 3, s_0_28, 16, 1, 0}, +/* 29 */ { 5, s_0_29, 28, 1, 0}, +/* 30 */ { 2, s_0_30, -1, 1, 0}, +/* 31 */ { 4, s_0_31, 30, 1, 0} +}; + +static const symbol s_1_0[2] = { 'g', 'd' }; +static const symbol s_1_1[2] = { 'd', 't' }; +static const symbol s_1_2[2] = { 'g', 't' }; +static const symbol s_1_3[2] = { 'k', 't' }; + +static const struct among a_1[4] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'g' }; +static const symbol s_2_1[3] = { 'l', 'i', 'g' }; +static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; +static const symbol s_2_3[3] = { 'e', 'l', 's' }; +static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; + +static const struct among a_2[5] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 4, s_2_2, 1, 1, 0}, +/* 3 */ { 3, s_2_3, -1, 1, 0}, +/* 4 */ { 5, s_2_4, -1, 2, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + +static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; + +static const symbol s_0[] = { 's', 't' }; +static const symbol s_1[] = { 'i', 'g' }; +static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 33 */ + { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); + if (ret < 0) return 0; + z->c = ret; /* hop, line 33 */ + } + z->I[1] = z->c; /* setmark x, line 33 */ + z->c = c_test; + } + if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ + { /* gopast */ /* non v, line 34 */ + int ret = in_grouping_U(z, g_v, 97, 248, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 34 */ + /* try, line 35 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 41 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 41 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 41 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 41 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 48 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; + { int ret = slice_del(z); /* delete, line 50 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 55 */ + { int mlimit; /* setlimit, line 56 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 56 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 56 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ + z->bra = z->c; /* ], line 56 */ + z->lb = mlimit; + } + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 62 */ + } + z->bra = z->c; /* ], line 62 */ + { int ret = slice_del(z); /* delete, line 62 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ + z->ket = z->c; /* [, line 66 */ + if (!(eq_s_b(z, 2, s_0))) goto lab0; + z->bra = z->c; /* ], line 66 */ + if (!(eq_s_b(z, 2, s_1))) goto lab0; + { int ret = slice_del(z); /* delete, line 66 */ + if (ret < 0) return ret; + } + lab0: + z->c = z->l - m1; + } + { int mlimit; /* setlimit, line 67 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 67 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + z->ket = z->c; /* [, line 67 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 67 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 70 */ + if (ret < 0) return ret; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m3; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_undouble(struct SN_env * z) { + { int mlimit; /* setlimit, line 76 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 76 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 76 */ + if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 76 */ + z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ + if (z->S[0] == 0) return -1; /* -> ch, line 76 */ + z->lb = mlimit; + } + if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ + { int ret = slice_del(z); /* delete, line 78 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int danish_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 84 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 84 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 85 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 86 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 88 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ + { int ret = r_undouble(z); + if (ret == 0) goto lab4; /* call undouble, line 89 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); } + +extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_dutch.c b/src/backend/snowball/libstemmer/stem_UTF_8_dutch.c new file mode 100644 index 00000000000..f04c88d3e6a --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_dutch.c @@ -0,0 +1,634 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int dutch_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_standard_suffix(struct SN_env * z); +static int r_undouble(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_en_ending(struct SN_env * z); +static int r_e_ending(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * dutch_UTF_8_create_env(void); +extern void dutch_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[2] = { 0xC3, 0xA1 }; +static const symbol s_0_2[2] = { 0xC3, 0xA4 }; +static const symbol s_0_3[2] = { 0xC3, 0xA9 }; +static const symbol s_0_4[2] = { 0xC3, 0xAB }; +static const symbol s_0_5[2] = { 0xC3, 0xAD }; +static const symbol s_0_6[2] = { 0xC3, 0xAF }; +static const symbol s_0_7[2] = { 0xC3, 0xB3 }; +static const symbol s_0_8[2] = { 0xC3, 0xB6 }; +static const symbol s_0_9[2] = { 0xC3, 0xBA }; +static const symbol s_0_10[2] = { 0xC3, 0xBC }; + +static const struct among a_0[11] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 2, s_0_1, 0, 1, 0}, +/* 2 */ { 2, s_0_2, 0, 1, 0}, +/* 3 */ { 2, s_0_3, 0, 2, 0}, +/* 4 */ { 2, s_0_4, 0, 2, 0}, +/* 5 */ { 2, s_0_5, 0, 3, 0}, +/* 6 */ { 2, s_0_6, 0, 3, 0}, +/* 7 */ { 2, s_0_7, 0, 4, 0}, +/* 8 */ { 2, s_0_8, 0, 4, 0}, +/* 9 */ { 2, s_0_9, 0, 5, 0}, +/* 10 */ { 2, s_0_10, 0, 5, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'Y' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_1_1, 0, 2, 0}, +/* 2 */ { 1, s_1_2, 0, 1, 0} +}; + +static const symbol s_2_0[2] = { 'd', 'd' }; +static const symbol s_2_1[2] = { 'k', 'k' }; +static const symbol s_2_2[2] = { 't', 't' }; + +static const struct among a_2[3] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0} +}; + +static const symbol s_3_0[3] = { 'e', 'n', 'e' }; +static const symbol s_3_1[2] = { 's', 'e' }; +static const symbol s_3_2[2] = { 'e', 'n' }; +static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; +static const symbol s_3_4[1] = { 's' }; + +static const struct among a_3[5] = +{ +/* 0 */ { 3, s_3_0, -1, 2, 0}, +/* 1 */ { 2, s_3_1, -1, 3, 0}, +/* 2 */ { 2, s_3_2, -1, 2, 0}, +/* 3 */ { 5, s_3_3, 2, 1, 0}, +/* 4 */ { 1, s_3_4, -1, 3, 0} +}; + +static const symbol s_4_0[3] = { 'e', 'n', 'd' }; +static const symbol s_4_1[2] = { 'i', 'g' }; +static const symbol s_4_2[3] = { 'i', 'n', 'g' }; +static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; +static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; +static const symbol s_4_5[3] = { 'b', 'a', 'r' }; + +static const struct among a_4[6] = +{ +/* 0 */ { 3, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 2, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 3, 0}, +/* 4 */ { 4, s_4_4, -1, 4, 0}, +/* 5 */ { 3, s_4_5, -1, 5, 0} +}; + +static const symbol s_5_0[2] = { 'a', 'a' }; +static const symbol s_5_1[2] = { 'e', 'e' }; +static const symbol s_5_2[2] = { 'o', 'o' }; +static const symbol s_5_3[2] = { 'u', 'u' }; + +static const struct among a_5[4] = +{ +/* 0 */ { 2, s_5_0, -1, -1, 0}, +/* 1 */ { 2, s_5_1, -1, -1, 0}, +/* 2 */ { 2, s_5_2, -1, -1, 0}, +/* 3 */ { 2, s_5_3, -1, -1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'o' }; +static const symbol s_4[] = { 'u' }; +static const symbol s_5[] = { 'y' }; +static const symbol s_6[] = { 'Y' }; +static const symbol s_7[] = { 'i' }; +static const symbol s_8[] = { 'I' }; +static const symbol s_9[] = { 'y' }; +static const symbol s_10[] = { 'Y' }; +static const symbol s_11[] = { 'y' }; +static const symbol s_12[] = { 'i' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'g', 'e', 'm' }; +static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; +static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; +static const symbol s_17[] = { 'c' }; +static const symbol s_18[] = { 'e', 'n' }; +static const symbol s_19[] = { 'i', 'g' }; +static const symbol s_20[] = { 'e' }; +static const symbol s_21[] = { 'e' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + { int c_test = z->c; /* test, line 42 */ + while(1) { /* repeat, line 42 */ + int c1 = z->c; + z->bra = z->c; /* [, line 43 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else + among_var = find_among(z, a_0, 11); /* substring, line 43 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 43 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 54 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + { int c_keep = z->c; /* try, line 57 */ + z->bra = z->c; /* [, line 57 */ + if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } + z->ket = z->c; /* ], line 57 */ + { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ + if (ret < 0) return ret; + } + lab1: + ; + } + while(1) { /* repeat, line 58 */ + int c2 = z->c; + while(1) { /* goto, line 58 */ + int c3 = z->c; + if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3; + z->bra = z->c; /* [, line 59 */ + { int c4 = z->c; /* or, line 59 */ + if (!(eq_s(z, 1, s_7))) goto lab5; + z->ket = z->c; /* ], line 59 */ + if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5; + { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = c4; + if (!(eq_s(z, 1, s_9))) goto lab3; + z->ket = z->c; /* ], line 60 */ + { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ + if (ret < 0) return ret; + } + } + lab4: + z->c = c3; + break; + lab3: + z->c = c3; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab2; + z->c = ret; /* goto, line 58 */ + } + } + continue; + lab2: + z->c = c2; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { /* gopast */ /* grouping v, line 69 */ + int ret = out_grouping_U(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 69 */ + int ret = in_grouping_U(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 69 */ + /* try, line 70 */ + if (!(z->I[0] < 3)) goto lab0; + z->I[0] = 3; +lab0: + { /* gopast */ /* grouping v, line 71 */ + int ret = out_grouping_U(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 71 */ + int ret = in_grouping_U(z, g_v, 97, 232, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 71 */ + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 75 */ + int c1 = z->c; + z->bra = z->c; /* [, line 77 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 77 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 77 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 80 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_undouble(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 91 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 91 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 91 */ + } + z->bra = z->c; /* ], line 91 */ + { int ret = slice_del(z); /* delete, line 91 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_e_ending(struct SN_env * z) { + z->B[0] = 0; /* unset e_found, line 95 */ + z->ket = z->c; /* [, line 96 */ + if (!(eq_s_b(z, 1, s_13))) return 0; + z->bra = z->c; /* ], line 96 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 96 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 96 */ + if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set e_found, line 97 */ + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 98 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_en_ending(struct SN_env * z) { + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 102 */ + if (ret < 0) return ret; + } + { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ + if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ + if (!(eq_s_b(z, 3, s_14))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + } + { int ret = slice_del(z); /* delete, line 102 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 103 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ + z->ket = z->c; /* [, line 108 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; + among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ + if (!(among_var)) goto lab0; + z->bra = z->c; /* ], line 108 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_en_ending(z); + if (ret == 0) goto lab0; /* call en_ending, line 113 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 116 */ + if (ret < 0) return ret; + } + if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0; + { int ret = slice_del(z); /* delete, line 116 */ + if (ret < 0) return ret; + } + break; + } + lab0: + z->c = z->l - m1; + } + { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ + { int ret = r_e_ending(z); + if (ret == 0) goto lab1; /* call e_ending, line 120 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ + z->ket = z->c; /* [, line 122 */ + if (!(eq_s_b(z, 4, s_16))) goto lab2; + z->bra = z->c; /* ], line 122 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 122 */ + if (ret < 0) return ret; + } + { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ + if (!(eq_s_b(z, 1, s_17))) goto lab3; + goto lab2; + lab3: + z->c = z->l - m4; + } + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 123 */ + if (!(eq_s_b(z, 2, s_18))) goto lab2; + z->bra = z->c; /* ], line 123 */ + { int ret = r_en_ending(z); + if (ret == 0) goto lab2; /* call en_ending, line 123 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ + z->ket = z->c; /* [, line 127 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; + among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ + if (!(among_var)) goto lab4; + z->bra = z->c; /* ], line 127 */ + switch(among_var) { + case 0: goto lab4; + case 1: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 129 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 129 */ + if (ret < 0) return ret; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ + z->ket = z->c; /* [, line 130 */ + if (!(eq_s_b(z, 2, s_19))) goto lab6; + z->bra = z->c; /* ], line 130 */ + { int ret = r_R2(z); + if (ret == 0) goto lab6; /* call R2, line 130 */ + if (ret < 0) return ret; + } + { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ + if (!(eq_s_b(z, 1, s_20))) goto lab7; + goto lab6; + lab7: + z->c = z->l - m7; + } + { int ret = slice_del(z); /* delete, line 130 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_undouble(z); + if (ret == 0) goto lab4; /* call undouble, line 130 */ + if (ret < 0) return ret; + } + } + lab5: + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 133 */ + if (ret < 0) return ret; + } + { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ + if (!(eq_s_b(z, 1, s_21))) goto lab8; + goto lab4; + lab8: + z->c = z->l - m8; + } + { int ret = slice_del(z); /* delete, line 133 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 136 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 136 */ + if (ret < 0) return ret; + } + { int ret = r_e_ending(z); + if (ret == 0) goto lab4; /* call e_ending, line 136 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 139 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 139 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) goto lab4; /* call R2, line 142 */ + if (ret < 0) return ret; + } + if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + break; + } + lab4: + z->c = z->l - m5; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ + if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9; + { int m_test = z->l - z->c; /* test, line 148 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; + if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ + if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9; + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 152 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab9; + z->c = ret; /* next, line 152 */ + } + z->bra = z->c; /* ], line 152 */ + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m9; + } + return 1; +} + +extern int dutch_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 159 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 159 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 160 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 160 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 161 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + z->c = z->lb; + { int c4 = z->c; /* do, line 163 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab3; /* call postlude, line 163 */ + if (ret < 0) return ret; + } + lab3: + z->c = c4; + } + return 1; +} + +extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_english.c b/src/backend/snowball/libstemmer/stem_UTF_8_english.c new file mode 100644 index 00000000000..c5d4c2a445d --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_english.c @@ -0,0 +1,1125 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int english_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_exception2(struct SN_env * z); +static int r_exception1(struct SN_env * z); +static int r_Step_5(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * english_UTF_8_create_env(void); +extern void english_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; +static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; +static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 5, s_0_0, -1, -1, 0}, +/* 1 */ { 6, s_0_1, -1, -1, 0}, +/* 2 */ { 5, s_0_2, -1, -1, 0} +}; + +static const symbol s_1_0[1] = { '\'' }; +static const symbol s_1_1[3] = { '\'', 's', '\'' }; +static const symbol s_1_2[2] = { '\'', 's' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 3, s_1_1, 0, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0} +}; + +static const symbol s_2_0[3] = { 'i', 'e', 'd' }; +static const symbol s_2_1[1] = { 's' }; +static const symbol s_2_2[3] = { 'i', 'e', 's' }; +static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; +static const symbol s_2_4[2] = { 's', 's' }; +static const symbol s_2_5[2] = { 'u', 's' }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, 2, 0}, +/* 1 */ { 1, s_2_1, -1, 3, 0}, +/* 2 */ { 3, s_2_2, 1, 2, 0}, +/* 3 */ { 4, s_2_3, 1, 1, 0}, +/* 4 */ { 2, s_2_4, 1, -1, 0}, +/* 5 */ { 2, s_2_5, 1, -1, 0} +}; + +static const symbol s_3_1[2] = { 'b', 'b' }; +static const symbol s_3_2[2] = { 'd', 'd' }; +static const symbol s_3_3[2] = { 'f', 'f' }; +static const symbol s_3_4[2] = { 'g', 'g' }; +static const symbol s_3_5[2] = { 'b', 'l' }; +static const symbol s_3_6[2] = { 'm', 'm' }; +static const symbol s_3_7[2] = { 'n', 'n' }; +static const symbol s_3_8[2] = { 'p', 'p' }; +static const symbol s_3_9[2] = { 'r', 'r' }; +static const symbol s_3_10[2] = { 'a', 't' }; +static const symbol s_3_11[2] = { 't', 't' }; +static const symbol s_3_12[2] = { 'i', 'z' }; + +static const struct among a_3[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_3_1, 0, 2, 0}, +/* 2 */ { 2, s_3_2, 0, 2, 0}, +/* 3 */ { 2, s_3_3, 0, 2, 0}, +/* 4 */ { 2, s_3_4, 0, 2, 0}, +/* 5 */ { 2, s_3_5, 0, 1, 0}, +/* 6 */ { 2, s_3_6, 0, 2, 0}, +/* 7 */ { 2, s_3_7, 0, 2, 0}, +/* 8 */ { 2, s_3_8, 0, 2, 0}, +/* 9 */ { 2, s_3_9, 0, 2, 0}, +/* 10 */ { 2, s_3_10, 0, 1, 0}, +/* 11 */ { 2, s_3_11, 0, 2, 0}, +/* 12 */ { 2, s_3_12, 0, 1, 0} +}; + +static const symbol s_4_0[2] = { 'e', 'd' }; +static const symbol s_4_1[3] = { 'e', 'e', 'd' }; +static const symbol s_4_2[3] = { 'i', 'n', 'g' }; +static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; +static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; +static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; + +static const struct among a_4[6] = +{ +/* 0 */ { 2, s_4_0, -1, 2, 0}, +/* 1 */ { 3, s_4_1, 0, 1, 0}, +/* 2 */ { 3, s_4_2, -1, 2, 0}, +/* 3 */ { 4, s_4_3, -1, 2, 0}, +/* 4 */ { 5, s_4_4, 3, 1, 0}, +/* 5 */ { 5, s_4_5, -1, 2, 0} +}; + +static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; +static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; +static const symbol s_5_2[3] = { 'o', 'g', 'i' }; +static const symbol s_5_3[2] = { 'l', 'i' }; +static const symbol s_5_4[3] = { 'b', 'l', 'i' }; +static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; +static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; +static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; +static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; +static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; +static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; +static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; +static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; +static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; +static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; +static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static const struct among a_5[24] = +{ +/* 0 */ { 4, s_5_0, -1, 3, 0}, +/* 1 */ { 4, s_5_1, -1, 2, 0}, +/* 2 */ { 3, s_5_2, -1, 13, 0}, +/* 3 */ { 2, s_5_3, -1, 16, 0}, +/* 4 */ { 3, s_5_4, 3, 12, 0}, +/* 5 */ { 4, s_5_5, 4, 4, 0}, +/* 6 */ { 4, s_5_6, 3, 8, 0}, +/* 7 */ { 5, s_5_7, 3, 14, 0}, +/* 8 */ { 6, s_5_8, 3, 15, 0}, +/* 9 */ { 5, s_5_9, 3, 10, 0}, +/* 10 */ { 5, s_5_10, 3, 5, 0}, +/* 11 */ { 5, s_5_11, -1, 8, 0}, +/* 12 */ { 6, s_5_12, -1, 12, 0}, +/* 13 */ { 5, s_5_13, -1, 11, 0}, +/* 14 */ { 6, s_5_14, -1, 1, 0}, +/* 15 */ { 7, s_5_15, 14, 7, 0}, +/* 16 */ { 5, s_5_16, -1, 8, 0}, +/* 17 */ { 5, s_5_17, -1, 7, 0}, +/* 18 */ { 7, s_5_18, 17, 6, 0}, +/* 19 */ { 4, s_5_19, -1, 6, 0}, +/* 20 */ { 4, s_5_20, -1, 7, 0}, +/* 21 */ { 7, s_5_21, -1, 11, 0}, +/* 22 */ { 7, s_5_22, -1, 9, 0}, +/* 23 */ { 7, s_5_23, -1, 10, 0} +}; + +static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_6_7[3] = { 'f', 'u', 'l' }; +static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; + +static const struct among a_6[9] = +{ +/* 0 */ { 5, s_6_0, -1, 4, 0}, +/* 1 */ { 5, s_6_1, -1, 6, 0}, +/* 2 */ { 5, s_6_2, -1, 3, 0}, +/* 3 */ { 5, s_6_3, -1, 4, 0}, +/* 4 */ { 4, s_6_4, -1, 4, 0}, +/* 5 */ { 6, s_6_5, -1, 1, 0}, +/* 6 */ { 7, s_6_6, 5, 2, 0}, +/* 7 */ { 3, s_6_7, -1, 5, 0}, +/* 8 */ { 4, s_6_8, -1, 5, 0} +}; + +static const symbol s_7_0[2] = { 'i', 'c' }; +static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_7_5[3] = { 'a', 't', 'e' }; +static const symbol s_7_6[3] = { 'i', 'v', 'e' }; +static const symbol s_7_7[3] = { 'i', 'z', 'e' }; +static const symbol s_7_8[3] = { 'i', 't', 'i' }; +static const symbol s_7_9[2] = { 'a', 'l' }; +static const symbol s_7_10[3] = { 'i', 's', 'm' }; +static const symbol s_7_11[3] = { 'i', 'o', 'n' }; +static const symbol s_7_12[2] = { 'e', 'r' }; +static const symbol s_7_13[3] = { 'o', 'u', 's' }; +static const symbol s_7_14[3] = { 'a', 'n', 't' }; +static const symbol s_7_15[3] = { 'e', 'n', 't' }; +static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; + +static const struct among a_7[18] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 4, s_7_1, -1, 1, 0}, +/* 2 */ { 4, s_7_2, -1, 1, 0}, +/* 3 */ { 4, s_7_3, -1, 1, 0}, +/* 4 */ { 4, s_7_4, -1, 1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 3, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 3, s_7_8, -1, 1, 0}, +/* 9 */ { 2, s_7_9, -1, 1, 0}, +/* 10 */ { 3, s_7_10, -1, 1, 0}, +/* 11 */ { 3, s_7_11, -1, 2, 0}, +/* 12 */ { 2, s_7_12, -1, 1, 0}, +/* 13 */ { 3, s_7_13, -1, 1, 0}, +/* 14 */ { 3, s_7_14, -1, 1, 0}, +/* 15 */ { 3, s_7_15, -1, 1, 0}, +/* 16 */ { 4, s_7_16, 15, 1, 0}, +/* 17 */ { 5, s_7_17, 16, 1, 0} +}; + +static const symbol s_8_0[1] = { 'e' }; +static const symbol s_8_1[1] = { 'l' }; + +static const struct among a_8[2] = +{ +/* 0 */ { 1, s_8_0, -1, 1, 0}, +/* 1 */ { 1, s_8_1, -1, 2, 0} +}; + +static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; +static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; +static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; +static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; +static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; +static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; + +static const struct among a_9[8] = +{ +/* 0 */ { 7, s_9_0, -1, -1, 0}, +/* 1 */ { 7, s_9_1, -1, -1, 0}, +/* 2 */ { 6, s_9_2, -1, -1, 0}, +/* 3 */ { 7, s_9_3, -1, -1, 0}, +/* 4 */ { 6, s_9_4, -1, -1, 0}, +/* 5 */ { 7, s_9_5, -1, -1, 0}, +/* 6 */ { 7, s_9_6, -1, -1, 0}, +/* 7 */ { 6, s_9_7, -1, -1, 0} +}; + +static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; +static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; +static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; +static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; +static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; +static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; +static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; +static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; +static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; +static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; +static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; +static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; +static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; +static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; +static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; +static const symbol s_10_15[3] = { 's', 'k', 'y' }; +static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; +static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; + +static const struct among a_10[18] = +{ +/* 0 */ { 5, s_10_0, -1, -1, 0}, +/* 1 */ { 5, s_10_1, -1, -1, 0}, +/* 2 */ { 4, s_10_2, -1, -1, 0}, +/* 3 */ { 6, s_10_3, -1, -1, 0}, +/* 4 */ { 5, s_10_4, -1, 3, 0}, +/* 5 */ { 5, s_10_5, -1, 9, 0}, +/* 6 */ { 6, s_10_6, -1, 7, 0}, +/* 7 */ { 4, s_10_7, -1, -1, 0}, +/* 8 */ { 4, s_10_8, -1, 6, 0}, +/* 9 */ { 5, s_10_9, -1, 4, 0}, +/* 10 */ { 4, s_10_10, -1, -1, 0}, +/* 11 */ { 4, s_10_11, -1, 10, 0}, +/* 12 */ { 6, s_10_12, -1, 11, 0}, +/* 13 */ { 5, s_10_13, -1, 2, 0}, +/* 14 */ { 4, s_10_14, -1, 1, 0}, +/* 15 */ { 3, s_10_15, -1, -1, 0}, +/* 16 */ { 5, s_10_16, -1, 5, 0}, +/* 17 */ { 4, s_10_17, -1, 8, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1 }; + +static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static const unsigned char g_valid_LI[] = { 55, 141, 2 }; + +static const symbol s_0[] = { '\'' }; +static const symbol s_1[] = { 'y' }; +static const symbol s_2[] = { 'Y' }; +static const symbol s_3[] = { 'y' }; +static const symbol s_4[] = { 'Y' }; +static const symbol s_5[] = { 's', 's' }; +static const symbol s_6[] = { 'i' }; +static const symbol s_7[] = { 'i', 'e' }; +static const symbol s_8[] = { 'e', 'e' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'e' }; +static const symbol s_11[] = { 'y' }; +static const symbol s_12[] = { 'Y' }; +static const symbol s_13[] = { 'i' }; +static const symbol s_14[] = { 't', 'i', 'o', 'n' }; +static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; +static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; +static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; +static const symbol s_18[] = { 'e', 'n', 't' }; +static const symbol s_19[] = { 'i', 'z', 'e' }; +static const symbol s_20[] = { 'a', 't', 'e' }; +static const symbol s_21[] = { 'a', 'l' }; +static const symbol s_22[] = { 'f', 'u', 'l' }; +static const symbol s_23[] = { 'o', 'u', 's' }; +static const symbol s_24[] = { 'i', 'v', 'e' }; +static const symbol s_25[] = { 'b', 'l', 'e' }; +static const symbol s_26[] = { 'l' }; +static const symbol s_27[] = { 'o', 'g' }; +static const symbol s_28[] = { 'f', 'u', 'l' }; +static const symbol s_29[] = { 'l', 'e', 's', 's' }; +static const symbol s_30[] = { 't', 'i', 'o', 'n' }; +static const symbol s_31[] = { 'a', 't', 'e' }; +static const symbol s_32[] = { 'a', 'l' }; +static const symbol s_33[] = { 'i', 'c' }; +static const symbol s_34[] = { 's' }; +static const symbol s_35[] = { 't' }; +static const symbol s_36[] = { 'l' }; +static const symbol s_37[] = { 's', 'k', 'i' }; +static const symbol s_38[] = { 's', 'k', 'y' }; +static const symbol s_39[] = { 'd', 'i', 'e' }; +static const symbol s_40[] = { 'l', 'i', 'e' }; +static const symbol s_41[] = { 't', 'i', 'e' }; +static const symbol s_42[] = { 'i', 'd', 'l' }; +static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; +static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; +static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; +static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; +static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; +static const symbol s_48[] = { 'Y' }; +static const symbol s_49[] = { 'y' }; + +static int r_prelude(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 26 */ + { int c1 = z->c; /* do, line 27 */ + z->bra = z->c; /* [, line 27 */ + if (!(eq_s(z, 1, s_0))) goto lab0; + z->ket = z->c; /* ], line 27 */ + { int ret = slice_del(z); /* delete, line 27 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 28 */ + z->bra = z->c; /* [, line 28 */ + if (!(eq_s(z, 1, s_1))) goto lab1; + z->ket = z->c; /* ], line 28 */ + { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 28 */ + lab1: + z->c = c2; + } + { int c3 = z->c; /* do, line 29 */ + while(1) { /* repeat, line 29 */ + int c4 = z->c; + while(1) { /* goto, line 29 */ + int c5 = z->c; + if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab4; + z->bra = z->c; /* [, line 29 */ + if (!(eq_s(z, 1, s_3))) goto lab4; + z->ket = z->c; /* ], line 29 */ + z->c = c5; + break; + lab4: + z->c = c5; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab3; + z->c = ret; /* goto, line 29 */ + } + } + { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 29 */ + continue; + lab3: + z->c = c4; + break; + } + z->c = c3; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c1 = z->c; /* do, line 35 */ + { int c2 = z->c; /* or, line 41 */ + if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; + if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ + goto lab1; + lab2: + z->c = c2; + { /* gopast */ /* grouping v, line 41 */ + int ret = out_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 41 */ + int ret = in_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + } + lab1: + z->I[0] = z->c; /* setmark p1, line 42 */ + { /* gopast */ /* grouping v, line 43 */ + int ret = out_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 43 */ + int ret = in_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 43 */ + lab0: + z->c = c1; + } + return 1; +} + +static int r_shortv(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ + if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) goto lab1; + if (in_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; + if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; + if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; + if (z->c > z->lb) return 0; /* atlimit, line 52 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ + z->ket = z->c; /* [, line 60 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 60 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + { int ret = slice_del(z); /* delete, line 62 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + z->ket = z->c; /* [, line 65 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; + among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 65 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ + { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 2); + if (ret < 0) goto lab2; + z->c = ret; /* hop, line 68 */ + } + { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m1; + { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ + if (ret < 0) return ret; + } + } + lab1: + break; + case 3: + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 69 */ + } + { /* gopast */ /* grouping v, line 69 */ + int ret = out_grouping_b_U(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + { int ret = slice_del(z); /* delete, line 69 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 75 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 75 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 77 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m_test = z->l - z->c; /* test, line 80 */ + { /* gopast */ /* grouping v, line 80 */ + int ret = out_grouping_b_U(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 80 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 81 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else + among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + case 2: + z->ket = z->c; /* [, line 86 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 86 */ + } + z->bra = z->c; /* ], line 86 */ + { int ret = slice_del(z); /* delete, line 86 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 87 */ + { int m_test = z->l - z->c; /* test, line 87 */ + { int ret = r_shortv(z); + if (ret == 0) return 0; /* call shortv, line 87 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 94 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ + if (!(eq_s_b(z, 1, s_11))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_12))) return 0; + } +lab0: + z->bra = z->c; /* ], line 94 */ + if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; + { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ + if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ + return 0; + lab2: + z->c = z->l - m2; + } + { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 100 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 100 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 100 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 13: + if (!(eq_s_b(z, 1, s_26))) return 0; + { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ + if (ret < 0) return ret; + } + break; + case 16: + if (in_grouping_b_U(z, g_valid_LI, 99, 116, 0)) return 0; + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 127 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 127 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 127 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 136 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 136 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 141 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 141 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 141 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 144 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ + if (!(eq_s_b(z, 1, s_34))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_35))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_5(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 150 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; + among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 150 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ + { int ret = r_R2(z); + if (ret == 0) goto lab1; /* call R2, line 151 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 151 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ + { int ret = r_shortv(z); + if (ret == 0) goto lab2; /* call shortv, line 151 */ + if (ret < 0) return ret; + } + return 0; + lab2: + z->c = z->l - m2; + } + } + lab0: + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 152 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_36))) return 0; + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_exception2(struct SN_env * z) { + z->ket = z->c; /* [, line 158 */ + if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; + if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ + z->bra = z->c; /* ], line 158 */ + if (z->c > z->lb) return 0; /* atlimit, line 158 */ + return 1; +} + +static int r_exception1(struct SN_env * z) { + int among_var; + z->bra = z->c; /* [, line 170 */ + if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; + among_var = find_among(z, a_10, 18); /* substring, line 170 */ + if (!(among_var)) return 0; + z->ket = z->c; /* ], line 170 */ + if (z->c < z->l) return 0; /* atlimit, line 170 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ + while(1) { /* repeat, line 203 */ + int c1 = z->c; + while(1) { /* goto, line 203 */ + int c2 = z->c; + z->bra = z->c; /* [, line 203 */ + if (!(eq_s(z, 1, s_48))) goto lab1; + z->ket = z->c; /* ], line 203 */ + z->c = c2; + break; + lab1: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* goto, line 203 */ + } + } + { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ + if (ret < 0) return ret; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +extern int english_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* or, line 207 */ + { int ret = r_exception1(z); + if (ret == 0) goto lab1; /* call exception1, line 207 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = c1; + { int c2 = z->c; /* not, line 208 */ + { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); + if (ret < 0) goto lab3; + z->c = ret; /* hop, line 208 */ + } + goto lab2; + lab3: + z->c = c2; + } + goto lab0; + lab2: + z->c = c1; + { int c3 = z->c; /* do, line 209 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab4; /* call prelude, line 209 */ + if (ret < 0) return ret; + } + lab4: + z->c = c3; + } + { int c4 = z->c; /* do, line 210 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab5; /* call mark_regions, line 210 */ + if (ret < 0) return ret; + } + lab5: + z->c = c4; + } + z->lb = z->c; z->c = z->l; /* backwards, line 211 */ + + { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ + { int ret = r_Step_1a(z); + if (ret == 0) goto lab6; /* call Step_1a, line 213 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ + { int ret = r_exception2(z); + if (ret == 0) goto lab8; /* call exception2, line 215 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m6; + { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ + { int ret = r_Step_1b(z); + if (ret == 0) goto lab9; /* call Step_1b, line 217 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ + { int ret = r_Step_1c(z); + if (ret == 0) goto lab10; /* call Step_1c, line 218 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ + { int ret = r_Step_2(z); + if (ret == 0) goto lab11; /* call Step_2, line 220 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ + { int ret = r_Step_3(z); + if (ret == 0) goto lab12; /* call Step_3, line 221 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m10; + } + { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ + { int ret = r_Step_4(z); + if (ret == 0) goto lab13; /* call Step_4, line 222 */ + if (ret < 0) return ret; + } + lab13: + z->c = z->l - m11; + } + { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ + { int ret = r_Step_5(z); + if (ret == 0) goto lab14; /* call Step_5, line 224 */ + if (ret < 0) return ret; + } + lab14: + z->c = z->l - m12; + } + } + lab7: + z->c = z->lb; + { int c13 = z->c; /* do, line 227 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab15; /* call postlude, line 227 */ + if (ret < 0) return ret; + } + lab15: + z->c = c13; + } + } +lab0: + return 1; +} + +extern struct SN_env * english_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void english_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_finnish.c b/src/backend/snowball/libstemmer/stem_UTF_8_finnish.c new file mode 100644 index 00000000000..55fba0a732d --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_finnish.c @@ -0,0 +1,768 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int finnish_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_tidy(struct SN_env * z); +static int r_other_endings(struct SN_env * z); +static int r_t_plural(struct SN_env * z); +static int r_i_plural(struct SN_env * z); +static int r_case_ending(struct SN_env * z); +static int r_VI(struct SN_env * z); +static int r_LONG(struct SN_env * z); +static int r_possessive(struct SN_env * z); +static int r_particle_etc(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * finnish_UTF_8_create_env(void); +extern void finnish_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[2] = { 'p', 'a' }; +static const symbol s_0_1[3] = { 's', 't', 'i' }; +static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; +static const symbol s_0_3[3] = { 'h', 'a', 'n' }; +static const symbol s_0_4[3] = { 'k', 'i', 'n' }; +static const symbol s_0_5[4] = { 'h', 0xC3, 0xA4, 'n' }; +static const symbol s_0_6[6] = { 'k', 0xC3, 0xA4, 0xC3, 0xA4, 'n' }; +static const symbol s_0_7[2] = { 'k', 'o' }; +static const symbol s_0_8[3] = { 'p', 0xC3, 0xA4 }; +static const symbol s_0_9[3] = { 'k', 0xC3, 0xB6 }; + +static const struct among a_0[10] = +{ +/* 0 */ { 2, s_0_0, -1, 1, 0}, +/* 1 */ { 3, s_0_1, -1, 2, 0}, +/* 2 */ { 4, s_0_2, -1, 1, 0}, +/* 3 */ { 3, s_0_3, -1, 1, 0}, +/* 4 */ { 3, s_0_4, -1, 1, 0}, +/* 5 */ { 4, s_0_5, -1, 1, 0}, +/* 6 */ { 6, s_0_6, -1, 1, 0}, +/* 7 */ { 2, s_0_7, -1, 1, 0}, +/* 8 */ { 3, s_0_8, -1, 1, 0}, +/* 9 */ { 3, s_0_9, -1, 1, 0} +}; + +static const symbol s_1_0[3] = { 'l', 'l', 'a' }; +static const symbol s_1_1[2] = { 'n', 'a' }; +static const symbol s_1_2[3] = { 's', 's', 'a' }; +static const symbol s_1_3[2] = { 't', 'a' }; +static const symbol s_1_4[3] = { 'l', 't', 'a' }; +static const symbol s_1_5[3] = { 's', 't', 'a' }; + +static const struct among a_1[6] = +{ +/* 0 */ { 3, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 3, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 3, s_1_4, 3, -1, 0}, +/* 5 */ { 3, s_1_5, 3, -1, 0} +}; + +static const symbol s_2_0[4] = { 'l', 'l', 0xC3, 0xA4 }; +static const symbol s_2_1[3] = { 'n', 0xC3, 0xA4 }; +static const symbol s_2_2[4] = { 's', 's', 0xC3, 0xA4 }; +static const symbol s_2_3[3] = { 't', 0xC3, 0xA4 }; +static const symbol s_2_4[4] = { 'l', 't', 0xC3, 0xA4 }; +static const symbol s_2_5[4] = { 's', 't', 0xC3, 0xA4 }; + +static const struct among a_2[6] = +{ +/* 0 */ { 4, s_2_0, -1, -1, 0}, +/* 1 */ { 3, s_2_1, -1, -1, 0}, +/* 2 */ { 4, s_2_2, -1, -1, 0}, +/* 3 */ { 3, s_2_3, -1, -1, 0}, +/* 4 */ { 4, s_2_4, 3, -1, 0}, +/* 5 */ { 4, s_2_5, 3, -1, 0} +}; + +static const symbol s_3_0[3] = { 'l', 'l', 'e' }; +static const symbol s_3_1[3] = { 'i', 'n', 'e' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 3, s_3_0, -1, -1, 0}, +/* 1 */ { 3, s_3_1, -1, -1, 0} +}; + +static const symbol s_4_0[3] = { 'n', 's', 'a' }; +static const symbol s_4_1[3] = { 'm', 'm', 'e' }; +static const symbol s_4_2[3] = { 'n', 'n', 'e' }; +static const symbol s_4_3[2] = { 'n', 'i' }; +static const symbol s_4_4[2] = { 's', 'i' }; +static const symbol s_4_5[2] = { 'a', 'n' }; +static const symbol s_4_6[2] = { 'e', 'n' }; +static const symbol s_4_7[3] = { 0xC3, 0xA4, 'n' }; +static const symbol s_4_8[4] = { 'n', 's', 0xC3, 0xA4 }; + +static const struct among a_4[9] = +{ +/* 0 */ { 3, s_4_0, -1, 3, 0}, +/* 1 */ { 3, s_4_1, -1, 3, 0}, +/* 2 */ { 3, s_4_2, -1, 3, 0}, +/* 3 */ { 2, s_4_3, -1, 2, 0}, +/* 4 */ { 2, s_4_4, -1, 1, 0}, +/* 5 */ { 2, s_4_5, -1, 4, 0}, +/* 6 */ { 2, s_4_6, -1, 6, 0}, +/* 7 */ { 3, s_4_7, -1, 5, 0}, +/* 8 */ { 4, s_4_8, -1, 3, 0} +}; + +static const symbol s_5_0[2] = { 'a', 'a' }; +static const symbol s_5_1[2] = { 'e', 'e' }; +static const symbol s_5_2[2] = { 'i', 'i' }; +static const symbol s_5_3[2] = { 'o', 'o' }; +static const symbol s_5_4[2] = { 'u', 'u' }; +static const symbol s_5_5[4] = { 0xC3, 0xA4, 0xC3, 0xA4 }; +static const symbol s_5_6[4] = { 0xC3, 0xB6, 0xC3, 0xB6 }; + +static const struct among a_5[7] = +{ +/* 0 */ { 2, s_5_0, -1, -1, 0}, +/* 1 */ { 2, s_5_1, -1, -1, 0}, +/* 2 */ { 2, s_5_2, -1, -1, 0}, +/* 3 */ { 2, s_5_3, -1, -1, 0}, +/* 4 */ { 2, s_5_4, -1, -1, 0}, +/* 5 */ { 4, s_5_5, -1, -1, 0}, +/* 6 */ { 4, s_5_6, -1, -1, 0} +}; + +static const symbol s_6_0[1] = { 'a' }; +static const symbol s_6_1[3] = { 'l', 'l', 'a' }; +static const symbol s_6_2[2] = { 'n', 'a' }; +static const symbol s_6_3[3] = { 's', 's', 'a' }; +static const symbol s_6_4[2] = { 't', 'a' }; +static const symbol s_6_5[3] = { 'l', 't', 'a' }; +static const symbol s_6_6[3] = { 's', 't', 'a' }; +static const symbol s_6_7[3] = { 't', 't', 'a' }; +static const symbol s_6_8[3] = { 'l', 'l', 'e' }; +static const symbol s_6_9[3] = { 'i', 'n', 'e' }; +static const symbol s_6_10[3] = { 'k', 's', 'i' }; +static const symbol s_6_11[1] = { 'n' }; +static const symbol s_6_12[3] = { 'h', 'a', 'n' }; +static const symbol s_6_13[3] = { 'd', 'e', 'n' }; +static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; +static const symbol s_6_15[3] = { 'h', 'e', 'n' }; +static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; +static const symbol s_6_17[3] = { 'h', 'i', 'n' }; +static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; +static const symbol s_6_19[3] = { 'h', 'o', 'n' }; +static const symbol s_6_20[4] = { 'h', 0xC3, 0xA4, 'n' }; +static const symbol s_6_21[4] = { 'h', 0xC3, 0xB6, 'n' }; +static const symbol s_6_22[2] = { 0xC3, 0xA4 }; +static const symbol s_6_23[4] = { 'l', 'l', 0xC3, 0xA4 }; +static const symbol s_6_24[3] = { 'n', 0xC3, 0xA4 }; +static const symbol s_6_25[4] = { 's', 's', 0xC3, 0xA4 }; +static const symbol s_6_26[3] = { 't', 0xC3, 0xA4 }; +static const symbol s_6_27[4] = { 'l', 't', 0xC3, 0xA4 }; +static const symbol s_6_28[4] = { 's', 't', 0xC3, 0xA4 }; +static const symbol s_6_29[4] = { 't', 't', 0xC3, 0xA4 }; + +static const struct among a_6[30] = +{ +/* 0 */ { 1, s_6_0, -1, 8, 0}, +/* 1 */ { 3, s_6_1, 0, -1, 0}, +/* 2 */ { 2, s_6_2, 0, -1, 0}, +/* 3 */ { 3, s_6_3, 0, -1, 0}, +/* 4 */ { 2, s_6_4, 0, -1, 0}, +/* 5 */ { 3, s_6_5, 4, -1, 0}, +/* 6 */ { 3, s_6_6, 4, -1, 0}, +/* 7 */ { 3, s_6_7, 4, 9, 0}, +/* 8 */ { 3, s_6_8, -1, -1, 0}, +/* 9 */ { 3, s_6_9, -1, -1, 0}, +/* 10 */ { 3, s_6_10, -1, -1, 0}, +/* 11 */ { 1, s_6_11, -1, 7, 0}, +/* 12 */ { 3, s_6_12, 11, 1, 0}, +/* 13 */ { 3, s_6_13, 11, -1, r_VI}, +/* 14 */ { 4, s_6_14, 11, -1, r_LONG}, +/* 15 */ { 3, s_6_15, 11, 2, 0}, +/* 16 */ { 4, s_6_16, 11, -1, r_VI}, +/* 17 */ { 3, s_6_17, 11, 3, 0}, +/* 18 */ { 4, s_6_18, 11, -1, r_VI}, +/* 19 */ { 3, s_6_19, 11, 4, 0}, +/* 20 */ { 4, s_6_20, 11, 5, 0}, +/* 21 */ { 4, s_6_21, 11, 6, 0}, +/* 22 */ { 2, s_6_22, -1, 8, 0}, +/* 23 */ { 4, s_6_23, 22, -1, 0}, +/* 24 */ { 3, s_6_24, 22, -1, 0}, +/* 25 */ { 4, s_6_25, 22, -1, 0}, +/* 26 */ { 3, s_6_26, 22, -1, 0}, +/* 27 */ { 4, s_6_27, 26, -1, 0}, +/* 28 */ { 4, s_6_28, 26, -1, 0}, +/* 29 */ { 4, s_6_29, 26, 9, 0} +}; + +static const symbol s_7_0[3] = { 'e', 'j', 'a' }; +static const symbol s_7_1[3] = { 'm', 'm', 'a' }; +static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; +static const symbol s_7_3[3] = { 'm', 'p', 'a' }; +static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; +static const symbol s_7_5[3] = { 'm', 'm', 'i' }; +static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; +static const symbol s_7_7[3] = { 'm', 'p', 'i' }; +static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; +static const symbol s_7_9[4] = { 'e', 'j', 0xC3, 0xA4 }; +static const symbol s_7_10[4] = { 'm', 'm', 0xC3, 0xA4 }; +static const symbol s_7_11[5] = { 'i', 'm', 'm', 0xC3, 0xA4 }; +static const symbol s_7_12[4] = { 'm', 'p', 0xC3, 0xA4 }; +static const symbol s_7_13[5] = { 'i', 'm', 'p', 0xC3, 0xA4 }; + +static const struct among a_7[14] = +{ +/* 0 */ { 3, s_7_0, -1, -1, 0}, +/* 1 */ { 3, s_7_1, -1, 1, 0}, +/* 2 */ { 4, s_7_2, 1, -1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 4, s_7_4, 3, -1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 4, s_7_6, 5, -1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 4, s_7_8, 7, -1, 0}, +/* 9 */ { 4, s_7_9, -1, -1, 0}, +/* 10 */ { 4, s_7_10, -1, 1, 0}, +/* 11 */ { 5, s_7_11, 10, -1, 0}, +/* 12 */ { 4, s_7_12, -1, 1, 0}, +/* 13 */ { 5, s_7_13, 12, -1, 0} +}; + +static const symbol s_8_0[1] = { 'i' }; +static const symbol s_8_1[1] = { 'j' }; + +static const struct among a_8[2] = +{ +/* 0 */ { 1, s_8_0, -1, -1, 0}, +/* 1 */ { 1, s_8_1, -1, -1, 0} +}; + +static const symbol s_9_0[3] = { 'm', 'm', 'a' }; +static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; + +static const struct among a_9[2] = +{ +/* 0 */ { 3, s_9_0, -1, 1, 0}, +/* 1 */ { 4, s_9_1, 0, -1, 0} +}; + +static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + +static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + +static const symbol s_0[] = { 'k' }; +static const symbol s_1[] = { 'k', 's', 'e' }; +static const symbol s_2[] = { 'k', 's', 'i' }; +static const symbol s_3[] = { 'i' }; +static const symbol s_4[] = { 'a' }; +static const symbol s_5[] = { 'e' }; +static const symbol s_6[] = { 'i' }; +static const symbol s_7[] = { 'o' }; +static const symbol s_8[] = { 0xC3, 0xA4 }; +static const symbol s_9[] = { 0xC3, 0xB6 }; +static const symbol s_10[] = { 'i', 'e' }; +static const symbol s_11[] = { 'e' }; +static const symbol s_12[] = { 'p', 'o' }; +static const symbol s_13[] = { 't' }; +static const symbol s_14[] = { 'p', 'o' }; +static const symbol s_15[] = { 'j' }; +static const symbol s_16[] = { 'o' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'o' }; +static const symbol s_19[] = { 'j' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ + { /* gopast */ /* non V1, line 46 */ + int ret = in_grouping_U(z, g_V1, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 46 */ + if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ + { /* gopast */ /* non V1, line 47 */ + int ret = in_grouping_U(z, g_V1, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 47 */ + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_particle_etc(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 55 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 55 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 55 */ + among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 55 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (in_grouping_b_U(z, g_particle_end, 97, 246, 0)) return 0; + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 64 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 66 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_possessive(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 69 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 69 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 69 */ + among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 69 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ + if (!(eq_s_b(z, 1, s_0))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + { int ret = slice_del(z); /* delete, line 72 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 74 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 74 */ + if (!(eq_s_b(z, 3, s_1))) return 0; + z->bra = z->c; /* ], line 74 */ + { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 78 */ + if (ret < 0) return ret; + } + break; + case 4: + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; + if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + break; + case 5: + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 164) return 0; + if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ + { int ret = slice_del(z); /* delete, line 84 */ + if (ret < 0) return ret; + } + break; + case 6: + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; + if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ + { int ret = slice_del(z); /* delete, line 86 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_LONG(struct SN_env * z) { + if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ + return 1; +} + +static int r_VI(struct SN_env * z) { + if (!(eq_s_b(z, 1, s_3))) return 0; + if (in_grouping_b_U(z, g_V2, 97, 246, 0)) return 0; + return 1; +} + +static int r_case_ending(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 96 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 96 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 96 */ + among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 96 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (!(eq_s_b(z, 1, s_4))) return 0; + break; + case 2: + if (!(eq_s_b(z, 1, s_5))) return 0; + break; + case 3: + if (!(eq_s_b(z, 1, s_6))) return 0; + break; + case 4: + if (!(eq_s_b(z, 1, s_7))) return 0; + break; + case 5: + if (!(eq_s_b(z, 2, s_8))) return 0; + break; + case 6: + if (!(eq_s_b(z, 2, s_9))) return 0; + break; + case 7: + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ + { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ + { int ret = r_LONG(z); + if (ret == 0) goto lab2; /* call LONG, line 111 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m3; + if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } + } + lab1: + z->c = z->l - m2; + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) { z->c = z->l - m_keep; goto lab0; } + z->c = ret; /* next, line 113 */ + } + } + z->bra = z->c; /* ], line 113 */ + lab0: + ; + } + break; + case 8: + if (in_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; + if (out_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; + break; + case 9: + if (!(eq_s_b(z, 1, s_11))) return 0; + break; + } + { int ret = slice_del(z); /* delete, line 138 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set ending_removed, line 139 */ + return 1; +} + +static int r_other_endings(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 142 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[1]) return 0; + z->c = z->I[1]; /* tomark, line 142 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 142 */ + among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 142 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ + if (!(eq_s_b(z, 2, s_12))) goto lab0; + return 0; + lab0: + z->c = z->l - m2; + } + break; + } + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_i_plural(struct SN_env * z) { + { int mlimit; /* setlimit, line 154 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 154 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 154 */ + if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ + z->bra = z->c; /* ], line 154 */ + z->lb = mlimit; + } + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_t_plural(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 161 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 161 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 162 */ + if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 162 */ + { int m_test = z->l - z->c; /* test, line 162 */ + if (in_grouping_b_U(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + z->lb = mlimit; + } + { int mlimit; /* setlimit, line 165 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[1]) return 0; + z->c = z->I[1]; /* tomark, line 165 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + z->ket = z->c; /* [, line 165 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 165 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ + if (!(eq_s_b(z, 2, s_14))) goto lab0; + return 0; + lab0: + z->c = z->l - m3; + } + break; + } + { int ret = slice_del(z); /* delete, line 170 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_tidy(struct SN_env * z) { + { int mlimit; /* setlimit, line 173 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 173 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ + { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ + { int ret = r_LONG(z); + if (ret == 0) goto lab0; /* call LONG, line 174 */ + if (ret < 0) return ret; + } + z->c = z->l - m3; + z->ket = z->c; /* [, line 174 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 174 */ + } + z->bra = z->c; /* ], line 174 */ + { int ret = slice_del(z); /* delete, line 174 */ + if (ret < 0) return ret; + } + } + lab0: + z->c = z->l - m2; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ + z->ket = z->c; /* [, line 175 */ + if (in_grouping_b_U(z, g_AEI, 97, 228, 0)) goto lab1; + z->bra = z->c; /* ], line 175 */ + if (out_grouping_b_U(z, g_V1, 97, 246, 0)) goto lab1; + { int ret = slice_del(z); /* delete, line 175 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ + z->ket = z->c; /* [, line 176 */ + if (!(eq_s_b(z, 1, s_15))) goto lab2; + z->bra = z->c; /* ], line 176 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ + if (!(eq_s_b(z, 1, s_16))) goto lab4; + goto lab3; + lab4: + z->c = z->l - m6; + if (!(eq_s_b(z, 1, s_17))) goto lab2; + } + lab3: + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m5; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ + z->ket = z->c; /* [, line 177 */ + if (!(eq_s_b(z, 1, s_18))) goto lab5; + z->bra = z->c; /* ], line 177 */ + if (!(eq_s_b(z, 1, s_19))) goto lab5; + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m7; + } + z->lb = mlimit; + } + if (in_grouping_b_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ + z->ket = z->c; /* [, line 179 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 179 */ + } + z->bra = z->c; /* ], line 179 */ + z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ + if (z->S[0] == 0) return -1; /* -> x, line 179 */ + if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int finnish_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 185 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 185 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->B[0] = 0; /* unset ending_removed, line 186 */ + z->lb = z->c; z->c = z->l; /* backwards, line 187 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ + { int ret = r_particle_etc(z); + if (ret == 0) goto lab1; /* call particle_etc, line 188 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ + { int ret = r_possessive(z); + if (ret == 0) goto lab2; /* call possessive, line 189 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ + { int ret = r_case_ending(z); + if (ret == 0) goto lab3; /* call case_ending, line 190 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ + { int ret = r_other_endings(z); + if (ret == 0) goto lab4; /* call other_endings, line 191 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ + if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ + { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ + { int ret = r_i_plural(z); + if (ret == 0) goto lab7; /* call i_plural, line 192 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ + { int ret = r_t_plural(z); + if (ret == 0) goto lab8; /* call t_plural, line 192 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m8; + } + } +lab5: + { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ + { int ret = r_tidy(z); + if (ret == 0) goto lab9; /* call tidy, line 193 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m9; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * finnish_UTF_8_create_env(void) { return SN_create_env(1, 2, 1); } + +extern void finnish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_french.c b/src/backend/snowball/libstemmer/stem_UTF_8_french.c new file mode 100644 index 00000000000..fa1507f2c63 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_french.c @@ -0,0 +1,1256 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int french_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_un_accent(struct SN_env * z); +static int r_un_double(struct SN_env * z); +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_i_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * french_UTF_8_create_env(void); +extern void french_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[3] = { 'c', 'o', 'l' }; +static const symbol s_0_1[3] = { 'p', 'a', 'r' }; +static const symbol s_0_2[3] = { 't', 'a', 'p' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 3, s_0_0, -1, -1, 0}, +/* 1 */ { 3, s_0_1, -1, -1, 0}, +/* 2 */ { 3, s_0_2, -1, -1, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'U' }; +static const symbol s_1_3[1] = { 'Y' }; + +static const struct among a_1[4] = +{ +/* 0 */ { 0, 0, -1, 4, 0}, +/* 1 */ { 1, s_1_1, 0, 1, 0}, +/* 2 */ { 1, s_1_2, 0, 2, 0}, +/* 3 */ { 1, s_1_3, 0, 3, 0} +}; + +static const symbol s_2_0[3] = { 'i', 'q', 'U' }; +static const symbol s_2_1[3] = { 'a', 'b', 'l' }; +static const symbol s_2_2[4] = { 'I', 0xC3, 0xA8, 'r' }; +static const symbol s_2_3[4] = { 'i', 0xC3, 0xA8, 'r' }; +static const symbol s_2_4[3] = { 'e', 'u', 's' }; +static const symbol s_2_5[2] = { 'i', 'v' }; + +static const struct among a_2[6] = +{ +/* 0 */ { 3, s_2_0, -1, 3, 0}, +/* 1 */ { 3, s_2_1, -1, 3, 0}, +/* 2 */ { 4, s_2_2, -1, 4, 0}, +/* 3 */ { 4, s_2_3, -1, 4, 0}, +/* 4 */ { 3, s_2_4, -1, 2, 0}, +/* 5 */ { 2, s_2_5, -1, 1, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'c' }; +static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_3_2[2] = { 'i', 'v' }; + +static const struct among a_3[3] = +{ +/* 0 */ { 2, s_3_0, -1, 2, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 2, s_3_2, -1, 3, 0} +}; + +static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; +static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; +static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; +static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; +static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; +static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; +static const symbol s_4_9[3] = { 'i', 'v', 'e' }; +static const symbol s_4_10[2] = { 'i', 'f' }; +static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; +static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; +static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; +static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; +static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; +static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; +static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; +static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; +static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; +static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; +static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; +static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; +static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; +static const symbol s_4_25[3] = { 'i', 'f', 's' }; +static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; +static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; +static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; +static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; +static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; +static const symbol s_4_33[5] = { 'i', 't', 0xC3, 0xA9, 's' }; +static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; +static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; +static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; +static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; +static const symbol s_4_39[3] = { 'a', 'u', 'x' }; +static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; +static const symbol s_4_41[3] = { 'e', 'u', 'x' }; +static const symbol s_4_42[4] = { 'i', 't', 0xC3, 0xA9 }; + +static const struct among a_4[43] = +{ +/* 0 */ { 4, s_4_0, -1, 1, 0}, +/* 1 */ { 6, s_4_1, -1, 2, 0}, +/* 2 */ { 4, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 5, 0}, +/* 4 */ { 5, s_4_4, -1, 3, 0}, +/* 5 */ { 4, s_4_5, -1, 1, 0}, +/* 6 */ { 4, s_4_6, -1, 1, 0}, +/* 7 */ { 4, s_4_7, -1, 11, 0}, +/* 8 */ { 4, s_4_8, -1, 1, 0}, +/* 9 */ { 3, s_4_9, -1, 8, 0}, +/* 10 */ { 2, s_4_10, -1, 8, 0}, +/* 11 */ { 5, s_4_11, -1, 4, 0}, +/* 12 */ { 5, s_4_12, -1, 2, 0}, +/* 13 */ { 5, s_4_13, -1, 4, 0}, +/* 14 */ { 5, s_4_14, -1, 2, 0}, +/* 15 */ { 5, s_4_15, -1, 1, 0}, +/* 16 */ { 7, s_4_16, -1, 2, 0}, +/* 17 */ { 5, s_4_17, -1, 1, 0}, +/* 18 */ { 5, s_4_18, -1, 5, 0}, +/* 19 */ { 6, s_4_19, -1, 3, 0}, +/* 20 */ { 5, s_4_20, -1, 1, 0}, +/* 21 */ { 5, s_4_21, -1, 1, 0}, +/* 22 */ { 5, s_4_22, -1, 11, 0}, +/* 23 */ { 5, s_4_23, -1, 1, 0}, +/* 24 */ { 4, s_4_24, -1, 8, 0}, +/* 25 */ { 3, s_4_25, -1, 8, 0}, +/* 26 */ { 6, s_4_26, -1, 4, 0}, +/* 27 */ { 6, s_4_27, -1, 2, 0}, +/* 28 */ { 6, s_4_28, -1, 4, 0}, +/* 29 */ { 6, s_4_29, -1, 2, 0}, +/* 30 */ { 5, s_4_30, -1, 15, 0}, +/* 31 */ { 6, s_4_31, 30, 6, 0}, +/* 32 */ { 9, s_4_32, 31, 12, 0}, +/* 33 */ { 5, s_4_33, -1, 7, 0}, +/* 34 */ { 4, s_4_34, -1, 15, 0}, +/* 35 */ { 5, s_4_35, 34, 6, 0}, +/* 36 */ { 8, s_4_36, 35, 12, 0}, +/* 37 */ { 6, s_4_37, 34, 13, 0}, +/* 38 */ { 6, s_4_38, 34, 14, 0}, +/* 39 */ { 3, s_4_39, -1, 10, 0}, +/* 40 */ { 4, s_4_40, 39, 9, 0}, +/* 41 */ { 3, s_4_41, -1, 1, 0}, +/* 42 */ { 4, s_4_42, -1, 7, 0} +}; + +static const symbol s_5_0[3] = { 'i', 'r', 'a' }; +static const symbol s_5_1[2] = { 'i', 'e' }; +static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; +static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; +static const symbol s_5_4[1] = { 'i' }; +static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; +static const symbol s_5_6[2] = { 'i', 'r' }; +static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_5_8[3] = { 'i', 'e', 's' }; +static const symbol s_5_9[5] = { 0xC3, 0xAE, 'm', 'e', 's' }; +static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; +static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; +static const symbol s_5_12[5] = { 0xC3, 0xAE, 't', 'e', 's' }; +static const symbol s_5_13[2] = { 'i', 's' }; +static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; +static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; +static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; +static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; +static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; +static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; +static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; +static const symbol s_5_21[2] = { 'i', 't' }; +static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; +static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; +static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; +static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; +static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; +static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; +static const symbol s_5_30[3] = { 0xC3, 0xAE, 't' }; +static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; +static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; +static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; +static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; + +static const struct among a_5[35] = +{ +/* 0 */ { 3, s_5_0, -1, 1, 0}, +/* 1 */ { 2, s_5_1, -1, 1, 0}, +/* 2 */ { 4, s_5_2, -1, 1, 0}, +/* 3 */ { 7, s_5_3, -1, 1, 0}, +/* 4 */ { 1, s_5_4, -1, 1, 0}, +/* 5 */ { 4, s_5_5, 4, 1, 0}, +/* 6 */ { 2, s_5_6, -1, 1, 0}, +/* 7 */ { 4, s_5_7, -1, 1, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 5, s_5_9, -1, 1, 0}, +/* 10 */ { 5, s_5_10, -1, 1, 0}, +/* 11 */ { 8, s_5_11, -1, 1, 0}, +/* 12 */ { 5, s_5_12, -1, 1, 0}, +/* 13 */ { 2, s_5_13, -1, 1, 0}, +/* 14 */ { 5, s_5_14, 13, 1, 0}, +/* 15 */ { 6, s_5_15, 13, 1, 0}, +/* 16 */ { 6, s_5_16, -1, 1, 0}, +/* 17 */ { 7, s_5_17, -1, 1, 0}, +/* 18 */ { 5, s_5_18, -1, 1, 0}, +/* 19 */ { 6, s_5_19, -1, 1, 0}, +/* 20 */ { 7, s_5_20, -1, 1, 0}, +/* 21 */ { 2, s_5_21, -1, 1, 0}, +/* 22 */ { 5, s_5_22, 21, 1, 0}, +/* 23 */ { 6, s_5_23, 21, 1, 0}, +/* 24 */ { 6, s_5_24, -1, 1, 0}, +/* 25 */ { 7, s_5_25, -1, 1, 0}, +/* 26 */ { 8, s_5_26, -1, 1, 0}, +/* 27 */ { 5, s_5_27, -1, 1, 0}, +/* 28 */ { 6, s_5_28, -1, 1, 0}, +/* 29 */ { 5, s_5_29, -1, 1, 0}, +/* 30 */ { 3, s_5_30, -1, 1, 0}, +/* 31 */ { 5, s_5_31, -1, 1, 0}, +/* 32 */ { 6, s_5_32, -1, 1, 0}, +/* 33 */ { 4, s_5_33, -1, 1, 0}, +/* 34 */ { 5, s_5_34, -1, 1, 0} +}; + +static const symbol s_6_0[1] = { 'a' }; +static const symbol s_6_1[3] = { 'e', 'r', 'a' }; +static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; +static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_4[3] = { 0xC3, 0xA9, 'e' }; +static const symbol s_6_5[2] = { 'a', 'i' }; +static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; +static const symbol s_6_7[2] = { 'e', 'r' }; +static const symbol s_6_8[2] = { 'a', 's' }; +static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; +static const symbol s_6_10[5] = { 0xC3, 0xA2, 'm', 'e', 's' }; +static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; +static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_6_13[5] = { 0xC3, 0xA2, 't', 'e', 's' }; +static const symbol s_6_14[4] = { 0xC3, 0xA9, 'e', 's' }; +static const symbol s_6_15[3] = { 'a', 'i', 's' }; +static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; +static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; +static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; +static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; +static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; +static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; +static const symbol s_6_22[3] = { 0xC3, 0xA9, 's' }; +static const symbol s_6_23[3] = { 'a', 'i', 't' }; +static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; +static const symbol s_6_25[3] = { 'a', 'n', 't' }; +static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; +static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; +static const symbol s_6_28[6] = { 0xC3, 0xA8, 'r', 'e', 'n', 't' }; +static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; +static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; +static const symbol s_6_31[3] = { 0xC3, 0xA2, 't' }; +static const symbol s_6_32[2] = { 'e', 'z' }; +static const symbol s_6_33[3] = { 'i', 'e', 'z' }; +static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; +static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; +static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; +static const symbol s_6_37[2] = { 0xC3, 0xA9 }; + +static const struct among a_6[38] = +{ +/* 0 */ { 1, s_6_0, -1, 3, 0}, +/* 1 */ { 3, s_6_1, 0, 2, 0}, +/* 2 */ { 4, s_6_2, -1, 3, 0}, +/* 3 */ { 4, s_6_3, -1, 3, 0}, +/* 4 */ { 3, s_6_4, -1, 2, 0}, +/* 5 */ { 2, s_6_5, -1, 3, 0}, +/* 6 */ { 4, s_6_6, 5, 2, 0}, +/* 7 */ { 2, s_6_7, -1, 2, 0}, +/* 8 */ { 2, s_6_8, -1, 3, 0}, +/* 9 */ { 4, s_6_9, 8, 2, 0}, +/* 10 */ { 5, s_6_10, -1, 3, 0}, +/* 11 */ { 5, s_6_11, -1, 3, 0}, +/* 12 */ { 5, s_6_12, -1, 3, 0}, +/* 13 */ { 5, s_6_13, -1, 3, 0}, +/* 14 */ { 4, s_6_14, -1, 2, 0}, +/* 15 */ { 3, s_6_15, -1, 3, 0}, +/* 16 */ { 5, s_6_16, 15, 2, 0}, +/* 17 */ { 4, s_6_17, -1, 1, 0}, +/* 18 */ { 6, s_6_18, 17, 2, 0}, +/* 19 */ { 7, s_6_19, 17, 3, 0}, +/* 20 */ { 5, s_6_20, -1, 2, 0}, +/* 21 */ { 4, s_6_21, -1, 3, 0}, +/* 22 */ { 3, s_6_22, -1, 2, 0}, +/* 23 */ { 3, s_6_23, -1, 3, 0}, +/* 24 */ { 5, s_6_24, 23, 2, 0}, +/* 25 */ { 3, s_6_25, -1, 3, 0}, +/* 26 */ { 5, s_6_26, -1, 3, 0}, +/* 27 */ { 7, s_6_27, 26, 2, 0}, +/* 28 */ { 6, s_6_28, -1, 2, 0}, +/* 29 */ { 6, s_6_29, -1, 3, 0}, +/* 30 */ { 5, s_6_30, -1, 2, 0}, +/* 31 */ { 3, s_6_31, -1, 3, 0}, +/* 32 */ { 2, s_6_32, -1, 2, 0}, +/* 33 */ { 3, s_6_33, 32, 2, 0}, +/* 34 */ { 5, s_6_34, 33, 2, 0}, +/* 35 */ { 6, s_6_35, 33, 3, 0}, +/* 36 */ { 4, s_6_36, 32, 2, 0}, +/* 37 */ { 2, s_6_37, -1, 2, 0} +}; + +static const symbol s_7_0[1] = { 'e' }; +static const symbol s_7_1[5] = { 'I', 0xC3, 0xA8, 'r', 'e' }; +static const symbol s_7_2[5] = { 'i', 0xC3, 0xA8, 'r', 'e' }; +static const symbol s_7_3[3] = { 'i', 'o', 'n' }; +static const symbol s_7_4[3] = { 'I', 'e', 'r' }; +static const symbol s_7_5[3] = { 'i', 'e', 'r' }; +static const symbol s_7_6[2] = { 0xC3, 0xAB }; + +static const struct among a_7[7] = +{ +/* 0 */ { 1, s_7_0, -1, 3, 0}, +/* 1 */ { 5, s_7_1, 0, 2, 0}, +/* 2 */ { 5, s_7_2, 0, 2, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 3, s_7_4, -1, 2, 0}, +/* 5 */ { 3, s_7_5, -1, 2, 0}, +/* 6 */ { 2, s_7_6, -1, 4, 0} +}; + +static const symbol s_8_0[3] = { 'e', 'l', 'l' }; +static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; +static const symbol s_8_2[3] = { 'e', 'n', 'n' }; +static const symbol s_8_3[3] = { 'o', 'n', 'n' }; +static const symbol s_8_4[3] = { 'e', 't', 't' }; + +static const struct among a_8[5] = +{ +/* 0 */ { 3, s_8_0, -1, -1, 0}, +/* 1 */ { 4, s_8_1, -1, -1, 0}, +/* 2 */ { 3, s_8_2, -1, -1, 0}, +/* 3 */ { 3, s_8_3, -1, -1, 0}, +/* 4 */ { 3, s_8_4, -1, -1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; + +static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + +static const symbol s_0[] = { 'u' }; +static const symbol s_1[] = { 'U' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'I' }; +static const symbol s_4[] = { 'y' }; +static const symbol s_5[] = { 'Y' }; +static const symbol s_6[] = { 'y' }; +static const symbol s_7[] = { 'Y' }; +static const symbol s_8[] = { 'q' }; +static const symbol s_9[] = { 'u' }; +static const symbol s_10[] = { 'U' }; +static const symbol s_11[] = { 'i' }; +static const symbol s_12[] = { 'u' }; +static const symbol s_13[] = { 'y' }; +static const symbol s_14[] = { 'i', 'c' }; +static const symbol s_15[] = { 'i', 'q', 'U' }; +static const symbol s_16[] = { 'l', 'o', 'g' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'e', 'n', 't' }; +static const symbol s_19[] = { 'a', 't' }; +static const symbol s_20[] = { 'e', 'u', 'x' }; +static const symbol s_21[] = { 'i' }; +static const symbol s_22[] = { 'a', 'b', 'l' }; +static const symbol s_23[] = { 'i', 'q', 'U' }; +static const symbol s_24[] = { 'a', 't' }; +static const symbol s_25[] = { 'i', 'c' }; +static const symbol s_26[] = { 'i', 'q', 'U' }; +static const symbol s_27[] = { 'e', 'a', 'u' }; +static const symbol s_28[] = { 'a', 'l' }; +static const symbol s_29[] = { 'e', 'u', 'x' }; +static const symbol s_30[] = { 'a', 'n', 't' }; +static const symbol s_31[] = { 'e', 'n', 't' }; +static const symbol s_32[] = { 'e' }; +static const symbol s_33[] = { 's' }; +static const symbol s_34[] = { 's' }; +static const symbol s_35[] = { 't' }; +static const symbol s_36[] = { 'i' }; +static const symbol s_37[] = { 'g', 'u' }; +static const symbol s_38[] = { 0xC3, 0xA9 }; +static const symbol s_39[] = { 0xC3, 0xA8 }; +static const symbol s_40[] = { 'e' }; +static const symbol s_41[] = { 'Y' }; +static const symbol s_42[] = { 'i' }; +static const symbol s_43[] = { 0xC3, 0xA7 }; +static const symbol s_44[] = { 'c' }; + +static int r_prelude(struct SN_env * z) { + while(1) { /* repeat, line 38 */ + int c1 = z->c; + while(1) { /* goto, line 38 */ + int c2 = z->c; + { int c3 = z->c; /* or, line 44 */ + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab3; + z->bra = z->c; /* [, line 40 */ + { int c4 = z->c; /* or, line 40 */ + if (!(eq_s(z, 1, s_0))) goto lab5; + z->ket = z->c; /* ], line 40 */ + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab5; + { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = c4; + if (!(eq_s(z, 1, s_2))) goto lab6; + z->ket = z->c; /* ], line 41 */ + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab6; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ + if (ret < 0) return ret; + } + goto lab4; + lab6: + z->c = c4; + if (!(eq_s(z, 1, s_4))) goto lab3; + z->ket = z->c; /* ], line 42 */ + { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ + if (ret < 0) return ret; + } + } + lab4: + goto lab2; + lab3: + z->c = c3; + z->bra = z->c; /* [, line 45 */ + if (!(eq_s(z, 1, s_6))) goto lab7; + z->ket = z->c; /* ], line 45 */ + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab7; + { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ + if (ret < 0) return ret; + } + goto lab2; + lab7: + z->c = c3; + if (!(eq_s(z, 1, s_8))) goto lab1; + z->bra = z->c; /* [, line 47 */ + if (!(eq_s(z, 1, s_9))) goto lab1; + z->ket = z->c; /* ], line 47 */ + { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ + if (ret < 0) return ret; + } + } + lab2: + z->c = c2; + break; + lab1: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* goto, line 38 */ + } + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 56 */ + { int c2 = z->c; /* or, line 58 */ + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; + if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab2; + z->c = ret; /* next, line 57 */ + } + goto lab1; + lab2: + z->c = c2; + if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; + if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ + goto lab1; + lab3: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 66 */ + } + { /* gopast */ /* grouping v, line 66 */ + int ret = out_grouping_U(z, g_v, 97, 251, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + } + lab1: + z->I[0] = z->c; /* setmark pV, line 67 */ + lab0: + z->c = c1; + } + { int c3 = z->c; /* do, line 69 */ + { /* gopast */ /* grouping v, line 70 */ + int ret = out_grouping_U(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 70 */ + int ret = in_grouping_U(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 70 */ + { /* gopast */ /* grouping v, line 71 */ + int ret = out_grouping_U(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 71 */ + int ret = in_grouping_U(z, g_v, 97, 251, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 71 */ + lab4: + z->c = c3; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 75 */ + int c1 = z->c; + z->bra = z->c; /* [, line 77 */ + if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else + among_var = find_among(z, a_1, 4); /* substring, line 77 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 77 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 81 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 92 */ + among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 96 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 99 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 99 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ + z->ket = z->c; /* [, line 100 */ + if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 100 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 100 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 100 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = z->l - m1; + { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ + if (ret < 0) return ret; + } + } + lab1: + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 104 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 107 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 114 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ + z->ket = z->c; /* [, line 116 */ + among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 116 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab3; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 117 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 117 */ + if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 117 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 117 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ + { int ret = r_R2(z); + if (ret == 0) goto lab5; /* call R2, line 118 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 118 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = z->l - m2; + { int ret = r_R1(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ + if (ret < 0) return ret; + } + } + lab4: + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 120 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ + if (ret < 0) return ret; + } + break; + } + lab3: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 129 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 129 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ + z->ket = z->c; /* [, line 131 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } + among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } + z->bra = z->c; /* ], line 131 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab6; } + case 1: + { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ + { int ret = r_R2(z); + if (ret == 0) goto lab8; /* call R2, line 132 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 132 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m3; + { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ + if (ret < 0) return ret; + } + } + lab7: + break; + case 2: + { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ + { int ret = r_R2(z); + if (ret == 0) goto lab10; /* call R2, line 133 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 133 */ + if (ret < 0) return ret; + } + goto lab9; + lab10: + z->c = z->l - m4; + { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ + if (ret < 0) return ret; + } + } + lab9: + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + break; + } + lab6: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 141 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 141 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ + z->ket = z->c; /* [, line 142 */ + if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } + z->bra = z->c; /* ], line 142 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 142 */ + if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } + z->bra = z->c; /* ], line 142 */ + { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ + { int ret = r_R2(z); + if (ret == 0) goto lab13; /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + goto lab12; + lab13: + z->c = z->l - m5; + { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ + if (ret < 0) return ret; + } + } + lab12: + lab11: + ; + } + break; + case 9: + { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 145 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ + if (ret < 0) return ret; + } + break; + case 11: + { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ + { int ret = r_R2(z); + if (ret == 0) goto lab15; /* call R2, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + goto lab14; + lab15: + z->c = z->l - m6; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ + if (ret < 0) return ret; + } + } + lab14: + break; + case 12: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 150 */ + if (ret < 0) return ret; + } + if (out_grouping_b_U(z, g_v, 97, 251, 0)) return 0; + { int ret = slice_del(z); /* delete, line 150 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 155 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 155 */ + break; + case 14: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 156 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 156 */ + break; + case 15: + { int m_test = z->l - z->c; /* test, line 158 */ + if (in_grouping_b_U(z, g_v, 97, 251, 0)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 158 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + return 0; /* fail, line 158 */ + break; + } + return 1; +} + +static int r_i_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 163 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 163 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 164 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 164 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + if (out_grouping_b_U(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } + { int ret = slice_del(z); /* delete, line 170 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 174 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 174 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 175 */ + among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 175 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 185 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 190 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ + z->ket = z->c; /* [, line 191 */ + if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 191 */ + { int ret = slice_del(z); /* delete, line 191 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ + z->ket = z->c; /* [, line 199 */ + if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 199 */ + { int m_test = z->l - z->c; /* test, line 199 */ + if (out_grouping_b_U(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 199 */ + if (ret < 0) return ret; + } + lab0: + ; + } + { int mlimit; /* setlimit, line 200 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 200 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 201 */ + among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 201 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ + if (!(eq_s_b(z, 1, s_34))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } + } + lab1: + { int ret = slice_del(z); /* delete, line 202 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 205 */ + if (ret < 0) return ret; + } + break; + case 4: + if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } + { int ret = slice_del(z); /* delete, line 206 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_un_double(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 212 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ + z->c = z->l - m_test; + } + z->ket = z->c; /* [, line 212 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 212 */ + } + z->bra = z->c; /* ], line 212 */ + { int ret = slice_del(z); /* delete, line 212 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_un_accent(struct SN_env * z) { + { int i = 1; + while(1) { /* atleast, line 216 */ + if (out_grouping_b_U(z, g_v, 97, 251, 0)) goto lab0; + i--; + continue; + lab0: + break; + } + if (i > 0) return 0; + } + z->ket = z->c; /* [, line 217 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ + if (!(eq_s_b(z, 2, s_38))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_39))) return 0; + } +lab1: + z->bra = z->c; /* ], line 217 */ + { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int french_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 223 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 223 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 224 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 224 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 225 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ + { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_i_verb_suffix(z); + if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ + if (ret < 0) return ret; + } + goto lab5; + lab7: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = z->l - m5; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ + z->ket = z->c; /* [, line 234 */ + { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ + if (!(eq_s_b(z, 1, s_41))) goto lab10; + z->bra = z->c; /* ], line 234 */ + { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ + if (ret < 0) return ret; + } + goto lab9; + lab10: + z->c = z->l - m7; + if (!(eq_s_b(z, 2, s_43))) { z->c = z->l - m_keep; goto lab8; } + z->bra = z->c; /* ], line 235 */ + { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ + if (ret < 0) return ret; + } + } + lab9: + lab8: + ; + } + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ + { int ret = r_un_double(z); + if (ret == 0) goto lab11; /* call un_double, line 243 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ + { int ret = r_un_accent(z); + if (ret == 0) goto lab12; /* call un_accent, line 244 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m9; + } + z->c = z->lb; + { int c10 = z->c; /* do, line 246 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab13; /* call postlude, line 246 */ + if (ret < 0) return ret; + } + lab13: + z->c = c10; + } + return 1; +} + +extern struct SN_env * french_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void french_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_german.c b/src/backend/snowball/libstemmer/stem_UTF_8_german.c new file mode 100644 index 00000000000..5d406e50921 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_german.c @@ -0,0 +1,509 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int german_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * german_UTF_8_create_env(void); +extern void german_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 'U' }; +static const symbol s_0_2[1] = { 'Y' }; +static const symbol s_0_3[2] = { 0xC3, 0xA4 }; +static const symbol s_0_4[2] = { 0xC3, 0xB6 }; +static const symbol s_0_5[2] = { 0xC3, 0xBC }; + +static const struct among a_0[6] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 1, s_0_1, 0, 2, 0}, +/* 2 */ { 1, s_0_2, 0, 1, 0}, +/* 3 */ { 2, s_0_3, 0, 3, 0}, +/* 4 */ { 2, s_0_4, 0, 4, 0}, +/* 5 */ { 2, s_0_5, 0, 5, 0} +}; + +static const symbol s_1_0[1] = { 'e' }; +static const symbol s_1_1[2] = { 'e', 'm' }; +static const symbol s_1_2[2] = { 'e', 'n' }; +static const symbol s_1_3[3] = { 'e', 'r', 'n' }; +static const symbol s_1_4[2] = { 'e', 'r' }; +static const symbol s_1_5[1] = { 's' }; +static const symbol s_1_6[2] = { 'e', 's' }; + +static const struct among a_1[7] = +{ +/* 0 */ { 1, s_1_0, -1, 1, 0}, +/* 1 */ { 2, s_1_1, -1, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0}, +/* 3 */ { 3, s_1_3, -1, 1, 0}, +/* 4 */ { 2, s_1_4, -1, 1, 0}, +/* 5 */ { 1, s_1_5, -1, 2, 0}, +/* 6 */ { 2, s_1_6, 5, 1, 0} +}; + +static const symbol s_2_0[2] = { 'e', 'n' }; +static const symbol s_2_1[2] = { 'e', 'r' }; +static const symbol s_2_2[2] = { 's', 't' }; +static const symbol s_2_3[3] = { 'e', 's', 't' }; + +static const struct among a_2[4] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 2, s_2_1, -1, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 2, 0}, +/* 3 */ { 3, s_2_3, 2, 1, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'g' }; +static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0} +}; + +static const symbol s_4_0[3] = { 'e', 'n', 'd' }; +static const symbol s_4_1[2] = { 'i', 'g' }; +static const symbol s_4_2[3] = { 'u', 'n', 'g' }; +static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; +static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; +static const symbol s_4_5[2] = { 'i', 'k' }; +static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; +static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; + +static const struct among a_4[8] = +{ +/* 0 */ { 3, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 2, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 3, 0}, +/* 4 */ { 4, s_4_4, -1, 2, 0}, +/* 5 */ { 2, s_4_5, -1, 2, 0}, +/* 6 */ { 4, s_4_6, -1, 3, 0}, +/* 7 */ { 4, s_4_7, -1, 4, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + +static const unsigned char g_s_ending[] = { 117, 30, 5 }; + +static const unsigned char g_st_ending[] = { 117, 30, 4 }; + +static const symbol s_0[] = { 0xC3, 0x9F }; +static const symbol s_1[] = { 's', 's' }; +static const symbol s_2[] = { 'u' }; +static const symbol s_3[] = { 'U' }; +static const symbol s_4[] = { 'y' }; +static const symbol s_5[] = { 'Y' }; +static const symbol s_6[] = { 'y' }; +static const symbol s_7[] = { 'u' }; +static const symbol s_8[] = { 'a' }; +static const symbol s_9[] = { 'o' }; +static const symbol s_10[] = { 'u' }; +static const symbol s_11[] = { 'i', 'g' }; +static const symbol s_12[] = { 'e' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'e', 'r' }; +static const symbol s_15[] = { 'e', 'n' }; + +static int r_prelude(struct SN_env * z) { + { int c_test = z->c; /* test, line 30 */ + while(1) { /* repeat, line 30 */ + int c1 = z->c; + { int c2 = z->c; /* or, line 33 */ + z->bra = z->c; /* [, line 32 */ + if (!(eq_s(z, 2, s_0))) goto lab2; + z->ket = z->c; /* ], line 32 */ + { int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */ + if (ret < 0) return ret; + } + goto lab1; + lab2: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 33 */ + } + } + lab1: + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + while(1) { /* repeat, line 36 */ + int c3 = z->c; + while(1) { /* goto, line 36 */ + int c4 = z->c; + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; + z->bra = z->c; /* [, line 37 */ + { int c5 = z->c; /* or, line 37 */ + if (!(eq_s(z, 1, s_2))) goto lab6; + z->ket = z->c; /* ], line 37 */ + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = c5; + if (!(eq_s(z, 1, s_4))) goto lab4; + z->ket = z->c; /* ], line 38 */ + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; + { int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = c4; + break; + lab4: + z->c = c4; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab3; + z->c = ret; /* goto, line 36 */ + } + } + continue; + lab3: + z->c = c3; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c_test = z->c; /* test, line 47 */ + { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); + if (ret < 0) return 0; + z->c = ret; /* hop, line 47 */ + } + z->I[2] = z->c; /* setmark x, line 47 */ + z->c = c_test; + } + { /* gopast */ /* grouping v, line 49 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 49 */ + int ret = in_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 49 */ + /* try, line 50 */ + if (!(z->I[0] < z->I[2])) goto lab0; + z->I[0] = z->I[2]; +lab0: + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 51 */ + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 55 */ + int c1 = z->c; + z->bra = z->c; /* [, line 57 */ + among_var = find_among(z, a_0, 6); /* substring, line 57 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 57 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 63 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + { int m1 = z->l - z->c; (void)m1; /* do, line 74 */ + z->ket = z->c; /* [, line 75 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; + among_var = find_among_b(z, a_1, 7); /* substring, line 75 */ + if (!(among_var)) goto lab0; + z->bra = z->c; /* ], line 75 */ + { int ret = r_R1(z); + if (ret == 0) goto lab0; /* call R1, line 75 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_del(z); /* delete, line 77 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0; + { int ret = slice_del(z); /* delete, line 80 */ + if (ret < 0) return ret; + } + break; + } + lab0: + z->c = z->l - m1; + } + { int m2 = z->l - z->c; (void)m2; /* do, line 84 */ + z->ket = z->c; /* [, line 85 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1; + among_var = find_among_b(z, a_2, 4); /* substring, line 85 */ + if (!(among_var)) goto lab1; + z->bra = z->c; /* ], line 85 */ + { int ret = r_R1(z); + if (ret == 0) goto lab1; /* call R1, line 85 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab1; + case 1: + { int ret = slice_del(z); /* delete, line 87 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab1; + { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3); + if (ret < 0) goto lab1; + z->c = ret; /* hop, line 90 */ + } + { int ret = slice_del(z); /* delete, line 90 */ + if (ret < 0) return ret; + } + break; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 94 */ + z->ket = z->c; /* [, line 95 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; + among_var = find_among_b(z, a_4, 8); /* substring, line 95 */ + if (!(among_var)) goto lab2; + z->bra = z->c; /* ], line 95 */ + { int ret = r_R2(z); + if (ret == 0) goto lab2; /* call R2, line 95 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: goto lab2; + case 1: + { int ret = slice_del(z); /* delete, line 97 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */ + z->ket = z->c; /* [, line 98 */ + if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 98 */ + { int m4 = z->l - z->c; (void)m4; /* not, line 98 */ + if (!(eq_s_b(z, 1, s_12))) goto lab4; + { z->c = z->l - m_keep; goto lab3; } + lab4: + z->c = z->l - m4; + } + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 98 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + case 2: + { int m5 = z->l - z->c; (void)m5; /* not, line 101 */ + if (!(eq_s_b(z, 1, s_13))) goto lab5; + goto lab2; + lab5: + z->c = z->l - m5; + } + { int ret = slice_del(z); /* delete, line 101 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 104 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */ + z->ket = z->c; /* [, line 106 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 106 */ + if (!(eq_s_b(z, 2, s_14))) goto lab8; + goto lab7; + lab8: + z->c = z->l - m6; + if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; } + } + lab7: + z->bra = z->c; /* ], line 106 */ + { int ret = r_R1(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 106 */ + if (ret < 0) return ret; + } + lab6: + ; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 110 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + z->ket = z->c; /* [, line 112 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; } + among_var = find_among_b(z, a_3, 2); /* substring, line 112 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab9; } + z->bra = z->c; /* ], line 112 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab9; } + case 1: + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + break; + } + lab9: + ; + } + break; + } + lab2: + z->c = z->l - m3; + } + return 1; +} + +extern int german_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 125 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 125 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 126 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 126 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 127 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 128 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab2; /* call standard_suffix, line 128 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + z->c = z->lb; + { int c4 = z->c; /* do, line 129 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab3; /* call postlude, line 129 */ + if (ret < 0) return ret; + } + lab3: + z->c = c4; + } + return 1; +} + +extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_hungarian.c b/src/backend/snowball/libstemmer/stem_UTF_8_hungarian.c new file mode 100644 index 00000000000..e40df8e6c18 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_hungarian.c @@ -0,0 +1,1234 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int hungarian_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_double(struct SN_env * z); +static int r_undouble(struct SN_env * z); +static int r_factive(struct SN_env * z); +static int r_instrum(struct SN_env * z); +static int r_plur_owner(struct SN_env * z); +static int r_sing_owner(struct SN_env * z); +static int r_owned(struct SN_env * z); +static int r_plural(struct SN_env * z); +static int r_case_other(struct SN_env * z); +static int r_case_special(struct SN_env * z); +static int r_case(struct SN_env * z); +static int r_v_ending(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * hungarian_UTF_8_create_env(void); +extern void hungarian_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[2] = { 'c', 's' }; +static const symbol s_0_1[3] = { 'd', 'z', 's' }; +static const symbol s_0_2[2] = { 'g', 'y' }; +static const symbol s_0_3[2] = { 'l', 'y' }; +static const symbol s_0_4[2] = { 'n', 'y' }; +static const symbol s_0_5[2] = { 's', 'z' }; +static const symbol s_0_6[2] = { 't', 'y' }; +static const symbol s_0_7[2] = { 'z', 's' }; + +static const struct among a_0[8] = +{ +/* 0 */ { 2, s_0_0, -1, -1, 0}, +/* 1 */ { 3, s_0_1, -1, -1, 0}, +/* 2 */ { 2, s_0_2, -1, -1, 0}, +/* 3 */ { 2, s_0_3, -1, -1, 0}, +/* 4 */ { 2, s_0_4, -1, -1, 0}, +/* 5 */ { 2, s_0_5, -1, -1, 0}, +/* 6 */ { 2, s_0_6, -1, -1, 0}, +/* 7 */ { 2, s_0_7, -1, -1, 0} +}; + +static const symbol s_1_0[2] = { 0xC3, 0xA1 }; +static const symbol s_1_1[2] = { 0xC3, 0xA9 }; + +static const struct among a_1[2] = +{ +/* 0 */ { 2, s_1_0, -1, 1, 0}, +/* 1 */ { 2, s_1_1, -1, 2, 0} +}; + +static const symbol s_2_0[2] = { 'b', 'b' }; +static const symbol s_2_1[2] = { 'c', 'c' }; +static const symbol s_2_2[2] = { 'd', 'd' }; +static const symbol s_2_3[2] = { 'f', 'f' }; +static const symbol s_2_4[2] = { 'g', 'g' }; +static const symbol s_2_5[2] = { 'j', 'j' }; +static const symbol s_2_6[2] = { 'k', 'k' }; +static const symbol s_2_7[2] = { 'l', 'l' }; +static const symbol s_2_8[2] = { 'm', 'm' }; +static const symbol s_2_9[2] = { 'n', 'n' }; +static const symbol s_2_10[2] = { 'p', 'p' }; +static const symbol s_2_11[2] = { 'r', 'r' }; +static const symbol s_2_12[3] = { 'c', 'c', 's' }; +static const symbol s_2_13[2] = { 's', 's' }; +static const symbol s_2_14[3] = { 'z', 'z', 's' }; +static const symbol s_2_15[2] = { 't', 't' }; +static const symbol s_2_16[2] = { 'v', 'v' }; +static const symbol s_2_17[3] = { 'g', 'g', 'y' }; +static const symbol s_2_18[3] = { 'l', 'l', 'y' }; +static const symbol s_2_19[3] = { 'n', 'n', 'y' }; +static const symbol s_2_20[3] = { 't', 't', 'y' }; +static const symbol s_2_21[3] = { 's', 's', 'z' }; +static const symbol s_2_22[2] = { 'z', 'z' }; + +static const struct among a_2[23] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0}, +/* 3 */ { 2, s_2_3, -1, -1, 0}, +/* 4 */ { 2, s_2_4, -1, -1, 0}, +/* 5 */ { 2, s_2_5, -1, -1, 0}, +/* 6 */ { 2, s_2_6, -1, -1, 0}, +/* 7 */ { 2, s_2_7, -1, -1, 0}, +/* 8 */ { 2, s_2_8, -1, -1, 0}, +/* 9 */ { 2, s_2_9, -1, -1, 0}, +/* 10 */ { 2, s_2_10, -1, -1, 0}, +/* 11 */ { 2, s_2_11, -1, -1, 0}, +/* 12 */ { 3, s_2_12, -1, -1, 0}, +/* 13 */ { 2, s_2_13, -1, -1, 0}, +/* 14 */ { 3, s_2_14, -1, -1, 0}, +/* 15 */ { 2, s_2_15, -1, -1, 0}, +/* 16 */ { 2, s_2_16, -1, -1, 0}, +/* 17 */ { 3, s_2_17, -1, -1, 0}, +/* 18 */ { 3, s_2_18, -1, -1, 0}, +/* 19 */ { 3, s_2_19, -1, -1, 0}, +/* 20 */ { 3, s_2_20, -1, -1, 0}, +/* 21 */ { 3, s_2_21, -1, -1, 0}, +/* 22 */ { 2, s_2_22, -1, -1, 0} +}; + +static const symbol s_3_0[2] = { 'a', 'l' }; +static const symbol s_3_1[2] = { 'e', 'l' }; + +static const struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 2, s_3_1, -1, 2, 0} +}; + +static const symbol s_4_0[2] = { 'b', 'a' }; +static const symbol s_4_1[2] = { 'r', 'a' }; +static const symbol s_4_2[2] = { 'b', 'e' }; +static const symbol s_4_3[2] = { 'r', 'e' }; +static const symbol s_4_4[2] = { 'i', 'g' }; +static const symbol s_4_5[3] = { 'n', 'a', 'k' }; +static const symbol s_4_6[3] = { 'n', 'e', 'k' }; +static const symbol s_4_7[3] = { 'v', 'a', 'l' }; +static const symbol s_4_8[3] = { 'v', 'e', 'l' }; +static const symbol s_4_9[2] = { 'u', 'l' }; +static const symbol s_4_10[4] = { 'n', 0xC3, 0xA1, 'l' }; +static const symbol s_4_11[4] = { 'n', 0xC3, 0xA9, 'l' }; +static const symbol s_4_12[4] = { 'b', 0xC3, 0xB3, 'l' }; +static const symbol s_4_13[4] = { 'r', 0xC3, 0xB3, 'l' }; +static const symbol s_4_14[4] = { 't', 0xC3, 0xB3, 'l' }; +static const symbol s_4_15[4] = { 'b', 0xC3, 0xB5, 'l' }; +static const symbol s_4_16[4] = { 'r', 0xC3, 0xB5, 'l' }; +static const symbol s_4_17[4] = { 't', 0xC3, 0xB5, 'l' }; +static const symbol s_4_18[3] = { 0xC3, 0xBC, 'l' }; +static const symbol s_4_19[1] = { 'n' }; +static const symbol s_4_20[2] = { 'a', 'n' }; +static const symbol s_4_21[3] = { 'b', 'a', 'n' }; +static const symbol s_4_22[2] = { 'e', 'n' }; +static const symbol s_4_23[3] = { 'b', 'e', 'n' }; +static const symbol s_4_24[7] = { 'k', 0xC3, 0xA9, 'p', 'p', 'e', 'n' }; +static const symbol s_4_25[2] = { 'o', 'n' }; +static const symbol s_4_26[3] = { 0xC3, 0xB6, 'n' }; +static const symbol s_4_27[5] = { 'k', 0xC3, 0xA9, 'p', 'p' }; +static const symbol s_4_28[3] = { 'k', 'o', 'r' }; +static const symbol s_4_29[1] = { 't' }; +static const symbol s_4_30[2] = { 'a', 't' }; +static const symbol s_4_31[2] = { 'e', 't' }; +static const symbol s_4_32[5] = { 'k', 0xC3, 0xA9, 'n', 't' }; +static const symbol s_4_33[7] = { 'a', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; +static const symbol s_4_34[7] = { 'e', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; +static const symbol s_4_35[7] = { 'o', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; +static const symbol s_4_36[2] = { 'o', 't' }; +static const symbol s_4_37[4] = { 0xC3, 0xA9, 'r', 't' }; +static const symbol s_4_38[3] = { 0xC3, 0xB6, 't' }; +static const symbol s_4_39[3] = { 'h', 'e', 'z' }; +static const symbol s_4_40[3] = { 'h', 'o', 'z' }; +static const symbol s_4_41[4] = { 'h', 0xC3, 0xB6, 'z' }; +static const symbol s_4_42[3] = { 'v', 0xC3, 0xA1 }; +static const symbol s_4_43[3] = { 'v', 0xC3, 0xA9 }; + +static const struct among a_4[44] = +{ +/* 0 */ { 2, s_4_0, -1, -1, 0}, +/* 1 */ { 2, s_4_1, -1, -1, 0}, +/* 2 */ { 2, s_4_2, -1, -1, 0}, +/* 3 */ { 2, s_4_3, -1, -1, 0}, +/* 4 */ { 2, s_4_4, -1, -1, 0}, +/* 5 */ { 3, s_4_5, -1, -1, 0}, +/* 6 */ { 3, s_4_6, -1, -1, 0}, +/* 7 */ { 3, s_4_7, -1, -1, 0}, +/* 8 */ { 3, s_4_8, -1, -1, 0}, +/* 9 */ { 2, s_4_9, -1, -1, 0}, +/* 10 */ { 4, s_4_10, -1, -1, 0}, +/* 11 */ { 4, s_4_11, -1, -1, 0}, +/* 12 */ { 4, s_4_12, -1, -1, 0}, +/* 13 */ { 4, s_4_13, -1, -1, 0}, +/* 14 */ { 4, s_4_14, -1, -1, 0}, +/* 15 */ { 4, s_4_15, -1, -1, 0}, +/* 16 */ { 4, s_4_16, -1, -1, 0}, +/* 17 */ { 4, s_4_17, -1, -1, 0}, +/* 18 */ { 3, s_4_18, -1, -1, 0}, +/* 19 */ { 1, s_4_19, -1, -1, 0}, +/* 20 */ { 2, s_4_20, 19, -1, 0}, +/* 21 */ { 3, s_4_21, 20, -1, 0}, +/* 22 */ { 2, s_4_22, 19, -1, 0}, +/* 23 */ { 3, s_4_23, 22, -1, 0}, +/* 24 */ { 7, s_4_24, 22, -1, 0}, +/* 25 */ { 2, s_4_25, 19, -1, 0}, +/* 26 */ { 3, s_4_26, 19, -1, 0}, +/* 27 */ { 5, s_4_27, -1, -1, 0}, +/* 28 */ { 3, s_4_28, -1, -1, 0}, +/* 29 */ { 1, s_4_29, -1, -1, 0}, +/* 30 */ { 2, s_4_30, 29, -1, 0}, +/* 31 */ { 2, s_4_31, 29, -1, 0}, +/* 32 */ { 5, s_4_32, 29, -1, 0}, +/* 33 */ { 7, s_4_33, 32, -1, 0}, +/* 34 */ { 7, s_4_34, 32, -1, 0}, +/* 35 */ { 7, s_4_35, 32, -1, 0}, +/* 36 */ { 2, s_4_36, 29, -1, 0}, +/* 37 */ { 4, s_4_37, 29, -1, 0}, +/* 38 */ { 3, s_4_38, 29, -1, 0}, +/* 39 */ { 3, s_4_39, -1, -1, 0}, +/* 40 */ { 3, s_4_40, -1, -1, 0}, +/* 41 */ { 4, s_4_41, -1, -1, 0}, +/* 42 */ { 3, s_4_42, -1, -1, 0}, +/* 43 */ { 3, s_4_43, -1, -1, 0} +}; + +static const symbol s_5_0[3] = { 0xC3, 0xA1, 'n' }; +static const symbol s_5_1[3] = { 0xC3, 0xA9, 'n' }; +static const symbol s_5_2[8] = { 0xC3, 0xA1, 'n', 'k', 0xC3, 0xA9, 'n', 't' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 3, s_5_0, -1, 2, 0}, +/* 1 */ { 3, s_5_1, -1, 1, 0}, +/* 2 */ { 8, s_5_2, -1, 3, 0} +}; + +static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; +static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; +static const symbol s_6_2[6] = { 0xC3, 0xA1, 's', 't', 'u', 'l' }; +static const symbol s_6_3[5] = { 's', 't', 0xC3, 0xBC, 'l' }; +static const symbol s_6_4[6] = { 'e', 's', 't', 0xC3, 0xBC, 'l' }; +static const symbol s_6_5[7] = { 0xC3, 0xA9, 's', 't', 0xC3, 0xBC, 'l' }; + +static const struct among a_6[6] = +{ +/* 0 */ { 4, s_6_0, -1, 2, 0}, +/* 1 */ { 5, s_6_1, 0, 1, 0}, +/* 2 */ { 6, s_6_2, 0, 3, 0}, +/* 3 */ { 5, s_6_3, -1, 2, 0}, +/* 4 */ { 6, s_6_4, 3, 1, 0}, +/* 5 */ { 7, s_6_5, 3, 4, 0} +}; + +static const symbol s_7_0[2] = { 0xC3, 0xA1 }; +static const symbol s_7_1[2] = { 0xC3, 0xA9 }; + +static const struct among a_7[2] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 2, s_7_1, -1, 2, 0} +}; + +static const symbol s_8_0[1] = { 'k' }; +static const symbol s_8_1[2] = { 'a', 'k' }; +static const symbol s_8_2[2] = { 'e', 'k' }; +static const symbol s_8_3[2] = { 'o', 'k' }; +static const symbol s_8_4[3] = { 0xC3, 0xA1, 'k' }; +static const symbol s_8_5[3] = { 0xC3, 0xA9, 'k' }; +static const symbol s_8_6[3] = { 0xC3, 0xB6, 'k' }; + +static const struct among a_8[7] = +{ +/* 0 */ { 1, s_8_0, -1, 7, 0}, +/* 1 */ { 2, s_8_1, 0, 4, 0}, +/* 2 */ { 2, s_8_2, 0, 6, 0}, +/* 3 */ { 2, s_8_3, 0, 5, 0}, +/* 4 */ { 3, s_8_4, 0, 1, 0}, +/* 5 */ { 3, s_8_5, 0, 2, 0}, +/* 6 */ { 3, s_8_6, 0, 3, 0} +}; + +static const symbol s_9_0[3] = { 0xC3, 0xA9, 'i' }; +static const symbol s_9_1[5] = { 0xC3, 0xA1, 0xC3, 0xA9, 'i' }; +static const symbol s_9_2[5] = { 0xC3, 0xA9, 0xC3, 0xA9, 'i' }; +static const symbol s_9_3[2] = { 0xC3, 0xA9 }; +static const symbol s_9_4[3] = { 'k', 0xC3, 0xA9 }; +static const symbol s_9_5[4] = { 'a', 'k', 0xC3, 0xA9 }; +static const symbol s_9_6[4] = { 'e', 'k', 0xC3, 0xA9 }; +static const symbol s_9_7[4] = { 'o', 'k', 0xC3, 0xA9 }; +static const symbol s_9_8[5] = { 0xC3, 0xA1, 'k', 0xC3, 0xA9 }; +static const symbol s_9_9[5] = { 0xC3, 0xA9, 'k', 0xC3, 0xA9 }; +static const symbol s_9_10[5] = { 0xC3, 0xB6, 'k', 0xC3, 0xA9 }; +static const symbol s_9_11[4] = { 0xC3, 0xA9, 0xC3, 0xA9 }; + +static const struct among a_9[12] = +{ +/* 0 */ { 3, s_9_0, -1, 7, 0}, +/* 1 */ { 5, s_9_1, 0, 6, 0}, +/* 2 */ { 5, s_9_2, 0, 5, 0}, +/* 3 */ { 2, s_9_3, -1, 9, 0}, +/* 4 */ { 3, s_9_4, 3, 4, 0}, +/* 5 */ { 4, s_9_5, 4, 1, 0}, +/* 6 */ { 4, s_9_6, 4, 1, 0}, +/* 7 */ { 4, s_9_7, 4, 1, 0}, +/* 8 */ { 5, s_9_8, 4, 3, 0}, +/* 9 */ { 5, s_9_9, 4, 2, 0}, +/* 10 */ { 5, s_9_10, 4, 1, 0}, +/* 11 */ { 4, s_9_11, 3, 8, 0} +}; + +static const symbol s_10_0[1] = { 'a' }; +static const symbol s_10_1[2] = { 'j', 'a' }; +static const symbol s_10_2[1] = { 'd' }; +static const symbol s_10_3[2] = { 'a', 'd' }; +static const symbol s_10_4[2] = { 'e', 'd' }; +static const symbol s_10_5[2] = { 'o', 'd' }; +static const symbol s_10_6[3] = { 0xC3, 0xA1, 'd' }; +static const symbol s_10_7[3] = { 0xC3, 0xA9, 'd' }; +static const symbol s_10_8[3] = { 0xC3, 0xB6, 'd' }; +static const symbol s_10_9[1] = { 'e' }; +static const symbol s_10_10[2] = { 'j', 'e' }; +static const symbol s_10_11[2] = { 'n', 'k' }; +static const symbol s_10_12[3] = { 'u', 'n', 'k' }; +static const symbol s_10_13[4] = { 0xC3, 0xA1, 'n', 'k' }; +static const symbol s_10_14[4] = { 0xC3, 0xA9, 'n', 'k' }; +static const symbol s_10_15[4] = { 0xC3, 0xBC, 'n', 'k' }; +static const symbol s_10_16[2] = { 'u', 'k' }; +static const symbol s_10_17[3] = { 'j', 'u', 'k' }; +static const symbol s_10_18[5] = { 0xC3, 0xA1, 'j', 'u', 'k' }; +static const symbol s_10_19[3] = { 0xC3, 0xBC, 'k' }; +static const symbol s_10_20[4] = { 'j', 0xC3, 0xBC, 'k' }; +static const symbol s_10_21[6] = { 0xC3, 0xA9, 'j', 0xC3, 0xBC, 'k' }; +static const symbol s_10_22[1] = { 'm' }; +static const symbol s_10_23[2] = { 'a', 'm' }; +static const symbol s_10_24[2] = { 'e', 'm' }; +static const symbol s_10_25[2] = { 'o', 'm' }; +static const symbol s_10_26[3] = { 0xC3, 0xA1, 'm' }; +static const symbol s_10_27[3] = { 0xC3, 0xA9, 'm' }; +static const symbol s_10_28[1] = { 'o' }; +static const symbol s_10_29[2] = { 0xC3, 0xA1 }; +static const symbol s_10_30[2] = { 0xC3, 0xA9 }; + +static const struct among a_10[31] = +{ +/* 0 */ { 1, s_10_0, -1, 18, 0}, +/* 1 */ { 2, s_10_1, 0, 17, 0}, +/* 2 */ { 1, s_10_2, -1, 16, 0}, +/* 3 */ { 2, s_10_3, 2, 13, 0}, +/* 4 */ { 2, s_10_4, 2, 13, 0}, +/* 5 */ { 2, s_10_5, 2, 13, 0}, +/* 6 */ { 3, s_10_6, 2, 14, 0}, +/* 7 */ { 3, s_10_7, 2, 15, 0}, +/* 8 */ { 3, s_10_8, 2, 13, 0}, +/* 9 */ { 1, s_10_9, -1, 18, 0}, +/* 10 */ { 2, s_10_10, 9, 17, 0}, +/* 11 */ { 2, s_10_11, -1, 4, 0}, +/* 12 */ { 3, s_10_12, 11, 1, 0}, +/* 13 */ { 4, s_10_13, 11, 2, 0}, +/* 14 */ { 4, s_10_14, 11, 3, 0}, +/* 15 */ { 4, s_10_15, 11, 1, 0}, +/* 16 */ { 2, s_10_16, -1, 8, 0}, +/* 17 */ { 3, s_10_17, 16, 7, 0}, +/* 18 */ { 5, s_10_18, 17, 5, 0}, +/* 19 */ { 3, s_10_19, -1, 8, 0}, +/* 20 */ { 4, s_10_20, 19, 7, 0}, +/* 21 */ { 6, s_10_21, 20, 6, 0}, +/* 22 */ { 1, s_10_22, -1, 12, 0}, +/* 23 */ { 2, s_10_23, 22, 9, 0}, +/* 24 */ { 2, s_10_24, 22, 9, 0}, +/* 25 */ { 2, s_10_25, 22, 9, 0}, +/* 26 */ { 3, s_10_26, 22, 10, 0}, +/* 27 */ { 3, s_10_27, 22, 11, 0}, +/* 28 */ { 1, s_10_28, -1, 18, 0}, +/* 29 */ { 2, s_10_29, -1, 19, 0}, +/* 30 */ { 2, s_10_30, -1, 20, 0} +}; + +static const symbol s_11_0[2] = { 'i', 'd' }; +static const symbol s_11_1[3] = { 'a', 'i', 'd' }; +static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; +static const symbol s_11_3[3] = { 'e', 'i', 'd' }; +static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; +static const symbol s_11_5[4] = { 0xC3, 0xA1, 'i', 'd' }; +static const symbol s_11_6[4] = { 0xC3, 0xA9, 'i', 'd' }; +static const symbol s_11_7[1] = { 'i' }; +static const symbol s_11_8[2] = { 'a', 'i' }; +static const symbol s_11_9[3] = { 'j', 'a', 'i' }; +static const symbol s_11_10[2] = { 'e', 'i' }; +static const symbol s_11_11[3] = { 'j', 'e', 'i' }; +static const symbol s_11_12[3] = { 0xC3, 0xA1, 'i' }; +static const symbol s_11_13[3] = { 0xC3, 0xA9, 'i' }; +static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; +static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; +static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; +static const symbol s_11_17[6] = { 0xC3, 0xA9, 'i', 't', 'e', 'k' }; +static const symbol s_11_18[2] = { 'i', 'k' }; +static const symbol s_11_19[3] = { 'a', 'i', 'k' }; +static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; +static const symbol s_11_21[3] = { 'e', 'i', 'k' }; +static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; +static const symbol s_11_23[4] = { 0xC3, 0xA1, 'i', 'k' }; +static const symbol s_11_24[4] = { 0xC3, 0xA9, 'i', 'k' }; +static const symbol s_11_25[3] = { 'i', 'n', 'k' }; +static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; +static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; +static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; +static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; +static const symbol s_11_30[5] = { 0xC3, 0xA1, 'i', 'n', 'k' }; +static const symbol s_11_31[5] = { 0xC3, 0xA9, 'i', 'n', 'k' }; +static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; +static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; +static const symbol s_11_34[6] = { 0xC3, 0xA1, 'i', 't', 'o', 'k' }; +static const symbol s_11_35[2] = { 'i', 'm' }; +static const symbol s_11_36[3] = { 'a', 'i', 'm' }; +static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; +static const symbol s_11_38[3] = { 'e', 'i', 'm' }; +static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; +static const symbol s_11_40[4] = { 0xC3, 0xA1, 'i', 'm' }; +static const symbol s_11_41[4] = { 0xC3, 0xA9, 'i', 'm' }; + +static const struct among a_11[42] = +{ +/* 0 */ { 2, s_11_0, -1, 10, 0}, +/* 1 */ { 3, s_11_1, 0, 9, 0}, +/* 2 */ { 4, s_11_2, 1, 6, 0}, +/* 3 */ { 3, s_11_3, 0, 9, 0}, +/* 4 */ { 4, s_11_4, 3, 6, 0}, +/* 5 */ { 4, s_11_5, 0, 7, 0}, +/* 6 */ { 4, s_11_6, 0, 8, 0}, +/* 7 */ { 1, s_11_7, -1, 15, 0}, +/* 8 */ { 2, s_11_8, 7, 14, 0}, +/* 9 */ { 3, s_11_9, 8, 11, 0}, +/* 10 */ { 2, s_11_10, 7, 14, 0}, +/* 11 */ { 3, s_11_11, 10, 11, 0}, +/* 12 */ { 3, s_11_12, 7, 12, 0}, +/* 13 */ { 3, s_11_13, 7, 13, 0}, +/* 14 */ { 4, s_11_14, -1, 24, 0}, +/* 15 */ { 5, s_11_15, 14, 21, 0}, +/* 16 */ { 6, s_11_16, 15, 20, 0}, +/* 17 */ { 6, s_11_17, 14, 23, 0}, +/* 18 */ { 2, s_11_18, -1, 29, 0}, +/* 19 */ { 3, s_11_19, 18, 26, 0}, +/* 20 */ { 4, s_11_20, 19, 25, 0}, +/* 21 */ { 3, s_11_21, 18, 26, 0}, +/* 22 */ { 4, s_11_22, 21, 25, 0}, +/* 23 */ { 4, s_11_23, 18, 27, 0}, +/* 24 */ { 4, s_11_24, 18, 28, 0}, +/* 25 */ { 3, s_11_25, -1, 20, 0}, +/* 26 */ { 4, s_11_26, 25, 17, 0}, +/* 27 */ { 5, s_11_27, 26, 16, 0}, +/* 28 */ { 4, s_11_28, 25, 17, 0}, +/* 29 */ { 5, s_11_29, 28, 16, 0}, +/* 30 */ { 5, s_11_30, 25, 18, 0}, +/* 31 */ { 5, s_11_31, 25, 19, 0}, +/* 32 */ { 5, s_11_32, -1, 21, 0}, +/* 33 */ { 6, s_11_33, 32, 20, 0}, +/* 34 */ { 6, s_11_34, -1, 22, 0}, +/* 35 */ { 2, s_11_35, -1, 5, 0}, +/* 36 */ { 3, s_11_36, 35, 4, 0}, +/* 37 */ { 4, s_11_37, 36, 1, 0}, +/* 38 */ { 3, s_11_38, 35, 4, 0}, +/* 39 */ { 4, s_11_39, 38, 1, 0}, +/* 40 */ { 4, s_11_40, 35, 2, 0}, +/* 41 */ { 4, s_11_41, 35, 3, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'e' }; +static const symbol s_3[] = { 'a' }; +static const symbol s_4[] = { 'a' }; +static const symbol s_5[] = { 'a' }; +static const symbol s_6[] = { 'e' }; +static const symbol s_7[] = { 'a' }; +static const symbol s_8[] = { 'e' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'a' }; +static const symbol s_11[] = { 'e' }; +static const symbol s_12[] = { 'a' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'a' }; +static const symbol s_15[] = { 'e' }; +static const symbol s_16[] = { 'a' }; +static const symbol s_17[] = { 'e' }; +static const symbol s_18[] = { 'a' }; +static const symbol s_19[] = { 'e' }; +static const symbol s_20[] = { 'a' }; +static const symbol s_21[] = { 'e' }; +static const symbol s_22[] = { 'a' }; +static const symbol s_23[] = { 'e' }; +static const symbol s_24[] = { 'a' }; +static const symbol s_25[] = { 'e' }; +static const symbol s_26[] = { 'a' }; +static const symbol s_27[] = { 'e' }; +static const symbol s_28[] = { 'a' }; +static const symbol s_29[] = { 'e' }; +static const symbol s_30[] = { 'a' }; +static const symbol s_31[] = { 'e' }; +static const symbol s_32[] = { 'a' }; +static const symbol s_33[] = { 'e' }; +static const symbol s_34[] = { 'a' }; +static const symbol s_35[] = { 'e' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c1 = z->c; /* or, line 51 */ + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab1; + if (in_grouping_U(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ + { int c2 = z->c; /* or, line 49 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; + if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ + goto lab2; + lab3: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab1; + z->c = ret; /* next, line 49 */ + } + } + lab2: + z->I[0] = z->c; /* setmark p1, line 50 */ + goto lab0; + lab1: + z->c = c1; + if (out_grouping_U(z, g_v, 97, 252, 0)) return 0; + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 53 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_v_ending(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 61 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; + among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 61 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 61 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_double(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 68 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ + z->c = z->l - m_test; + } + return 1; +} + +static int r_undouble(struct SN_env * z) { + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 73 */ + } + z->ket = z->c; /* [, line 73 */ + { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 1); + if (ret < 0) return 0; + z->c = ret; /* hop, line 73 */ + } + z->bra = z->c; /* ], line 73 */ + { int ret = slice_del(z); /* delete, line 73 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_instrum(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 77 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; + among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 77 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 77 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 78 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 79 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 82 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_case(struct SN_env * z) { + z->ket = z->c; /* [, line 87 */ + if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ + z->bra = z->c; /* ], line 87 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 87 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 111 */ + if (ret < 0) return ret; + } + { int ret = r_v_ending(z); + if (ret == 0) return 0; /* call v_ending, line 112 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_case_special(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 116 */ + if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; + among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 116 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 116 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_case_other(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 124 */ + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; + among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 124 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 124 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 126 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_factive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 133 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; + among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 133 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 133 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 134 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_double(z); + if (ret == 0) return 0; /* call double, line 135 */ + if (ret < 0) return ret; + } + break; + } + { int ret = slice_del(z); /* delete, line 137 */ + if (ret < 0) return ret; + } + { int ret = r_undouble(z); + if (ret == 0) return 0; /* call undouble, line 138 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_plural(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 142 */ + if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; + among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 142 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 142 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 145 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 146 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 148 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 149 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_owned(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 154 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 169)) return 0; + among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 154 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 154 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 155 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 158 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 161 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_sing_owner(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 168 */ + among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 168 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 168 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 169 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 172 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_del(z); /* delete, line 175 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 177 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_del(z); /* delete, line 180 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_del(z); /* delete, line 181 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ + if (ret < 0) return ret; + } + break; + case 16: + { int ret = slice_del(z); /* delete, line 184 */ + if (ret < 0) return ret; + } + break; + case 17: + { int ret = slice_del(z); /* delete, line 185 */ + if (ret < 0) return ret; + } + break; + case 18: + { int ret = slice_del(z); /* delete, line 186 */ + if (ret < 0) return ret; + } + break; + case 19: + { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ + if (ret < 0) return ret; + } + break; + case 20: + { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_plur_owner(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 193 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 193 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 193 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_del(z); /* delete, line 197 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_del(z); /* delete, line 198 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 199 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_del(z); /* delete, line 202 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_del(z); /* delete, line 203 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_del(z); /* delete, line 204 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_del(z); /* delete, line 207 */ + if (ret < 0) return ret; + } + break; + case 15: + { int ret = slice_del(z); /* delete, line 208 */ + if (ret < 0) return ret; + } + break; + case 16: + { int ret = slice_del(z); /* delete, line 209 */ + if (ret < 0) return ret; + } + break; + case 17: + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + break; + case 18: + { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ + if (ret < 0) return ret; + } + break; + case 19: + { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ + if (ret < 0) return ret; + } + break; + case 20: + { int ret = slice_del(z); /* delete, line 214 */ + if (ret < 0) return ret; + } + break; + case 21: + { int ret = slice_del(z); /* delete, line 215 */ + if (ret < 0) return ret; + } + break; + case 22: + { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ + if (ret < 0) return ret; + } + break; + case 23: + { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ + if (ret < 0) return ret; + } + break; + case 24: + { int ret = slice_del(z); /* delete, line 218 */ + if (ret < 0) return ret; + } + break; + case 25: + { int ret = slice_del(z); /* delete, line 219 */ + if (ret < 0) return ret; + } + break; + case 26: + { int ret = slice_del(z); /* delete, line 220 */ + if (ret < 0) return ret; + } + break; + case 27: + { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ + if (ret < 0) return ret; + } + break; + case 28: + { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ + if (ret < 0) return ret; + } + break; + case 29: + { int ret = slice_del(z); /* delete, line 223 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int hungarian_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 229 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 229 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 230 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ + { int ret = r_instrum(z); + if (ret == 0) goto lab1; /* call instrum, line 231 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ + { int ret = r_case(z); + if (ret == 0) goto lab2; /* call case, line 232 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ + { int ret = r_case_special(z); + if (ret == 0) goto lab3; /* call case_special, line 233 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ + { int ret = r_case_other(z); + if (ret == 0) goto lab4; /* call case_other, line 234 */ + if (ret < 0) return ret; + } + lab4: + z->c = z->l - m5; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ + { int ret = r_factive(z); + if (ret == 0) goto lab5; /* call factive, line 235 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ + { int ret = r_owned(z); + if (ret == 0) goto lab6; /* call owned, line 236 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ + { int ret = r_sing_owner(z); + if (ret == 0) goto lab7; /* call sing_owner, line 237 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ + { int ret = r_plur_owner(z); + if (ret == 0) goto lab8; /* call plur_owner, line 238 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ + { int ret = r_plural(z); + if (ret == 0) goto lab9; /* call plural, line 239 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m10; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * hungarian_UTF_8_create_env(void) { return SN_create_env(0, 1, 0); } + +extern void hungarian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_italian.c b/src/backend/snowball/libstemmer/stem_UTF_8_italian.c new file mode 100644 index 00000000000..395e38a548e --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_italian.c @@ -0,0 +1,1073 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int italian_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_vowel_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_attached_pronoun(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * italian_UTF_8_create_env(void); +extern void italian_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[2] = { 'q', 'u' }; +static const symbol s_0_2[2] = { 0xC3, 0xA1 }; +static const symbol s_0_3[2] = { 0xC3, 0xA9 }; +static const symbol s_0_4[2] = { 0xC3, 0xAD }; +static const symbol s_0_5[2] = { 0xC3, 0xB3 }; +static const symbol s_0_6[2] = { 0xC3, 0xBA }; + +static const struct among a_0[7] = +{ +/* 0 */ { 0, 0, -1, 7, 0}, +/* 1 */ { 2, s_0_1, 0, 6, 0}, +/* 2 */ { 2, s_0_2, 0, 1, 0}, +/* 3 */ { 2, s_0_3, 0, 2, 0}, +/* 4 */ { 2, s_0_4, 0, 3, 0}, +/* 5 */ { 2, s_0_5, 0, 4, 0}, +/* 6 */ { 2, s_0_6, 0, 5, 0} +}; + +static const symbol s_1_1[1] = { 'I' }; +static const symbol s_1_2[1] = { 'U' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_1_1, 0, 1, 0}, +/* 2 */ { 1, s_1_2, 0, 2, 0} +}; + +static const symbol s_2_0[2] = { 'l', 'a' }; +static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; +static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; +static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; +static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; +static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; +static const symbol s_2_6[2] = { 'l', 'e' }; +static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; +static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; +static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; +static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; +static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; +static const symbol s_2_12[2] = { 'n', 'e' }; +static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; +static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; +static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; +static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; +static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; +static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; +static const symbol s_2_19[2] = { 'c', 'i' }; +static const symbol s_2_20[2] = { 'l', 'i' }; +static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; +static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; +static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; +static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; +static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; +static const symbol s_2_26[3] = { 'g', 'l', 'i' }; +static const symbol s_2_27[2] = { 'm', 'i' }; +static const symbol s_2_28[2] = { 's', 'i' }; +static const symbol s_2_29[2] = { 't', 'i' }; +static const symbol s_2_30[2] = { 'v', 'i' }; +static const symbol s_2_31[2] = { 'l', 'o' }; +static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; +static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; +static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; +static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; +static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; + +static const struct among a_2[37] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 4, s_2_1, 0, -1, 0}, +/* 2 */ { 6, s_2_2, 0, -1, 0}, +/* 3 */ { 4, s_2_3, 0, -1, 0}, +/* 4 */ { 4, s_2_4, 0, -1, 0}, +/* 5 */ { 4, s_2_5, 0, -1, 0}, +/* 6 */ { 2, s_2_6, -1, -1, 0}, +/* 7 */ { 4, s_2_7, 6, -1, 0}, +/* 8 */ { 6, s_2_8, 6, -1, 0}, +/* 9 */ { 4, s_2_9, 6, -1, 0}, +/* 10 */ { 4, s_2_10, 6, -1, 0}, +/* 11 */ { 4, s_2_11, 6, -1, 0}, +/* 12 */ { 2, s_2_12, -1, -1, 0}, +/* 13 */ { 4, s_2_13, 12, -1, 0}, +/* 14 */ { 6, s_2_14, 12, -1, 0}, +/* 15 */ { 4, s_2_15, 12, -1, 0}, +/* 16 */ { 4, s_2_16, 12, -1, 0}, +/* 17 */ { 4, s_2_17, 12, -1, 0}, +/* 18 */ { 4, s_2_18, 12, -1, 0}, +/* 19 */ { 2, s_2_19, -1, -1, 0}, +/* 20 */ { 2, s_2_20, -1, -1, 0}, +/* 21 */ { 4, s_2_21, 20, -1, 0}, +/* 22 */ { 6, s_2_22, 20, -1, 0}, +/* 23 */ { 4, s_2_23, 20, -1, 0}, +/* 24 */ { 4, s_2_24, 20, -1, 0}, +/* 25 */ { 4, s_2_25, 20, -1, 0}, +/* 26 */ { 3, s_2_26, 20, -1, 0}, +/* 27 */ { 2, s_2_27, -1, -1, 0}, +/* 28 */ { 2, s_2_28, -1, -1, 0}, +/* 29 */ { 2, s_2_29, -1, -1, 0}, +/* 30 */ { 2, s_2_30, -1, -1, 0}, +/* 31 */ { 2, s_2_31, -1, -1, 0}, +/* 32 */ { 4, s_2_32, 31, -1, 0}, +/* 33 */ { 6, s_2_33, 31, -1, 0}, +/* 34 */ { 4, s_2_34, 31, -1, 0}, +/* 35 */ { 4, s_2_35, 31, -1, 0}, +/* 36 */ { 4, s_2_36, 31, -1, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_3_2[2] = { 'a', 'r' }; +static const symbol s_3_3[2] = { 'e', 'r' }; +static const symbol s_3_4[2] = { 'i', 'r' }; + +static const struct among a_3[5] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 2, s_3_2, -1, 2, 0}, +/* 3 */ { 2, s_3_3, -1, 2, 0}, +/* 4 */ { 2, s_3_4, -1, 2, 0} +}; + +static const symbol s_4_0[2] = { 'i', 'c' }; +static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_4_2[2] = { 'o', 's' }; +static const symbol s_4_3[2] = { 'i', 'v' }; + +static const struct among a_4[4] = +{ +/* 0 */ { 2, s_4_0, -1, -1, 0}, +/* 1 */ { 4, s_4_1, -1, -1, 0}, +/* 2 */ { 2, s_4_2, -1, -1, 0}, +/* 3 */ { 2, s_4_3, -1, 1, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_5_2[2] = { 'i', 'v' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, -1, 1, 0} +}; + +static const symbol s_6_0[3] = { 'i', 'c', 'a' }; +static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; +static const symbol s_6_2[3] = { 'o', 's', 'a' }; +static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; +static const symbol s_6_4[3] = { 'i', 'v', 'a' }; +static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; +static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; +static const symbol s_6_7[3] = { 'i', 'c', 'e' }; +static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; +static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; +static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; +static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; +static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; +static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; +static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; +static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; +static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; +static const symbol s_6_17[3] = { 'o', 's', 'e' }; +static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; +static const symbol s_6_22[3] = { 'i', 'v', 'e' }; +static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; +static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; +static const symbol s_6_25[3] = { 'i', 'c', 'i' }; +static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; +static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; +static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; +static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; +static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; +static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; +static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; +static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; +static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_6_35[3] = { 'o', 's', 'i' }; +static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; +static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; +static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; +static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; +static const symbol s_6_40[3] = { 'i', 'v', 'i' }; +static const symbol s_6_41[3] = { 'i', 'c', 'o' }; +static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_6_43[3] = { 'o', 's', 'o' }; +static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_6_46[3] = { 'i', 'v', 'o' }; +static const symbol s_6_47[4] = { 'i', 't', 0xC3, 0xA0 }; +static const symbol s_6_48[5] = { 'i', 's', 't', 0xC3, 0xA0 }; +static const symbol s_6_49[5] = { 'i', 's', 't', 0xC3, 0xA8 }; +static const symbol s_6_50[5] = { 'i', 's', 't', 0xC3, 0xAC }; + +static const struct among a_6[51] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 5, s_6_1, -1, 3, 0}, +/* 2 */ { 3, s_6_2, -1, 1, 0}, +/* 3 */ { 4, s_6_3, -1, 1, 0}, +/* 4 */ { 3, s_6_4, -1, 9, 0}, +/* 5 */ { 4, s_6_5, -1, 1, 0}, +/* 6 */ { 4, s_6_6, -1, 5, 0}, +/* 7 */ { 3, s_6_7, -1, 1, 0}, +/* 8 */ { 6, s_6_8, 7, 1, 0}, +/* 9 */ { 4, s_6_9, -1, 1, 0}, +/* 10 */ { 5, s_6_10, -1, 3, 0}, +/* 11 */ { 5, s_6_11, -1, 1, 0}, +/* 12 */ { 5, s_6_12, -1, 1, 0}, +/* 13 */ { 6, s_6_13, -1, 4, 0}, +/* 14 */ { 6, s_6_14, -1, 2, 0}, +/* 15 */ { 6, s_6_15, -1, 4, 0}, +/* 16 */ { 5, s_6_16, -1, 2, 0}, +/* 17 */ { 3, s_6_17, -1, 1, 0}, +/* 18 */ { 4, s_6_18, -1, 1, 0}, +/* 19 */ { 5, s_6_19, -1, 1, 0}, +/* 20 */ { 6, s_6_20, 19, 7, 0}, +/* 21 */ { 4, s_6_21, -1, 1, 0}, +/* 22 */ { 3, s_6_22, -1, 9, 0}, +/* 23 */ { 4, s_6_23, -1, 1, 0}, +/* 24 */ { 4, s_6_24, -1, 5, 0}, +/* 25 */ { 3, s_6_25, -1, 1, 0}, +/* 26 */ { 6, s_6_26, 25, 1, 0}, +/* 27 */ { 4, s_6_27, -1, 1, 0}, +/* 28 */ { 5, s_6_28, -1, 1, 0}, +/* 29 */ { 5, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, -1, 1, 0}, +/* 31 */ { 6, s_6_31, -1, 4, 0}, +/* 32 */ { 6, s_6_32, -1, 2, 0}, +/* 33 */ { 6, s_6_33, -1, 4, 0}, +/* 34 */ { 5, s_6_34, -1, 2, 0}, +/* 35 */ { 3, s_6_35, -1, 1, 0}, +/* 36 */ { 4, s_6_36, -1, 1, 0}, +/* 37 */ { 6, s_6_37, -1, 6, 0}, +/* 38 */ { 6, s_6_38, -1, 6, 0}, +/* 39 */ { 4, s_6_39, -1, 1, 0}, +/* 40 */ { 3, s_6_40, -1, 9, 0}, +/* 41 */ { 3, s_6_41, -1, 1, 0}, +/* 42 */ { 4, s_6_42, -1, 1, 0}, +/* 43 */ { 3, s_6_43, -1, 1, 0}, +/* 44 */ { 6, s_6_44, -1, 6, 0}, +/* 45 */ { 6, s_6_45, -1, 6, 0}, +/* 46 */ { 3, s_6_46, -1, 9, 0}, +/* 47 */ { 4, s_6_47, -1, 8, 0}, +/* 48 */ { 5, s_6_48, -1, 1, 0}, +/* 49 */ { 5, s_6_49, -1, 1, 0}, +/* 50 */ { 5, s_6_50, -1, 1, 0} +}; + +static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; +static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; +static const symbol s_7_2[3] = { 'a', 't', 'a' }; +static const symbol s_7_3[3] = { 'i', 't', 'a' }; +static const symbol s_7_4[3] = { 'u', 't', 'a' }; +static const symbol s_7_5[3] = { 'a', 'v', 'a' }; +static const symbol s_7_6[3] = { 'e', 'v', 'a' }; +static const symbol s_7_7[3] = { 'i', 'v', 'a' }; +static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; +static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; +static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; +static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_7_12[3] = { 'a', 'r', 'e' }; +static const symbol s_7_13[3] = { 'e', 'r', 'e' }; +static const symbol s_7_14[3] = { 'i', 'r', 'e' }; +static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; +static const symbol s_7_16[3] = { 'a', 't', 'e' }; +static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; +static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; +static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; +static const symbol s_7_20[3] = { 'e', 't', 'e' }; +static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; +static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; +static const symbol s_7_23[3] = { 'i', 't', 'e' }; +static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; +static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; +static const symbol s_7_26[3] = { 'u', 't', 'e' }; +static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; +static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; +static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; +static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; +static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; +static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; +static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; +static const symbol s_7_34[3] = { 'a', 't', 'i' }; +static const symbol s_7_35[3] = { 'i', 't', 'i' }; +static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; +static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; +static const symbol s_7_38[3] = { 'u', 't', 'i' }; +static const symbol s_7_39[3] = { 'a', 'v', 'i' }; +static const symbol s_7_40[3] = { 'e', 'v', 'i' }; +static const symbol s_7_41[3] = { 'i', 'v', 'i' }; +static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; +static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; +static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; +static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; +static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; +static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; +static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; +static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; +static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; +static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; +static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; +static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; +static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; +static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; +static const symbol s_7_58[3] = { 'a', 'n', 'o' }; +static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; +static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; +static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; +static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; +static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; +static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; +static const symbol s_7_65[3] = { 'o', 'n', 'o' }; +static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; +static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; +static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; +static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; +static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; +static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; +static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; +static const symbol s_7_75[3] = { 'a', 't', 'o' }; +static const symbol s_7_76[3] = { 'i', 't', 'o' }; +static const symbol s_7_77[3] = { 'u', 't', 'o' }; +static const symbol s_7_78[3] = { 'a', 'v', 'o' }; +static const symbol s_7_79[3] = { 'e', 'v', 'o' }; +static const symbol s_7_80[3] = { 'i', 'v', 'o' }; +static const symbol s_7_81[2] = { 'a', 'r' }; +static const symbol s_7_82[2] = { 'i', 'r' }; +static const symbol s_7_83[4] = { 'e', 'r', 0xC3, 0xA0 }; +static const symbol s_7_84[4] = { 'i', 'r', 0xC3, 0xA0 }; +static const symbol s_7_85[4] = { 'e', 'r', 0xC3, 0xB2 }; +static const symbol s_7_86[4] = { 'i', 'r', 0xC3, 0xB2 }; + +static const struct among a_7[87] = +{ +/* 0 */ { 4, s_7_0, -1, 1, 0}, +/* 1 */ { 4, s_7_1, -1, 1, 0}, +/* 2 */ { 3, s_7_2, -1, 1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 3, s_7_4, -1, 1, 0}, +/* 5 */ { 3, s_7_5, -1, 1, 0}, +/* 6 */ { 3, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 6, s_7_8, -1, 1, 0}, +/* 9 */ { 6, s_7_9, -1, 1, 0}, +/* 10 */ { 4, s_7_10, -1, 1, 0}, +/* 11 */ { 4, s_7_11, -1, 1, 0}, +/* 12 */ { 3, s_7_12, -1, 1, 0}, +/* 13 */ { 3, s_7_13, -1, 1, 0}, +/* 14 */ { 3, s_7_14, -1, 1, 0}, +/* 15 */ { 4, s_7_15, -1, 1, 0}, +/* 16 */ { 3, s_7_16, -1, 1, 0}, +/* 17 */ { 5, s_7_17, 16, 1, 0}, +/* 18 */ { 5, s_7_18, 16, 1, 0}, +/* 19 */ { 5, s_7_19, 16, 1, 0}, +/* 20 */ { 3, s_7_20, -1, 1, 0}, +/* 21 */ { 5, s_7_21, 20, 1, 0}, +/* 22 */ { 5, s_7_22, 20, 1, 0}, +/* 23 */ { 3, s_7_23, -1, 1, 0}, +/* 24 */ { 6, s_7_24, -1, 1, 0}, +/* 25 */ { 6, s_7_25, -1, 1, 0}, +/* 26 */ { 3, s_7_26, -1, 1, 0}, +/* 27 */ { 4, s_7_27, -1, 1, 0}, +/* 28 */ { 4, s_7_28, -1, 1, 0}, +/* 29 */ { 4, s_7_29, -1, 1, 0}, +/* 30 */ { 4, s_7_30, -1, 1, 0}, +/* 31 */ { 4, s_7_31, -1, 1, 0}, +/* 32 */ { 4, s_7_32, -1, 1, 0}, +/* 33 */ { 4, s_7_33, -1, 1, 0}, +/* 34 */ { 3, s_7_34, -1, 1, 0}, +/* 35 */ { 3, s_7_35, -1, 1, 0}, +/* 36 */ { 6, s_7_36, -1, 1, 0}, +/* 37 */ { 6, s_7_37, -1, 1, 0}, +/* 38 */ { 3, s_7_38, -1, 1, 0}, +/* 39 */ { 3, s_7_39, -1, 1, 0}, +/* 40 */ { 3, s_7_40, -1, 1, 0}, +/* 41 */ { 3, s_7_41, -1, 1, 0}, +/* 42 */ { 4, s_7_42, -1, 1, 0}, +/* 43 */ { 4, s_7_43, -1, 1, 0}, +/* 44 */ { 4, s_7_44, -1, 1, 0}, +/* 45 */ { 4, s_7_45, -1, 1, 0}, +/* 46 */ { 4, s_7_46, -1, 1, 0}, +/* 47 */ { 5, s_7_47, -1, 1, 0}, +/* 48 */ { 5, s_7_48, -1, 1, 0}, +/* 49 */ { 5, s_7_49, -1, 1, 0}, +/* 50 */ { 5, s_7_50, -1, 1, 0}, +/* 51 */ { 5, s_7_51, -1, 1, 0}, +/* 52 */ { 6, s_7_52, -1, 1, 0}, +/* 53 */ { 4, s_7_53, -1, 1, 0}, +/* 54 */ { 4, s_7_54, -1, 1, 0}, +/* 55 */ { 6, s_7_55, 54, 1, 0}, +/* 56 */ { 6, s_7_56, 54, 1, 0}, +/* 57 */ { 4, s_7_57, -1, 1, 0}, +/* 58 */ { 3, s_7_58, -1, 1, 0}, +/* 59 */ { 6, s_7_59, 58, 1, 0}, +/* 60 */ { 5, s_7_60, 58, 1, 0}, +/* 61 */ { 5, s_7_61, 58, 1, 0}, +/* 62 */ { 5, s_7_62, 58, 1, 0}, +/* 63 */ { 6, s_7_63, -1, 1, 0}, +/* 64 */ { 6, s_7_64, -1, 1, 0}, +/* 65 */ { 3, s_7_65, -1, 1, 0}, +/* 66 */ { 6, s_7_66, 65, 1, 0}, +/* 67 */ { 5, s_7_67, 65, 1, 0}, +/* 68 */ { 5, s_7_68, 65, 1, 0}, +/* 69 */ { 5, s_7_69, 65, 1, 0}, +/* 70 */ { 8, s_7_70, -1, 1, 0}, +/* 71 */ { 8, s_7_71, -1, 1, 0}, +/* 72 */ { 6, s_7_72, -1, 1, 0}, +/* 73 */ { 6, s_7_73, -1, 1, 0}, +/* 74 */ { 6, s_7_74, -1, 1, 0}, +/* 75 */ { 3, s_7_75, -1, 1, 0}, +/* 76 */ { 3, s_7_76, -1, 1, 0}, +/* 77 */ { 3, s_7_77, -1, 1, 0}, +/* 78 */ { 3, s_7_78, -1, 1, 0}, +/* 79 */ { 3, s_7_79, -1, 1, 0}, +/* 80 */ { 3, s_7_80, -1, 1, 0}, +/* 81 */ { 2, s_7_81, -1, 1, 0}, +/* 82 */ { 2, s_7_82, -1, 1, 0}, +/* 83 */ { 4, s_7_83, -1, 1, 0}, +/* 84 */ { 4, s_7_84, -1, 1, 0}, +/* 85 */ { 4, s_7_85, -1, 1, 0}, +/* 86 */ { 4, s_7_86, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; + +static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; + +static const unsigned char g_CG[] = { 17 }; + +static const symbol s_0[] = { 0xC3, 0xA0 }; +static const symbol s_1[] = { 0xC3, 0xA8 }; +static const symbol s_2[] = { 0xC3, 0xAC }; +static const symbol s_3[] = { 0xC3, 0xB2 }; +static const symbol s_4[] = { 0xC3, 0xB9 }; +static const symbol s_5[] = { 'q', 'U' }; +static const symbol s_6[] = { 'u' }; +static const symbol s_7[] = { 'U' }; +static const symbol s_8[] = { 'i' }; +static const symbol s_9[] = { 'I' }; +static const symbol s_10[] = { 'i' }; +static const symbol s_11[] = { 'u' }; +static const symbol s_12[] = { 'e' }; +static const symbol s_13[] = { 'i', 'c' }; +static const symbol s_14[] = { 'l', 'o', 'g' }; +static const symbol s_15[] = { 'u' }; +static const symbol s_16[] = { 'e', 'n', 't', 'e' }; +static const symbol s_17[] = { 'a', 't' }; +static const symbol s_18[] = { 'a', 't' }; +static const symbol s_19[] = { 'i', 'c' }; +static const symbol s_20[] = { 'i' }; +static const symbol s_21[] = { 'h' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + { int c_test = z->c; /* test, line 35 */ + while(1) { /* repeat, line 35 */ + int c1 = z->c; + z->bra = z->c; /* [, line 36 */ + among_var = find_among(z, a_0, 7); /* substring, line 36 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 36 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 2, s_0); /* <-, line 37 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_1); /* <-, line 38 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_2); /* <-, line 39 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_3); /* <-, line 40 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 2, s_4); /* <-, line 41 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 43 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + z->c = c_test; + } + while(1) { /* repeat, line 46 */ + int c2 = z->c; + while(1) { /* goto, line 46 */ + int c3 = z->c; + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; + z->bra = z->c; /* [, line 47 */ + { int c4 = z->c; /* or, line 47 */ + if (!(eq_s(z, 1, s_6))) goto lab4; + z->ket = z->c; /* ], line 47 */ + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab4; + { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ + if (ret < 0) return ret; + } + goto lab3; + lab4: + z->c = c4; + if (!(eq_s(z, 1, s_8))) goto lab2; + z->ket = z->c; /* ], line 48 */ + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; + { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ + if (ret < 0) return ret; + } + } + lab3: + z->c = c3; + break; + lab2: + z->c = c3; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab1; + z->c = ret; /* goto, line 46 */ + } + } + continue; + lab1: + z->c = c2; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 58 */ + { int c2 = z->c; /* or, line 60 */ + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; + { int c3 = z->c; /* or, line 59 */ + if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab4; + { /* gopast */ /* grouping v, line 59 */ + int ret = out_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; + { /* gopast */ /* non v, line 59 */ + int ret = in_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab0; + { int c4 = z->c; /* or, line 61 */ + if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab6; + { /* gopast */ /* grouping v, line 61 */ + int ret = out_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab0; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 61 */ + } + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 62 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 64 */ + { /* gopast */ /* grouping v, line 65 */ + int ret = out_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 65 */ + int ret = in_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 65 */ + { /* gopast */ /* grouping v, line 66 */ + int ret = out_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 66 */ + int ret = in_grouping_U(z, g_v, 97, 249, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 66 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 70 */ + int c1 = z->c; + z->bra = z->c; /* [, line 72 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 72 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 72 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 75 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_attached_pronoun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 87 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ + z->bra = z->c; /* ], line 87 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; + among_var = find_among_b(z, a_3, 5); /* among, line 97 */ + if (!(among_var)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 97 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 98 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 104 */ + among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 104 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 111 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 111 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 113 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 113 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ + z->ket = z->c; /* [, line 114 */ + if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 114 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 114 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 117 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 119 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 121 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 123 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 123 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 125 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ + z->ket = z->c; /* [, line 127 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 127 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 127 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + z->ket = z->c; /* [, line 128 */ + if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 128 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 128 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ + z->ket = z->c; /* [, line 136 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 136 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 137 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 9: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 142 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 142 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ + z->ket = z->c; /* [, line 143 */ + if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 143 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 143 */ + if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 143 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 148 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 148 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 149 */ + among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 149 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 163 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_vowel_suffix(struct SN_env * z) { + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ + z->ket = z->c; /* [, line 172 */ + if (in_grouping_b_U(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 172 */ + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 172 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 173 */ + if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 173 */ + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 173 */ + if (ret < 0) return ret; + } + lab0: + ; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ + z->ket = z->c; /* [, line 176 */ + if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 176 */ + if (in_grouping_b_U(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 176 */ + if (ret < 0) return ret; + } + lab1: + ; + } + return 1; +} + +extern int italian_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 182 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 182 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 183 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 183 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 184 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ + { int ret = r_attached_pronoun(z); + if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ + { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ + if (ret < 0) return ret; + } + goto lab4; + lab5: + z->c = z->l - m5; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ + if (ret < 0) return ret; + } + } + lab4: + lab3: + z->c = z->l - m4; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ + { int ret = r_vowel_suffix(z); + if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m6; + } + z->c = z->lb; + { int c7 = z->c; /* do, line 189 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab7; /* call postlude, line 189 */ + if (ret < 0) return ret; + } + lab7: + z->c = c7; + } + return 1; +} + +extern struct SN_env * italian_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void italian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_norwegian.c b/src/backend/snowball/libstemmer/stem_UTF_8_norwegian.c new file mode 100644 index 00000000000..cbb0cd4601c --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_norwegian.c @@ -0,0 +1,299 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int norwegian_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * norwegian_UTF_8_create_env(void); +extern void norwegian_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 'a' }; +static const symbol s_0_1[1] = { 'e' }; +static const symbol s_0_2[3] = { 'e', 'd', 'e' }; +static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; +static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; +static const symbol s_0_5[3] = { 'a', 'n', 'e' }; +static const symbol s_0_6[3] = { 'e', 'n', 'e' }; +static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; +static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; +static const symbol s_0_9[2] = { 'e', 'n' }; +static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; +static const symbol s_0_11[2] = { 'a', 'r' }; +static const symbol s_0_12[2] = { 'e', 'r' }; +static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; +static const symbol s_0_14[1] = { 's' }; +static const symbol s_0_15[2] = { 'a', 's' }; +static const symbol s_0_16[2] = { 'e', 's' }; +static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; +static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; +static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; +static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; +static const symbol s_0_21[3] = { 'e', 'n', 's' }; +static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; +static const symbol s_0_23[3] = { 'e', 'r', 's' }; +static const symbol s_0_24[3] = { 'e', 't', 's' }; +static const symbol s_0_25[2] = { 'e', 't' }; +static const symbol s_0_26[3] = { 'h', 'e', 't' }; +static const symbol s_0_27[3] = { 'e', 'r', 't' }; +static const symbol s_0_28[3] = { 'a', 's', 't' }; + +static const struct among a_0[29] = +{ +/* 0 */ { 1, s_0_0, -1, 1, 0}, +/* 1 */ { 1, s_0_1, -1, 1, 0}, +/* 2 */ { 3, s_0_2, 1, 1, 0}, +/* 3 */ { 4, s_0_3, 1, 1, 0}, +/* 4 */ { 4, s_0_4, 1, 1, 0}, +/* 5 */ { 3, s_0_5, 1, 1, 0}, +/* 6 */ { 3, s_0_6, 1, 1, 0}, +/* 7 */ { 6, s_0_7, 6, 1, 0}, +/* 8 */ { 4, s_0_8, 1, 3, 0}, +/* 9 */ { 2, s_0_9, -1, 1, 0}, +/* 10 */ { 5, s_0_10, 9, 1, 0}, +/* 11 */ { 2, s_0_11, -1, 1, 0}, +/* 12 */ { 2, s_0_12, -1, 1, 0}, +/* 13 */ { 5, s_0_13, 12, 1, 0}, +/* 14 */ { 1, s_0_14, -1, 2, 0}, +/* 15 */ { 2, s_0_15, 14, 1, 0}, +/* 16 */ { 2, s_0_16, 14, 1, 0}, +/* 17 */ { 4, s_0_17, 16, 1, 0}, +/* 18 */ { 5, s_0_18, 16, 1, 0}, +/* 19 */ { 4, s_0_19, 16, 1, 0}, +/* 20 */ { 7, s_0_20, 19, 1, 0}, +/* 21 */ { 3, s_0_21, 14, 1, 0}, +/* 22 */ { 6, s_0_22, 21, 1, 0}, +/* 23 */ { 3, s_0_23, 14, 1, 0}, +/* 24 */ { 3, s_0_24, 14, 1, 0}, +/* 25 */ { 2, s_0_25, -1, 1, 0}, +/* 26 */ { 3, s_0_26, 25, 1, 0}, +/* 27 */ { 3, s_0_27, -1, 3, 0}, +/* 28 */ { 3, s_0_28, -1, 1, 0} +}; + +static const symbol s_1_0[2] = { 'd', 't' }; +static const symbol s_1_1[2] = { 'v', 't' }; + +static const struct among a_1[2] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0} +}; + +static const symbol s_2_0[3] = { 'l', 'e', 'g' }; +static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; +static const symbol s_2_2[2] = { 'i', 'g' }; +static const symbol s_2_3[3] = { 'e', 'i', 'g' }; +static const symbol s_2_4[3] = { 'l', 'i', 'g' }; +static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; +static const symbol s_2_6[3] = { 'e', 'l', 's' }; +static const symbol s_2_7[3] = { 'l', 'o', 'v' }; +static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; +static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; +static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; + +static const struct among a_2[11] = +{ +/* 0 */ { 3, s_2_0, -1, 1, 0}, +/* 1 */ { 4, s_2_1, 0, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 1, 0}, +/* 3 */ { 3, s_2_3, 2, 1, 0}, +/* 4 */ { 3, s_2_4, 2, 1, 0}, +/* 5 */ { 4, s_2_5, 4, 1, 0}, +/* 6 */ { 3, s_2_6, -1, 1, 0}, +/* 7 */ { 3, s_2_7, -1, 1, 0}, +/* 8 */ { 4, s_2_8, 7, 1, 0}, +/* 9 */ { 4, s_2_9, 7, 1, 0}, +/* 10 */ { 7, s_2_10, 9, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + +static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; + +static const symbol s_0[] = { 'k' }; +static const symbol s_1[] = { 'e', 'r' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 30 */ + { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); + if (ret < 0) return 0; + z->c = ret; /* hop, line 30 */ + } + z->I[1] = z->c; /* setmark x, line 30 */ + z->c = c_test; + } + if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ + { /* gopast */ /* non v, line 31 */ + int ret = in_grouping_U(z, g_v, 97, 248, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 31 */ + /* try, line 32 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 38 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 38 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 38 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 38 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ + if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_0))) return 0; + if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 46 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 53 */ + { int mlimit; /* setlimit, line 54 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 54 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 54 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ + z->bra = z->c; /* ], line 54 */ + z->lb = mlimit; + } + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 59 */ + } + z->bra = z->c; /* ], line 59 */ + { int ret = slice_del(z); /* delete, line 59 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 63 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 63 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 63 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 63 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 67 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int norwegian_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 74 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 74 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 75 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 76 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 78 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_porter.c b/src/backend/snowball/libstemmer/stem_UTF_8_porter.c new file mode 100644 index 00000000000..421cc0e74a6 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_porter.c @@ -0,0 +1,755 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int porter_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_Step_5b(struct SN_env * z); +static int r_Step_5a(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * porter_UTF_8_create_env(void); +extern void porter_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 's' }; +static const symbol s_0_1[3] = { 'i', 'e', 's' }; +static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; +static const symbol s_0_3[2] = { 's', 's' }; + +static const struct among a_0[4] = +{ +/* 0 */ { 1, s_0_0, -1, 3, 0}, +/* 1 */ { 3, s_0_1, 0, 2, 0}, +/* 2 */ { 4, s_0_2, 0, 1, 0}, +/* 3 */ { 2, s_0_3, 0, -1, 0} +}; + +static const symbol s_1_1[2] = { 'b', 'b' }; +static const symbol s_1_2[2] = { 'd', 'd' }; +static const symbol s_1_3[2] = { 'f', 'f' }; +static const symbol s_1_4[2] = { 'g', 'g' }; +static const symbol s_1_5[2] = { 'b', 'l' }; +static const symbol s_1_6[2] = { 'm', 'm' }; +static const symbol s_1_7[2] = { 'n', 'n' }; +static const symbol s_1_8[2] = { 'p', 'p' }; +static const symbol s_1_9[2] = { 'r', 'r' }; +static const symbol s_1_10[2] = { 'a', 't' }; +static const symbol s_1_11[2] = { 't', 't' }; +static const symbol s_1_12[2] = { 'i', 'z' }; + +static const struct among a_1[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_1_1, 0, 2, 0}, +/* 2 */ { 2, s_1_2, 0, 2, 0}, +/* 3 */ { 2, s_1_3, 0, 2, 0}, +/* 4 */ { 2, s_1_4, 0, 2, 0}, +/* 5 */ { 2, s_1_5, 0, 1, 0}, +/* 6 */ { 2, s_1_6, 0, 2, 0}, +/* 7 */ { 2, s_1_7, 0, 2, 0}, +/* 8 */ { 2, s_1_8, 0, 2, 0}, +/* 9 */ { 2, s_1_9, 0, 2, 0}, +/* 10 */ { 2, s_1_10, 0, 1, 0}, +/* 11 */ { 2, s_1_11, 0, 2, 0}, +/* 12 */ { 2, s_1_12, 0, 1, 0} +}; + +static const symbol s_2_0[2] = { 'e', 'd' }; +static const symbol s_2_1[3] = { 'e', 'e', 'd' }; +static const symbol s_2_2[3] = { 'i', 'n', 'g' }; + +static const struct among a_2[3] = +{ +/* 0 */ { 2, s_2_0, -1, 2, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 3, s_2_2, -1, 2, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; +static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; +static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; +static const symbol s_3_3[3] = { 'e', 'l', 'i' }; +static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; +static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; +static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; +static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; +static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; +static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; +static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; +static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; +static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static const struct among a_3[20] = +{ +/* 0 */ { 4, s_3_0, -1, 3, 0}, +/* 1 */ { 4, s_3_1, -1, 2, 0}, +/* 2 */ { 4, s_3_2, -1, 4, 0}, +/* 3 */ { 3, s_3_3, -1, 6, 0}, +/* 4 */ { 4, s_3_4, -1, 9, 0}, +/* 5 */ { 5, s_3_5, -1, 12, 0}, +/* 6 */ { 5, s_3_6, -1, 5, 0}, +/* 7 */ { 5, s_3_7, -1, 10, 0}, +/* 8 */ { 6, s_3_8, -1, 14, 0}, +/* 9 */ { 5, s_3_9, -1, 13, 0}, +/* 10 */ { 6, s_3_10, -1, 1, 0}, +/* 11 */ { 7, s_3_11, 10, 8, 0}, +/* 12 */ { 5, s_3_12, -1, 10, 0}, +/* 13 */ { 5, s_3_13, -1, 8, 0}, +/* 14 */ { 7, s_3_14, 13, 7, 0}, +/* 15 */ { 4, s_3_15, -1, 7, 0}, +/* 16 */ { 4, s_3_16, -1, 8, 0}, +/* 17 */ { 7, s_3_17, -1, 13, 0}, +/* 18 */ { 7, s_3_18, -1, 11, 0}, +/* 19 */ { 7, s_3_19, -1, 12, 0} +}; + +static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_4_5[3] = { 'f', 'u', 'l' }; +static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; + +static const struct among a_4[7] = +{ +/* 0 */ { 5, s_4_0, -1, 2, 0}, +/* 1 */ { 5, s_4_1, -1, 3, 0}, +/* 2 */ { 5, s_4_2, -1, 1, 0}, +/* 3 */ { 5, s_4_3, -1, 2, 0}, +/* 4 */ { 4, s_4_4, -1, 2, 0}, +/* 5 */ { 3, s_4_5, -1, 3, 0}, +/* 6 */ { 4, s_4_6, -1, 3, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; +static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; +static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_5_5[3] = { 'a', 't', 'e' }; +static const symbol s_5_6[3] = { 'i', 'v', 'e' }; +static const symbol s_5_7[3] = { 'i', 'z', 'e' }; +static const symbol s_5_8[3] = { 'i', 't', 'i' }; +static const symbol s_5_9[2] = { 'a', 'l' }; +static const symbol s_5_10[3] = { 'i', 's', 'm' }; +static const symbol s_5_11[3] = { 'i', 'o', 'n' }; +static const symbol s_5_12[2] = { 'e', 'r' }; +static const symbol s_5_13[3] = { 'o', 'u', 's' }; +static const symbol s_5_14[3] = { 'a', 'n', 't' }; +static const symbol s_5_15[3] = { 'e', 'n', 't' }; +static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; +static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; +static const symbol s_5_18[2] = { 'o', 'u' }; + +static const struct among a_5[19] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 4, s_5_2, -1, 1, 0}, +/* 3 */ { 4, s_5_3, -1, 1, 0}, +/* 4 */ { 4, s_5_4, -1, 1, 0}, +/* 5 */ { 3, s_5_5, -1, 1, 0}, +/* 6 */ { 3, s_5_6, -1, 1, 0}, +/* 7 */ { 3, s_5_7, -1, 1, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 2, s_5_9, -1, 1, 0}, +/* 10 */ { 3, s_5_10, -1, 1, 0}, +/* 11 */ { 3, s_5_11, -1, 2, 0}, +/* 12 */ { 2, s_5_12, -1, 1, 0}, +/* 13 */ { 3, s_5_13, -1, 1, 0}, +/* 14 */ { 3, s_5_14, -1, 1, 0}, +/* 15 */ { 3, s_5_15, -1, 1, 0}, +/* 16 */ { 4, s_5_16, 15, 1, 0}, +/* 17 */ { 5, s_5_17, 16, 1, 0}, +/* 18 */ { 2, s_5_18, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1 }; + +static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static const symbol s_0[] = { 's', 's' }; +static const symbol s_1[] = { 'i' }; +static const symbol s_2[] = { 'e', 'e' }; +static const symbol s_3[] = { 'e' }; +static const symbol s_4[] = { 'e' }; +static const symbol s_5[] = { 'y' }; +static const symbol s_6[] = { 'Y' }; +static const symbol s_7[] = { 'i' }; +static const symbol s_8[] = { 't', 'i', 'o', 'n' }; +static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; +static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; +static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; +static const symbol s_12[] = { 'e', 'n', 't' }; +static const symbol s_13[] = { 'e' }; +static const symbol s_14[] = { 'i', 'z', 'e' }; +static const symbol s_15[] = { 'a', 't', 'e' }; +static const symbol s_16[] = { 'a', 'l' }; +static const symbol s_17[] = { 'a', 'l' }; +static const symbol s_18[] = { 'f', 'u', 'l' }; +static const symbol s_19[] = { 'o', 'u', 's' }; +static const symbol s_20[] = { 'i', 'v', 'e' }; +static const symbol s_21[] = { 'b', 'l', 'e' }; +static const symbol s_22[] = { 'a', 'l' }; +static const symbol s_23[] = { 'i', 'c' }; +static const symbol s_24[] = { 's' }; +static const symbol s_25[] = { 't' }; +static const symbol s_26[] = { 'e' }; +static const symbol s_27[] = { 'l' }; +static const symbol s_28[] = { 'l' }; +static const symbol s_29[] = { 'y' }; +static const symbol s_30[] = { 'Y' }; +static const symbol s_31[] = { 'y' }; +static const symbol s_32[] = { 'Y' }; +static const symbol s_33[] = { 'Y' }; +static const symbol s_34[] = { 'y' }; + +static int r_shortv(struct SN_env * z) { + if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) return 0; + if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; + if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 25 */ + if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; + among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 25 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 29 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 34 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; + among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 34 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 35 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m_test = z->l - z->c; /* test, line 38 */ + { /* gopast */ /* grouping v, line 38 */ + int ret = out_grouping_b_U(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + z->c = z->l - m_test; + } + { int ret = slice_del(z); /* delete, line 38 */ + if (ret < 0) return ret; + } + { int m_test = z->l - z->c; /* test, line 39 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else + among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + case 2: + z->ket = z->c; /* [, line 44 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 44 */ + } + z->bra = z->c; /* ], line 44 */ + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 45 */ + { int m_test = z->l - z->c; /* test, line 45 */ + { int ret = r_shortv(z); + if (ret == 0) return 0; /* call shortv, line 45 */ + if (ret < 0) return ret; + } + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ + z->c = c_keep; + if (ret < 0) return ret; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 52 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ + if (!(eq_s_b(z, 1, s_5))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_6))) return 0; + } +lab0: + z->bra = z->c; /* ], line 52 */ + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping_b_U(z, g_v, 97, 121, 1); + if (ret < 0) return 0; + z->c -= ret; + } + { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 58 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 58 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 58 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ + if (ret < 0) return ret; + } + break; + case 8: + { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ + if (ret < 0) return ret; + } + break; + case 9: + { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ + if (ret < 0) return ret; + } + break; + case 10: + { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ + if (ret < 0) return ret; + } + break; + case 11: + { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ + if (ret < 0) return ret; + } + break; + case 12: + { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 13: + { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ + if (ret < 0) return ret; + } + break; + case 14: + { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 82 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 82 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 82 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 87 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 92 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 92 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 95 */ + if (ret < 0) return ret; + } + break; + case 2: + { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ + if (!(eq_s_b(z, 1, s_24))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_25))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 96 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_Step_5a(struct SN_env * z) { + z->ket = z->c; /* [, line 101 */ + if (!(eq_s_b(z, 1, s_26))) return 0; + z->bra = z->c; /* ], line 101 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ + { int ret = r_R2(z); + if (ret == 0) goto lab1; /* call R2, line 102 */ + if (ret < 0) return ret; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 102 */ + if (ret < 0) return ret; + } + { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ + { int ret = r_shortv(z); + if (ret == 0) goto lab2; /* call shortv, line 102 */ + if (ret < 0) return ret; + } + return 0; + lab2: + z->c = z->l - m2; + } + } +lab0: + { int ret = slice_del(z); /* delete, line 103 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_Step_5b(struct SN_env * z) { + z->ket = z->c; /* [, line 107 */ + if (!(eq_s_b(z, 1, s_27))) return 0; + z->bra = z->c; /* ], line 107 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 108 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_28))) return 0; + { int ret = slice_del(z); /* delete, line 109 */ + if (ret < 0) return ret; + } + return 1; +} + +extern int porter_UTF_8_stem(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 115 */ + { int c1 = z->c; /* do, line 116 */ + z->bra = z->c; /* [, line 116 */ + if (!(eq_s(z, 1, s_29))) goto lab0; + z->ket = z->c; /* ], line 116 */ + { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 116 */ + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 117 */ + while(1) { /* repeat, line 117 */ + int c3 = z->c; + while(1) { /* goto, line 117 */ + int c4 = z->c; + if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3; + z->bra = z->c; /* [, line 117 */ + if (!(eq_s(z, 1, s_31))) goto lab3; + z->ket = z->c; /* ], line 117 */ + z->c = c4; + break; + lab3: + z->c = c4; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab2; + z->c = ret; /* goto, line 117 */ + } + } + { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ + if (ret < 0) return ret; + } + z->B[0] = 1; /* set Y_found, line 117 */ + continue; + lab2: + z->c = c3; + break; + } + z->c = c2; + } + z->I[0] = z->l; + z->I[1] = z->l; + { int c5 = z->c; /* do, line 121 */ + { /* gopast */ /* grouping v, line 122 */ + int ret = out_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 122 */ + int ret = in_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 122 */ + { /* gopast */ /* grouping v, line 123 */ + int ret = out_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + { /* gopast */ /* non v, line 123 */ + int ret = in_grouping_U(z, g_v, 97, 121, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 123 */ + lab4: + z->c = c5; + } + z->lb = z->c; z->c = z->l; /* backwards, line 126 */ + + { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ + { int ret = r_Step_1a(z); + if (ret == 0) goto lab5; /* call Step_1a, line 127 */ + if (ret < 0) return ret; + } + lab5: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ + { int ret = r_Step_1b(z); + if (ret == 0) goto lab6; /* call Step_1b, line 128 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m7; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ + { int ret = r_Step_1c(z); + if (ret == 0) goto lab7; /* call Step_1c, line 129 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m8; + } + { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ + { int ret = r_Step_2(z); + if (ret == 0) goto lab8; /* call Step_2, line 130 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m9; + } + { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ + { int ret = r_Step_3(z); + if (ret == 0) goto lab9; /* call Step_3, line 131 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m10; + } + { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ + { int ret = r_Step_4(z); + if (ret == 0) goto lab10; /* call Step_4, line 132 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m11; + } + { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ + { int ret = r_Step_5a(z); + if (ret == 0) goto lab11; /* call Step_5a, line 133 */ + if (ret < 0) return ret; + } + lab11: + z->c = z->l - m12; + } + { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ + { int ret = r_Step_5b(z); + if (ret == 0) goto lab12; /* call Step_5b, line 134 */ + if (ret < 0) return ret; + } + lab12: + z->c = z->l - m13; + } + z->c = z->lb; + { int c14 = z->c; /* do, line 137 */ + if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ + while(1) { /* repeat, line 137 */ + int c15 = z->c; + while(1) { /* goto, line 137 */ + int c16 = z->c; + z->bra = z->c; /* [, line 137 */ + if (!(eq_s(z, 1, s_33))) goto lab15; + z->ket = z->c; /* ], line 137 */ + z->c = c16; + break; + lab15: + z->c = c16; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab14; + z->c = ret; /* goto, line 137 */ + } + } + { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ + if (ret < 0) return ret; + } + continue; + lab14: + z->c = c15; + break; + } + lab13: + z->c = c14; + } + return 1; +} + +extern struct SN_env * porter_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void porter_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_portuguese.c b/src/backend/snowball/libstemmer/stem_UTF_8_portuguese.c new file mode 100644 index 00000000000..8939cfe016a --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_portuguese.c @@ -0,0 +1,1023 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int portuguese_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_residual_form(struct SN_env * z); +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * portuguese_UTF_8_create_env(void); +extern void portuguese_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[2] = { 0xC3, 0xA3 }; +static const symbol s_0_2[2] = { 0xC3, 0xB5 }; + +static const struct among a_0[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_0_1, 0, 1, 0}, +/* 2 */ { 2, s_0_2, 0, 2, 0} +}; + +static const symbol s_1_1[2] = { 'a', '~' }; +static const symbol s_1_2[2] = { 'o', '~' }; + +static const struct among a_1[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_1_1, 0, 1, 0}, +/* 2 */ { 2, s_1_2, 0, 2, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'c' }; +static const symbol s_2_1[2] = { 'a', 'd' }; +static const symbol s_2_2[2] = { 'o', 's' }; +static const symbol s_2_3[2] = { 'i', 'v' }; + +static const struct among a_2[4] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 2, s_2_2, -1, -1, 0}, +/* 3 */ { 2, s_2_3, -1, 1, 0} +}; + +static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; +static const symbol s_3_2[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; + +static const struct among a_3[3] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0}, +/* 2 */ { 5, s_3_2, -1, 1, 0} +}; + +static const symbol s_4_0[2] = { 'i', 'c' }; +static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_4_2[2] = { 'i', 'v' }; + +static const struct among a_4[3] = +{ +/* 0 */ { 2, s_4_0, -1, 1, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 2, s_4_2, -1, 1, 0} +}; + +static const symbol s_5_0[3] = { 'i', 'c', 'a' }; +static const symbol s_5_1[6] = { 0xC3, 0xA2, 'n', 'c', 'i', 'a' }; +static const symbol s_5_2[6] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a' }; +static const symbol s_5_3[3] = { 'i', 'r', 'a' }; +static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; +static const symbol s_5_5[3] = { 'o', 's', 'a' }; +static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; +static const symbol s_5_7[3] = { 'i', 'v', 'a' }; +static const symbol s_5_8[3] = { 'e', 'z', 'a' }; +static const symbol s_5_9[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; +static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; +static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_5_14[5] = { 0xC3, 0xA1, 'v', 'e', 'l' }; +static const symbol s_5_15[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; +static const symbol s_5_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; +static const symbol s_5_17[3] = { 'i', 'c', 'o' }; +static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_5_19[3] = { 'o', 's', 'o' }; +static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; +static const symbol s_5_22[3] = { 'i', 'v', 'o' }; +static const symbol s_5_23[6] = { 'a', 0xC3, 0xA7, 'a', '~', 'o' }; +static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; +static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; +static const symbol s_5_26[7] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a', 's' }; +static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; +static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; +static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; +static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; +static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; +static const symbol s_5_33[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; +static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; +static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; +static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_5_38[7] = { 'a', 0xC3, 0xA7, 'o', '~', 'e', 's' }; +static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; +static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; +static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; +static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; +static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; +static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; + +static const struct among a_5[45] = +{ +/* 0 */ { 3, s_5_0, -1, 1, 0}, +/* 1 */ { 6, s_5_1, -1, 1, 0}, +/* 2 */ { 6, s_5_2, -1, 4, 0}, +/* 3 */ { 3, s_5_3, -1, 9, 0}, +/* 4 */ { 5, s_5_4, -1, 1, 0}, +/* 5 */ { 3, s_5_5, -1, 1, 0}, +/* 6 */ { 4, s_5_6, -1, 1, 0}, +/* 7 */ { 3, s_5_7, -1, 8, 0}, +/* 8 */ { 3, s_5_8, -1, 1, 0}, +/* 9 */ { 6, s_5_9, -1, 2, 0}, +/* 10 */ { 5, s_5_10, -1, 7, 0}, +/* 11 */ { 4, s_5_11, -1, 1, 0}, +/* 12 */ { 5, s_5_12, -1, 6, 0}, +/* 13 */ { 6, s_5_13, 12, 5, 0}, +/* 14 */ { 5, s_5_14, -1, 1, 0}, +/* 15 */ { 5, s_5_15, -1, 1, 0}, +/* 16 */ { 6, s_5_16, -1, 3, 0}, +/* 17 */ { 3, s_5_17, -1, 1, 0}, +/* 18 */ { 4, s_5_18, -1, 1, 0}, +/* 19 */ { 3, s_5_19, -1, 1, 0}, +/* 20 */ { 6, s_5_20, -1, 1, 0}, +/* 21 */ { 6, s_5_21, -1, 1, 0}, +/* 22 */ { 3, s_5_22, -1, 8, 0}, +/* 23 */ { 6, s_5_23, -1, 1, 0}, +/* 24 */ { 4, s_5_24, -1, 1, 0}, +/* 25 */ { 4, s_5_25, -1, 1, 0}, +/* 26 */ { 7, s_5_26, -1, 4, 0}, +/* 27 */ { 4, s_5_27, -1, 9, 0}, +/* 28 */ { 6, s_5_28, -1, 1, 0}, +/* 29 */ { 4, s_5_29, -1, 1, 0}, +/* 30 */ { 5, s_5_30, -1, 1, 0}, +/* 31 */ { 4, s_5_31, -1, 8, 0}, +/* 32 */ { 4, s_5_32, -1, 1, 0}, +/* 33 */ { 7, s_5_33, -1, 2, 0}, +/* 34 */ { 6, s_5_34, -1, 7, 0}, +/* 35 */ { 7, s_5_35, -1, 3, 0}, +/* 36 */ { 6, s_5_36, -1, 1, 0}, +/* 37 */ { 5, s_5_37, -1, 1, 0}, +/* 38 */ { 7, s_5_38, -1, 1, 0}, +/* 39 */ { 4, s_5_39, -1, 1, 0}, +/* 40 */ { 5, s_5_40, -1, 1, 0}, +/* 41 */ { 4, s_5_41, -1, 1, 0}, +/* 42 */ { 7, s_5_42, -1, 1, 0}, +/* 43 */ { 7, s_5_43, -1, 1, 0}, +/* 44 */ { 4, s_5_44, -1, 8, 0} +}; + +static const symbol s_6_0[3] = { 'a', 'd', 'a' }; +static const symbol s_6_1[3] = { 'i', 'd', 'a' }; +static const symbol s_6_2[2] = { 'i', 'a' }; +static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; +static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; +static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; +static const symbol s_6_6[3] = { 'a', 'r', 'a' }; +static const symbol s_6_7[3] = { 'e', 'r', 'a' }; +static const symbol s_6_8[3] = { 'i', 'r', 'a' }; +static const symbol s_6_9[3] = { 'a', 'v', 'a' }; +static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; +static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; +static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; +static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; +static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; +static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; +static const symbol s_6_16[2] = { 'e', 'i' }; +static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; +static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; +static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; +static const symbol s_6_20[2] = { 'a', 'm' }; +static const symbol s_6_21[3] = { 'i', 'a', 'm' }; +static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; +static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; +static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; +static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; +static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; +static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; +static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; +static const symbol s_6_29[2] = { 'e', 'm' }; +static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; +static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; +static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; +static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; +static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; +static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; +static const symbol s_6_36[3] = { 'a', 'd', 'o' }; +static const symbol s_6_37[3] = { 'i', 'd', 'o' }; +static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; +static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; +static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; +static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; +static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; +static const symbol s_6_44[2] = { 'a', 'r' }; +static const symbol s_6_45[2] = { 'e', 'r' }; +static const symbol s_6_46[2] = { 'i', 'r' }; +static const symbol s_6_47[2] = { 'a', 's' }; +static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; +static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; +static const symbol s_6_50[3] = { 'i', 'a', 's' }; +static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; +static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; +static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; +static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; +static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; +static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; +static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; +static const symbol s_6_58[2] = { 'e', 's' }; +static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; +static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; +static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; +static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; +static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; +static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; +static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; +static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; +static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; +static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; +static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; +static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; +static const symbol s_6_71[2] = { 'i', 's' }; +static const symbol s_6_72[3] = { 'a', 'i', 's' }; +static const symbol s_6_73[3] = { 'e', 'i', 's' }; +static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; +static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; +static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; +static const symbol s_6_77[6] = { 0xC3, 0xA1, 'r', 'e', 'i', 's' }; +static const symbol s_6_78[6] = { 0xC3, 0xA9, 'r', 'e', 'i', 's' }; +static const symbol s_6_79[6] = { 0xC3, 0xAD, 'r', 'e', 'i', 's' }; +static const symbol s_6_80[7] = { 0xC3, 0xA1, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_81[7] = { 0xC3, 0xA9, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_82[7] = { 0xC3, 0xAD, 's', 's', 'e', 'i', 's' }; +static const symbol s_6_83[6] = { 0xC3, 0xA1, 'v', 'e', 'i', 's' }; +static const symbol s_6_84[5] = { 0xC3, 0xAD, 'e', 'i', 's' }; +static const symbol s_6_85[7] = { 'a', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; +static const symbol s_6_86[7] = { 'e', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; +static const symbol s_6_87[7] = { 'i', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; +static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; +static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; +static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; +static const symbol s_6_91[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_92[7] = { 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_93[7] = { 0xC3, 0xAD, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_6_94[7] = { 0xC3, 0xA1, 'v', 'a', 'm', 'o', 's' }; +static const symbol s_6_95[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_6_96[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_6_97[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_6_98[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; +static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_6_103[8] = { 0xC3, 0xA1, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_104[8] = { 0xC3, 0xAA, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_105[8] = { 0xC3, 0xAD, 's', 's', 'e', 'm', 'o', 's' }; +static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; +static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; +static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; +static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; +static const symbol s_6_110[5] = { 0xC3, 0xA1, 'm', 'o', 's' }; +static const symbol s_6_111[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_6_112[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_6_113[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_6_114[2] = { 'e', 'u' }; +static const symbol s_6_115[2] = { 'i', 'u' }; +static const symbol s_6_116[2] = { 'o', 'u' }; +static const symbol s_6_117[4] = { 'a', 'r', 0xC3, 0xA1 }; +static const symbol s_6_118[4] = { 'e', 'r', 0xC3, 0xA1 }; +static const symbol s_6_119[4] = { 'i', 'r', 0xC3, 0xA1 }; + +static const struct among a_6[120] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 3, s_6_1, -1, 1, 0}, +/* 2 */ { 2, s_6_2, -1, 1, 0}, +/* 3 */ { 4, s_6_3, 2, 1, 0}, +/* 4 */ { 4, s_6_4, 2, 1, 0}, +/* 5 */ { 4, s_6_5, 2, 1, 0}, +/* 6 */ { 3, s_6_6, -1, 1, 0}, +/* 7 */ { 3, s_6_7, -1, 1, 0}, +/* 8 */ { 3, s_6_8, -1, 1, 0}, +/* 9 */ { 3, s_6_9, -1, 1, 0}, +/* 10 */ { 4, s_6_10, -1, 1, 0}, +/* 11 */ { 4, s_6_11, -1, 1, 0}, +/* 12 */ { 4, s_6_12, -1, 1, 0}, +/* 13 */ { 4, s_6_13, -1, 1, 0}, +/* 14 */ { 4, s_6_14, -1, 1, 0}, +/* 15 */ { 4, s_6_15, -1, 1, 0}, +/* 16 */ { 2, s_6_16, -1, 1, 0}, +/* 17 */ { 4, s_6_17, 16, 1, 0}, +/* 18 */ { 4, s_6_18, 16, 1, 0}, +/* 19 */ { 4, s_6_19, 16, 1, 0}, +/* 20 */ { 2, s_6_20, -1, 1, 0}, +/* 21 */ { 3, s_6_21, 20, 1, 0}, +/* 22 */ { 5, s_6_22, 21, 1, 0}, +/* 23 */ { 5, s_6_23, 21, 1, 0}, +/* 24 */ { 5, s_6_24, 21, 1, 0}, +/* 25 */ { 4, s_6_25, 20, 1, 0}, +/* 26 */ { 4, s_6_26, 20, 1, 0}, +/* 27 */ { 4, s_6_27, 20, 1, 0}, +/* 28 */ { 4, s_6_28, 20, 1, 0}, +/* 29 */ { 2, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, 29, 1, 0}, +/* 31 */ { 4, s_6_31, 29, 1, 0}, +/* 32 */ { 4, s_6_32, 29, 1, 0}, +/* 33 */ { 5, s_6_33, 29, 1, 0}, +/* 34 */ { 5, s_6_34, 29, 1, 0}, +/* 35 */ { 5, s_6_35, 29, 1, 0}, +/* 36 */ { 3, s_6_36, -1, 1, 0}, +/* 37 */ { 3, s_6_37, -1, 1, 0}, +/* 38 */ { 4, s_6_38, -1, 1, 0}, +/* 39 */ { 4, s_6_39, -1, 1, 0}, +/* 40 */ { 4, s_6_40, -1, 1, 0}, +/* 41 */ { 5, s_6_41, -1, 1, 0}, +/* 42 */ { 5, s_6_42, -1, 1, 0}, +/* 43 */ { 5, s_6_43, -1, 1, 0}, +/* 44 */ { 2, s_6_44, -1, 1, 0}, +/* 45 */ { 2, s_6_45, -1, 1, 0}, +/* 46 */ { 2, s_6_46, -1, 1, 0}, +/* 47 */ { 2, s_6_47, -1, 1, 0}, +/* 48 */ { 4, s_6_48, 47, 1, 0}, +/* 49 */ { 4, s_6_49, 47, 1, 0}, +/* 50 */ { 3, s_6_50, 47, 1, 0}, +/* 51 */ { 5, s_6_51, 50, 1, 0}, +/* 52 */ { 5, s_6_52, 50, 1, 0}, +/* 53 */ { 5, s_6_53, 50, 1, 0}, +/* 54 */ { 4, s_6_54, 47, 1, 0}, +/* 55 */ { 4, s_6_55, 47, 1, 0}, +/* 56 */ { 4, s_6_56, 47, 1, 0}, +/* 57 */ { 4, s_6_57, 47, 1, 0}, +/* 58 */ { 2, s_6_58, -1, 1, 0}, +/* 59 */ { 5, s_6_59, 58, 1, 0}, +/* 60 */ { 5, s_6_60, 58, 1, 0}, +/* 61 */ { 5, s_6_61, 58, 1, 0}, +/* 62 */ { 4, s_6_62, 58, 1, 0}, +/* 63 */ { 4, s_6_63, 58, 1, 0}, +/* 64 */ { 4, s_6_64, 58, 1, 0}, +/* 65 */ { 5, s_6_65, 58, 1, 0}, +/* 66 */ { 5, s_6_66, 58, 1, 0}, +/* 67 */ { 5, s_6_67, 58, 1, 0}, +/* 68 */ { 5, s_6_68, 58, 1, 0}, +/* 69 */ { 5, s_6_69, 58, 1, 0}, +/* 70 */ { 5, s_6_70, 58, 1, 0}, +/* 71 */ { 2, s_6_71, -1, 1, 0}, +/* 72 */ { 3, s_6_72, 71, 1, 0}, +/* 73 */ { 3, s_6_73, 71, 1, 0}, +/* 74 */ { 5, s_6_74, 73, 1, 0}, +/* 75 */ { 5, s_6_75, 73, 1, 0}, +/* 76 */ { 5, s_6_76, 73, 1, 0}, +/* 77 */ { 6, s_6_77, 73, 1, 0}, +/* 78 */ { 6, s_6_78, 73, 1, 0}, +/* 79 */ { 6, s_6_79, 73, 1, 0}, +/* 80 */ { 7, s_6_80, 73, 1, 0}, +/* 81 */ { 7, s_6_81, 73, 1, 0}, +/* 82 */ { 7, s_6_82, 73, 1, 0}, +/* 83 */ { 6, s_6_83, 73, 1, 0}, +/* 84 */ { 5, s_6_84, 73, 1, 0}, +/* 85 */ { 7, s_6_85, 84, 1, 0}, +/* 86 */ { 7, s_6_86, 84, 1, 0}, +/* 87 */ { 7, s_6_87, 84, 1, 0}, +/* 88 */ { 4, s_6_88, -1, 1, 0}, +/* 89 */ { 4, s_6_89, -1, 1, 0}, +/* 90 */ { 4, s_6_90, -1, 1, 0}, +/* 91 */ { 7, s_6_91, 90, 1, 0}, +/* 92 */ { 7, s_6_92, 90, 1, 0}, +/* 93 */ { 7, s_6_93, 90, 1, 0}, +/* 94 */ { 7, s_6_94, 90, 1, 0}, +/* 95 */ { 6, s_6_95, 90, 1, 0}, +/* 96 */ { 8, s_6_96, 95, 1, 0}, +/* 97 */ { 8, s_6_97, 95, 1, 0}, +/* 98 */ { 8, s_6_98, 95, 1, 0}, +/* 99 */ { 4, s_6_99, -1, 1, 0}, +/*100 */ { 6, s_6_100, 99, 1, 0}, +/*101 */ { 6, s_6_101, 99, 1, 0}, +/*102 */ { 6, s_6_102, 99, 1, 0}, +/*103 */ { 8, s_6_103, 99, 1, 0}, +/*104 */ { 8, s_6_104, 99, 1, 0}, +/*105 */ { 8, s_6_105, 99, 1, 0}, +/*106 */ { 4, s_6_106, -1, 1, 0}, +/*107 */ { 5, s_6_107, -1, 1, 0}, +/*108 */ { 5, s_6_108, -1, 1, 0}, +/*109 */ { 5, s_6_109, -1, 1, 0}, +/*110 */ { 5, s_6_110, -1, 1, 0}, +/*111 */ { 5, s_6_111, -1, 1, 0}, +/*112 */ { 5, s_6_112, -1, 1, 0}, +/*113 */ { 5, s_6_113, -1, 1, 0}, +/*114 */ { 2, s_6_114, -1, 1, 0}, +/*115 */ { 2, s_6_115, -1, 1, 0}, +/*116 */ { 2, s_6_116, -1, 1, 0}, +/*117 */ { 4, s_6_117, -1, 1, 0}, +/*118 */ { 4, s_6_118, -1, 1, 0}, +/*119 */ { 4, s_6_119, -1, 1, 0} +}; + +static const symbol s_7_0[1] = { 'a' }; +static const symbol s_7_1[1] = { 'i' }; +static const symbol s_7_2[1] = { 'o' }; +static const symbol s_7_3[2] = { 'o', 's' }; +static const symbol s_7_4[2] = { 0xC3, 0xA1 }; +static const symbol s_7_5[2] = { 0xC3, 0xAD }; +static const symbol s_7_6[2] = { 0xC3, 0xB3 }; + +static const struct among a_7[7] = +{ +/* 0 */ { 1, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 1, 0}, +/* 2 */ { 1, s_7_2, -1, 1, 0}, +/* 3 */ { 2, s_7_3, -1, 1, 0}, +/* 4 */ { 2, s_7_4, -1, 1, 0}, +/* 5 */ { 2, s_7_5, -1, 1, 0}, +/* 6 */ { 2, s_7_6, -1, 1, 0} +}; + +static const symbol s_8_0[1] = { 'e' }; +static const symbol s_8_1[2] = { 0xC3, 0xA7 }; +static const symbol s_8_2[2] = { 0xC3, 0xA9 }; +static const symbol s_8_3[2] = { 0xC3, 0xAA }; + +static const struct among a_8[4] = +{ +/* 0 */ { 1, s_8_0, -1, 1, 0}, +/* 1 */ { 2, s_8_1, -1, 2, 0}, +/* 2 */ { 2, s_8_2, -1, 1, 0}, +/* 3 */ { 2, s_8_3, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; + +static const symbol s_0[] = { 'a', '~' }; +static const symbol s_1[] = { 'o', '~' }; +static const symbol s_2[] = { 0xC3, 0xA3 }; +static const symbol s_3[] = { 0xC3, 0xB5 }; +static const symbol s_4[] = { 'l', 'o', 'g' }; +static const symbol s_5[] = { 'u' }; +static const symbol s_6[] = { 'e', 'n', 't', 'e' }; +static const symbol s_7[] = { 'a', 't' }; +static const symbol s_8[] = { 'a', 't' }; +static const symbol s_9[] = { 'e' }; +static const symbol s_10[] = { 'i', 'r' }; +static const symbol s_11[] = { 'u' }; +static const symbol s_12[] = { 'g' }; +static const symbol s_13[] = { 'i' }; +static const symbol s_14[] = { 'c' }; +static const symbol s_15[] = { 'c' }; +static const symbol s_16[] = { 'i' }; +static const symbol s_17[] = { 'c' }; + +static int r_prelude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 36 */ + int c1 = z->c; + z->bra = z->c; /* [, line 37 */ + if (z->c + 1 >= z->l || (z->p[z->c + 1] != 163 && z->p[z->c + 1] != 181)) among_var = 3; else + among_var = find_among(z, a_0, 3); /* substring, line 37 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 37 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 40 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 50 */ + { int c2 = z->c; /* or, line 52 */ + if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; + { int c3 = z->c; /* or, line 51 */ + if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab4; + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab0; + { int c4 = z->c; /* or, line 53 */ + if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab6; + { /* gopast */ /* grouping v, line 53 */ + int ret = out_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab0; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 53 */ + } + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 54 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 56 */ + { /* gopast */ /* grouping v, line 57 */ + int ret = out_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 57 */ + int ret = in_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 57 */ + { /* gopast */ /* grouping v, line 58 */ + int ret = out_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 58 */ + int ret = in_grouping_U(z, g_v, 97, 250, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 58 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 62 */ + int c1 = z->c; + z->bra = z->c; /* [, line 63 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else + among_var = find_among(z, a_1, 3); /* substring, line 63 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 63 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 2, s_2); /* <-, line 64 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_3); /* <-, line 65 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 66 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 77 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 77 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 93 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 93 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 98 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 102 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 110 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 110 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ + z->ket = z->c; /* [, line 112 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } + among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 112 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 112 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + z->ket = z->c; /* [, line 113 */ + if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 113 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 113 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + break; + case 6: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 122 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ + z->ket = z->c; /* [, line 124 */ + if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 124 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 127 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 134 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 134 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ + z->ket = z->c; /* [, line 136 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 136 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 139 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 146 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 146 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ + z->ket = z->c; /* [, line 148 */ + if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 148 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 148 */ + if (ret < 0) return ret; + } + lab3: + ; + } + break; + case 9: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 153 */ + if (ret < 0) return ret; + } + if (!(eq_s_b(z, 1, s_9))) return 0; + { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 159 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 159 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 160 */ + among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 160 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 184 */ + among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 184 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 187 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 187 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_residual_form(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 192 */ + among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 192 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 194 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 194 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ + if (!(eq_s_b(z, 1, s_11))) goto lab1; + z->bra = z->c; /* ], line 194 */ + { int m_test = z->l - z->c; /* test, line 194 */ + if (!(eq_s_b(z, 1, s_12))) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_13))) return 0; + z->bra = z->c; /* ], line 195 */ + { int m_test = z->l - z->c; /* test, line 195 */ + if (!(eq_s_b(z, 1, s_14))) return 0; + z->c = z->l - m_test; + } + } + lab0: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 195 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 195 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int portuguese_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 202 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 202 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 203 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 203 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 204 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ + { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ + if (ret < 0) return ret; + } + } + lab5: + z->c = z->l - m5; + { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ + z->ket = z->c; /* [, line 207 */ + if (!(eq_s_b(z, 1, s_16))) goto lab7; + z->bra = z->c; /* ], line 207 */ + { int m_test = z->l - z->c; /* test, line 207 */ + if (!(eq_s_b(z, 1, s_17))) goto lab7; + z->c = z->l - m_test; + } + { int ret = r_RV(z); + if (ret == 0) goto lab7; /* call RV, line 207 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 207 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ + { int ret = r_residual_form(z); + if (ret == 0) goto lab8; /* call residual_form, line 211 */ + if (ret < 0) return ret; + } + lab8: + z->c = z->l - m8; + } + z->c = z->lb; + { int c9 = z->c; /* do, line 213 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab9; /* call postlude, line 213 */ + if (ret < 0) return ret; + } + lab9: + z->c = c9; + } + return 1; +} + +extern struct SN_env * portuguese_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void portuguese_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_romanian.c b/src/backend/snowball/libstemmer/stem_UTF_8_romanian.c new file mode 100644 index 00000000000..e82ebfe95f8 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_romanian.c @@ -0,0 +1,1004 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int romanian_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_vowel_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_combo_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_step_0(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * romanian_UTF_8_create_env(void); +extern void romanian_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[1] = { 'I' }; +static const symbol s_0_2[1] = { 'U' }; + +static const struct among a_0[3] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 1, s_0_1, 0, 1, 0}, +/* 2 */ { 1, s_0_2, 0, 2, 0} +}; + +static const symbol s_1_0[2] = { 'e', 'a' }; +static const symbol s_1_1[5] = { 'a', 0xC5, 0xA3, 'i', 'a' }; +static const symbol s_1_2[3] = { 'a', 'u', 'a' }; +static const symbol s_1_3[3] = { 'i', 'u', 'a' }; +static const symbol s_1_4[5] = { 'a', 0xC5, 0xA3, 'i', 'e' }; +static const symbol s_1_5[3] = { 'e', 'l', 'e' }; +static const symbol s_1_6[3] = { 'i', 'l', 'e' }; +static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; +static const symbol s_1_8[3] = { 'i', 'e', 'i' }; +static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; +static const symbol s_1_10[2] = { 'i', 'i' }; +static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; +static const symbol s_1_12[2] = { 'u', 'l' }; +static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; +static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; +static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; + +static const struct among a_1[16] = +{ +/* 0 */ { 2, s_1_0, -1, 3, 0}, +/* 1 */ { 5, s_1_1, -1, 7, 0}, +/* 2 */ { 3, s_1_2, -1, 2, 0}, +/* 3 */ { 3, s_1_3, -1, 4, 0}, +/* 4 */ { 5, s_1_4, -1, 7, 0}, +/* 5 */ { 3, s_1_5, -1, 3, 0}, +/* 6 */ { 3, s_1_6, -1, 5, 0}, +/* 7 */ { 4, s_1_7, 6, 4, 0}, +/* 8 */ { 3, s_1_8, -1, 4, 0}, +/* 9 */ { 4, s_1_9, -1, 6, 0}, +/* 10 */ { 2, s_1_10, -1, 4, 0}, +/* 11 */ { 4, s_1_11, -1, 1, 0}, +/* 12 */ { 2, s_1_12, -1, 1, 0}, +/* 13 */ { 4, s_1_13, -1, 3, 0}, +/* 14 */ { 4, s_1_14, -1, 4, 0}, +/* 15 */ { 5, s_1_15, 14, 4, 0} +}; + +static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; +static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; +static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; +static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; +static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; +static const symbol s_2_5[7] = { 'a', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; +static const symbol s_2_6[7] = { 'i', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; +static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_9[7] = { 0xC4, 0x83, 't', 'o', 'a', 'r', 'e' }; +static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; +static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; +static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; +static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; +static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; +static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; +static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; +static const symbol s_2_21[6] = { 0xC4, 0x83, 't', 'o', 'r', 'i' }; +static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; +static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; +static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; +static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; +static const symbol s_2_28[7] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 'i' }; +static const symbol s_2_29[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 'i' }; +static const symbol s_2_30[7] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 'i' }; +static const symbol s_2_31[9] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_2_32[11] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_2_33[9] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; +static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; +static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; +static const symbol s_2_38[5] = { 0xC4, 0x83, 't', 'o', 'r' }; +static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; +static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; +static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; +static const symbol s_2_42[6] = { 'i', 'c', 'a', 'l', 0xC4, 0x83 }; +static const symbol s_2_43[6] = { 'i', 'c', 'i', 'v', 0xC4, 0x83 }; +static const symbol s_2_44[6] = { 'a', 't', 'i', 'v', 0xC4, 0x83 }; +static const symbol s_2_45[6] = { 'i', 't', 'i', 'v', 0xC4, 0x83 }; + +static const struct among a_2[46] = +{ +/* 0 */ { 5, s_2_0, -1, 4, 0}, +/* 1 */ { 5, s_2_1, -1, 4, 0}, +/* 2 */ { 5, s_2_2, -1, 5, 0}, +/* 3 */ { 5, s_2_3, -1, 6, 0}, +/* 4 */ { 5, s_2_4, -1, 4, 0}, +/* 5 */ { 7, s_2_5, -1, 5, 0}, +/* 6 */ { 7, s_2_6, -1, 6, 0}, +/* 7 */ { 6, s_2_7, -1, 5, 0}, +/* 8 */ { 6, s_2_8, -1, 6, 0}, +/* 9 */ { 7, s_2_9, -1, 5, 0}, +/* 10 */ { 7, s_2_10, -1, 4, 0}, +/* 11 */ { 9, s_2_11, -1, 1, 0}, +/* 12 */ { 9, s_2_12, -1, 2, 0}, +/* 13 */ { 7, s_2_13, -1, 3, 0}, +/* 14 */ { 5, s_2_14, -1, 4, 0}, +/* 15 */ { 5, s_2_15, -1, 5, 0}, +/* 16 */ { 5, s_2_16, -1, 6, 0}, +/* 17 */ { 5, s_2_17, -1, 4, 0}, +/* 18 */ { 5, s_2_18, -1, 5, 0}, +/* 19 */ { 7, s_2_19, 18, 4, 0}, +/* 20 */ { 5, s_2_20, -1, 6, 0}, +/* 21 */ { 6, s_2_21, -1, 5, 0}, +/* 22 */ { 7, s_2_22, -1, 4, 0}, +/* 23 */ { 9, s_2_23, -1, 1, 0}, +/* 24 */ { 7, s_2_24, -1, 3, 0}, +/* 25 */ { 5, s_2_25, -1, 4, 0}, +/* 26 */ { 5, s_2_26, -1, 5, 0}, +/* 27 */ { 5, s_2_27, -1, 6, 0}, +/* 28 */ { 7, s_2_28, -1, 4, 0}, +/* 29 */ { 9, s_2_29, -1, 1, 0}, +/* 30 */ { 7, s_2_30, -1, 3, 0}, +/* 31 */ { 9, s_2_31, -1, 4, 0}, +/* 32 */ { 11, s_2_32, -1, 1, 0}, +/* 33 */ { 9, s_2_33, -1, 3, 0}, +/* 34 */ { 4, s_2_34, -1, 4, 0}, +/* 35 */ { 4, s_2_35, -1, 5, 0}, +/* 36 */ { 6, s_2_36, 35, 4, 0}, +/* 37 */ { 4, s_2_37, -1, 6, 0}, +/* 38 */ { 5, s_2_38, -1, 5, 0}, +/* 39 */ { 4, s_2_39, -1, 4, 0}, +/* 40 */ { 4, s_2_40, -1, 5, 0}, +/* 41 */ { 4, s_2_41, -1, 6, 0}, +/* 42 */ { 6, s_2_42, -1, 4, 0}, +/* 43 */ { 6, s_2_43, -1, 4, 0}, +/* 44 */ { 6, s_2_44, -1, 5, 0}, +/* 45 */ { 6, s_2_45, -1, 6, 0} +}; + +static const symbol s_3_0[3] = { 'i', 'c', 'a' }; +static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; +static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; +static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; +static const symbol s_3_4[3] = { 'a', 't', 'a' }; +static const symbol s_3_5[3] = { 'i', 't', 'a' }; +static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; +static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; +static const symbol s_3_8[3] = { 'u', 't', 'a' }; +static const symbol s_3_9[3] = { 'i', 'v', 'a' }; +static const symbol s_3_10[2] = { 'i', 'c' }; +static const symbol s_3_11[3] = { 'i', 'c', 'e' }; +static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; +static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; +static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; +static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; +static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; +static const symbol s_3_17[3] = { 'a', 't', 'e' }; +static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; +static const symbol s_3_19[3] = { 'i', 't', 'e' }; +static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; +static const symbol s_3_22[3] = { 'u', 't', 'e' }; +static const symbol s_3_23[3] = { 'i', 'v', 'e' }; +static const symbol s_3_24[3] = { 'i', 'c', 'i' }; +static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; +static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; +static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; +static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; +static const symbol s_3_29[3] = { 'o', 's', 'i' }; +static const symbol s_3_30[3] = { 'a', 't', 'i' }; +static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; +static const symbol s_3_32[3] = { 'i', 't', 'i' }; +static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; +static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; +static const symbol s_3_35[3] = { 'u', 't', 'i' }; +static const symbol s_3_36[5] = { 'i', 0xC5, 0x9F, 't', 'i' }; +static const symbol s_3_37[3] = { 'i', 'v', 'i' }; +static const symbol s_3_38[5] = { 'i', 't', 0xC4, 0x83, 'i' }; +static const symbol s_3_39[4] = { 'o', 0xC5, 0x9F, 'i' }; +static const symbol s_3_40[7] = { 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; +static const symbol s_3_43[3] = { 'i', 's', 'm' }; +static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; +static const symbol s_3_45[2] = { 'o', 's' }; +static const symbol s_3_46[2] = { 'a', 't' }; +static const symbol s_3_47[2] = { 'i', 't' }; +static const symbol s_3_48[3] = { 'a', 'n', 't' }; +static const symbol s_3_49[3] = { 'i', 's', 't' }; +static const symbol s_3_50[2] = { 'u', 't' }; +static const symbol s_3_51[2] = { 'i', 'v' }; +static const symbol s_3_52[4] = { 'i', 'c', 0xC4, 0x83 }; +static const symbol s_3_53[6] = { 'a', 'b', 'i', 'l', 0xC4, 0x83 }; +static const symbol s_3_54[6] = { 'i', 'b', 'i', 'l', 0xC4, 0x83 }; +static const symbol s_3_55[5] = { 'o', 'a', 's', 0xC4, 0x83 }; +static const symbol s_3_56[4] = { 'a', 't', 0xC4, 0x83 }; +static const symbol s_3_57[4] = { 'i', 't', 0xC4, 0x83 }; +static const symbol s_3_58[5] = { 'a', 'n', 't', 0xC4, 0x83 }; +static const symbol s_3_59[5] = { 'i', 's', 't', 0xC4, 0x83 }; +static const symbol s_3_60[4] = { 'u', 't', 0xC4, 0x83 }; +static const symbol s_3_61[4] = { 'i', 'v', 0xC4, 0x83 }; + +static const struct among a_3[62] = +{ +/* 0 */ { 3, s_3_0, -1, 1, 0}, +/* 1 */ { 5, s_3_1, -1, 1, 0}, +/* 2 */ { 5, s_3_2, -1, 1, 0}, +/* 3 */ { 4, s_3_3, -1, 1, 0}, +/* 4 */ { 3, s_3_4, -1, 1, 0}, +/* 5 */ { 3, s_3_5, -1, 1, 0}, +/* 6 */ { 4, s_3_6, -1, 1, 0}, +/* 7 */ { 4, s_3_7, -1, 3, 0}, +/* 8 */ { 3, s_3_8, -1, 1, 0}, +/* 9 */ { 3, s_3_9, -1, 1, 0}, +/* 10 */ { 2, s_3_10, -1, 1, 0}, +/* 11 */ { 3, s_3_11, -1, 1, 0}, +/* 12 */ { 5, s_3_12, -1, 1, 0}, +/* 13 */ { 5, s_3_13, -1, 1, 0}, +/* 14 */ { 4, s_3_14, -1, 3, 0}, +/* 15 */ { 4, s_3_15, -1, 2, 0}, +/* 16 */ { 4, s_3_16, -1, 1, 0}, +/* 17 */ { 3, s_3_17, -1, 1, 0}, +/* 18 */ { 5, s_3_18, 17, 1, 0}, +/* 19 */ { 3, s_3_19, -1, 1, 0}, +/* 20 */ { 4, s_3_20, -1, 1, 0}, +/* 21 */ { 4, s_3_21, -1, 3, 0}, +/* 22 */ { 3, s_3_22, -1, 1, 0}, +/* 23 */ { 3, s_3_23, -1, 1, 0}, +/* 24 */ { 3, s_3_24, -1, 1, 0}, +/* 25 */ { 5, s_3_25, -1, 1, 0}, +/* 26 */ { 5, s_3_26, -1, 1, 0}, +/* 27 */ { 4, s_3_27, -1, 2, 0}, +/* 28 */ { 5, s_3_28, -1, 1, 0}, +/* 29 */ { 3, s_3_29, -1, 1, 0}, +/* 30 */ { 3, s_3_30, -1, 1, 0}, +/* 31 */ { 5, s_3_31, 30, 1, 0}, +/* 32 */ { 3, s_3_32, -1, 1, 0}, +/* 33 */ { 4, s_3_33, -1, 1, 0}, +/* 34 */ { 4, s_3_34, -1, 3, 0}, +/* 35 */ { 3, s_3_35, -1, 1, 0}, +/* 36 */ { 5, s_3_36, -1, 3, 0}, +/* 37 */ { 3, s_3_37, -1, 1, 0}, +/* 38 */ { 5, s_3_38, -1, 1, 0}, +/* 39 */ { 4, s_3_39, -1, 1, 0}, +/* 40 */ { 7, s_3_40, -1, 1, 0}, +/* 41 */ { 4, s_3_41, -1, 1, 0}, +/* 42 */ { 4, s_3_42, -1, 1, 0}, +/* 43 */ { 3, s_3_43, -1, 3, 0}, +/* 44 */ { 4, s_3_44, -1, 1, 0}, +/* 45 */ { 2, s_3_45, -1, 1, 0}, +/* 46 */ { 2, s_3_46, -1, 1, 0}, +/* 47 */ { 2, s_3_47, -1, 1, 0}, +/* 48 */ { 3, s_3_48, -1, 1, 0}, +/* 49 */ { 3, s_3_49, -1, 3, 0}, +/* 50 */ { 2, s_3_50, -1, 1, 0}, +/* 51 */ { 2, s_3_51, -1, 1, 0}, +/* 52 */ { 4, s_3_52, -1, 1, 0}, +/* 53 */ { 6, s_3_53, -1, 1, 0}, +/* 54 */ { 6, s_3_54, -1, 1, 0}, +/* 55 */ { 5, s_3_55, -1, 1, 0}, +/* 56 */ { 4, s_3_56, -1, 1, 0}, +/* 57 */ { 4, s_3_57, -1, 1, 0}, +/* 58 */ { 5, s_3_58, -1, 1, 0}, +/* 59 */ { 5, s_3_59, -1, 3, 0}, +/* 60 */ { 4, s_3_60, -1, 1, 0}, +/* 61 */ { 4, s_3_61, -1, 1, 0} +}; + +static const symbol s_4_0[2] = { 'e', 'a' }; +static const symbol s_4_1[2] = { 'i', 'a' }; +static const symbol s_4_2[3] = { 'e', 's', 'c' }; +static const symbol s_4_3[4] = { 0xC4, 0x83, 's', 'c' }; +static const symbol s_4_4[3] = { 'i', 'n', 'd' }; +static const symbol s_4_5[4] = { 0xC3, 0xA2, 'n', 'd' }; +static const symbol s_4_6[3] = { 'a', 'r', 'e' }; +static const symbol s_4_7[3] = { 'e', 'r', 'e' }; +static const symbol s_4_8[3] = { 'i', 'r', 'e' }; +static const symbol s_4_9[4] = { 0xC3, 0xA2, 'r', 'e' }; +static const symbol s_4_10[2] = { 's', 'e' }; +static const symbol s_4_11[3] = { 'a', 's', 'e' }; +static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; +static const symbol s_4_13[3] = { 'i', 's', 'e' }; +static const symbol s_4_14[3] = { 'u', 's', 'e' }; +static const symbol s_4_15[4] = { 0xC3, 0xA2, 's', 'e' }; +static const symbol s_4_16[5] = { 'e', 0xC5, 0x9F, 't', 'e' }; +static const symbol s_4_17[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'e' }; +static const symbol s_4_18[3] = { 'e', 'z', 'e' }; +static const symbol s_4_19[2] = { 'a', 'i' }; +static const symbol s_4_20[3] = { 'e', 'a', 'i' }; +static const symbol s_4_21[3] = { 'i', 'a', 'i' }; +static const symbol s_4_22[3] = { 's', 'e', 'i' }; +static const symbol s_4_23[5] = { 'e', 0xC5, 0x9F, 't', 'i' }; +static const symbol s_4_24[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'i' }; +static const symbol s_4_25[2] = { 'u', 'i' }; +static const symbol s_4_26[3] = { 'e', 'z', 'i' }; +static const symbol s_4_27[4] = { 'a', 0xC5, 0x9F, 'i' }; +static const symbol s_4_28[5] = { 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_29[6] = { 'a', 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_30[7] = { 's', 'e', 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_31[6] = { 'i', 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_32[6] = { 'u', 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_33[7] = { 0xC3, 0xA2, 's', 'e', 0xC5, 0x9F, 'i' }; +static const symbol s_4_34[4] = { 'i', 0xC5, 0x9F, 'i' }; +static const symbol s_4_35[4] = { 'u', 0xC5, 0x9F, 'i' }; +static const symbol s_4_36[5] = { 0xC3, 0xA2, 0xC5, 0x9F, 'i' }; +static const symbol s_4_37[3] = { 0xC3, 0xA2, 'i' }; +static const symbol s_4_38[4] = { 'a', 0xC5, 0xA3, 'i' }; +static const symbol s_4_39[5] = { 'e', 'a', 0xC5, 0xA3, 'i' }; +static const symbol s_4_40[5] = { 'i', 'a', 0xC5, 0xA3, 'i' }; +static const symbol s_4_41[4] = { 'e', 0xC5, 0xA3, 'i' }; +static const symbol s_4_42[4] = { 'i', 0xC5, 0xA3, 'i' }; +static const symbol s_4_43[7] = { 'a', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_44[8] = { 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_45[9] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_46[10] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_47[9] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_48[9] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_49[10] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_50[7] = { 'i', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_51[7] = { 'u', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_52[8] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; +static const symbol s_4_53[5] = { 0xC3, 0xA2, 0xC5, 0xA3, 'i' }; +static const symbol s_4_54[2] = { 'a', 'm' }; +static const symbol s_4_55[3] = { 'e', 'a', 'm' }; +static const symbol s_4_56[3] = { 'i', 'a', 'm' }; +static const symbol s_4_57[2] = { 'e', 'm' }; +static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; +static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; +static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; +static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; +static const symbol s_4_62[5] = { 0xC3, 0xA2, 's', 'e', 'm' }; +static const symbol s_4_63[2] = { 'i', 'm' }; +static const symbol s_4_64[3] = { 0xC4, 0x83, 'm' }; +static const symbol s_4_65[5] = { 'a', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_66[6] = { 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_67[7] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_68[8] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_69[7] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_70[7] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_71[8] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_72[5] = { 'i', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_73[5] = { 'u', 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_74[6] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 'm' }; +static const symbol s_4_75[3] = { 0xC3, 0xA2, 'm' }; +static const symbol s_4_76[2] = { 'a', 'u' }; +static const symbol s_4_77[3] = { 'e', 'a', 'u' }; +static const symbol s_4_78[3] = { 'i', 'a', 'u' }; +static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; +static const symbol s_4_80[5] = { 0xC3, 0xA2, 'n', 'd', 'u' }; +static const symbol s_4_81[2] = { 'e', 'z' }; +static const symbol s_4_82[6] = { 'e', 'a', 's', 'c', 0xC4, 0x83 }; +static const symbol s_4_83[4] = { 'a', 'r', 0xC4, 0x83 }; +static const symbol s_4_84[5] = { 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_85[6] = { 'a', 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_86[7] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_87[6] = { 'i', 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_88[6] = { 'u', 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_89[7] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83 }; +static const symbol s_4_90[4] = { 'i', 'r', 0xC4, 0x83 }; +static const symbol s_4_91[4] = { 'u', 'r', 0xC4, 0x83 }; +static const symbol s_4_92[5] = { 0xC3, 0xA2, 'r', 0xC4, 0x83 }; +static const symbol s_4_93[5] = { 'e', 'a', 'z', 0xC4, 0x83 }; + +static const struct among a_4[94] = +{ +/* 0 */ { 2, s_4_0, -1, 1, 0}, +/* 1 */ { 2, s_4_1, -1, 1, 0}, +/* 2 */ { 3, s_4_2, -1, 1, 0}, +/* 3 */ { 4, s_4_3, -1, 1, 0}, +/* 4 */ { 3, s_4_4, -1, 1, 0}, +/* 5 */ { 4, s_4_5, -1, 1, 0}, +/* 6 */ { 3, s_4_6, -1, 1, 0}, +/* 7 */ { 3, s_4_7, -1, 1, 0}, +/* 8 */ { 3, s_4_8, -1, 1, 0}, +/* 9 */ { 4, s_4_9, -1, 1, 0}, +/* 10 */ { 2, s_4_10, -1, 2, 0}, +/* 11 */ { 3, s_4_11, 10, 1, 0}, +/* 12 */ { 4, s_4_12, 10, 2, 0}, +/* 13 */ { 3, s_4_13, 10, 1, 0}, +/* 14 */ { 3, s_4_14, 10, 1, 0}, +/* 15 */ { 4, s_4_15, 10, 1, 0}, +/* 16 */ { 5, s_4_16, -1, 1, 0}, +/* 17 */ { 6, s_4_17, -1, 1, 0}, +/* 18 */ { 3, s_4_18, -1, 1, 0}, +/* 19 */ { 2, s_4_19, -1, 1, 0}, +/* 20 */ { 3, s_4_20, 19, 1, 0}, +/* 21 */ { 3, s_4_21, 19, 1, 0}, +/* 22 */ { 3, s_4_22, -1, 2, 0}, +/* 23 */ { 5, s_4_23, -1, 1, 0}, +/* 24 */ { 6, s_4_24, -1, 1, 0}, +/* 25 */ { 2, s_4_25, -1, 1, 0}, +/* 26 */ { 3, s_4_26, -1, 1, 0}, +/* 27 */ { 4, s_4_27, -1, 1, 0}, +/* 28 */ { 5, s_4_28, -1, 2, 0}, +/* 29 */ { 6, s_4_29, 28, 1, 0}, +/* 30 */ { 7, s_4_30, 28, 2, 0}, +/* 31 */ { 6, s_4_31, 28, 1, 0}, +/* 32 */ { 6, s_4_32, 28, 1, 0}, +/* 33 */ { 7, s_4_33, 28, 1, 0}, +/* 34 */ { 4, s_4_34, -1, 1, 0}, +/* 35 */ { 4, s_4_35, -1, 1, 0}, +/* 36 */ { 5, s_4_36, -1, 1, 0}, +/* 37 */ { 3, s_4_37, -1, 1, 0}, +/* 38 */ { 4, s_4_38, -1, 2, 0}, +/* 39 */ { 5, s_4_39, 38, 1, 0}, +/* 40 */ { 5, s_4_40, 38, 1, 0}, +/* 41 */ { 4, s_4_41, -1, 2, 0}, +/* 42 */ { 4, s_4_42, -1, 2, 0}, +/* 43 */ { 7, s_4_43, -1, 1, 0}, +/* 44 */ { 8, s_4_44, -1, 2, 0}, +/* 45 */ { 9, s_4_45, 44, 1, 0}, +/* 46 */ { 10, s_4_46, 44, 2, 0}, +/* 47 */ { 9, s_4_47, 44, 1, 0}, +/* 48 */ { 9, s_4_48, 44, 1, 0}, +/* 49 */ { 10, s_4_49, 44, 1, 0}, +/* 50 */ { 7, s_4_50, -1, 1, 0}, +/* 51 */ { 7, s_4_51, -1, 1, 0}, +/* 52 */ { 8, s_4_52, -1, 1, 0}, +/* 53 */ { 5, s_4_53, -1, 2, 0}, +/* 54 */ { 2, s_4_54, -1, 1, 0}, +/* 55 */ { 3, s_4_55, 54, 1, 0}, +/* 56 */ { 3, s_4_56, 54, 1, 0}, +/* 57 */ { 2, s_4_57, -1, 2, 0}, +/* 58 */ { 4, s_4_58, 57, 1, 0}, +/* 59 */ { 5, s_4_59, 57, 2, 0}, +/* 60 */ { 4, s_4_60, 57, 1, 0}, +/* 61 */ { 4, s_4_61, 57, 1, 0}, +/* 62 */ { 5, s_4_62, 57, 1, 0}, +/* 63 */ { 2, s_4_63, -1, 2, 0}, +/* 64 */ { 3, s_4_64, -1, 2, 0}, +/* 65 */ { 5, s_4_65, 64, 1, 0}, +/* 66 */ { 6, s_4_66, 64, 2, 0}, +/* 67 */ { 7, s_4_67, 66, 1, 0}, +/* 68 */ { 8, s_4_68, 66, 2, 0}, +/* 69 */ { 7, s_4_69, 66, 1, 0}, +/* 70 */ { 7, s_4_70, 66, 1, 0}, +/* 71 */ { 8, s_4_71, 66, 1, 0}, +/* 72 */ { 5, s_4_72, 64, 1, 0}, +/* 73 */ { 5, s_4_73, 64, 1, 0}, +/* 74 */ { 6, s_4_74, 64, 1, 0}, +/* 75 */ { 3, s_4_75, -1, 2, 0}, +/* 76 */ { 2, s_4_76, -1, 1, 0}, +/* 77 */ { 3, s_4_77, 76, 1, 0}, +/* 78 */ { 3, s_4_78, 76, 1, 0}, +/* 79 */ { 4, s_4_79, -1, 1, 0}, +/* 80 */ { 5, s_4_80, -1, 1, 0}, +/* 81 */ { 2, s_4_81, -1, 1, 0}, +/* 82 */ { 6, s_4_82, -1, 1, 0}, +/* 83 */ { 4, s_4_83, -1, 1, 0}, +/* 84 */ { 5, s_4_84, -1, 2, 0}, +/* 85 */ { 6, s_4_85, 84, 1, 0}, +/* 86 */ { 7, s_4_86, 84, 2, 0}, +/* 87 */ { 6, s_4_87, 84, 1, 0}, +/* 88 */ { 6, s_4_88, 84, 1, 0}, +/* 89 */ { 7, s_4_89, 84, 1, 0}, +/* 90 */ { 4, s_4_90, -1, 1, 0}, +/* 91 */ { 4, s_4_91, -1, 1, 0}, +/* 92 */ { 5, s_4_92, -1, 1, 0}, +/* 93 */ { 5, s_4_93, -1, 1, 0} +}; + +static const symbol s_5_0[1] = { 'a' }; +static const symbol s_5_1[1] = { 'e' }; +static const symbol s_5_2[2] = { 'i', 'e' }; +static const symbol s_5_3[1] = { 'i' }; +static const symbol s_5_4[2] = { 0xC4, 0x83 }; + +static const struct among a_5[5] = +{ +/* 0 */ { 1, s_5_0, -1, 1, 0}, +/* 1 */ { 1, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, 1, 1, 0}, +/* 3 */ { 1, s_5_3, -1, 1, 0}, +/* 4 */ { 2, s_5_4, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 }; + +static const symbol s_0[] = { 'u' }; +static const symbol s_1[] = { 'U' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'I' }; +static const symbol s_4[] = { 'i' }; +static const symbol s_5[] = { 'u' }; +static const symbol s_6[] = { 'a' }; +static const symbol s_7[] = { 'e' }; +static const symbol s_8[] = { 'i' }; +static const symbol s_9[] = { 'a', 'b' }; +static const symbol s_10[] = { 'i' }; +static const symbol s_11[] = { 'a', 't' }; +static const symbol s_12[] = { 'a', 0xC5, 0xA3, 'i' }; +static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; +static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; +static const symbol s_15[] = { 'i', 'v' }; +static const symbol s_16[] = { 'i', 'c' }; +static const symbol s_17[] = { 'a', 't' }; +static const symbol s_18[] = { 'i', 't' }; +static const symbol s_19[] = { 0xC5, 0xA3 }; +static const symbol s_20[] = { 't' }; +static const symbol s_21[] = { 'i', 's', 't' }; +static const symbol s_22[] = { 'u' }; + +static int r_prelude(struct SN_env * z) { + while(1) { /* repeat, line 32 */ + int c1 = z->c; + while(1) { /* goto, line 32 */ + int c2 = z->c; + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; + z->bra = z->c; /* [, line 33 */ + { int c3 = z->c; /* or, line 33 */ + if (!(eq_s(z, 1, s_0))) goto lab3; + z->ket = z->c; /* ], line 33 */ + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab3; + { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = c3; + if (!(eq_s(z, 1, s_2))) goto lab1; + z->ket = z->c; /* ], line 34 */ + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; + { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ + if (ret < 0) return ret; + } + } + lab2: + z->c = c2; + break; + lab1: + z->c = c2; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* goto, line 32 */ + } + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 44 */ + { int c2 = z->c; /* or, line 46 */ + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; + { int c3 = z->c; /* or, line 45 */ + if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab4; + { /* gopast */ /* grouping v, line 45 */ + int ret = out_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; + { /* gopast */ /* non v, line 45 */ + int ret = in_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab0; + { int c4 = z->c; /* or, line 47 */ + if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab6; + { /* gopast */ /* grouping v, line 47 */ + int ret = out_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab0; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 47 */ + } + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 48 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 50 */ + { /* gopast */ /* grouping v, line 51 */ + int ret = out_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 51 */ + int ret = in_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 51 */ + { /* gopast */ /* grouping v, line 52 */ + int ret = out_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 52 */ + int ret = in_grouping_U(z, g_v, 97, 259, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 52 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 56 */ + int c1 = z->c; + z->bra = z->c; /* [, line 58 */ + if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else + among_var = find_among(z, a_0, 3); /* substring, line 58 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 58 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 61 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_step_0(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 73 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 73 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 73 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 75 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ + if (ret < 0) return ret; + } + break; + case 5: + { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ + if (!(eq_s_b(z, 2, s_9))) goto lab0; + return 0; + lab0: + z->c = z->l - m1; + } + { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ + if (ret < 0) return ret; + } + break; + case 7: + { int ret = slice_from_s(z, 4, s_12); /* <-, line 87 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_combo_suffix(struct SN_env * z) { + int among_var; + { int m_test = z->l - z->c; /* test, line 91 */ + z->ket = z->c; /* [, line 92 */ + among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 92 */ + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 92 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ + if (ret < 0) return ret; + } + break; + } + z->B[0] = 1; /* set standard_suffix_removed, line 125 */ + z->c = z->l - m_test; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ + while(1) { /* repeat, line 131 */ + int m1 = z->l - z->c; (void)m1; + { int ret = r_combo_suffix(z); + if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ + if (ret < 0) return ret; + } + continue; + lab0: + z->c = z->l - m1; + break; + } + z->ket = z->c; /* [, line 132 */ + among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 132 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 132 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 149 */ + if (ret < 0) return ret; + } + break; + case 2: + if (!(eq_s_b(z, 2, s_19))) return 0; + z->bra = z->c; /* ], line 152 */ + { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ + if (ret < 0) return ret; + } + break; + } + z->B[0] = 1; /* set standard_suffix_removed, line 160 */ + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 164 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 164 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 165 */ + among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 165 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ + if (out_grouping_b_U(z, g_v, 97, 259, 0)) goto lab1; + goto lab0; + lab1: + z->c = z->l - m2; + if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } + } + lab0: + { int ret = slice_del(z); /* delete, line 200 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 214 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +static int r_vowel_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 219 */ + among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 219 */ + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 219 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 220 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int romanian_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 226 */ + { int ret = r_prelude(z); + if (ret == 0) goto lab0; /* call prelude, line 226 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + { int c2 = z->c; /* do, line 227 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab1; /* call mark_regions, line 227 */ + if (ret < 0) return ret; + } + lab1: + z->c = c2; + } + z->lb = z->c; z->c = z->l; /* backwards, line 228 */ + + { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ + { int ret = r_step_0(z); + if (ret == 0) goto lab2; /* call step_0, line 229 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ + if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ + goto lab5; + lab6: + z->c = z->l - m6; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ + if (ret < 0) return ret; + } + } + lab5: + lab4: + z->c = z->l - m5; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ + { int ret = r_vowel_suffix(z); + if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ + if (ret < 0) return ret; + } + lab7: + z->c = z->l - m7; + } + z->c = z->lb; + { int c8 = z->c; /* do, line 234 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab8; /* call postlude, line 234 */ + if (ret < 0) return ret; + } + lab8: + z->c = c8; + } + return 1; +} + +extern struct SN_env * romanian_UTF_8_create_env(void) { return SN_create_env(0, 3, 1); } + +extern void romanian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_russian.c b/src/backend/snowball/libstemmer/stem_UTF_8_russian.c new file mode 100644 index 00000000000..fcbcc6cf464 --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_russian.c @@ -0,0 +1,694 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int russian_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_tidy_up(struct SN_env * z); +static int r_derivational(struct SN_env * z); +static int r_noun(struct SN_env * z); +static int r_verb(struct SN_env * z); +static int r_reflexive(struct SN_env * z); +static int r_adjectival(struct SN_env * z); +static int r_adjective(struct SN_env * z); +static int r_perfective_gerund(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * russian_UTF_8_create_env(void); +extern void russian_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[10] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static const symbol s_0_1[12] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static const symbol s_0_2[12] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; +static const symbol s_0_3[2] = { 0xD0, 0xB2 }; +static const symbol s_0_4[4] = { 0xD1, 0x8B, 0xD0, 0xB2 }; +static const symbol s_0_5[4] = { 0xD0, 0xB8, 0xD0, 0xB2 }; +static const symbol s_0_6[6] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; +static const symbol s_0_7[8] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; +static const symbol s_0_8[8] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; + +static const struct among a_0[9] = +{ +/* 0 */ { 10, s_0_0, -1, 1, 0}, +/* 1 */ { 12, s_0_1, 0, 2, 0}, +/* 2 */ { 12, s_0_2, 0, 2, 0}, +/* 3 */ { 2, s_0_3, -1, 1, 0}, +/* 4 */ { 4, s_0_4, 3, 2, 0}, +/* 5 */ { 4, s_0_5, 3, 2, 0}, +/* 6 */ { 6, s_0_6, -1, 1, 0}, +/* 7 */ { 8, s_0_7, 6, 2, 0}, +/* 8 */ { 8, s_0_8, 6, 2, 0} +}; + +static const symbol s_1_0[6] = { 0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83 }; +static const symbol s_1_1[6] = { 0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83 }; +static const symbol s_1_2[4] = { 0xD1, 0x8B, 0xD1, 0x85 }; +static const symbol s_1_3[4] = { 0xD0, 0xB8, 0xD1, 0x85 }; +static const symbol s_1_4[4] = { 0xD1, 0x83, 0xD1, 0x8E }; +static const symbol s_1_5[4] = { 0xD1, 0x8E, 0xD1, 0x8E }; +static const symbol s_1_6[4] = { 0xD0, 0xB5, 0xD1, 0x8E }; +static const symbol s_1_7[4] = { 0xD0, 0xBE, 0xD1, 0x8E }; +static const symbol s_1_8[4] = { 0xD1, 0x8F, 0xD1, 0x8F }; +static const symbol s_1_9[4] = { 0xD0, 0xB0, 0xD1, 0x8F }; +static const symbol s_1_10[4] = { 0xD1, 0x8B, 0xD0, 0xB5 }; +static const symbol s_1_11[4] = { 0xD0, 0xB5, 0xD0, 0xB5 }; +static const symbol s_1_12[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; +static const symbol s_1_13[4] = { 0xD0, 0xBE, 0xD0, 0xB5 }; +static const symbol s_1_14[6] = { 0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8 }; +static const symbol s_1_15[6] = { 0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8 }; +static const symbol s_1_16[4] = { 0xD1, 0x8B, 0xD0, 0xB9 }; +static const symbol s_1_17[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static const symbol s_1_18[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; +static const symbol s_1_19[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; +static const symbol s_1_20[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; +static const symbol s_1_21[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static const symbol s_1_22[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; +static const symbol s_1_23[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; +static const symbol s_1_24[6] = { 0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE }; +static const symbol s_1_25[6] = { 0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE }; + +static const struct among a_1[26] = +{ +/* 0 */ { 6, s_1_0, -1, 1, 0}, +/* 1 */ { 6, s_1_1, -1, 1, 0}, +/* 2 */ { 4, s_1_2, -1, 1, 0}, +/* 3 */ { 4, s_1_3, -1, 1, 0}, +/* 4 */ { 4, s_1_4, -1, 1, 0}, +/* 5 */ { 4, s_1_5, -1, 1, 0}, +/* 6 */ { 4, s_1_6, -1, 1, 0}, +/* 7 */ { 4, s_1_7, -1, 1, 0}, +/* 8 */ { 4, s_1_8, -1, 1, 0}, +/* 9 */ { 4, s_1_9, -1, 1, 0}, +/* 10 */ { 4, s_1_10, -1, 1, 0}, +/* 11 */ { 4, s_1_11, -1, 1, 0}, +/* 12 */ { 4, s_1_12, -1, 1, 0}, +/* 13 */ { 4, s_1_13, -1, 1, 0}, +/* 14 */ { 6, s_1_14, -1, 1, 0}, +/* 15 */ { 6, s_1_15, -1, 1, 0}, +/* 16 */ { 4, s_1_16, -1, 1, 0}, +/* 17 */ { 4, s_1_17, -1, 1, 0}, +/* 18 */ { 4, s_1_18, -1, 1, 0}, +/* 19 */ { 4, s_1_19, -1, 1, 0}, +/* 20 */ { 4, s_1_20, -1, 1, 0}, +/* 21 */ { 4, s_1_21, -1, 1, 0}, +/* 22 */ { 4, s_1_22, -1, 1, 0}, +/* 23 */ { 4, s_1_23, -1, 1, 0}, +/* 24 */ { 6, s_1_24, -1, 1, 0}, +/* 25 */ { 6, s_1_25, -1, 1, 0} +}; + +static const symbol s_2_0[4] = { 0xD0, 0xB2, 0xD1, 0x88 }; +static const symbol s_2_1[6] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88 }; +static const symbol s_2_2[6] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88 }; +static const symbol s_2_3[2] = { 0xD1, 0x89 }; +static const symbol s_2_4[4] = { 0xD1, 0x8E, 0xD1, 0x89 }; +static const symbol s_2_5[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89 }; +static const symbol s_2_6[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static const symbol s_2_7[4] = { 0xD0, 0xBD, 0xD0, 0xBD }; + +static const struct among a_2[8] = +{ +/* 0 */ { 4, s_2_0, -1, 1, 0}, +/* 1 */ { 6, s_2_1, 0, 2, 0}, +/* 2 */ { 6, s_2_2, 0, 2, 0}, +/* 3 */ { 2, s_2_3, -1, 1, 0}, +/* 4 */ { 4, s_2_4, 3, 1, 0}, +/* 5 */ { 6, s_2_5, 4, 2, 0}, +/* 6 */ { 4, s_2_6, -1, 1, 0}, +/* 7 */ { 4, s_2_7, -1, 1, 0} +}; + +static const symbol s_3_0[4] = { 0xD1, 0x81, 0xD1, 0x8C }; +static const symbol s_3_1[4] = { 0xD1, 0x81, 0xD1, 0x8F }; + +static const struct among a_3[2] = +{ +/* 0 */ { 4, s_3_0, -1, 1, 0}, +/* 1 */ { 4, s_3_1, -1, 1, 0} +}; + +static const symbol s_4_0[4] = { 0xD1, 0x8B, 0xD1, 0x82 }; +static const symbol s_4_1[4] = { 0xD1, 0x8E, 0xD1, 0x82 }; +static const symbol s_4_2[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82 }; +static const symbol s_4_3[4] = { 0xD1, 0x8F, 0xD1, 0x82 }; +static const symbol s_4_4[4] = { 0xD0, 0xB5, 0xD1, 0x82 }; +static const symbol s_4_5[6] = { 0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82 }; +static const symbol s_4_6[4] = { 0xD0, 0xB8, 0xD1, 0x82 }; +static const symbol s_4_7[4] = { 0xD0, 0xBD, 0xD1, 0x8B }; +static const symbol s_4_8[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B }; +static const symbol s_4_9[4] = { 0xD1, 0x82, 0xD1, 0x8C }; +static const symbol s_4_10[6] = { 0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C }; +static const symbol s_4_11[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C }; +static const symbol s_4_12[6] = { 0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C }; +static const symbol s_4_13[6] = { 0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C }; +static const symbol s_4_14[2] = { 0xD1, 0x8E }; +static const symbol s_4_15[4] = { 0xD1, 0x83, 0xD1, 0x8E }; +static const symbol s_4_16[4] = { 0xD0, 0xBB, 0xD0, 0xB0 }; +static const symbol s_4_17[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0 }; +static const symbol s_4_18[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0 }; +static const symbol s_4_19[4] = { 0xD0, 0xBD, 0xD0, 0xB0 }; +static const symbol s_4_20[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0 }; +static const symbol s_4_21[6] = { 0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5 }; +static const symbol s_4_22[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5 }; +static const symbol s_4_23[6] = { 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static const symbol s_4_24[8] = { 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static const symbol s_4_25[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; +static const symbol s_4_26[4] = { 0xD0, 0xBB, 0xD0, 0xB8 }; +static const symbol s_4_27[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8 }; +static const symbol s_4_28[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8 }; +static const symbol s_4_29[2] = { 0xD0, 0xB9 }; +static const symbol s_4_30[4] = { 0xD1, 0x83, 0xD0, 0xB9 }; +static const symbol s_4_31[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static const symbol s_4_32[2] = { 0xD0, 0xBB }; +static const symbol s_4_33[4] = { 0xD1, 0x8B, 0xD0, 0xBB }; +static const symbol s_4_34[4] = { 0xD0, 0xB8, 0xD0, 0xBB }; +static const symbol s_4_35[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; +static const symbol s_4_36[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static const symbol s_4_37[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; +static const symbol s_4_38[2] = { 0xD0, 0xBD }; +static const symbol s_4_39[4] = { 0xD0, 0xB5, 0xD0, 0xBD }; +static const symbol s_4_40[4] = { 0xD0, 0xBB, 0xD0, 0xBE }; +static const symbol s_4_41[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE }; +static const symbol s_4_42[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE }; +static const symbol s_4_43[4] = { 0xD0, 0xBD, 0xD0, 0xBE }; +static const symbol s_4_44[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE }; +static const symbol s_4_45[6] = { 0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE }; + +static const struct among a_4[46] = +{ +/* 0 */ { 4, s_4_0, -1, 2, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 6, s_4_2, 1, 2, 0}, +/* 3 */ { 4, s_4_3, -1, 2, 0}, +/* 4 */ { 4, s_4_4, -1, 1, 0}, +/* 5 */ { 6, s_4_5, 4, 2, 0}, +/* 6 */ { 4, s_4_6, -1, 2, 0}, +/* 7 */ { 4, s_4_7, -1, 1, 0}, +/* 8 */ { 6, s_4_8, 7, 2, 0}, +/* 9 */ { 4, s_4_9, -1, 1, 0}, +/* 10 */ { 6, s_4_10, 9, 2, 0}, +/* 11 */ { 6, s_4_11, 9, 2, 0}, +/* 12 */ { 6, s_4_12, -1, 1, 0}, +/* 13 */ { 6, s_4_13, -1, 2, 0}, +/* 14 */ { 2, s_4_14, -1, 2, 0}, +/* 15 */ { 4, s_4_15, 14, 2, 0}, +/* 16 */ { 4, s_4_16, -1, 1, 0}, +/* 17 */ { 6, s_4_17, 16, 2, 0}, +/* 18 */ { 6, s_4_18, 16, 2, 0}, +/* 19 */ { 4, s_4_19, -1, 1, 0}, +/* 20 */ { 6, s_4_20, 19, 2, 0}, +/* 21 */ { 6, s_4_21, -1, 1, 0}, +/* 22 */ { 6, s_4_22, -1, 2, 0}, +/* 23 */ { 6, s_4_23, -1, 1, 0}, +/* 24 */ { 8, s_4_24, 23, 2, 0}, +/* 25 */ { 8, s_4_25, 23, 2, 0}, +/* 26 */ { 4, s_4_26, -1, 1, 0}, +/* 27 */ { 6, s_4_27, 26, 2, 0}, +/* 28 */ { 6, s_4_28, 26, 2, 0}, +/* 29 */ { 2, s_4_29, -1, 1, 0}, +/* 30 */ { 4, s_4_30, 29, 2, 0}, +/* 31 */ { 4, s_4_31, 29, 2, 0}, +/* 32 */ { 2, s_4_32, -1, 1, 0}, +/* 33 */ { 4, s_4_33, 32, 2, 0}, +/* 34 */ { 4, s_4_34, 32, 2, 0}, +/* 35 */ { 4, s_4_35, -1, 2, 0}, +/* 36 */ { 4, s_4_36, -1, 1, 0}, +/* 37 */ { 4, s_4_37, -1, 2, 0}, +/* 38 */ { 2, s_4_38, -1, 1, 0}, +/* 39 */ { 4, s_4_39, 38, 2, 0}, +/* 40 */ { 4, s_4_40, -1, 1, 0}, +/* 41 */ { 6, s_4_41, 40, 2, 0}, +/* 42 */ { 6, s_4_42, 40, 2, 0}, +/* 43 */ { 4, s_4_43, -1, 1, 0}, +/* 44 */ { 6, s_4_44, 43, 2, 0}, +/* 45 */ { 6, s_4_45, 43, 1, 0} +}; + +static const symbol s_5_0[2] = { 0xD1, 0x83 }; +static const symbol s_5_1[4] = { 0xD1, 0x8F, 0xD1, 0x85 }; +static const symbol s_5_2[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85 }; +static const symbol s_5_3[4] = { 0xD0, 0xB0, 0xD1, 0x85 }; +static const symbol s_5_4[2] = { 0xD1, 0x8B }; +static const symbol s_5_5[2] = { 0xD1, 0x8C }; +static const symbol s_5_6[2] = { 0xD1, 0x8E }; +static const symbol s_5_7[4] = { 0xD1, 0x8C, 0xD1, 0x8E }; +static const symbol s_5_8[4] = { 0xD0, 0xB8, 0xD1, 0x8E }; +static const symbol s_5_9[2] = { 0xD1, 0x8F }; +static const symbol s_5_10[4] = { 0xD1, 0x8C, 0xD1, 0x8F }; +static const symbol s_5_11[4] = { 0xD0, 0xB8, 0xD1, 0x8F }; +static const symbol s_5_12[2] = { 0xD0, 0xB0 }; +static const symbol s_5_13[4] = { 0xD0, 0xB5, 0xD0, 0xB2 }; +static const symbol s_5_14[4] = { 0xD0, 0xBE, 0xD0, 0xB2 }; +static const symbol s_5_15[2] = { 0xD0, 0xB5 }; +static const symbol s_5_16[4] = { 0xD1, 0x8C, 0xD0, 0xB5 }; +static const symbol s_5_17[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; +static const symbol s_5_18[2] = { 0xD0, 0xB8 }; +static const symbol s_5_19[4] = { 0xD0, 0xB5, 0xD0, 0xB8 }; +static const symbol s_5_20[4] = { 0xD0, 0xB8, 0xD0, 0xB8 }; +static const symbol s_5_21[6] = { 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; +static const symbol s_5_22[8] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; +static const symbol s_5_23[6] = { 0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8 }; +static const symbol s_5_24[2] = { 0xD0, 0xB9 }; +static const symbol s_5_25[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; +static const symbol s_5_26[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9 }; +static const symbol s_5_27[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; +static const symbol s_5_28[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; +static const symbol s_5_29[4] = { 0xD1, 0x8F, 0xD0, 0xBC }; +static const symbol s_5_30[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC }; +static const symbol s_5_31[4] = { 0xD0, 0xB0, 0xD0, 0xBC }; +static const symbol s_5_32[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; +static const symbol s_5_33[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC }; +static const symbol s_5_34[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; +static const symbol s_5_35[2] = { 0xD0, 0xBE }; + +static const struct among a_5[36] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 6, s_5_2, 1, 1, 0}, +/* 3 */ { 4, s_5_3, -1, 1, 0}, +/* 4 */ { 2, s_5_4, -1, 1, 0}, +/* 5 */ { 2, s_5_5, -1, 1, 0}, +/* 6 */ { 2, s_5_6, -1, 1, 0}, +/* 7 */ { 4, s_5_7, 6, 1, 0}, +/* 8 */ { 4, s_5_8, 6, 1, 0}, +/* 9 */ { 2, s_5_9, -1, 1, 0}, +/* 10 */ { 4, s_5_10, 9, 1, 0}, +/* 11 */ { 4, s_5_11, 9, 1, 0}, +/* 12 */ { 2, s_5_12, -1, 1, 0}, +/* 13 */ { 4, s_5_13, -1, 1, 0}, +/* 14 */ { 4, s_5_14, -1, 1, 0}, +/* 15 */ { 2, s_5_15, -1, 1, 0}, +/* 16 */ { 4, s_5_16, 15, 1, 0}, +/* 17 */ { 4, s_5_17, 15, 1, 0}, +/* 18 */ { 2, s_5_18, -1, 1, 0}, +/* 19 */ { 4, s_5_19, 18, 1, 0}, +/* 20 */ { 4, s_5_20, 18, 1, 0}, +/* 21 */ { 6, s_5_21, 18, 1, 0}, +/* 22 */ { 8, s_5_22, 21, 1, 0}, +/* 23 */ { 6, s_5_23, 18, 1, 0}, +/* 24 */ { 2, s_5_24, -1, 1, 0}, +/* 25 */ { 4, s_5_25, 24, 1, 0}, +/* 26 */ { 6, s_5_26, 25, 1, 0}, +/* 27 */ { 4, s_5_27, 24, 1, 0}, +/* 28 */ { 4, s_5_28, 24, 1, 0}, +/* 29 */ { 4, s_5_29, -1, 1, 0}, +/* 30 */ { 6, s_5_30, 29, 1, 0}, +/* 31 */ { 4, s_5_31, -1, 1, 0}, +/* 32 */ { 4, s_5_32, -1, 1, 0}, +/* 33 */ { 6, s_5_33, 32, 1, 0}, +/* 34 */ { 4, s_5_34, -1, 1, 0}, +/* 35 */ { 2, s_5_35, -1, 1, 0} +}; + +static const symbol s_6_0[6] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82 }; +static const symbol s_6_1[8] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C }; + +static const struct among a_6[2] = +{ +/* 0 */ { 6, s_6_0, -1, 1, 0}, +/* 1 */ { 8, s_6_1, -1, 1, 0} +}; + +static const symbol s_7_0[6] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88 }; +static const symbol s_7_1[2] = { 0xD1, 0x8C }; +static const symbol s_7_2[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5 }; +static const symbol s_7_3[2] = { 0xD0, 0xBD }; + +static const struct among a_7[4] = +{ +/* 0 */ { 6, s_7_0, -1, 1, 0}, +/* 1 */ { 2, s_7_1, -1, 3, 0}, +/* 2 */ { 8, s_7_2, -1, 1, 0}, +/* 3 */ { 2, s_7_3, -1, 2, 0} +}; + +static const unsigned char g_v[] = { 33, 65, 8, 232 }; + +static const symbol s_0[] = { 0xD0, 0xB0 }; +static const symbol s_1[] = { 0xD1, 0x8F }; +static const symbol s_2[] = { 0xD0, 0xB0 }; +static const symbol s_3[] = { 0xD1, 0x8F }; +static const symbol s_4[] = { 0xD0, 0xB0 }; +static const symbol s_5[] = { 0xD1, 0x8F }; +static const symbol s_6[] = { 0xD0, 0xBD }; +static const symbol s_7[] = { 0xD0, 0xBD }; +static const symbol s_8[] = { 0xD0, 0xBD }; +static const symbol s_9[] = { 0xD0, 0xB8 }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c1 = z->c; /* do, line 61 */ + { /* gopast */ /* grouping v, line 62 */ + int ret = out_grouping_U(z, g_v, 1072, 1103, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[0] = z->c; /* setmark pV, line 62 */ + { /* gopast */ /* non v, line 62 */ + int ret = in_grouping_U(z, g_v, 1072, 1103, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* grouping v, line 63 */ + int ret = out_grouping_U(z, g_v, 1072, 1103, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + { /* gopast */ /* non v, line 63 */ + int ret = in_grouping_U(z, g_v, 1072, 1103, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + z->I[1] = z->c; /* setmark p2, line 63 */ + lab0: + z->c = c1; + } + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_perfective_gerund(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 72 */ + among_var = find_among_b(z, a_0, 9); /* substring, line 72 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 72 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 76 */ + if (!(eq_s_b(z, 2, s_0))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_1))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 76 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 83 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjective(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 88 */ + among_var = find_among_b(z, a_1, 26); /* substring, line 88 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 88 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 97 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_adjectival(struct SN_env * z) { + int among_var; + { int ret = r_adjective(z); + if (ret == 0) return 0; /* call adjective, line 102 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 109 */ + z->ket = z->c; /* [, line 110 */ + among_var = find_among_b(z, a_2, 8); /* substring, line 110 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 110 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab0; } + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 115 */ + if (!(eq_s_b(z, 2, s_2))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_3))) { z->c = z->l - m_keep; goto lab0; } + } + lab1: + { int ret = slice_del(z); /* delete, line 115 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 122 */ + if (ret < 0) return ret; + } + break; + } + lab0: + ; + } + return 1; +} + +static int r_reflexive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 129 */ + if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 140 && z->p[z->c - 1] != 143)) return 0; + among_var = find_among_b(z, a_3, 2); /* substring, line 129 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 129 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 132 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 137 */ + among_var = find_among_b(z, a_4, 46); /* substring, line 137 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 137 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m1 = z->l - z->c; (void)m1; /* or, line 143 */ + if (!(eq_s_b(z, 2, s_4))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_5))) return 0; + } + lab0: + { int ret = slice_del(z); /* delete, line 143 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 151 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_noun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 160 */ + among_var = find_among_b(z, a_5, 36); /* substring, line 160 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 160 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 167 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_derivational(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 176 */ + if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 130 && z->p[z->c - 1] != 140)) return 0; + among_var = find_among_b(z, a_6, 2); /* substring, line 176 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 176 */ + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 176 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_tidy_up(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 184 */ + among_var = find_among_b(z, a_7, 4); /* substring, line 184 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 184 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 188 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 189 */ + if (!(eq_s_b(z, 2, s_6))) return 0; + z->bra = z->c; /* ], line 189 */ + if (!(eq_s_b(z, 2, s_7))) return 0; + { int ret = slice_del(z); /* delete, line 189 */ + if (ret < 0) return ret; + } + break; + case 2: + if (!(eq_s_b(z, 2, s_8))) return 0; + { int ret = slice_del(z); /* delete, line 192 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_del(z); /* delete, line 194 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +extern int russian_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 201 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 201 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 202 */ + + { int mlimit; /* setlimit, line 202 */ + int m2 = z->l - z->c; (void)m2; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 202 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m2; + { int m3 = z->l - z->c; (void)m3; /* do, line 203 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 204 */ + { int ret = r_perfective_gerund(z); + if (ret == 0) goto lab3; /* call perfective_gerund, line 204 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = z->l - m4; + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 205 */ + { int ret = r_reflexive(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 205 */ + if (ret < 0) return ret; + } + lab4: + ; + } + { int m5 = z->l - z->c; (void)m5; /* or, line 206 */ + { int ret = r_adjectival(z); + if (ret == 0) goto lab6; /* call adjectival, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m5; + { int ret = r_verb(z); + if (ret == 0) goto lab7; /* call verb, line 206 */ + if (ret < 0) return ret; + } + goto lab5; + lab7: + z->c = z->l - m5; + { int ret = r_noun(z); + if (ret == 0) goto lab1; /* call noun, line 206 */ + if (ret < 0) return ret; + } + } + lab5: + ; + } + lab2: + lab1: + z->c = z->l - m3; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 209 */ + z->ket = z->c; /* [, line 209 */ + if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m_keep; goto lab8; } + z->bra = z->c; /* ], line 209 */ + { int ret = slice_del(z); /* delete, line 209 */ + if (ret < 0) return ret; + } + lab8: + ; + } + { int m6 = z->l - z->c; (void)m6; /* do, line 212 */ + { int ret = r_derivational(z); + if (ret == 0) goto lab9; /* call derivational, line 212 */ + if (ret < 0) return ret; + } + lab9: + z->c = z->l - m6; + } + { int m7 = z->l - z->c; (void)m7; /* do, line 213 */ + { int ret = r_tidy_up(z); + if (ret == 0) goto lab10; /* call tidy_up, line 213 */ + if (ret < 0) return ret; + } + lab10: + z->c = z->l - m7; + } + z->lb = mlimit; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * russian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void russian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_spanish.c b/src/backend/snowball/libstemmer/stem_UTF_8_spanish.c new file mode 100644 index 00000000000..5ac83fdc1df --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_spanish.c @@ -0,0 +1,1097 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int spanish_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_residual_suffix(struct SN_env * z); +static int r_verb_suffix(struct SN_env * z); +static int r_y_verb_suffix(struct SN_env * z); +static int r_standard_suffix(struct SN_env * z); +static int r_attached_pronoun(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_RV(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * spanish_UTF_8_create_env(void); +extern void spanish_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_1[2] = { 0xC3, 0xA1 }; +static const symbol s_0_2[2] = { 0xC3, 0xA9 }; +static const symbol s_0_3[2] = { 0xC3, 0xAD }; +static const symbol s_0_4[2] = { 0xC3, 0xB3 }; +static const symbol s_0_5[2] = { 0xC3, 0xBA }; + +static const struct among a_0[6] = +{ +/* 0 */ { 0, 0, -1, 6, 0}, +/* 1 */ { 2, s_0_1, 0, 1, 0}, +/* 2 */ { 2, s_0_2, 0, 2, 0}, +/* 3 */ { 2, s_0_3, 0, 3, 0}, +/* 4 */ { 2, s_0_4, 0, 4, 0}, +/* 5 */ { 2, s_0_5, 0, 5, 0} +}; + +static const symbol s_1_0[2] = { 'l', 'a' }; +static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; +static const symbol s_1_2[2] = { 'l', 'e' }; +static const symbol s_1_3[2] = { 'm', 'e' }; +static const symbol s_1_4[2] = { 's', 'e' }; +static const symbol s_1_5[2] = { 'l', 'o' }; +static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; +static const symbol s_1_7[3] = { 'l', 'a', 's' }; +static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; +static const symbol s_1_9[3] = { 'l', 'e', 's' }; +static const symbol s_1_10[3] = { 'l', 'o', 's' }; +static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; +static const symbol s_1_12[3] = { 'n', 'o', 's' }; + +static const struct among a_1[13] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 4, s_1_1, 0, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 2, s_1_4, -1, -1, 0}, +/* 5 */ { 2, s_1_5, -1, -1, 0}, +/* 6 */ { 4, s_1_6, 5, -1, 0}, +/* 7 */ { 3, s_1_7, -1, -1, 0}, +/* 8 */ { 5, s_1_8, 7, -1, 0}, +/* 9 */ { 3, s_1_9, -1, -1, 0}, +/* 10 */ { 3, s_1_10, -1, -1, 0}, +/* 11 */ { 5, s_1_11, 10, -1, 0}, +/* 12 */ { 3, s_1_12, -1, -1, 0} +}; + +static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; +static const symbol s_2_3[5] = { 0xC3, 0xA1, 'n', 'd', 'o' }; +static const symbol s_2_4[6] = { 'i', 0xC3, 0xA9, 'n', 'd', 'o' }; +static const symbol s_2_5[2] = { 'a', 'r' }; +static const symbol s_2_6[2] = { 'e', 'r' }; +static const symbol s_2_7[2] = { 'i', 'r' }; +static const symbol s_2_8[3] = { 0xC3, 0xA1, 'r' }; +static const symbol s_2_9[3] = { 0xC3, 0xA9, 'r' }; +static const symbol s_2_10[3] = { 0xC3, 0xAD, 'r' }; + +static const struct among a_2[11] = +{ +/* 0 */ { 4, s_2_0, -1, 6, 0}, +/* 1 */ { 5, s_2_1, -1, 6, 0}, +/* 2 */ { 5, s_2_2, -1, 7, 0}, +/* 3 */ { 5, s_2_3, -1, 2, 0}, +/* 4 */ { 6, s_2_4, -1, 1, 0}, +/* 5 */ { 2, s_2_5, -1, 6, 0}, +/* 6 */ { 2, s_2_6, -1, 6, 0}, +/* 7 */ { 2, s_2_7, -1, 6, 0}, +/* 8 */ { 3, s_2_8, -1, 3, 0}, +/* 9 */ { 3, s_2_9, -1, 4, 0}, +/* 10 */ { 3, s_2_10, -1, 5, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'c' }; +static const symbol s_3_1[2] = { 'a', 'd' }; +static const symbol s_3_2[2] = { 'o', 's' }; +static const symbol s_3_3[2] = { 'i', 'v' }; + +static const struct among a_3[4] = +{ +/* 0 */ { 2, s_3_0, -1, -1, 0}, +/* 1 */ { 2, s_3_1, -1, -1, 0}, +/* 2 */ { 2, s_3_2, -1, -1, 0}, +/* 3 */ { 2, s_3_3, -1, 1, 0} +}; + +static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; + +static const struct among a_4[3] = +{ +/* 0 */ { 4, s_4_0, -1, 1, 0}, +/* 1 */ { 4, s_4_1, -1, 1, 0}, +/* 2 */ { 4, s_4_2, -1, 1, 0} +}; + +static const symbol s_5_0[2] = { 'i', 'c' }; +static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; +static const symbol s_5_2[2] = { 'i', 'v' }; + +static const struct among a_5[3] = +{ +/* 0 */ { 2, s_5_0, -1, 1, 0}, +/* 1 */ { 4, s_5_1, -1, 1, 0}, +/* 2 */ { 2, s_5_2, -1, 1, 0} +}; + +static const symbol s_6_0[3] = { 'i', 'c', 'a' }; +static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; +static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; +static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; +static const symbol s_6_4[3] = { 'o', 's', 'a' }; +static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; +static const symbol s_6_6[3] = { 'i', 'v', 'a' }; +static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; +static const symbol s_6_8[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; +static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; +static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; +static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; +static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; +static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; +static const symbol s_6_15[6] = { 'a', 'c', 'i', 0xC3, 0xB3, 'n' }; +static const symbol s_6_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; +static const symbol s_6_17[3] = { 'i', 'c', 'o' }; +static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; +static const symbol s_6_19[3] = { 'o', 's', 'o' }; +static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; +static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; +static const symbol s_6_22[3] = { 'i', 'v', 'o' }; +static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; +static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; +static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; +static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; +static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; +static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; +static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; +static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; +static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; +static const symbol s_6_32[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; +static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; +static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; +static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; +static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; +static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; +static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; +static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; +static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; +static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; +static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; +static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; +static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; + +static const struct among a_6[46] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 5, s_6_1, -1, 2, 0}, +/* 2 */ { 5, s_6_2, -1, 5, 0}, +/* 3 */ { 5, s_6_3, -1, 2, 0}, +/* 4 */ { 3, s_6_4, -1, 1, 0}, +/* 5 */ { 4, s_6_5, -1, 1, 0}, +/* 6 */ { 3, s_6_6, -1, 9, 0}, +/* 7 */ { 4, s_6_7, -1, 1, 0}, +/* 8 */ { 6, s_6_8, -1, 3, 0}, +/* 9 */ { 4, s_6_9, -1, 8, 0}, +/* 10 */ { 4, s_6_10, -1, 1, 0}, +/* 11 */ { 4, s_6_11, -1, 1, 0}, +/* 12 */ { 4, s_6_12, -1, 2, 0}, +/* 13 */ { 5, s_6_13, -1, 7, 0}, +/* 14 */ { 6, s_6_14, 13, 6, 0}, +/* 15 */ { 6, s_6_15, -1, 2, 0}, +/* 16 */ { 6, s_6_16, -1, 4, 0}, +/* 17 */ { 3, s_6_17, -1, 1, 0}, +/* 18 */ { 4, s_6_18, -1, 1, 0}, +/* 19 */ { 3, s_6_19, -1, 1, 0}, +/* 20 */ { 7, s_6_20, -1, 1, 0}, +/* 21 */ { 7, s_6_21, -1, 1, 0}, +/* 22 */ { 3, s_6_22, -1, 9, 0}, +/* 23 */ { 4, s_6_23, -1, 2, 0}, +/* 24 */ { 4, s_6_24, -1, 1, 0}, +/* 25 */ { 6, s_6_25, -1, 2, 0}, +/* 26 */ { 6, s_6_26, -1, 5, 0}, +/* 27 */ { 6, s_6_27, -1, 2, 0}, +/* 28 */ { 4, s_6_28, -1, 1, 0}, +/* 29 */ { 5, s_6_29, -1, 1, 0}, +/* 30 */ { 4, s_6_30, -1, 9, 0}, +/* 31 */ { 5, s_6_31, -1, 1, 0}, +/* 32 */ { 7, s_6_32, -1, 3, 0}, +/* 33 */ { 6, s_6_33, -1, 8, 0}, +/* 34 */ { 5, s_6_34, -1, 1, 0}, +/* 35 */ { 5, s_6_35, -1, 1, 0}, +/* 36 */ { 7, s_6_36, -1, 2, 0}, +/* 37 */ { 7, s_6_37, -1, 4, 0}, +/* 38 */ { 6, s_6_38, -1, 2, 0}, +/* 39 */ { 5, s_6_39, -1, 2, 0}, +/* 40 */ { 4, s_6_40, -1, 1, 0}, +/* 41 */ { 5, s_6_41, -1, 1, 0}, +/* 42 */ { 4, s_6_42, -1, 1, 0}, +/* 43 */ { 8, s_6_43, -1, 1, 0}, +/* 44 */ { 8, s_6_44, -1, 1, 0}, +/* 45 */ { 4, s_6_45, -1, 9, 0} +}; + +static const symbol s_7_0[2] = { 'y', 'a' }; +static const symbol s_7_1[2] = { 'y', 'e' }; +static const symbol s_7_2[3] = { 'y', 'a', 'n' }; +static const symbol s_7_3[3] = { 'y', 'e', 'n' }; +static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; +static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; +static const symbol s_7_6[2] = { 'y', 'o' }; +static const symbol s_7_7[3] = { 'y', 'a', 's' }; +static const symbol s_7_8[3] = { 'y', 'e', 's' }; +static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; +static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; +static const symbol s_7_11[3] = { 'y', 0xC3, 0xB3 }; + +static const struct among a_7[12] = +{ +/* 0 */ { 2, s_7_0, -1, 1, 0}, +/* 1 */ { 2, s_7_1, -1, 1, 0}, +/* 2 */ { 3, s_7_2, -1, 1, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0}, +/* 4 */ { 5, s_7_4, -1, 1, 0}, +/* 5 */ { 5, s_7_5, -1, 1, 0}, +/* 6 */ { 2, s_7_6, -1, 1, 0}, +/* 7 */ { 3, s_7_7, -1, 1, 0}, +/* 8 */ { 3, s_7_8, -1, 1, 0}, +/* 9 */ { 4, s_7_9, -1, 1, 0}, +/* 10 */ { 5, s_7_10, -1, 1, 0}, +/* 11 */ { 3, s_7_11, -1, 1, 0} +}; + +static const symbol s_8_0[3] = { 'a', 'b', 'a' }; +static const symbol s_8_1[3] = { 'a', 'd', 'a' }; +static const symbol s_8_2[3] = { 'i', 'd', 'a' }; +static const symbol s_8_3[3] = { 'a', 'r', 'a' }; +static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; +static const symbol s_8_5[3] = { 0xC3, 0xAD, 'a' }; +static const symbol s_8_6[5] = { 'a', 'r', 0xC3, 0xAD, 'a' }; +static const symbol s_8_7[5] = { 'e', 'r', 0xC3, 0xAD, 'a' }; +static const symbol s_8_8[5] = { 'i', 'r', 0xC3, 0xAD, 'a' }; +static const symbol s_8_9[2] = { 'a', 'd' }; +static const symbol s_8_10[2] = { 'e', 'd' }; +static const symbol s_8_11[2] = { 'i', 'd' }; +static const symbol s_8_12[3] = { 'a', 's', 'e' }; +static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; +static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; +static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; +static const symbol s_8_16[2] = { 'a', 'n' }; +static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; +static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; +static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; +static const symbol s_8_20[4] = { 0xC3, 0xAD, 'a', 'n' }; +static const symbol s_8_21[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 'n' }; +static const symbol s_8_22[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 'n' }; +static const symbol s_8_23[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 'n' }; +static const symbol s_8_24[2] = { 'e', 'n' }; +static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; +static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; +static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; +static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; +static const symbol s_8_29[5] = { 'a', 'r', 0xC3, 0xA1, 'n' }; +static const symbol s_8_30[5] = { 'e', 'r', 0xC3, 0xA1, 'n' }; +static const symbol s_8_31[5] = { 'i', 'r', 0xC3, 0xA1, 'n' }; +static const symbol s_8_32[3] = { 'a', 'd', 'o' }; +static const symbol s_8_33[3] = { 'i', 'd', 'o' }; +static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; +static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_8_36[2] = { 'a', 'r' }; +static const symbol s_8_37[2] = { 'e', 'r' }; +static const symbol s_8_38[2] = { 'i', 'r' }; +static const symbol s_8_39[2] = { 'a', 's' }; +static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; +static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; +static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; +static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; +static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; +static const symbol s_8_45[4] = { 0xC3, 0xAD, 'a', 's' }; +static const symbol s_8_46[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 's' }; +static const symbol s_8_47[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 's' }; +static const symbol s_8_48[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 's' }; +static const symbol s_8_49[2] = { 'e', 's' }; +static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; +static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; +static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; +static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; +static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; +static const symbol s_8_55[5] = { 0xC3, 0xAD, 'a', 'i', 's' }; +static const symbol s_8_56[7] = { 'a', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; +static const symbol s_8_57[7] = { 'e', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; +static const symbol s_8_58[7] = { 'i', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; +static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; +static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; +static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; +static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; +static const symbol s_8_63[4] = { 0xC3, 0xA1, 'i', 's' }; +static const symbol s_8_64[4] = { 0xC3, 0xA9, 'i', 's' }; +static const symbol s_8_65[6] = { 'a', 'r', 0xC3, 0xA9, 'i', 's' }; +static const symbol s_8_66[6] = { 'e', 'r', 0xC3, 0xA9, 'i', 's' }; +static const symbol s_8_67[6] = { 'i', 'r', 0xC3, 0xA9, 'i', 's' }; +static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; +static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; +static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; +static const symbol s_8_71[7] = { 0xC3, 0xA1, 'b', 'a', 'm', 'o', 's' }; +static const symbol s_8_72[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_8_73[8] = { 'i', 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; +static const symbol s_8_74[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_8_75[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_8_76[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_8_77[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; +static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; +static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; +static const symbol s_8_82[7] = { 0xC3, 0xA1, 's', 'e', 'm', 'o', 's' }; +static const symbol s_8_83[8] = { 'i', 0xC3, 0xA9, 's', 'e', 'm', 'o', 's' }; +static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; +static const symbol s_8_85[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_8_86[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_8_87[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; +static const symbol s_8_88[3] = { 0xC3, 0xAD, 's' }; +static const symbol s_8_89[4] = { 'a', 'r', 0xC3, 0xA1 }; +static const symbol s_8_90[4] = { 'e', 'r', 0xC3, 0xA1 }; +static const symbol s_8_91[4] = { 'i', 'r', 0xC3, 0xA1 }; +static const symbol s_8_92[4] = { 'a', 'r', 0xC3, 0xA9 }; +static const symbol s_8_93[4] = { 'e', 'r', 0xC3, 0xA9 }; +static const symbol s_8_94[4] = { 'i', 'r', 0xC3, 0xA9 }; +static const symbol s_8_95[3] = { 'i', 0xC3, 0xB3 }; + +static const struct among a_8[96] = +{ +/* 0 */ { 3, s_8_0, -1, 2, 0}, +/* 1 */ { 3, s_8_1, -1, 2, 0}, +/* 2 */ { 3, s_8_2, -1, 2, 0}, +/* 3 */ { 3, s_8_3, -1, 2, 0}, +/* 4 */ { 4, s_8_4, -1, 2, 0}, +/* 5 */ { 3, s_8_5, -1, 2, 0}, +/* 6 */ { 5, s_8_6, 5, 2, 0}, +/* 7 */ { 5, s_8_7, 5, 2, 0}, +/* 8 */ { 5, s_8_8, 5, 2, 0}, +/* 9 */ { 2, s_8_9, -1, 2, 0}, +/* 10 */ { 2, s_8_10, -1, 2, 0}, +/* 11 */ { 2, s_8_11, -1, 2, 0}, +/* 12 */ { 3, s_8_12, -1, 2, 0}, +/* 13 */ { 4, s_8_13, -1, 2, 0}, +/* 14 */ { 4, s_8_14, -1, 2, 0}, +/* 15 */ { 4, s_8_15, -1, 2, 0}, +/* 16 */ { 2, s_8_16, -1, 2, 0}, +/* 17 */ { 4, s_8_17, 16, 2, 0}, +/* 18 */ { 4, s_8_18, 16, 2, 0}, +/* 19 */ { 5, s_8_19, 16, 2, 0}, +/* 20 */ { 4, s_8_20, 16, 2, 0}, +/* 21 */ { 6, s_8_21, 20, 2, 0}, +/* 22 */ { 6, s_8_22, 20, 2, 0}, +/* 23 */ { 6, s_8_23, 20, 2, 0}, +/* 24 */ { 2, s_8_24, -1, 1, 0}, +/* 25 */ { 4, s_8_25, 24, 2, 0}, +/* 26 */ { 5, s_8_26, 24, 2, 0}, +/* 27 */ { 4, s_8_27, -1, 2, 0}, +/* 28 */ { 5, s_8_28, -1, 2, 0}, +/* 29 */ { 5, s_8_29, -1, 2, 0}, +/* 30 */ { 5, s_8_30, -1, 2, 0}, +/* 31 */ { 5, s_8_31, -1, 2, 0}, +/* 32 */ { 3, s_8_32, -1, 2, 0}, +/* 33 */ { 3, s_8_33, -1, 2, 0}, +/* 34 */ { 4, s_8_34, -1, 2, 0}, +/* 35 */ { 5, s_8_35, -1, 2, 0}, +/* 36 */ { 2, s_8_36, -1, 2, 0}, +/* 37 */ { 2, s_8_37, -1, 2, 0}, +/* 38 */ { 2, s_8_38, -1, 2, 0}, +/* 39 */ { 2, s_8_39, -1, 2, 0}, +/* 40 */ { 4, s_8_40, 39, 2, 0}, +/* 41 */ { 4, s_8_41, 39, 2, 0}, +/* 42 */ { 4, s_8_42, 39, 2, 0}, +/* 43 */ { 4, s_8_43, 39, 2, 0}, +/* 44 */ { 5, s_8_44, 39, 2, 0}, +/* 45 */ { 4, s_8_45, 39, 2, 0}, +/* 46 */ { 6, s_8_46, 45, 2, 0}, +/* 47 */ { 6, s_8_47, 45, 2, 0}, +/* 48 */ { 6, s_8_48, 45, 2, 0}, +/* 49 */ { 2, s_8_49, -1, 1, 0}, +/* 50 */ { 4, s_8_50, 49, 2, 0}, +/* 51 */ { 5, s_8_51, 49, 2, 0}, +/* 52 */ { 5, s_8_52, -1, 2, 0}, +/* 53 */ { 5, s_8_53, -1, 2, 0}, +/* 54 */ { 6, s_8_54, -1, 2, 0}, +/* 55 */ { 5, s_8_55, -1, 2, 0}, +/* 56 */ { 7, s_8_56, 55, 2, 0}, +/* 57 */ { 7, s_8_57, 55, 2, 0}, +/* 58 */ { 7, s_8_58, 55, 2, 0}, +/* 59 */ { 5, s_8_59, -1, 2, 0}, +/* 60 */ { 6, s_8_60, -1, 2, 0}, +/* 61 */ { 6, s_8_61, -1, 2, 0}, +/* 62 */ { 6, s_8_62, -1, 2, 0}, +/* 63 */ { 4, s_8_63, -1, 2, 0}, +/* 64 */ { 4, s_8_64, -1, 1, 0}, +/* 65 */ { 6, s_8_65, 64, 2, 0}, +/* 66 */ { 6, s_8_66, 64, 2, 0}, +/* 67 */ { 6, s_8_67, 64, 2, 0}, +/* 68 */ { 4, s_8_68, -1, 2, 0}, +/* 69 */ { 4, s_8_69, -1, 2, 0}, +/* 70 */ { 4, s_8_70, -1, 2, 0}, +/* 71 */ { 7, s_8_71, 70, 2, 0}, +/* 72 */ { 7, s_8_72, 70, 2, 0}, +/* 73 */ { 8, s_8_73, 70, 2, 0}, +/* 74 */ { 6, s_8_74, 70, 2, 0}, +/* 75 */ { 8, s_8_75, 74, 2, 0}, +/* 76 */ { 8, s_8_76, 74, 2, 0}, +/* 77 */ { 8, s_8_77, 74, 2, 0}, +/* 78 */ { 4, s_8_78, -1, 1, 0}, +/* 79 */ { 6, s_8_79, 78, 2, 0}, +/* 80 */ { 6, s_8_80, 78, 2, 0}, +/* 81 */ { 6, s_8_81, 78, 2, 0}, +/* 82 */ { 7, s_8_82, 78, 2, 0}, +/* 83 */ { 8, s_8_83, 78, 2, 0}, +/* 84 */ { 4, s_8_84, -1, 2, 0}, +/* 85 */ { 5, s_8_85, -1, 2, 0}, +/* 86 */ { 5, s_8_86, -1, 2, 0}, +/* 87 */ { 5, s_8_87, -1, 2, 0}, +/* 88 */ { 3, s_8_88, -1, 2, 0}, +/* 89 */ { 4, s_8_89, -1, 2, 0}, +/* 90 */ { 4, s_8_90, -1, 2, 0}, +/* 91 */ { 4, s_8_91, -1, 2, 0}, +/* 92 */ { 4, s_8_92, -1, 2, 0}, +/* 93 */ { 4, s_8_93, -1, 2, 0}, +/* 94 */ { 4, s_8_94, -1, 2, 0}, +/* 95 */ { 3, s_8_95, -1, 2, 0} +}; + +static const symbol s_9_0[1] = { 'a' }; +static const symbol s_9_1[1] = { 'e' }; +static const symbol s_9_2[1] = { 'o' }; +static const symbol s_9_3[2] = { 'o', 's' }; +static const symbol s_9_4[2] = { 0xC3, 0xA1 }; +static const symbol s_9_5[2] = { 0xC3, 0xA9 }; +static const symbol s_9_6[2] = { 0xC3, 0xAD }; +static const symbol s_9_7[2] = { 0xC3, 0xB3 }; + +static const struct among a_9[8] = +{ +/* 0 */ { 1, s_9_0, -1, 1, 0}, +/* 1 */ { 1, s_9_1, -1, 2, 0}, +/* 2 */ { 1, s_9_2, -1, 1, 0}, +/* 3 */ { 2, s_9_3, -1, 1, 0}, +/* 4 */ { 2, s_9_4, -1, 1, 0}, +/* 5 */ { 2, s_9_5, -1, 2, 0}, +/* 6 */ { 2, s_9_6, -1, 1, 0}, +/* 7 */ { 2, s_9_7, -1, 1, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 'i' }; +static const symbol s_3[] = { 'o' }; +static const symbol s_4[] = { 'u' }; +static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; +static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; +static const symbol s_7[] = { 'a', 'r' }; +static const symbol s_8[] = { 'e', 'r' }; +static const symbol s_9[] = { 'i', 'r' }; +static const symbol s_10[] = { 'u' }; +static const symbol s_11[] = { 'i', 'c' }; +static const symbol s_12[] = { 'l', 'o', 'g' }; +static const symbol s_13[] = { 'u' }; +static const symbol s_14[] = { 'e', 'n', 't', 'e' }; +static const symbol s_15[] = { 'a', 't' }; +static const symbol s_16[] = { 'a', 't' }; +static const symbol s_17[] = { 'u' }; +static const symbol s_18[] = { 'u' }; +static const symbol s_19[] = { 'g' }; +static const symbol s_20[] = { 'u' }; +static const symbol s_21[] = { 'g' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + z->I[2] = z->l; + { int c1 = z->c; /* do, line 37 */ + { int c2 = z->c; /* or, line 39 */ + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; + { int c3 = z->c; /* or, line 38 */ + if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab4; + { /* gopast */ /* grouping v, line 38 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab4; + z->c += ret; + } + goto lab3; + lab4: + z->c = c3; + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; + { /* gopast */ /* non v, line 38 */ + int ret = in_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab2; + z->c += ret; + } + } + lab3: + goto lab1; + lab2: + z->c = c2; + if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab0; + { int c4 = z->c; /* or, line 40 */ + if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab6; + { /* gopast */ /* grouping v, line 40 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab6; + z->c += ret; + } + goto lab5; + lab6: + z->c = c4; + if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab0; + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 40 */ + } + } + lab5: + ; + } + lab1: + z->I[0] = z->c; /* setmark pV, line 41 */ + lab0: + z->c = c1; + } + { int c5 = z->c; /* do, line 43 */ + { /* gopast */ /* grouping v, line 44 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 44 */ + int ret = in_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[1] = z->c; /* setmark p1, line 44 */ + { /* gopast */ /* grouping v, line 45 */ + int ret = out_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + { /* gopast */ /* non v, line 45 */ + int ret = in_grouping_U(z, g_v, 97, 252, 1); + if (ret < 0) goto lab7; + z->c += ret; + } + z->I[2] = z->c; /* setmark p2, line 45 */ + lab7: + z->c = c5; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + int among_var; + while(1) { /* repeat, line 49 */ + int c1 = z->c; + z->bra = z->c; /* [, line 50 */ + if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((67641858 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else + among_var = find_among(z, a_0, 6); /* substring, line 50 */ + if (!(among_var)) goto lab0; + z->ket = z->c; /* ], line 50 */ + switch(among_var) { + case 0: goto lab0; + case 1: + { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab0; + z->c = ret; /* next, line 57 */ + } + break; + } + continue; + lab0: + z->c = c1; + break; + } + return 1; +} + +static int r_RV(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[2] <= z->c)) return 0; + return 1; +} + +static int r_attached_pronoun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 68 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ + z->bra = z->c; /* ], line 68 */ + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; + among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ + if (!(among_var)) return 0; + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 72 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: return 0; + case 1: + z->bra = z->c; /* ], line 73 */ + { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ + if (ret < 0) return ret; + } + break; + case 2: + z->bra = z->c; /* ], line 74 */ + { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ + if (ret < 0) return ret; + } + break; + case 3: + z->bra = z->c; /* ], line 75 */ + { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ + if (ret < 0) return ret; + } + break; + case 4: + z->bra = z->c; /* ], line 76 */ + { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ + if (ret < 0) return ret; + } + break; + case 5: + z->bra = z->c; /* ], line 77 */ + { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = slice_del(z); /* delete, line 81 */ + if (ret < 0) return ret; + } + break; + case 7: + if (!(eq_s_b(z, 1, s_10))) return 0; + { int ret = slice_del(z); /* delete, line 82 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_standard_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 87 */ + if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 87 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 99 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 99 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 105 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 105 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ + z->ket = z->c; /* [, line 106 */ + if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 106 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 106 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + case 3: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 111 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 115 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ + if (ret < 0) return ret; + } + break; + case 5: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 119 */ + if (ret < 0) return ret; + } + { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ + if (ret < 0) return ret; + } + break; + case 6: + { int ret = r_R1(z); + if (ret == 0) return 0; /* call R1, line 123 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 123 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ + z->ket = z->c; /* [, line 125 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } + among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 125 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 125 */ + if (ret < 0) return ret; + } + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab1; } + case 1: + z->ket = z->c; /* [, line 126 */ + if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } + z->bra = z->c; /* ], line 126 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 126 */ + if (ret < 0) return ret; + } + break; + } + lab1: + ; + } + break; + case 7: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 135 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 135 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ + z->ket = z->c; /* [, line 137 */ + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } + among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } + z->bra = z->c; /* ], line 137 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab2; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 140 */ + if (ret < 0) return ret; + } + break; + } + lab2: + ; + } + break; + case 8: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 147 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 147 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ + z->ket = z->c; /* [, line 149 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } + among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ + if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } + z->bra = z->c; /* ], line 149 */ + switch(among_var) { + case 0: { z->c = z->l - m_keep; goto lab3; } + case 1: + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 152 */ + if (ret < 0) return ret; + } + break; + } + lab3: + ; + } + break; + case 9: + { int ret = r_R2(z); + if (ret == 0) return 0; /* call R2, line 159 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 159 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ + z->ket = z->c; /* [, line 161 */ + if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } + z->bra = z->c; /* ], line 161 */ + { int ret = r_R2(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 161 */ + if (ret < 0) return ret; + } + lab4: + ; + } + break; + } + return 1; +} + +static int r_y_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 168 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 168 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 168 */ + among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 168 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + if (!(eq_s_b(z, 1, s_17))) return 0; + { int ret = slice_del(z); /* delete, line 171 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_verb_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 176 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 176 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 176 */ + among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 176 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ + if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } + { int m_test = z->l - z->c; /* test, line 179 */ + if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + lab0: + ; + } + z->bra = z->c; /* ], line 179 */ + { int ret = slice_del(z); /* delete, line 179 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_del(z); /* delete, line 200 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_residual_suffix(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 205 */ + among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 205 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 208 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 208 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = r_RV(z); + if (ret == 0) return 0; /* call RV, line 210 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ + z->ket = z->c; /* [, line 210 */ + if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } + z->bra = z->c; /* ], line 210 */ + { int m_test = z->l - z->c; /* test, line 210 */ + if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } + z->c = z->l - m_test; + } + { int ret = r_RV(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ + if (ret < 0) return ret; + } + { int ret = slice_del(z); /* delete, line 210 */ + if (ret < 0) return ret; + } + lab0: + ; + } + break; + } + return 1; +} + +extern int spanish_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 216 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 216 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 217 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ + { int ret = r_attached_pronoun(z); + if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ + { int ret = r_standard_suffix(z); + if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ + if (ret < 0) return ret; + } + goto lab3; + lab4: + z->c = z->l - m4; + { int ret = r_y_verb_suffix(z); + if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ + if (ret < 0) return ret; + } + goto lab3; + lab5: + z->c = z->l - m4; + { int ret = r_verb_suffix(z); + if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ + if (ret < 0) return ret; + } + } + lab3: + lab2: + z->c = z->l - m3; + } + { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ + { int ret = r_residual_suffix(z); + if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ + if (ret < 0) return ret; + } + lab6: + z->c = z->l - m5; + } + z->c = z->lb; + { int c6 = z->c; /* do, line 225 */ + { int ret = r_postlude(z); + if (ret == 0) goto lab7; /* call postlude, line 225 */ + if (ret < 0) return ret; + } + lab7: + z->c = c6; + } + return 1; +} + +extern struct SN_env * spanish_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } + +extern void spanish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_swedish.c b/src/backend/snowball/libstemmer/stem_UTF_8_swedish.c new file mode 100644 index 00000000000..1372cec1eeb --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_swedish.c @@ -0,0 +1,309 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int swedish_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_other_suffix(struct SN_env * z); +static int r_consonant_pair(struct SN_env * z); +static int r_main_suffix(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * swedish_UTF_8_create_env(void); +extern void swedish_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 'a' }; +static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; +static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; +static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; +static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; +static const symbol s_0_5[2] = { 'a', 'd' }; +static const symbol s_0_6[1] = { 'e' }; +static const symbol s_0_7[3] = { 'a', 'd', 'e' }; +static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; +static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; +static const symbol s_0_10[3] = { 'a', 'r', 'e' }; +static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; +static const symbol s_0_12[2] = { 'e', 'n' }; +static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; +static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; +static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; +static const symbol s_0_16[3] = { 'e', 'r', 'n' }; +static const symbol s_0_17[2] = { 'a', 'r' }; +static const symbol s_0_18[2] = { 'e', 'r' }; +static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; +static const symbol s_0_20[2] = { 'o', 'r' }; +static const symbol s_0_21[1] = { 's' }; +static const symbol s_0_22[2] = { 'a', 's' }; +static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; +static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; +static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; +static const symbol s_0_26[2] = { 'e', 's' }; +static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; +static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; +static const symbol s_0_29[3] = { 'e', 'n', 's' }; +static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; +static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; +static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; +static const symbol s_0_33[2] = { 'a', 't' }; +static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; +static const symbol s_0_35[3] = { 'h', 'e', 't' }; +static const symbol s_0_36[3] = { 'a', 's', 't' }; + +static const struct among a_0[37] = +{ +/* 0 */ { 1, s_0_0, -1, 1, 0}, +/* 1 */ { 4, s_0_1, 0, 1, 0}, +/* 2 */ { 4, s_0_2, 0, 1, 0}, +/* 3 */ { 7, s_0_3, 2, 1, 0}, +/* 4 */ { 4, s_0_4, 0, 1, 0}, +/* 5 */ { 2, s_0_5, -1, 1, 0}, +/* 6 */ { 1, s_0_6, -1, 1, 0}, +/* 7 */ { 3, s_0_7, 6, 1, 0}, +/* 8 */ { 4, s_0_8, 6, 1, 0}, +/* 9 */ { 4, s_0_9, 6, 1, 0}, +/* 10 */ { 3, s_0_10, 6, 1, 0}, +/* 11 */ { 4, s_0_11, 6, 1, 0}, +/* 12 */ { 2, s_0_12, -1, 1, 0}, +/* 13 */ { 5, s_0_13, 12, 1, 0}, +/* 14 */ { 4, s_0_14, 12, 1, 0}, +/* 15 */ { 5, s_0_15, 12, 1, 0}, +/* 16 */ { 3, s_0_16, -1, 1, 0}, +/* 17 */ { 2, s_0_17, -1, 1, 0}, +/* 18 */ { 2, s_0_18, -1, 1, 0}, +/* 19 */ { 5, s_0_19, 18, 1, 0}, +/* 20 */ { 2, s_0_20, -1, 1, 0}, +/* 21 */ { 1, s_0_21, -1, 2, 0}, +/* 22 */ { 2, s_0_22, 21, 1, 0}, +/* 23 */ { 5, s_0_23, 22, 1, 0}, +/* 24 */ { 5, s_0_24, 22, 1, 0}, +/* 25 */ { 5, s_0_25, 22, 1, 0}, +/* 26 */ { 2, s_0_26, 21, 1, 0}, +/* 27 */ { 4, s_0_27, 26, 1, 0}, +/* 28 */ { 5, s_0_28, 26, 1, 0}, +/* 29 */ { 3, s_0_29, 21, 1, 0}, +/* 30 */ { 5, s_0_30, 29, 1, 0}, +/* 31 */ { 6, s_0_31, 29, 1, 0}, +/* 32 */ { 4, s_0_32, 21, 1, 0}, +/* 33 */ { 2, s_0_33, -1, 1, 0}, +/* 34 */ { 5, s_0_34, -1, 1, 0}, +/* 35 */ { 3, s_0_35, -1, 1, 0}, +/* 36 */ { 3, s_0_36, -1, 1, 0} +}; + +static const symbol s_1_0[2] = { 'd', 'd' }; +static const symbol s_1_1[2] = { 'g', 'd' }; +static const symbol s_1_2[2] = { 'n', 'n' }; +static const symbol s_1_3[2] = { 'd', 't' }; +static const symbol s_1_4[2] = { 'g', 't' }; +static const symbol s_1_5[2] = { 'k', 't' }; +static const symbol s_1_6[2] = { 't', 't' }; + +static const struct among a_1[7] = +{ +/* 0 */ { 2, s_1_0, -1, -1, 0}, +/* 1 */ { 2, s_1_1, -1, -1, 0}, +/* 2 */ { 2, s_1_2, -1, -1, 0}, +/* 3 */ { 2, s_1_3, -1, -1, 0}, +/* 4 */ { 2, s_1_4, -1, -1, 0}, +/* 5 */ { 2, s_1_5, -1, -1, 0}, +/* 6 */ { 2, s_1_6, -1, -1, 0} +}; + +static const symbol s_2_0[2] = { 'i', 'g' }; +static const symbol s_2_1[3] = { 'l', 'i', 'g' }; +static const symbol s_2_2[3] = { 'e', 'l', 's' }; +static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; +static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; + +static const struct among a_2[5] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 3, s_2_1, 0, 1, 0}, +/* 2 */ { 3, s_2_2, -1, 1, 0}, +/* 3 */ { 5, s_2_3, -1, 3, 0}, +/* 4 */ { 5, s_2_4, -1, 2, 0} +}; + +static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; + +static const unsigned char g_s_ending[] = { 119, 127, 149 }; + +static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; +static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + { int c_test = z->c; /* test, line 29 */ + { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); + if (ret < 0) return 0; + z->c = ret; /* hop, line 29 */ + } + z->I[1] = z->c; /* setmark x, line 29 */ + z->c = c_test; + } + if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ + { /* gopast */ /* non v, line 30 */ + int ret = in_grouping_U(z, g_v, 97, 246, 1); + if (ret < 0) return 0; + z->c += ret; + } + z->I[0] = z->c; /* setmark p1, line 30 */ + /* try, line 31 */ + if (!(z->I[0] < z->I[1])) goto lab0; + z->I[0] = z->I[1]; +lab0: + return 1; +} + +static int r_main_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 37 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 37 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 37 */ + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 37 */ + z->lb = mlimit; + } + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_del(z); /* delete, line 44 */ + if (ret < 0) return ret; + } + break; + case 2: + if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; + { int ret = slice_del(z); /* delete, line 46 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_consonant_pair(struct SN_env * z) { + { int mlimit; /* setlimit, line 50 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 50 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ + z->c = z->l - m2; + z->ket = z->c; /* [, line 52 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) { z->lb = mlimit; return 0; } + z->c = ret; /* next, line 52 */ + } + z->bra = z->c; /* ], line 52 */ + { int ret = slice_del(z); /* delete, line 52 */ + if (ret < 0) return ret; + } + } + z->lb = mlimit; + } + return 1; +} + +static int r_other_suffix(struct SN_env * z) { + int among_var; + { int mlimit; /* setlimit, line 55 */ + int m1 = z->l - z->c; (void)m1; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 55 */ + mlimit = z->lb; z->lb = z->c; + z->c = z->l - m1; + z->ket = z->c; /* [, line 56 */ + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } + among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ + if (!(among_var)) { z->lb = mlimit; return 0; } + z->bra = z->c; /* ], line 56 */ + switch(among_var) { + case 0: { z->lb = mlimit; return 0; } + case 1: + { int ret = slice_del(z); /* delete, line 57 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ + if (ret < 0) return ret; + } + break; + } + z->lb = mlimit; + } + return 1; +} + +extern int swedish_UTF_8_stem(struct SN_env * z) { + { int c1 = z->c; /* do, line 66 */ + { int ret = r_mark_regions(z); + if (ret == 0) goto lab0; /* call mark_regions, line 66 */ + if (ret < 0) return ret; + } + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 67 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ + { int ret = r_main_suffix(z); + if (ret == 0) goto lab1; /* call main_suffix, line 68 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ + { int ret = r_consonant_pair(z); + if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ + { int ret = r_other_suffix(z); + if (ret == 0) goto lab3; /* call other_suffix, line 70 */ + if (ret < 0) return ret; + } + lab3: + z->c = z->l - m4; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c b/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c new file mode 100644 index 00000000000..587351d126f --- /dev/null +++ b/src/backend/snowball/libstemmer/stem_UTF_8_turkish.c @@ -0,0 +1,2205 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern int turkish_UTF_8_stem(struct SN_env * z); +#ifdef __cplusplus +} +#endif +static int r_stem_suffix_chain_before_ki(struct SN_env * z); +static int r_stem_noun_suffixes(struct SN_env * z); +static int r_stem_nominal_verb_suffixes(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_post_process_last_consonants(struct SN_env * z); +static int r_more_than_one_syllable_word(struct SN_env * z); +static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z); +static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z); +static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z); +static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z); +static int r_mark_ysA(struct SN_env * z); +static int r_mark_ymUs_(struct SN_env * z); +static int r_mark_yken(struct SN_env * z); +static int r_mark_yDU(struct SN_env * z); +static int r_mark_yUz(struct SN_env * z); +static int r_mark_yUm(struct SN_env * z); +static int r_mark_yU(struct SN_env * z); +static int r_mark_ylA(struct SN_env * z); +static int r_mark_yA(struct SN_env * z); +static int r_mark_possessives(struct SN_env * z); +static int r_mark_sUnUz(struct SN_env * z); +static int r_mark_sUn(struct SN_env * z); +static int r_mark_sU(struct SN_env * z); +static int r_mark_nUz(struct SN_env * z); +static int r_mark_nUn(struct SN_env * z); +static int r_mark_nU(struct SN_env * z); +static int r_mark_ndAn(struct SN_env * z); +static int r_mark_ndA(struct SN_env * z); +static int r_mark_ncA(struct SN_env * z); +static int r_mark_nA(struct SN_env * z); +static int r_mark_lArI(struct SN_env * z); +static int r_mark_lAr(struct SN_env * z); +static int r_mark_ki(struct SN_env * z); +static int r_mark_DUr(struct SN_env * z); +static int r_mark_DAn(struct SN_env * z); +static int r_mark_DA(struct SN_env * z); +static int r_mark_cAsInA(struct SN_env * z); +static int r_is_reserved_word(struct SN_env * z); +static int r_check_vowel_harmony(struct SN_env * z); +static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z); +#ifdef __cplusplus +extern "C" { +#endif + + +extern struct SN_env * turkish_UTF_8_create_env(void); +extern void turkish_UTF_8_close_env(struct SN_env * z); + + +#ifdef __cplusplus +} +#endif +static const symbol s_0_0[1] = { 'm' }; +static const symbol s_0_1[1] = { 'n' }; +static const symbol s_0_2[3] = { 'm', 'i', 'z' }; +static const symbol s_0_3[3] = { 'n', 'i', 'z' }; +static const symbol s_0_4[3] = { 'm', 'u', 'z' }; +static const symbol s_0_5[3] = { 'n', 'u', 'z' }; +static const symbol s_0_6[4] = { 'm', 0xC4, 0xB1, 'z' }; +static const symbol s_0_7[4] = { 'n', 0xC4, 0xB1, 'z' }; +static const symbol s_0_8[4] = { 'm', 0xC3, 0xBC, 'z' }; +static const symbol s_0_9[4] = { 'n', 0xC3, 0xBC, 'z' }; + +static const struct among a_0[10] = +{ +/* 0 */ { 1, s_0_0, -1, -1, 0}, +/* 1 */ { 1, s_0_1, -1, -1, 0}, +/* 2 */ { 3, s_0_2, -1, -1, 0}, +/* 3 */ { 3, s_0_3, -1, -1, 0}, +/* 4 */ { 3, s_0_4, -1, -1, 0}, +/* 5 */ { 3, s_0_5, -1, -1, 0}, +/* 6 */ { 4, s_0_6, -1, -1, 0}, +/* 7 */ { 4, s_0_7, -1, -1, 0}, +/* 8 */ { 4, s_0_8, -1, -1, 0}, +/* 9 */ { 4, s_0_9, -1, -1, 0} +}; + +static const symbol s_1_0[4] = { 'l', 'e', 'r', 'i' }; +static const symbol s_1_1[5] = { 'l', 'a', 'r', 0xC4, 0xB1 }; + +static const struct among a_1[2] = +{ +/* 0 */ { 4, s_1_0, -1, -1, 0}, +/* 1 */ { 5, s_1_1, -1, -1, 0} +}; + +static const symbol s_2_0[2] = { 'n', 'i' }; +static const symbol s_2_1[2] = { 'n', 'u' }; +static const symbol s_2_2[3] = { 'n', 0xC4, 0xB1 }; +static const symbol s_2_3[3] = { 'n', 0xC3, 0xBC }; + +static const struct among a_2[4] = +{ +/* 0 */ { 2, s_2_0, -1, -1, 0}, +/* 1 */ { 2, s_2_1, -1, -1, 0}, +/* 2 */ { 3, s_2_2, -1, -1, 0}, +/* 3 */ { 3, s_2_3, -1, -1, 0} +}; + +static const symbol s_3_0[2] = { 'i', 'n' }; +static const symbol s_3_1[2] = { 'u', 'n' }; +static const symbol s_3_2[3] = { 0xC4, 0xB1, 'n' }; +static const symbol s_3_3[3] = { 0xC3, 0xBC, 'n' }; + +static const struct among a_3[4] = +{ +/* 0 */ { 2, s_3_0, -1, -1, 0}, +/* 1 */ { 2, s_3_1, -1, -1, 0}, +/* 2 */ { 3, s_3_2, -1, -1, 0}, +/* 3 */ { 3, s_3_3, -1, -1, 0} +}; + +static const symbol s_4_0[1] = { 'a' }; +static const symbol s_4_1[1] = { 'e' }; + +static const struct among a_4[2] = +{ +/* 0 */ { 1, s_4_0, -1, -1, 0}, +/* 1 */ { 1, s_4_1, -1, -1, 0} +}; + +static const symbol s_5_0[2] = { 'n', 'a' }; +static const symbol s_5_1[2] = { 'n', 'e' }; + +static const struct among a_5[2] = +{ +/* 0 */ { 2, s_5_0, -1, -1, 0}, +/* 1 */ { 2, s_5_1, -1, -1, 0} +}; + +static const symbol s_6_0[2] = { 'd', 'a' }; +static const symbol s_6_1[2] = { 't', 'a' }; +static const symbol s_6_2[2] = { 'd', 'e' }; +static const symbol s_6_3[2] = { 't', 'e' }; + +static const struct among a_6[4] = +{ +/* 0 */ { 2, s_6_0, -1, -1, 0}, +/* 1 */ { 2, s_6_1, -1, -1, 0}, +/* 2 */ { 2, s_6_2, -1, -1, 0}, +/* 3 */ { 2, s_6_3, -1, -1, 0} +}; + +static const symbol s_7_0[3] = { 'n', 'd', 'a' }; +static const symbol s_7_1[3] = { 'n', 'd', 'e' }; + +static const struct among a_7[2] = +{ +/* 0 */ { 3, s_7_0, -1, -1, 0}, +/* 1 */ { 3, s_7_1, -1, -1, 0} +}; + +static const symbol s_8_0[3] = { 'd', 'a', 'n' }; +static const symbol s_8_1[3] = { 't', 'a', 'n' }; +static const symbol s_8_2[3] = { 'd', 'e', 'n' }; +static const symbol s_8_3[3] = { 't', 'e', 'n' }; + +static const struct among a_8[4] = +{ +/* 0 */ { 3, s_8_0, -1, -1, 0}, +/* 1 */ { 3, s_8_1, -1, -1, 0}, +/* 2 */ { 3, s_8_2, -1, -1, 0}, +/* 3 */ { 3, s_8_3, -1, -1, 0} +}; + +static const symbol s_9_0[4] = { 'n', 'd', 'a', 'n' }; +static const symbol s_9_1[4] = { 'n', 'd', 'e', 'n' }; + +static const struct among a_9[2] = +{ +/* 0 */ { 4, s_9_0, -1, -1, 0}, +/* 1 */ { 4, s_9_1, -1, -1, 0} +}; + +static const symbol s_10_0[2] = { 'l', 'a' }; +static const symbol s_10_1[2] = { 'l', 'e' }; + +static const struct among a_10[2] = +{ +/* 0 */ { 2, s_10_0, -1, -1, 0}, +/* 1 */ { 2, s_10_1, -1, -1, 0} +}; + +static const symbol s_11_0[2] = { 'c', 'a' }; +static const symbol s_11_1[2] = { 'c', 'e' }; + +static const struct among a_11[2] = +{ +/* 0 */ { 2, s_11_0, -1, -1, 0}, +/* 1 */ { 2, s_11_1, -1, -1, 0} +}; + +static const symbol s_12_0[2] = { 'i', 'm' }; +static const symbol s_12_1[2] = { 'u', 'm' }; +static const symbol s_12_2[3] = { 0xC4, 0xB1, 'm' }; +static const symbol s_12_3[3] = { 0xC3, 0xBC, 'm' }; + +static const struct among a_12[4] = +{ +/* 0 */ { 2, s_12_0, -1, -1, 0}, +/* 1 */ { 2, s_12_1, -1, -1, 0}, +/* 2 */ { 3, s_12_2, -1, -1, 0}, +/* 3 */ { 3, s_12_3, -1, -1, 0} +}; + +static const symbol s_13_0[3] = { 's', 'i', 'n' }; +static const symbol s_13_1[3] = { 's', 'u', 'n' }; +static const symbol s_13_2[4] = { 's', 0xC4, 0xB1, 'n' }; +static const symbol s_13_3[4] = { 's', 0xC3, 0xBC, 'n' }; + +static const struct among a_13[4] = +{ +/* 0 */ { 3, s_13_0, -1, -1, 0}, +/* 1 */ { 3, s_13_1, -1, -1, 0}, +/* 2 */ { 4, s_13_2, -1, -1, 0}, +/* 3 */ { 4, s_13_3, -1, -1, 0} +}; + +static const symbol s_14_0[2] = { 'i', 'z' }; +static const symbol s_14_1[2] = { 'u', 'z' }; +static const symbol s_14_2[3] = { 0xC4, 0xB1, 'z' }; +static const symbol s_14_3[3] = { 0xC3, 0xBC, 'z' }; + +static const struct among a_14[4] = +{ +/* 0 */ { 2, s_14_0, -1, -1, 0}, +/* 1 */ { 2, s_14_1, -1, -1, 0}, +/* 2 */ { 3, s_14_2, -1, -1, 0}, +/* 3 */ { 3, s_14_3, -1, -1, 0} +}; + +static const symbol s_15_0[5] = { 's', 'i', 'n', 'i', 'z' }; +static const symbol s_15_1[5] = { 's', 'u', 'n', 'u', 'z' }; +static const symbol s_15_2[7] = { 's', 0xC4, 0xB1, 'n', 0xC4, 0xB1, 'z' }; +static const symbol s_15_3[7] = { 's', 0xC3, 0xBC, 'n', 0xC3, 0xBC, 'z' }; + +static const struct among a_15[4] = +{ +/* 0 */ { 5, s_15_0, -1, -1, 0}, +/* 1 */ { 5, s_15_1, -1, -1, 0}, +/* 2 */ { 7, s_15_2, -1, -1, 0}, +/* 3 */ { 7, s_15_3, -1, -1, 0} +}; + +static const symbol s_16_0[3] = { 'l', 'a', 'r' }; +static const symbol s_16_1[3] = { 'l', 'e', 'r' }; + +static const struct among a_16[2] = +{ +/* 0 */ { 3, s_16_0, -1, -1, 0}, +/* 1 */ { 3, s_16_1, -1, -1, 0} +}; + +static const symbol s_17_0[3] = { 'n', 'i', 'z' }; +static const symbol s_17_1[3] = { 'n', 'u', 'z' }; +static const symbol s_17_2[4] = { 'n', 0xC4, 0xB1, 'z' }; +static const symbol s_17_3[4] = { 'n', 0xC3, 0xBC, 'z' }; + +static const struct among a_17[4] = +{ +/* 0 */ { 3, s_17_0, -1, -1, 0}, +/* 1 */ { 3, s_17_1, -1, -1, 0}, +/* 2 */ { 4, s_17_2, -1, -1, 0}, +/* 3 */ { 4, s_17_3, -1, -1, 0} +}; + +static const symbol s_18_0[3] = { 'd', 'i', 'r' }; +static const symbol s_18_1[3] = { 't', 'i', 'r' }; +static const symbol s_18_2[3] = { 'd', 'u', 'r' }; +static const symbol s_18_3[3] = { 't', 'u', 'r' }; +static const symbol s_18_4[4] = { 'd', 0xC4, 0xB1, 'r' }; +static const symbol s_18_5[4] = { 't', 0xC4, 0xB1, 'r' }; +static const symbol s_18_6[4] = { 'd', 0xC3, 0xBC, 'r' }; +static const symbol s_18_7[4] = { 't', 0xC3, 0xBC, 'r' }; + +static const struct among a_18[8] = +{ +/* 0 */ { 3, s_18_0, -1, -1, 0}, +/* 1 */ { 3, s_18_1, -1, -1, 0}, +/* 2 */ { 3, s_18_2, -1, -1, 0}, +/* 3 */ { 3, s_18_3, -1, -1, 0}, +/* 4 */ { 4, s_18_4, -1, -1, 0}, +/* 5 */ { 4, s_18_5, -1, -1, 0}, +/* 6 */ { 4, s_18_6, -1, -1, 0}, +/* 7 */ { 4, s_18_7, -1, -1, 0} +}; + +static const symbol s_19_0[7] = { 'c', 'a', 's', 0xC4, 0xB1, 'n', 'a' }; +static const symbol s_19_1[6] = { 'c', 'e', 's', 'i', 'n', 'e' }; + +static const struct among a_19[2] = +{ +/* 0 */ { 7, s_19_0, -1, -1, 0}, +/* 1 */ { 6, s_19_1, -1, -1, 0} +}; + +static const symbol s_20_0[2] = { 'd', 'i' }; +static const symbol s_20_1[2] = { 't', 'i' }; +static const symbol s_20_2[3] = { 'd', 'i', 'k' }; +static const symbol s_20_3[3] = { 't', 'i', 'k' }; +static const symbol s_20_4[3] = { 'd', 'u', 'k' }; +static const symbol s_20_5[3] = { 't', 'u', 'k' }; +static const symbol s_20_6[4] = { 'd', 0xC4, 0xB1, 'k' }; +static const symbol s_20_7[4] = { 't', 0xC4, 0xB1, 'k' }; +static const symbol s_20_8[4] = { 'd', 0xC3, 0xBC, 'k' }; +static const symbol s_20_9[4] = { 't', 0xC3, 0xBC, 'k' }; +static const symbol s_20_10[3] = { 'd', 'i', 'm' }; +static const symbol s_20_11[3] = { 't', 'i', 'm' }; +static const symbol s_20_12[3] = { 'd', 'u', 'm' }; +static const symbol s_20_13[3] = { 't', 'u', 'm' }; +static const symbol s_20_14[4] = { 'd', 0xC4, 0xB1, 'm' }; +static const symbol s_20_15[4] = { 't', 0xC4, 0xB1, 'm' }; +static const symbol s_20_16[4] = { 'd', 0xC3, 0xBC, 'm' }; +static const symbol s_20_17[4] = { 't', 0xC3, 0xBC, 'm' }; +static const symbol s_20_18[3] = { 'd', 'i', 'n' }; +static const symbol s_20_19[3] = { 't', 'i', 'n' }; +static const symbol s_20_20[3] = { 'd', 'u', 'n' }; +static const symbol s_20_21[3] = { 't', 'u', 'n' }; +static const symbol s_20_22[4] = { 'd', 0xC4, 0xB1, 'n' }; +static const symbol s_20_23[4] = { 't', 0xC4, 0xB1, 'n' }; +static const symbol s_20_24[4] = { 'd', 0xC3, 0xBC, 'n' }; +static const symbol s_20_25[4] = { 't', 0xC3, 0xBC, 'n' }; +static const symbol s_20_26[2] = { 'd', 'u' }; +static const symbol s_20_27[2] = { 't', 'u' }; +static const symbol s_20_28[3] = { 'd', 0xC4, 0xB1 }; +static const symbol s_20_29[3] = { 't', 0xC4, 0xB1 }; +static const symbol s_20_30[3] = { 'd', 0xC3, 0xBC }; +static const symbol s_20_31[3] = { 't', 0xC3, 0xBC }; + +static const struct among a_20[32] = +{ +/* 0 */ { 2, s_20_0, -1, -1, 0}, +/* 1 */ { 2, s_20_1, -1, -1, 0}, +/* 2 */ { 3, s_20_2, -1, -1, 0}, +/* 3 */ { 3, s_20_3, -1, -1, 0}, +/* 4 */ { 3, s_20_4, -1, -1, 0}, +/* 5 */ { 3, s_20_5, -1, -1, 0}, +/* 6 */ { 4, s_20_6, -1, -1, 0}, +/* 7 */ { 4, s_20_7, -1, -1, 0}, +/* 8 */ { 4, s_20_8, -1, -1, 0}, +/* 9 */ { 4, s_20_9, -1, -1, 0}, +/* 10 */ { 3, s_20_10, -1, -1, 0}, +/* 11 */ { 3, s_20_11, -1, -1, 0}, +/* 12 */ { 3, s_20_12, -1, -1, 0}, +/* 13 */ { 3, s_20_13, -1, -1, 0}, +/* 14 */ { 4, s_20_14, -1, -1, 0}, +/* 15 */ { 4, s_20_15, -1, -1, 0}, +/* 16 */ { 4, s_20_16, -1, -1, 0}, +/* 17 */ { 4, s_20_17, -1, -1, 0}, +/* 18 */ { 3, s_20_18, -1, -1, 0}, +/* 19 */ { 3, s_20_19, -1, -1, 0}, +/* 20 */ { 3, s_20_20, -1, -1, 0}, +/* 21 */ { 3, s_20_21, -1, -1, 0}, +/* 22 */ { 4, s_20_22, -1, -1, 0}, +/* 23 */ { 4, s_20_23, -1, -1, 0}, +/* 24 */ { 4, s_20_24, -1, -1, 0}, +/* 25 */ { 4, s_20_25, -1, -1, 0}, +/* 26 */ { 2, s_20_26, -1, -1, 0}, +/* 27 */ { 2, s_20_27, -1, -1, 0}, +/* 28 */ { 3, s_20_28, -1, -1, 0}, +/* 29 */ { 3, s_20_29, -1, -1, 0}, +/* 30 */ { 3, s_20_30, -1, -1, 0}, +/* 31 */ { 3, s_20_31, -1, -1, 0} +}; + +static const symbol s_21_0[2] = { 's', 'a' }; +static const symbol s_21_1[2] = { 's', 'e' }; +static const symbol s_21_2[3] = { 's', 'a', 'k' }; +static const symbol s_21_3[3] = { 's', 'e', 'k' }; +static const symbol s_21_4[3] = { 's', 'a', 'm' }; +static const symbol s_21_5[3] = { 's', 'e', 'm' }; +static const symbol s_21_6[3] = { 's', 'a', 'n' }; +static const symbol s_21_7[3] = { 's', 'e', 'n' }; + +static const struct among a_21[8] = +{ +/* 0 */ { 2, s_21_0, -1, -1, 0}, +/* 1 */ { 2, s_21_1, -1, -1, 0}, +/* 2 */ { 3, s_21_2, -1, -1, 0}, +/* 3 */ { 3, s_21_3, -1, -1, 0}, +/* 4 */ { 3, s_21_4, -1, -1, 0}, +/* 5 */ { 3, s_21_5, -1, -1, 0}, +/* 6 */ { 3, s_21_6, -1, -1, 0}, +/* 7 */ { 3, s_21_7, -1, -1, 0} +}; + +static const symbol s_22_0[4] = { 'm', 'i', 0xC5, 0x9F }; +static const symbol s_22_1[4] = { 'm', 'u', 0xC5, 0x9F }; +static const symbol s_22_2[5] = { 'm', 0xC4, 0xB1, 0xC5, 0x9F }; +static const symbol s_22_3[5] = { 'm', 0xC3, 0xBC, 0xC5, 0x9F }; + +static const struct among a_22[4] = +{ +/* 0 */ { 4, s_22_0, -1, -1, 0}, +/* 1 */ { 4, s_22_1, -1, -1, 0}, +/* 2 */ { 5, s_22_2, -1, -1, 0}, +/* 3 */ { 5, s_22_3, -1, -1, 0} +}; + +static const symbol s_23_0[1] = { 'b' }; +static const symbol s_23_1[1] = { 'c' }; +static const symbol s_23_2[1] = { 'd' }; +static const symbol s_23_3[2] = { 0xC4, 0x9F }; + +static const struct among a_23[4] = +{ +/* 0 */ { 1, s_23_0, -1, 1, 0}, +/* 1 */ { 1, s_23_1, -1, 2, 0}, +/* 2 */ { 1, s_23_2, -1, 3, 0}, +/* 3 */ { 2, s_23_3, -1, 4, 0} +}; + +static const unsigned char g_vowel[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1 }; + +static const unsigned char g_U[] = { 1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1 }; + +static const unsigned char g_vowel1[] = { 1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + +static const unsigned char g_vowel2[] = { 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 }; + +static const unsigned char g_vowel3[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + +static const unsigned char g_vowel4[] = { 17 }; + +static const unsigned char g_vowel5[] = { 65 }; + +static const unsigned char g_vowel6[] = { 65 }; + +static const symbol s_0[] = { 'a' }; +static const symbol s_1[] = { 'e' }; +static const symbol s_2[] = { 0xC4, 0xB1 }; +static const symbol s_3[] = { 'i' }; +static const symbol s_4[] = { 'o' }; +static const symbol s_5[] = { 0xC3, 0xB6 }; +static const symbol s_6[] = { 'u' }; +static const symbol s_7[] = { 0xC3, 0xBC }; +static const symbol s_8[] = { 'n' }; +static const symbol s_9[] = { 'n' }; +static const symbol s_10[] = { 's' }; +static const symbol s_11[] = { 's' }; +static const symbol s_12[] = { 'y' }; +static const symbol s_13[] = { 'y' }; +static const symbol s_14[] = { 'k', 'i' }; +static const symbol s_15[] = { 'k', 'e', 'n' }; +static const symbol s_16[] = { 'p' }; +static const symbol s_17[] = { 0xC3, 0xA7 }; +static const symbol s_18[] = { 't' }; +static const symbol s_19[] = { 'k' }; +static const symbol s_20[] = { 'd' }; +static const symbol s_21[] = { 'g' }; +static const symbol s_22[] = { 'a' }; +static const symbol s_23[] = { 0xC4, 0xB1 }; +static const symbol s_24[] = { 0xC4, 0xB1 }; +static const symbol s_25[] = { 'e' }; +static const symbol s_26[] = { 'i' }; +static const symbol s_27[] = { 'i' }; +static const symbol s_28[] = { 'o' }; +static const symbol s_29[] = { 'u' }; +static const symbol s_30[] = { 'u' }; +static const symbol s_31[] = { 0xC3, 0xB6 }; +static const symbol s_32[] = { 0xC3, 0xBC }; +static const symbol s_33[] = { 0xC3, 0xBC }; +static const symbol s_34[] = { 'a', 'd' }; +static const symbol s_35[] = { 's', 'o', 'y', 'a', 'd' }; + +static int r_check_vowel_harmony(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 112 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 114 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 116 */ + if (!(eq_s_b(z, 1, s_0))) goto lab1; + if (out_grouping_b_U(z, g_vowel1, 97, 305, 1) < 0) goto lab1; /* goto */ /* grouping vowel1, line 116 */ + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_1))) goto lab2; + if (out_grouping_b_U(z, g_vowel2, 101, 252, 1) < 0) goto lab2; /* goto */ /* grouping vowel2, line 117 */ + goto lab0; + lab2: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_2))) goto lab3; + if (out_grouping_b_U(z, g_vowel3, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel3, line 118 */ + goto lab0; + lab3: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_3))) goto lab4; + if (out_grouping_b_U(z, g_vowel4, 101, 105, 1) < 0) goto lab4; /* goto */ /* grouping vowel4, line 119 */ + goto lab0; + lab4: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_4))) goto lab5; + if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab5; /* goto */ /* grouping vowel5, line 120 */ + goto lab0; + lab5: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_5))) goto lab6; + if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) goto lab6; /* goto */ /* grouping vowel6, line 121 */ + goto lab0; + lab6: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_6))) goto lab7; + if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab7; /* goto */ /* grouping vowel5, line 122 */ + goto lab0; + lab7: + z->c = z->l - m1; + if (!(eq_s_b(z, 2, s_7))) return 0; + if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) return 0; /* goto */ /* grouping vowel6, line 123 */ + } + lab0: + z->c = z->l - m_test; + } + return 1; +} + +static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 134 */ + { int m_test = z->l - z->c; /* test, line 133 */ + if (!(eq_s_b(z, 1, s_8))) goto lab1; + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab1; + z->c = ret; /* next, line 133 */ + } + { int m_test = z->l - z->c; /* test, line 133 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 135 */ + { int m_test = z->l - z->c; /* test, line 135 */ + if (!(eq_s_b(z, 1, s_9))) goto lab2; + z->c = z->l - m_test; + } + return 0; + lab2: + z->c = z->l - m2; + } + { int m_test = z->l - z->c; /* test, line 135 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 135 */ + } + { int m_test = z->l - z->c; /* test, line 135 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; + z->c = z->l - m_test; + } + z->c = z->l - m_test; + } + } +lab0: + return 1; +} + +static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ + { int m_test = z->l - z->c; /* test, line 144 */ + if (!(eq_s_b(z, 1, s_10))) goto lab1; + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab1; + z->c = ret; /* next, line 144 */ + } + { int m_test = z->l - z->c; /* test, line 144 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ + { int m_test = z->l - z->c; /* test, line 146 */ + if (!(eq_s_b(z, 1, s_11))) goto lab2; + z->c = z->l - m_test; + } + return 0; + lab2: + z->c = z->l - m2; + } + { int m_test = z->l - z->c; /* test, line 146 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 146 */ + } + { int m_test = z->l - z->c; /* test, line 146 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; + z->c = z->l - m_test; + } + z->c = z->l - m_test; + } + } +lab0: + return 1; +} + +static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 155 */ + { int m_test = z->l - z->c; /* test, line 154 */ + if (!(eq_s_b(z, 1, s_12))) goto lab1; + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab1; + z->c = ret; /* next, line 154 */ + } + { int m_test = z->l - z->c; /* test, line 154 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 156 */ + { int m_test = z->l - z->c; /* test, line 156 */ + if (!(eq_s_b(z, 1, s_13))) goto lab2; + z->c = z->l - m_test; + } + return 0; + lab2: + z->c = z->l - m2; + } + { int m_test = z->l - z->c; /* test, line 156 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 156 */ + } + { int m_test = z->l - z->c; /* test, line 156 */ + if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; + z->c = z->l - m_test; + } + z->c = z->l - m_test; + } + } +lab0: + return 1; +} + +static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 161 */ + { int m_test = z->l - z->c; /* test, line 160 */ + if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab1; + z->c = z->l - m_test; + } + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) goto lab1; + z->c = ret; /* next, line 160 */ + } + { int m_test = z->l - z->c; /* test, line 160 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; + z->c = z->l - m_test; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int m2 = z->l - z->c; (void)m2; /* not, line 162 */ + { int m_test = z->l - z->c; /* test, line 162 */ + if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab2; + z->c = z->l - m_test; + } + return 0; + lab2: + z->c = z->l - m2; + } + { int m_test = z->l - z->c; /* test, line 162 */ + { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); + if (ret < 0) return 0; + z->c = ret; /* next, line 162 */ + } + { int m_test = z->l - z->c; /* test, line 162 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; + z->c = z->l - m_test; + } + z->c = z->l - m_test; + } + } +lab0: + return 1; +} + +static int r_mark_possessives(struct SN_env * z) { + if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((67133440 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_0, 10))) return 0; /* among, line 167 */ + { int ret = r_mark_suffix_with_optional_U_vowel(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_U_vowel, line 169 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_sU(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 173 */ + if (ret < 0) return ret; + } + if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; + { int ret = r_mark_suffix_with_optional_s_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_s_consonant, line 175 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_lArI(struct SN_env * z) { + if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 177)) return 0; + if (!(find_among_b(z, a_1, 2))) return 0; /* among, line 179 */ + return 1; +} + +static int r_mark_yU(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 183 */ + if (ret < 0) return ret; + } + if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 185 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_nU(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 189 */ + if (ret < 0) return ret; + } + if (!(find_among_b(z, a_2, 4))) return 0; /* among, line 190 */ + return 1; +} + +static int r_mark_nUn(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 194 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 110) return 0; + if (!(find_among_b(z, a_3, 4))) return 0; /* among, line 195 */ + { int ret = r_mark_suffix_with_optional_n_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 196 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_yA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 200 */ + if (ret < 0) return ret; + } + if (z->c <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_4, 2))) return 0; /* among, line 201 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 202 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_nA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 206 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_5, 2))) return 0; /* among, line 207 */ + return 1; +} + +static int r_mark_DA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 211 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_6, 4))) return 0; /* among, line 212 */ + return 1; +} + +static int r_mark_ndA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 216 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_7, 2))) return 0; /* among, line 217 */ + return 1; +} + +static int r_mark_DAn(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 221 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; + if (!(find_among_b(z, a_8, 4))) return 0; /* among, line 222 */ + return 1; +} + +static int r_mark_ndAn(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 226 */ + if (ret < 0) return ret; + } + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 110) return 0; + if (!(find_among_b(z, a_9, 2))) return 0; /* among, line 227 */ + return 1; +} + +static int r_mark_ylA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 231 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_10, 2))) return 0; /* among, line 232 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 233 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_ki(struct SN_env * z) { + if (!(eq_s_b(z, 2, s_14))) return 0; + return 1; +} + +static int r_mark_ncA(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 241 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_11, 2))) return 0; /* among, line 242 */ + { int ret = r_mark_suffix_with_optional_n_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 243 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_yUm(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 247 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 109) return 0; + if (!(find_among_b(z, a_12, 4))) return 0; /* among, line 248 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 249 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_sUn(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 253 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; + if (!(find_among_b(z, a_13, 4))) return 0; /* among, line 254 */ + return 1; +} + +static int r_mark_yUz(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 258 */ + if (ret < 0) return ret; + } + if (z->c - 1 <= z->lb || z->p[z->c - 1] != 122) return 0; + if (!(find_among_b(z, a_14, 4))) return 0; /* among, line 259 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 260 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_sUnUz(struct SN_env * z) { + if (z->c - 4 <= z->lb || z->p[z->c - 1] != 122) return 0; + if (!(find_among_b(z, a_15, 4))) return 0; /* among, line 264 */ + return 1; +} + +static int r_mark_lAr(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 268 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; + if (!(find_among_b(z, a_16, 2))) return 0; /* among, line 269 */ + return 1; +} + +static int r_mark_nUz(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 273 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 122) return 0; + if (!(find_among_b(z, a_17, 4))) return 0; /* among, line 274 */ + return 1; +} + +static int r_mark_DUr(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 278 */ + if (ret < 0) return ret; + } + if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; + if (!(find_among_b(z, a_18, 8))) return 0; /* among, line 279 */ + return 1; +} + +static int r_mark_cAsInA(struct SN_env * z) { + if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; + if (!(find_among_b(z, a_19, 2))) return 0; /* among, line 283 */ + return 1; +} + +static int r_mark_yDU(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 287 */ + if (ret < 0) return ret; + } + if (!(find_among_b(z, a_20, 32))) return 0; /* among, line 288 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 292 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_ysA(struct SN_env * z) { + if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((26658 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; + if (!(find_among_b(z, a_21, 8))) return 0; /* among, line 297 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 298 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_ymUs_(struct SN_env * z) { + { int ret = r_check_vowel_harmony(z); + if (ret == 0) return 0; /* call check_vowel_harmony, line 302 */ + if (ret < 0) return ret; + } + if (z->c - 3 <= z->lb || z->p[z->c - 1] != 159) return 0; + if (!(find_among_b(z, a_22, 4))) return 0; /* among, line 303 */ + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 304 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_mark_yken(struct SN_env * z) { + if (!(eq_s_b(z, 3, s_15))) return 0; + { int ret = r_mark_suffix_with_optional_y_consonant(z); + if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 308 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_stem_nominal_verb_suffixes(struct SN_env * z) { + z->ket = z->c; /* [, line 312 */ + z->B[0] = 1; /* set continue_stemming_noun_suffixes, line 313 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 315 */ + { int m2 = z->l - z->c; (void)m2; /* or, line 314 */ + { int ret = r_mark_ymUs_(z); + if (ret == 0) goto lab3; /* call mark_ymUs_, line 314 */ + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = z->l - m2; + { int ret = r_mark_yDU(z); + if (ret == 0) goto lab4; /* call mark_yDU, line 314 */ + if (ret < 0) return ret; + } + goto lab2; + lab4: + z->c = z->l - m2; + { int ret = r_mark_ysA(z); + if (ret == 0) goto lab5; /* call mark_ysA, line 314 */ + if (ret < 0) return ret; + } + goto lab2; + lab5: + z->c = z->l - m2; + { int ret = r_mark_yken(z); + if (ret == 0) goto lab1; /* call mark_yken, line 314 */ + if (ret < 0) return ret; + } + } + lab2: + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_mark_cAsInA(z); + if (ret == 0) goto lab6; /* call mark_cAsInA, line 316 */ + if (ret < 0) return ret; + } + { int m3 = z->l - z->c; (void)m3; /* or, line 316 */ + { int ret = r_mark_sUnUz(z); + if (ret == 0) goto lab8; /* call mark_sUnUz, line 316 */ + if (ret < 0) return ret; + } + goto lab7; + lab8: + z->c = z->l - m3; + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab9; /* call mark_lAr, line 316 */ + if (ret < 0) return ret; + } + goto lab7; + lab9: + z->c = z->l - m3; + { int ret = r_mark_yUm(z); + if (ret == 0) goto lab10; /* call mark_yUm, line 316 */ + if (ret < 0) return ret; + } + goto lab7; + lab10: + z->c = z->l - m3; + { int ret = r_mark_sUn(z); + if (ret == 0) goto lab11; /* call mark_sUn, line 316 */ + if (ret < 0) return ret; + } + goto lab7; + lab11: + z->c = z->l - m3; + { int ret = r_mark_yUz(z); + if (ret == 0) goto lab12; /* call mark_yUz, line 316 */ + if (ret < 0) return ret; + } + goto lab7; + lab12: + z->c = z->l - m3; + } + lab7: + { int ret = r_mark_ymUs_(z); + if (ret == 0) goto lab6; /* call mark_ymUs_, line 316 */ + if (ret < 0) return ret; + } + goto lab0; + lab6: + z->c = z->l - m1; + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab13; /* call mark_lAr, line 319 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 319 */ + { int ret = slice_del(z); /* delete, line 319 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 319 */ + z->ket = z->c; /* [, line 319 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 319 */ + { int ret = r_mark_DUr(z); + if (ret == 0) goto lab16; /* call mark_DUr, line 319 */ + if (ret < 0) return ret; + } + goto lab15; + lab16: + z->c = z->l - m4; + { int ret = r_mark_yDU(z); + if (ret == 0) goto lab17; /* call mark_yDU, line 319 */ + if (ret < 0) return ret; + } + goto lab15; + lab17: + z->c = z->l - m4; + { int ret = r_mark_ysA(z); + if (ret == 0) goto lab18; /* call mark_ysA, line 319 */ + if (ret < 0) return ret; + } + goto lab15; + lab18: + z->c = z->l - m4; + { int ret = r_mark_ymUs_(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_ymUs_, line 319 */ + if (ret < 0) return ret; + } + } + lab15: + lab14: + ; + } + z->B[0] = 0; /* unset continue_stemming_noun_suffixes, line 320 */ + goto lab0; + lab13: + z->c = z->l - m1; + { int ret = r_mark_nUz(z); + if (ret == 0) goto lab19; /* call mark_nUz, line 323 */ + if (ret < 0) return ret; + } + { int m5 = z->l - z->c; (void)m5; /* or, line 323 */ + { int ret = r_mark_yDU(z); + if (ret == 0) goto lab21; /* call mark_yDU, line 323 */ + if (ret < 0) return ret; + } + goto lab20; + lab21: + z->c = z->l - m5; + { int ret = r_mark_ysA(z); + if (ret == 0) goto lab19; /* call mark_ysA, line 323 */ + if (ret < 0) return ret; + } + } + lab20: + goto lab0; + lab19: + z->c = z->l - m1; + { int m6 = z->l - z->c; (void)m6; /* or, line 325 */ + { int ret = r_mark_sUnUz(z); + if (ret == 0) goto lab24; /* call mark_sUnUz, line 325 */ + if (ret < 0) return ret; + } + goto lab23; + lab24: + z->c = z->l - m6; + { int ret = r_mark_yUz(z); + if (ret == 0) goto lab25; /* call mark_yUz, line 325 */ + if (ret < 0) return ret; + } + goto lab23; + lab25: + z->c = z->l - m6; + { int ret = r_mark_sUn(z); + if (ret == 0) goto lab26; /* call mark_sUn, line 325 */ + if (ret < 0) return ret; + } + goto lab23; + lab26: + z->c = z->l - m6; + { int ret = r_mark_yUm(z); + if (ret == 0) goto lab22; /* call mark_yUm, line 325 */ + if (ret < 0) return ret; + } + } + lab23: + z->bra = z->c; /* ], line 325 */ + { int ret = slice_del(z); /* delete, line 325 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 325 */ + z->ket = z->c; /* [, line 325 */ + { int ret = r_mark_ymUs_(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab27; } /* call mark_ymUs_, line 325 */ + if (ret < 0) return ret; + } + lab27: + ; + } + goto lab0; + lab22: + z->c = z->l - m1; + { int ret = r_mark_DUr(z); + if (ret == 0) return 0; /* call mark_DUr, line 327 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 327 */ + { int ret = slice_del(z); /* delete, line 327 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 327 */ + z->ket = z->c; /* [, line 327 */ + { int m7 = z->l - z->c; (void)m7; /* or, line 327 */ + { int ret = r_mark_sUnUz(z); + if (ret == 0) goto lab30; /* call mark_sUnUz, line 327 */ + if (ret < 0) return ret; + } + goto lab29; + lab30: + z->c = z->l - m7; + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab31; /* call mark_lAr, line 327 */ + if (ret < 0) return ret; + } + goto lab29; + lab31: + z->c = z->l - m7; + { int ret = r_mark_yUm(z); + if (ret == 0) goto lab32; /* call mark_yUm, line 327 */ + if (ret < 0) return ret; + } + goto lab29; + lab32: + z->c = z->l - m7; + { int ret = r_mark_sUn(z); + if (ret == 0) goto lab33; /* call mark_sUn, line 327 */ + if (ret < 0) return ret; + } + goto lab29; + lab33: + z->c = z->l - m7; + { int ret = r_mark_yUz(z); + if (ret == 0) goto lab34; /* call mark_yUz, line 327 */ + if (ret < 0) return ret; + } + goto lab29; + lab34: + z->c = z->l - m7; + } + lab29: + { int ret = r_mark_ymUs_(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_ymUs_, line 327 */ + if (ret < 0) return ret; + } + lab28: + ; + } + } +lab0: + z->bra = z->c; /* ], line 328 */ + { int ret = slice_del(z); /* delete, line 328 */ + if (ret < 0) return ret; + } + return 1; +} + +static int r_stem_suffix_chain_before_ki(struct SN_env * z) { + z->ket = z->c; /* [, line 333 */ + { int ret = r_mark_ki(z); + if (ret == 0) return 0; /* call mark_ki, line 334 */ + if (ret < 0) return ret; + } + { int m1 = z->l - z->c; (void)m1; /* or, line 342 */ + { int ret = r_mark_DA(z); + if (ret == 0) goto lab1; /* call mark_DA, line 336 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 336 */ + { int ret = slice_del(z); /* delete, line 336 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 336 */ + z->ket = z->c; /* [, line 336 */ + { int m2 = z->l - z->c; (void)m2; /* or, line 338 */ + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab4; /* call mark_lAr, line 337 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 337 */ + { int ret = slice_del(z); /* delete, line 337 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 337 */ + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab5; } /* call stem_suffix_chain_before_ki, line 337 */ + if (ret < 0) return ret; + } + lab5: + ; + } + goto lab3; + lab4: + z->c = z->l - m2; + { int ret = r_mark_possessives(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call mark_possessives, line 339 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 339 */ + { int ret = slice_del(z); /* delete, line 339 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 339 */ + z->ket = z->c; /* [, line 339 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call mark_lAr, line 339 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 339 */ + { int ret = slice_del(z); /* delete, line 339 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call stem_suffix_chain_before_ki, line 339 */ + if (ret < 0) return ret; + } + lab6: + ; + } + } + lab3: + lab2: + ; + } + goto lab0; + lab1: + z->c = z->l - m1; + { int ret = r_mark_nUn(z); + if (ret == 0) goto lab7; /* call mark_nUn, line 343 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 343 */ + { int ret = slice_del(z); /* delete, line 343 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 343 */ + z->ket = z->c; /* [, line 343 */ + { int m3 = z->l - z->c; (void)m3; /* or, line 345 */ + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab10; /* call mark_lArI, line 344 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 344 */ + { int ret = slice_del(z); /* delete, line 344 */ + if (ret < 0) return ret; + } + goto lab9; + lab10: + z->c = z->l - m3; + z->ket = z->c; /* [, line 346 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 346 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab13; /* call mark_possessives, line 346 */ + if (ret < 0) return ret; + } + goto lab12; + lab13: + z->c = z->l - m4; + { int ret = r_mark_sU(z); + if (ret == 0) goto lab11; /* call mark_sU, line 346 */ + if (ret < 0) return ret; + } + } + lab12: + z->bra = z->c; /* ], line 346 */ + { int ret = slice_del(z); /* delete, line 346 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 346 */ + z->ket = z->c; /* [, line 346 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_lAr, line 346 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 346 */ + { int ret = slice_del(z); /* delete, line 346 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call stem_suffix_chain_before_ki, line 346 */ + if (ret < 0) return ret; + } + lab14: + ; + } + goto lab9; + lab11: + z->c = z->l - m3; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab8; } /* call stem_suffix_chain_before_ki, line 348 */ + if (ret < 0) return ret; + } + } + lab9: + lab8: + ; + } + goto lab0; + lab7: + z->c = z->l - m1; + { int ret = r_mark_ndA(z); + if (ret == 0) return 0; /* call mark_ndA, line 351 */ + if (ret < 0) return ret; + } + { int m5 = z->l - z->c; (void)m5; /* or, line 353 */ + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab16; /* call mark_lArI, line 352 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 352 */ + { int ret = slice_del(z); /* delete, line 352 */ + if (ret < 0) return ret; + } + goto lab15; + lab16: + z->c = z->l - m5; + { int ret = r_mark_sU(z); + if (ret == 0) goto lab17; /* call mark_sU, line 354 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 354 */ + { int ret = slice_del(z); /* delete, line 354 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 354 */ + z->ket = z->c; /* [, line 354 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call mark_lAr, line 354 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 354 */ + { int ret = slice_del(z); /* delete, line 354 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call stem_suffix_chain_before_ki, line 354 */ + if (ret < 0) return ret; + } + lab18: + ; + } + goto lab15; + lab17: + z->c = z->l - m5; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) return 0; /* call stem_suffix_chain_before_ki, line 356 */ + if (ret < 0) return ret; + } + } + lab15: + ; + } +lab0: + return 1; +} + +static int r_stem_noun_suffixes(struct SN_env * z) { + { int m1 = z->l - z->c; (void)m1; /* or, line 363 */ + z->ket = z->c; /* [, line 362 */ + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab1; /* call mark_lAr, line 362 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 362 */ + { int ret = slice_del(z); /* delete, line 362 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 362 */ + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call stem_suffix_chain_before_ki, line 362 */ + if (ret < 0) return ret; + } + lab2: + ; + } + goto lab0; + lab1: + z->c = z->l - m1; + z->ket = z->c; /* [, line 364 */ + { int ret = r_mark_ncA(z); + if (ret == 0) goto lab3; /* call mark_ncA, line 364 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 364 */ + { int ret = slice_del(z); /* delete, line 364 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 365 */ + { int m2 = z->l - z->c; (void)m2; /* or, line 367 */ + z->ket = z->c; /* [, line 366 */ + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab6; /* call mark_lArI, line 366 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 366 */ + { int ret = slice_del(z); /* delete, line 366 */ + if (ret < 0) return ret; + } + goto lab5; + lab6: + z->c = z->l - m2; + z->ket = z->c; /* [, line 368 */ + { int m3 = z->l - z->c; (void)m3; /* or, line 368 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab9; /* call mark_possessives, line 368 */ + if (ret < 0) return ret; + } + goto lab8; + lab9: + z->c = z->l - m3; + { int ret = r_mark_sU(z); + if (ret == 0) goto lab7; /* call mark_sU, line 368 */ + if (ret < 0) return ret; + } + } + lab8: + z->bra = z->c; /* ], line 368 */ + { int ret = slice_del(z); /* delete, line 368 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 368 */ + z->ket = z->c; /* [, line 368 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call mark_lAr, line 368 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 368 */ + { int ret = slice_del(z); /* delete, line 368 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call stem_suffix_chain_before_ki, line 368 */ + if (ret < 0) return ret; + } + lab10: + ; + } + goto lab5; + lab7: + z->c = z->l - m2; + z->ket = z->c; /* [, line 370 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call mark_lAr, line 370 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 370 */ + { int ret = slice_del(z); /* delete, line 370 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call stem_suffix_chain_before_ki, line 370 */ + if (ret < 0) return ret; + } + } + lab5: + lab4: + ; + } + goto lab0; + lab3: + z->c = z->l - m1; + z->ket = z->c; /* [, line 374 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 374 */ + { int ret = r_mark_ndA(z); + if (ret == 0) goto lab13; /* call mark_ndA, line 374 */ + if (ret < 0) return ret; + } + goto lab12; + lab13: + z->c = z->l - m4; + { int ret = r_mark_nA(z); + if (ret == 0) goto lab11; /* call mark_nA, line 374 */ + if (ret < 0) return ret; + } + } + lab12: + { int m5 = z->l - z->c; (void)m5; /* or, line 377 */ + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab15; /* call mark_lArI, line 376 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 376 */ + { int ret = slice_del(z); /* delete, line 376 */ + if (ret < 0) return ret; + } + goto lab14; + lab15: + z->c = z->l - m5; + { int ret = r_mark_sU(z); + if (ret == 0) goto lab16; /* call mark_sU, line 378 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 378 */ + { int ret = slice_del(z); /* delete, line 378 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 378 */ + z->ket = z->c; /* [, line 378 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call mark_lAr, line 378 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 378 */ + { int ret = slice_del(z); /* delete, line 378 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call stem_suffix_chain_before_ki, line 378 */ + if (ret < 0) return ret; + } + lab17: + ; + } + goto lab14; + lab16: + z->c = z->l - m5; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) goto lab11; /* call stem_suffix_chain_before_ki, line 380 */ + if (ret < 0) return ret; + } + } + lab14: + goto lab0; + lab11: + z->c = z->l - m1; + z->ket = z->c; /* [, line 384 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 384 */ + { int ret = r_mark_ndAn(z); + if (ret == 0) goto lab20; /* call mark_ndAn, line 384 */ + if (ret < 0) return ret; + } + goto lab19; + lab20: + z->c = z->l - m6; + { int ret = r_mark_nU(z); + if (ret == 0) goto lab18; /* call mark_nU, line 384 */ + if (ret < 0) return ret; + } + } + lab19: + { int m7 = z->l - z->c; (void)m7; /* or, line 384 */ + { int ret = r_mark_sU(z); + if (ret == 0) goto lab22; /* call mark_sU, line 384 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 384 */ + { int ret = slice_del(z); /* delete, line 384 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 384 */ + z->ket = z->c; /* [, line 384 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call mark_lAr, line 384 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 384 */ + { int ret = slice_del(z); /* delete, line 384 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call stem_suffix_chain_before_ki, line 384 */ + if (ret < 0) return ret; + } + lab23: + ; + } + goto lab21; + lab22: + z->c = z->l - m7; + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab18; /* call mark_lArI, line 384 */ + if (ret < 0) return ret; + } + } + lab21: + goto lab0; + lab18: + z->c = z->l - m1; + z->ket = z->c; /* [, line 386 */ + { int ret = r_mark_DAn(z); + if (ret == 0) goto lab24; /* call mark_DAn, line 386 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 386 */ + { int ret = slice_del(z); /* delete, line 386 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 386 */ + z->ket = z->c; /* [, line 386 */ + { int m8 = z->l - z->c; (void)m8; /* or, line 389 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab27; /* call mark_possessives, line 388 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 388 */ + { int ret = slice_del(z); /* delete, line 388 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 388 */ + z->ket = z->c; /* [, line 388 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_lAr, line 388 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 388 */ + { int ret = slice_del(z); /* delete, line 388 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call stem_suffix_chain_before_ki, line 388 */ + if (ret < 0) return ret; + } + lab28: + ; + } + goto lab26; + lab27: + z->c = z->l - m8; + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab29; /* call mark_lAr, line 390 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 390 */ + { int ret = slice_del(z); /* delete, line 390 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 390 */ + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab30; } /* call stem_suffix_chain_before_ki, line 390 */ + if (ret < 0) return ret; + } + lab30: + ; + } + goto lab26; + lab29: + z->c = z->l - m8; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab25; } /* call stem_suffix_chain_before_ki, line 392 */ + if (ret < 0) return ret; + } + } + lab26: + lab25: + ; + } + goto lab0; + lab24: + z->c = z->l - m1; + z->ket = z->c; /* [, line 396 */ + { int m9 = z->l - z->c; (void)m9; /* or, line 396 */ + { int ret = r_mark_nUn(z); + if (ret == 0) goto lab33; /* call mark_nUn, line 396 */ + if (ret < 0) return ret; + } + goto lab32; + lab33: + z->c = z->l - m9; + { int ret = r_mark_ylA(z); + if (ret == 0) goto lab31; /* call mark_ylA, line 396 */ + if (ret < 0) return ret; + } + } + lab32: + z->bra = z->c; /* ], line 396 */ + { int ret = slice_del(z); /* delete, line 396 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 397 */ + { int m10 = z->l - z->c; (void)m10; /* or, line 399 */ + z->ket = z->c; /* [, line 398 */ + { int ret = r_mark_lAr(z); + if (ret == 0) goto lab36; /* call mark_lAr, line 398 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 398 */ + { int ret = slice_del(z); /* delete, line 398 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) goto lab36; /* call stem_suffix_chain_before_ki, line 398 */ + if (ret < 0) return ret; + } + goto lab35; + lab36: + z->c = z->l - m10; + z->ket = z->c; /* [, line 400 */ + { int m11 = z->l - z->c; (void)m11; /* or, line 400 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab39; /* call mark_possessives, line 400 */ + if (ret < 0) return ret; + } + goto lab38; + lab39: + z->c = z->l - m11; + { int ret = r_mark_sU(z); + if (ret == 0) goto lab37; /* call mark_sU, line 400 */ + if (ret < 0) return ret; + } + } + lab38: + z->bra = z->c; /* ], line 400 */ + { int ret = slice_del(z); /* delete, line 400 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 400 */ + z->ket = z->c; /* [, line 400 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call mark_lAr, line 400 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 400 */ + { int ret = slice_del(z); /* delete, line 400 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call stem_suffix_chain_before_ki, line 400 */ + if (ret < 0) return ret; + } + lab40: + ; + } + goto lab35; + lab37: + z->c = z->l - m10; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab34; } /* call stem_suffix_chain_before_ki, line 402 */ + if (ret < 0) return ret; + } + } + lab35: + lab34: + ; + } + goto lab0; + lab31: + z->c = z->l - m1; + z->ket = z->c; /* [, line 406 */ + { int ret = r_mark_lArI(z); + if (ret == 0) goto lab41; /* call mark_lArI, line 406 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 406 */ + { int ret = slice_del(z); /* delete, line 406 */ + if (ret < 0) return ret; + } + goto lab0; + lab41: + z->c = z->l - m1; + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) goto lab42; /* call stem_suffix_chain_before_ki, line 408 */ + if (ret < 0) return ret; + } + goto lab0; + lab42: + z->c = z->l - m1; + z->ket = z->c; /* [, line 410 */ + { int m12 = z->l - z->c; (void)m12; /* or, line 410 */ + { int ret = r_mark_DA(z); + if (ret == 0) goto lab45; /* call mark_DA, line 410 */ + if (ret < 0) return ret; + } + goto lab44; + lab45: + z->c = z->l - m12; + { int ret = r_mark_yU(z); + if (ret == 0) goto lab46; /* call mark_yU, line 410 */ + if (ret < 0) return ret; + } + goto lab44; + lab46: + z->c = z->l - m12; + { int ret = r_mark_yA(z); + if (ret == 0) goto lab43; /* call mark_yA, line 410 */ + if (ret < 0) return ret; + } + } + lab44: + z->bra = z->c; /* ], line 410 */ + { int ret = slice_del(z); /* delete, line 410 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ + z->ket = z->c; /* [, line 410 */ + { int m13 = z->l - z->c; (void)m13; /* or, line 410 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab49; /* call mark_possessives, line 410 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 410 */ + { int ret = slice_del(z); /* delete, line 410 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ + z->ket = z->c; /* [, line 410 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab50; } /* call mark_lAr, line 410 */ + if (ret < 0) return ret; + } + lab50: + ; + } + goto lab48; + lab49: + z->c = z->l - m13; + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call mark_lAr, line 410 */ + if (ret < 0) return ret; + } + } + lab48: + z->bra = z->c; /* ], line 410 */ + { int ret = slice_del(z); /* delete, line 410 */ + if (ret < 0) return ret; + } + z->ket = z->c; /* [, line 410 */ + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call stem_suffix_chain_before_ki, line 410 */ + if (ret < 0) return ret; + } + lab47: + ; + } + goto lab0; + lab43: + z->c = z->l - m1; + z->ket = z->c; /* [, line 412 */ + { int m14 = z->l - z->c; (void)m14; /* or, line 412 */ + { int ret = r_mark_possessives(z); + if (ret == 0) goto lab52; /* call mark_possessives, line 412 */ + if (ret < 0) return ret; + } + goto lab51; + lab52: + z->c = z->l - m14; + { int ret = r_mark_sU(z); + if (ret == 0) return 0; /* call mark_sU, line 412 */ + if (ret < 0) return ret; + } + } + lab51: + z->bra = z->c; /* ], line 412 */ + { int ret = slice_del(z); /* delete, line 412 */ + if (ret < 0) return ret; + } + { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 412 */ + z->ket = z->c; /* [, line 412 */ + { int ret = r_mark_lAr(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call mark_lAr, line 412 */ + if (ret < 0) return ret; + } + z->bra = z->c; /* ], line 412 */ + { int ret = slice_del(z); /* delete, line 412 */ + if (ret < 0) return ret; + } + { int ret = r_stem_suffix_chain_before_ki(z); + if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call stem_suffix_chain_before_ki, line 412 */ + if (ret < 0) return ret; + } + lab53: + ; + } + } +lab0: + return 1; +} + +static int r_post_process_last_consonants(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 416 */ + among_var = find_among_b(z, a_23, 4); /* substring, line 416 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 416 */ + switch(among_var) { + case 0: return 0; + case 1: + { int ret = slice_from_s(z, 1, s_16); /* <-, line 417 */ + if (ret < 0) return ret; + } + break; + case 2: + { int ret = slice_from_s(z, 2, s_17); /* <-, line 418 */ + if (ret < 0) return ret; + } + break; + case 3: + { int ret = slice_from_s(z, 1, s_18); /* <-, line 419 */ + if (ret < 0) return ret; + } + break; + case 4: + { int ret = slice_from_s(z, 1, s_19); /* <-, line 420 */ + if (ret < 0) return ret; + } + break; + } + return 1; +} + +static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z) { + { int m_test = z->l - z->c; /* test, line 431 */ + { int m1 = z->l - z->c; (void)m1; /* or, line 431 */ + if (!(eq_s_b(z, 1, s_20))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m1; + if (!(eq_s_b(z, 1, s_21))) return 0; + } + lab0: + z->c = z->l - m_test; + } + { int m2 = z->l - z->c; (void)m2; /* or, line 433 */ + { int m_test = z->l - z->c; /* test, line 432 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel, line 432 */ + { int m3 = z->l - z->c; (void)m3; /* or, line 432 */ + if (!(eq_s_b(z, 1, s_22))) goto lab5; + goto lab4; + lab5: + z->c = z->l - m3; + if (!(eq_s_b(z, 2, s_23))) goto lab3; + } + lab4: + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 2, s_24); /* <+, line 432 */ + z->c = c_keep; + if (ret < 0) return ret; + } + goto lab2; + lab3: + z->c = z->l - m2; + { int m_test = z->l - z->c; /* test, line 434 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab6; /* goto */ /* grouping vowel, line 434 */ + { int m4 = z->l - z->c; (void)m4; /* or, line 434 */ + if (!(eq_s_b(z, 1, s_25))) goto lab8; + goto lab7; + lab8: + z->c = z->l - m4; + if (!(eq_s_b(z, 1, s_26))) goto lab6; + } + lab7: + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_27); /* <+, line 434 */ + z->c = c_keep; + if (ret < 0) return ret; + } + goto lab2; + lab6: + z->c = z->l - m2; + { int m_test = z->l - z->c; /* test, line 436 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab9; /* goto */ /* grouping vowel, line 436 */ + { int m5 = z->l - z->c; (void)m5; /* or, line 436 */ + if (!(eq_s_b(z, 1, s_28))) goto lab11; + goto lab10; + lab11: + z->c = z->l - m5; + if (!(eq_s_b(z, 1, s_29))) goto lab9; + } + lab10: + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 1, s_30); /* <+, line 436 */ + z->c = c_keep; + if (ret < 0) return ret; + } + goto lab2; + lab9: + z->c = z->l - m2; + { int m_test = z->l - z->c; /* test, line 438 */ + if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 438 */ + { int m6 = z->l - z->c; (void)m6; /* or, line 438 */ + if (!(eq_s_b(z, 2, s_31))) goto lab13; + goto lab12; + lab13: + z->c = z->l - m6; + if (!(eq_s_b(z, 2, s_32))) return 0; + } + lab12: + z->c = z->l - m_test; + } + { int c_keep = z->c; + int ret = insert_s(z, z->c, z->c, 2, s_33); /* <+, line 438 */ + z->c = c_keep; + if (ret < 0) return ret; + } + } +lab2: + return 1; +} + +static int r_more_than_one_syllable_word(struct SN_env * z) { + { int c_test = z->c; /* test, line 446 */ + { int i = 2; + while(1) { /* atleast, line 446 */ + int c1 = z->c; + { /* gopast */ /* grouping vowel, line 446 */ + int ret = out_grouping_U(z, g_vowel, 97, 305, 1); + if (ret < 0) goto lab0; + z->c += ret; + } + i--; + continue; + lab0: + z->c = c1; + break; + } + if (i > 0) return 0; + } + z->c = c_test; + } + return 1; +} + +static int r_is_reserved_word(struct SN_env * z) { + { int c1 = z->c; /* or, line 451 */ + { int c_test = z->c; /* test, line 450 */ + while(1) { /* gopast, line 450 */ + if (!(eq_s(z, 2, s_34))) goto lab2; + break; + lab2: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) goto lab1; + z->c = ret; /* gopast, line 450 */ + } + } + z->I[0] = 2; + if (!(z->I[0] == z->l)) goto lab1; + z->c = c_test; + } + goto lab0; + lab1: + z->c = c1; + { int c_test = z->c; /* test, line 452 */ + while(1) { /* gopast, line 452 */ + if (!(eq_s(z, 5, s_35))) goto lab3; + break; + lab3: + { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); + if (ret < 0) return 0; + z->c = ret; /* gopast, line 452 */ + } + } + z->I[0] = 5; + if (!(z->I[0] == z->l)) return 0; + z->c = c_test; + } + } +lab0: + return 1; +} + +static int r_postlude(struct SN_env * z) { + { int c1 = z->c; /* not, line 456 */ + { int ret = r_is_reserved_word(z); + if (ret == 0) goto lab0; /* call is_reserved_word, line 456 */ + if (ret < 0) return ret; + } + return 0; + lab0: + z->c = c1; + } + z->lb = z->c; z->c = z->l; /* backwards, line 457 */ + + { int m2 = z->l - z->c; (void)m2; /* do, line 458 */ + { int ret = r_append_U_to_stems_ending_with_d_or_g(z); + if (ret == 0) goto lab1; /* call append_U_to_stems_ending_with_d_or_g, line 458 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + { int m3 = z->l - z->c; (void)m3; /* do, line 459 */ + { int ret = r_post_process_last_consonants(z); + if (ret == 0) goto lab2; /* call post_process_last_consonants, line 459 */ + if (ret < 0) return ret; + } + lab2: + z->c = z->l - m3; + } + z->c = z->lb; + return 1; +} + +extern int turkish_UTF_8_stem(struct SN_env * z) { + { int ret = r_more_than_one_syllable_word(z); + if (ret == 0) return 0; /* call more_than_one_syllable_word, line 465 */ + if (ret < 0) return ret; + } + z->lb = z->c; z->c = z->l; /* backwards, line 467 */ + + { int m1 = z->l - z->c; (void)m1; /* do, line 468 */ + { int ret = r_stem_nominal_verb_suffixes(z); + if (ret == 0) goto lab0; /* call stem_nominal_verb_suffixes, line 468 */ + if (ret < 0) return ret; + } + lab0: + z->c = z->l - m1; + } + if (!(z->B[0])) return 0; /* Boolean test continue_stemming_noun_suffixes, line 469 */ + { int m2 = z->l - z->c; (void)m2; /* do, line 470 */ + { int ret = r_stem_noun_suffixes(z); + if (ret == 0) goto lab1; /* call stem_noun_suffixes, line 470 */ + if (ret < 0) return ret; + } + lab1: + z->c = z->l - m2; + } + z->c = z->lb; + { int ret = r_postlude(z); + if (ret == 0) return 0; /* call postlude, line 473 */ + if (ret < 0) return ret; + } + return 1; +} + +extern struct SN_env * turkish_UTF_8_create_env(void) { return SN_create_env(0, 1, 1); } + +extern void turkish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } + diff --git a/src/backend/snowball/libstemmer/utilities.c b/src/backend/snowball/libstemmer/utilities.c new file mode 100644 index 00000000000..8c89af1cd5c --- /dev/null +++ b/src/backend/snowball/libstemmer/utilities.c @@ -0,0 +1,473 @@ +#include "header.h" + +#define unless(C) if(!(C)) + +#define CREATE_SIZE 1 + +extern symbol * create_s(void) { + symbol * p; + void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); + if (mem == NULL) return NULL; + p = (symbol *) (HEAD + (char *) mem); + CAPACITY(p) = CREATE_SIZE; + SET_SIZE(p, CREATE_SIZE); + return p; +} + +extern void lose_s(symbol * p) { + if (p == NULL) return; + free((char *) p - HEAD); +} + +/* + new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c + if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new + position, or 0 on failure. + + -- used to implement hop and next in the utf8 case. +*/ + +extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { + int b; + if (n >= 0) { + for (; n > 0; n--) { + if (c >= l) return -1; + b = p[c++]; + if (b >= 0xC0) { /* 1100 0000 */ + while (c < l) { + b = p[c]; + if (b >= 0xC0 || b < 0x80) break; + /* break unless b is 10------ */ + c++; + } + } + } + } else { + for (; n < 0; n++) { + if (c <= lb) return -1; + b = p[--c]; + if (b >= 0x80) { /* 1000 0000 */ + while (c > lb) { + b = p[c]; + if (b >= 0xC0) break; /* 1100 0000 */ + c--; + } + } + } + } + return c; +} + +/* Code for character groupings: utf8 cases */ + +static int get_utf8(const symbol * p, int c, int l, int * slot) { + int b0, b1; + if (c >= l) return 0; + b0 = p[c++]; + if (b0 < 0xC0 || c == l) { /* 1100 0000 */ + * slot = b0; return 1; + } + b1 = p[c++]; + if (b0 < 0xE0 || c == l) { /* 1110 0000 */ + * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; + } + * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; +} + +static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { + int b0, b1; + if (c <= lb) return 0; + b0 = p[--c]; + if (b0 < 0x80 || c == lb) { /* 1000 0000 */ + * slot = b0; return 1; + } + b1 = p[--c]; + if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ + * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; + } + * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; +} + +extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_utf8(z->p, z->c, z->l, & ch); + unless (w) return -1; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c += w; + } while (repeat); + return 0; +} + +extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_b_utf8(z->p, z->c, z->lb, & ch); + unless (w) return -1; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c -= w; + } while (repeat); + return 0; +} + +extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_utf8(z->p, z->c, z->l, & ch); + unless (w) return -1; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c += w; + } while (repeat); + return 0; +} + +extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + int w = get_b_utf8(z->p, z->c, z->lb, & ch); + unless (w) return -1; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return w; + z->c -= w; + } while (repeat); + return 0; +} + +/* Code for character groupings: non-utf8 cases */ + +extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c >= z->l) return -1; + ch = z->p[z->c]; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c++; + } while (repeat); + return 0; +} + +extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c <= z->lb) return -1; + ch = z->p[z->c - 1]; + if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c--; + } while (repeat); + return 0; +} + +extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c >= z->l) return -1; + ch = z->p[z->c]; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c++; + } while (repeat); + return 0; +} + +extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { + do { + int ch; + if (z->c <= z->lb) return -1; + ch = z->p[z->c - 1]; + unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) + return 1; + z->c--; + } while (repeat); + return 0; +} + +extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { + if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; + z->c += s_size; return 1; +} + +extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { + if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; + z->c -= s_size; return 1; +} + +extern int eq_v(struct SN_env * z, const symbol * p) { + return eq_s(z, SIZE(p), p); +} + +extern int eq_v_b(struct SN_env * z, const symbol * p) { + return eq_s_b(z, SIZE(p), p); +} + +extern int find_among(struct SN_env * z, const struct among * v, int v_size) { + + int i = 0; + int j = v_size; + + int c = z->c; int l = z->l; + symbol * q = z->p + c; + + const struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; /* smaller */ + w = v + k; + { + int i2; for (i2 = common; i2 < w->s_size; i2++) { + if (c + common == l) { diff = -1; break; } + diff = q[common] - w->s[i2]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) { + if (i > 0) break; /* v->s has been inspected */ + if (j == i) break; /* only one item in v */ + + /* - but now we need to go round once more to get + v->s inspected. This looks messy, but is actually + the optimal approach. */ + + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) { + w = v + i; + if (common_i >= w->s_size) { + z->c = c + w->s_size; + if (w->function == 0) return w->result; + { + int res = w->function(z); + z->c = c + w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + +/* find_among_b is for backwards processing. Same comments apply */ + +extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { + + int i = 0; + int j = v_size; + + int c = z->c; int lb = z->lb; + symbol * q = z->p + c - 1; + + const struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; + w = v + k; + { + int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { + if (c - common == lb) { diff = -1; break; } + diff = q[- common] - w->s[i2]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) { + if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) { + w = v + i; + if (common_i >= w->s_size) { + z->c = c - w->s_size; + if (w->function == 0) return w->result; + { + int res = w->function(z); + z->c = c - w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + + +/* Increase the size of the buffer pointed to by p to at least n symbols. + * If insufficient memory, returns NULL and frees the old buffer. + */ +static symbol * increase_size(symbol * p, int n) { + symbol * q; + int new_size = n + 20; + void * mem = realloc((char *) p - HEAD, + HEAD + (new_size + 1) * sizeof(symbol)); + if (mem == NULL) { + lose_s(p); + return NULL; + } + q = (symbol *) (HEAD + (char *)mem); + CAPACITY(q) = new_size; + return q; +} + +/* to replace symbols between c_bra and c_ket in z->p by the + s_size symbols at s. + Returns 0 on success, -1 on error. + Also, frees z->p (and sets it to NULL) on error. +*/ +extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) +{ + int adjustment; + int len; + if (z->p == NULL) { + z->p = create_s(); + if (z->p == NULL) return -1; + } + adjustment = s_size - (c_ket - c_bra); + len = SIZE(z->p); + if (adjustment != 0) { + if (adjustment + len > CAPACITY(z->p)) { + z->p = increase_size(z->p, adjustment + len); + if (z->p == NULL) return -1; + } + memmove(z->p + c_ket + adjustment, + z->p + c_ket, + (len - c_ket) * sizeof(symbol)); + SET_SIZE(z->p, adjustment + len); + z->l += adjustment; + if (z->c >= c_ket) + z->c += adjustment; + else + if (z->c > c_bra) + z->c = c_bra; + } + unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); + if (adjptr != NULL) + *adjptr = adjustment; + return 0; +} + +static int slice_check(struct SN_env * z) { + + if (z->bra < 0 || + z->bra > z->ket || + z->ket > z->l || + z->p == NULL || + z->l > SIZE(z->p)) /* this line could be removed */ + { +#if 0 + fprintf(stderr, "faulty slice operation:\n"); + debug(z, -1, 0); +#endif + return -1; + } + return 0; +} + +extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { + if (slice_check(z)) return -1; + return replace_s(z, z->bra, z->ket, s_size, s, NULL); +} + +extern int slice_from_v(struct SN_env * z, const symbol * p) { + return slice_from_s(z, SIZE(p), p); +} + +extern int slice_del(struct SN_env * z) { + return slice_from_s(z, 0, 0); +} + +extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { + int adjustment; + if (replace_s(z, bra, ket, s_size, s, &adjustment)) + return -1; + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; + return 0; +} + +extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { + int adjustment; + if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) + return -1; + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; + return 0; +} + +extern symbol * slice_to(struct SN_env * z, symbol * p) { + if (slice_check(z)) { + lose_s(p); + return NULL; + } + { + int len = z->ket - z->bra; + if (CAPACITY(p) < len) { + p = increase_size(p, len); + if (p == NULL) + return NULL; + } + memmove(p, z->p + z->bra, len * sizeof(symbol)); + SET_SIZE(p, len); + } + return p; +} + +extern symbol * assign_to(struct SN_env * z, symbol * p) { + int len = z->l; + if (CAPACITY(p) < len) { + p = increase_size(p, len); + if (p == NULL) + return NULL; + } + memmove(p, z->p, len * sizeof(symbol)); + SET_SIZE(p, len); + return p; +} + +#if 0 +extern void debug(struct SN_env * z, int number, int line_count) { + int i; + int limit = SIZE(z->p); + /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ + if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); + for (i = 0; i <= limit; i++) { + if (z->lb == i) printf("{"); + if (z->bra == i) printf("["); + if (z->c == i) printf("|"); + if (z->ket == i) printf("]"); + if (z->l == i) printf("}"); + if (i < limit) + { int ch = z->p[i]; + if (ch == 0) ch = '#'; + printf("%c", ch); + } + } + printf("'\n"); +} +#endif diff --git a/src/backend/snowball/snowball.sql.in b/src/backend/snowball/snowball.sql.in new file mode 100644 index 00000000000..5f1f3e772e8 --- /dev/null +++ b/src/backend/snowball/snowball.sql.in @@ -0,0 +1,26 @@ +-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.1 2007/08/21 01:11:16 tgl Exp $$ + +-- text search configuration for _CFGNAME_ language +CREATE TEXT SEARCH DICTIONARY _DICTNAME_ + (TEMPLATE = snowball, + OPTION = 'Language=_DICTNAME__STOPWORDS_'); + +COMMENT ON TEXT SEARCH DICTIONARY _DICTNAME_ IS 'Snowball stemmer for _DICTNAME_ language'; + +CREATE TEXT SEARCH CONFIGURATION _CFGNAME_ + (PARSER = default); + +COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'Configuration for _CFGNAME_ language'; + +ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING + FOR email, url, host, sfloat, version, uri, file, float, int, uint + WITH simple; + +ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING + FOR lhword, lpart_hword, lword + WITH _LATDICTNAME_; + +ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING + FOR hword, nlhword, nlpart_hword, nlword, word, part_hword + WITH _NONLATDICTNAME_; + diff --git a/src/backend/snowball/snowball_func.sql.in b/src/backend/snowball/snowball_func.sql.in new file mode 100644 index 00000000000..db3ca2c9d00 --- /dev/null +++ b/src/backend/snowball/snowball_func.sql.in @@ -0,0 +1,18 @@ +-- $PostgreSQL: pgsql/src/backend/snowball/snowball_func.sql.in,v 1.1 2007/08/21 01:11:16 tgl Exp $$ + +SET search_path = pg_catalog; + +CREATE FUNCTION dsnowball_init(INTERNAL) + RETURNS INTERNAL AS '$libdir/dict_snowball', 'dsnowball_init' +LANGUAGE C STRICT; + +CREATE FUNCTION dsnowball_lexize(INTERNAL, INTERNAL, INTERNAL, INTERNAL) + RETURNS INTERNAL AS '$libdir/dict_snowball', 'dsnowball_lexize' +LANGUAGE C STRICT; + +CREATE TEXT SEARCH TEMPLATE snowball + (INIT = dsnowball_init, + LEXIZE = dsnowball_lexize); + +COMMENT ON TEXT SEARCH TEMPLATE snowball IS 'Snowball stemmer'; + diff --git a/src/backend/snowball/stopwords/danish.stop b/src/backend/snowball/stopwords/danish.stop new file mode 100644 index 00000000000..d3edc675791 --- /dev/null +++ b/src/backend/snowball/stopwords/danish.stop @@ -0,0 +1,94 @@ +og +i +jeg +det +at +en +den +til +er +som +på +de +med +han +af +for +ikke +der +var +mig +sig +men +et +har +om +vi +min +havde +ham +hun +nu +over +da +fra +du +ud +sin +dem +os +op +man +hans +hvor +eller +hvad +skal +selv +her +alle +vil +blev +kunne +ind +når +være +dog +noget +ville +jo +deres +efter +ned +skulle +denne +end +dette +mit +også +under +have +dig +anden +hende +mine +alt +meget +sit +sine +vor +mod +disse +hvis +din +nogle +hos +blive +mange +ad +bliver +hendes +været +thi +jer +sådan diff --git a/src/backend/snowball/stopwords/dutch.stop b/src/backend/snowball/stopwords/dutch.stop new file mode 100644 index 00000000000..cafa0324b53 --- /dev/null +++ b/src/backend/snowball/stopwords/dutch.stop @@ -0,0 +1,101 @@ +de +en +van +ik +te +dat +die +in +een +hij +het +niet +zijn +is +was +op +aan +met +als +voor +had +er +maar +om +hem +dan +zou +of +wat +mijn +men +dit +zo +door +over +ze +zich +bij +ook +tot +je +mij +uit +der +daar +haar +naar +heb +hoe +heeft +hebben +deze +u +want +nog +zal +me +zij +nu +ge +geen +omdat +iets +worden +toch +al +waren +veel +meer +doen +toen +moet +ben +zonder +kan +hun +dus +alles +onder +ja +eens +hier +wie +werd +altijd +doch +wordt +wezen +kunnen +ons +zelf +tegen +na +reeds +wil +kon +niets +uw +iemand +geweest +andere diff --git a/src/backend/snowball/stopwords/english.stop b/src/backend/snowball/stopwords/english.stop new file mode 100644 index 00000000000..a9130116d3e --- /dev/null +++ b/src/backend/snowball/stopwords/english.stop @@ -0,0 +1,128 @@ +i +me +my +myself +we +our +ours +ourselves +you +your +yours +yourself +yourselves +he +him +his +himself +she +her +hers +herself +it +its +itself +they +them +their +theirs +themselves +what +which +who +whom +this +that +these +those +am +is +are +was +were +be +been +being +have +has +had +having +do +does +did +doing +a +an +the +and +but +if +or +because +as +until +while +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under +again +further +then +once +here +there +when +where +why +how +all +any +both +each +few +more +most +other +some +such +no +nor +not +only +own +same +so +than +too +very +s +t +can +will +just +don +should +now + diff --git a/src/backend/snowball/stopwords/finnish.stop b/src/backend/snowball/stopwords/finnish.stop new file mode 100644 index 00000000000..47ee200f678 --- /dev/null +++ b/src/backend/snowball/stopwords/finnish.stop @@ -0,0 +1,235 @@ +olla +olen +olet +on +olemme +olette +ovat +ole +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet +en +et +ei +emme +ette +eivät +minä +minun +minut +minua +minussa +minusta +minuun +minulla +minulta +minulle +sinä +sinun +sinut +sinua +sinussa +sinusta +sinuun +sinulla +sinulta +sinulle +hän +hänen +hänet +häntä +hänessä +hänestä +häneen +hänellä +häneltä +hänelle +me +meidän +meidät +meitä +meissä +meistä +meihin +meillä +meiltä +meille +te +teidän +teidät +teitä +teissä +teistä +teihin +teillä +teiltä +teille +he +heidän +heidät +heitä +heissä +heistä +heihin +heillä +heiltä +heille +tämä +tämän +tätä +tässä +tästä +tähän +tallä +tältä +tälle +tänä +täksi +tuo +tuon +tuotä +tuossa +tuosta +tuohon +tuolla +tuolta +tuolle +tuona +tuoksi +se +sen +sitä +siinä +siitä +siihen +sillä +siltä +sille +sinä +siksi +nämä +näiden +näitä +näissä +näistä +näihin +näillä +näiltä +näille +näinä +näiksi +nuo +noiden +noita +noissa +noista +noihin +noilla +noilta +noille +noina +noiksi +ne +niiden +niitä +niissä +niistä +niihin +niillä +niiltä +niille +niinä +niiksi +kuka +kenen +kenet +ketä +kenessä +kenestä +keneen +kenellä +keneltä +kenelle +kenenä +keneksi +ketkä +keiden +ketkä +keitä +keissä +keistä +keihin +keillä +keiltä +keille +keinä +keiksi +mikä +minkä +minkä +mitä +missä +mistä +mihin +millä +miltä +mille +minä +miksi +mitkä +joka +jonka +jota +jossa +josta +johon +jolla +jolta +jolle +jona +joksi +jotka +joiden +joita +joissa +joista +joihin +joilla +joilta +joille +joina +joiksi +että +ja +jos +koska +kuin +mutta +niin +sekä +sillä +tai +vaan +vai +vaikka +kanssa +mukaan +noin +poikki +yli +kun +niin +nyt +itse diff --git a/src/backend/snowball/stopwords/french.stop b/src/backend/snowball/stopwords/french.stop new file mode 100644 index 00000000000..e7cbf4c9750 --- /dev/null +++ b/src/backend/snowball/stopwords/french.stop @@ -0,0 +1,155 @@ +au +aux +avec +ce +ces +dans +de +des +du +elle +en +et +eux +il +je +la +le +leur +lui +ma +mais +me +même +mes +moi +mon +ne +nos +notre +nous +on +ou +par +pas +pour +qu +que +qui +sa +se +ses +son +sur +ta +te +tes +toi +ton +tu +un +une +vos +votre +vous +c +d +j +l +à +m +n +s +t +y +été +étée +étées +étés +étant +étante +étants +étantes +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent +ayant +ayante +ayantes +ayants +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent diff --git a/src/backend/snowball/stopwords/german.stop b/src/backend/snowball/stopwords/german.stop new file mode 100644 index 00000000000..edef220b7a7 --- /dev/null +++ b/src/backend/snowball/stopwords/german.stop @@ -0,0 +1,231 @@ +aber +alle +allem +allen +aller +alles +als +also +am +an +ander +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders +auch +auf +aus +bei +bin +bis +bist +da +damit +dann +der +den +des +dem +die +das +daß +derselbe +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe +dazu +dein +deine +deinem +deinen +deiner +deines +denn +derer +dessen +dich +dir +du +dies +diese +diesem +diesen +dieser +dieses +doch +dort +durch +ein +eine +einem +einen +einer +eines +einig +einige +einigem +einigen +einiger +einiges +einmal +er +ihn +ihm +es +etwas +euer +eure +eurem +euren +eurer +eures +für +gegen +gewesen +hab +habe +haben +hat +hatte +hatten +hier +hin +hinter +ich +mich +mir +ihr +ihre +ihrem +ihren +ihrer +ihres +euch +im +in +indem +ins +ist +jede +jedem +jeden +jeder +jedes +jene +jenem +jenen +jener +jenes +jetzt +kann +kein +keine +keinem +keinen +keiner +keines +können +könnte +machen +man +manche +manchem +manchen +mancher +manches +mein +meine +meinem +meinen +meiner +meines +mit +muss +musste +nach +nicht +nichts +noch +nun +nur +ob +oder +ohne +sehr +sein +seine +seinem +seinen +seiner +seines +selbst +sich +sie +ihnen +sind +so +solche +solchem +solchen +solcher +solches +soll +sollte +sondern +sonst +über +um +und +uns +unse +unsem +unsen +unser +unses +unter +viel +vom +von +vor +während +war +waren +warst +was +weg +weil +weiter +welche +welchem +welchen +welcher +welches +wenn +werde +werden +wie +wieder +will +wir +wird +wirst +wo +wollen +wollte +würde +würden +zu +zum +zur +zwar +zwischen diff --git a/src/backend/snowball/stopwords/hungarian.stop b/src/backend/snowball/stopwords/hungarian.stop new file mode 100644 index 00000000000..94e9f9a0b07 --- /dev/null +++ b/src/backend/snowball/stopwords/hungarian.stop @@ -0,0 +1,199 @@ +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elõ +elõször +elõtt +elsõ +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +õ +õk +õket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/src/backend/snowball/stopwords/italian.stop b/src/backend/snowball/stopwords/italian.stop new file mode 100644 index 00000000000..6ee02b51fb1 --- /dev/null +++ b/src/backend/snowball/stopwords/italian.stop @@ -0,0 +1,279 @@ +ad +al +allo +ai +agli +all +agl +alla +alle +con +col +coi +da +dal +dallo +dai +dagli +dall +dagl +dalla +dalle +di +del +dello +dei +degli +dell +degl +della +delle +in +nel +nello +nei +negli +nell +negl +nella +nelle +su +sul +sullo +sui +sugli +sull +sugl +sulla +sulle +per +tra +contro +io +tu +lui +lei +noi +voi +loro +mio +mia +miei +mie +tuo +tua +tuoi +tue +suo +sua +suoi +sue +nostro +nostra +nostri +nostre +vostro +vostra +vostri +vostre +mi +ti +ci +vi +lo +la +li +le +gli +ne +il +un +uno +una +ma +ed +se +perché +anche +come +dov +dove +che +chi +cui +non +più +quale +quanto +quanti +quanta +quante +quello +quelli +quella +quelle +questo +questi +questa +queste +si +tutto +tutti +a +c +e +i +l +o +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/src/backend/snowball/stopwords/norwegian.stop b/src/backend/snowball/stopwords/norwegian.stop new file mode 100644 index 00000000000..9ac1abbb6cb --- /dev/null +++ b/src/backend/snowball/stopwords/norwegian.stop @@ -0,0 +1,176 @@ +og +i +jeg +det +at +en +et +den +til +er +som +på +de +med +han +av +ikke +ikkje +der +så +var +meg +seg +men +ett +har +om +vi +min +mitt +ha +hadde +hun +nå +over +da +ved +fra +du +ut +sin +dem +oss +opp +man +kan +hans +hvor +eller +hva +skal +selv +sjøl +her +alle +vil +bli +ble +blei +blitt +kunne +inn +når +være +kom +noen +noe +ville +dere +som +deres +kun +ja +etter +ned +skulle +denne +for +deg +si +sine +sitt +mot +å +meget +hvorfor +dette +disse +uten +hvordan +ingen +din +ditt +blir +samme +hvilken +hvilke +sånn +inni +mellom +vår +hver +hvem +vors +hvis +både +bare +enn +fordi +før +mange +også +slik +vært +være +båe +begge +siden +dykk +dykkar +dei +deira +deires +deim +di +då +eg +ein +eit +eitt +elles +honom +hjå +ho +hoe +henne +hennar +hennes +hoss +hossen +ikkje +ingi +inkje +korleis +korso +kva +kvar +kvarhelst +kven +kvi +kvifor +me +medan +mi +mine +mykje +no +nokon +noka +nokor +noko +nokre +si +sia +sidan +so +somt +somme +um +upp +vere +vore +verte +vort +varte +vart diff --git a/src/backend/snowball/stopwords/portuguese.stop b/src/backend/snowball/stopwords/portuguese.stop new file mode 100644 index 00000000000..6b2477863b7 --- /dev/null +++ b/src/backend/snowball/stopwords/portuguese.stop @@ -0,0 +1,203 @@ +de +a +o +que +e +do +da +em +um +para +com +não +uma +os +no +se +na +por +mais +as +dos +como +mas +ao +ele +das +à +seu +sua +ou +quando +muito +nos +já +eu +também +só +pelo +pela +até +isso +ela +entre +depois +sem +mesmo +aos +seus +quem +nas +me +esse +eles +você +essa +num +nem +suas +meu +às +minha +numa +pelos +elas +qual +nós +lhe +deles +essas +esses +pelas +este +dele +tu +te +vocês +vos +lhes +meus +minhas +teu +tua +teus +tuas +nosso +nossa +nossos +nossas +dela +delas +esta +estes +estas +aquele +aquela +aqueles +aquelas +isto +aquilo +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/src/backend/snowball/stopwords/russian.stop b/src/backend/snowball/stopwords/russian.stop new file mode 100644 index 00000000000..ecb83d4a7f3 --- /dev/null +++ b/src/backend/snowball/stopwords/russian.stop @@ -0,0 +1,151 @@ +и +в +во +не +что +он +на +я +с +со +как +а +то +все +она +так +его +но +да +ты +к +у +же +вы +за +бы +по +только +ее +мне +было +вот +от +меня +еще +нет +о +из +ему +теперь +когда +даже +ну +вдруг +ли +если +уже +или +ни +быть +был +него +до +вас +нибудь +опять +уж +вам +ведь +там +потом +себя +ничего +ей +может +они +тут +где +есть +надо +ней +для +мы +тебя +их +чем +была +сам +чтоб +без +будто +чего +раз +тоже +себе +под +будет +ж +тогда +кто +этот +того +потому +этого +какой +совсем +ним +здесь +этом +один +почти +мой +тем +чтобы +нее +сейчас +были +куда +зачем +всех +никогда +можно +при +наконец +два +об +другой +хоть +после +над +больше +тот +через +эти +нас +про +всего +них +какая +много +разве +три +эту +моя +впрочем +хорошо +свою +этой +перед +иногда +лучше +чуть +том +нельзя +такой +им +более +всегда +конечно +всю +между diff --git a/src/backend/snowball/stopwords/spanish.stop b/src/backend/snowball/stopwords/spanish.stop new file mode 100644 index 00000000000..59bc786caa4 --- /dev/null +++ b/src/backend/snowball/stopwords/spanish.stop @@ -0,0 +1,313 @@ +de +la +que +el +en +y +a +los +del +se +las +por +un +para +con +no +una +su +al +lo +como +más +pero +sus +le +ya +o +este +sí +porque +esta +entre +cuando +muy +sin +sobre +también +me +hasta +hay +donde +quien +desde +todo +nos +durante +todos +uno +les +ni +contra +otros +ese +eso +ante +ellos +e +esto +mí +antes +algunos +qué +unos +yo +otro +otras +otra +él +tanto +esa +estos +mucho +quienes +nada +muchos +cual +poco +ella +estar +estas +algunas +algo +nosotros +mi +mis +tú +te +ti +tu +tus +ellas +nosotras +vosostros +vosostras +os +mío +mía +míos +mías +tuyo +tuya +tuyos +tuyas +suyo +suya +suyos +suyas +nuestro +nuestra +nuestros +nuestras +vuestro +vuestra +vuestros +vuestras +esos +esas +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +sintiendo +sentido +sentida +sentidos +sentidas +siente +sentid +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened diff --git a/src/backend/snowball/stopwords/swedish.stop b/src/backend/snowball/stopwords/swedish.stop new file mode 100644 index 00000000000..742bb6263b9 --- /dev/null +++ b/src/backend/snowball/stopwords/swedish.stop @@ -0,0 +1,114 @@ +och +det +att +i +en +jag +hon +som +han +på +den +med +var +sig +för +så +till +är +men +ett +om +hade +de +av +icke +mig +du +henne +då +sin +nu +har +inte +hans +honom +skulle +hennes +där +min +man +ej +vid +kunde +något +från +ut +när +efter +upp +vi +dem +vara +vad +över +än +dig +kan +sina +här +ha +mot +alla +under +någon +eller +allt +mycket +sedan +ju +denna +själv +detta +åt +utan +varit +hur +ingen +mitt +ni +bli +blev +oss +din +dessa +några +deras +blir +mina +samma +vilken +er +sådan +vår +blivit +dess +inom +mellan +sådant +varför +varje +vilka +ditt +vem +vilket +sitta +sådana +vart +dina +vars +vårt +våra +ert +era +vilkas diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 77e40674df9..c38647db322 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.284 2007/07/17 05:02:02 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.285 2007/08/21 01:11:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -355,6 +355,8 @@ check_xact_readonly(Node *parsetree) case T_TruncateStmt: case T_DropOwnedStmt: case T_ReassignOwnedStmt: + case T_AlterTSDictionaryStmt: + case T_AlterTSConfigurationStmt: ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), errmsg("transaction is read-only"))); @@ -661,6 +663,26 @@ ProcessUtility(Node *parsetree, stmt->missing_ok); break; + case OBJECT_TSPARSER: + RemoveTSParser(names, stmt->behavior, + stmt->missing_ok); + break; + + case OBJECT_TSDICTIONARY: + RemoveTSDictionary(names, stmt->behavior, + stmt->missing_ok); + break; + + case OBJECT_TSTEMPLATE: + RemoveTSTemplate(names, stmt->behavior, + stmt->missing_ok); + break; + + case OBJECT_TSCONFIGURATION: + RemoveTSConfiguration(names, stmt->behavior, + stmt->missing_ok); + break; + default: elog(ERROR, "unrecognized drop object type: %d", (int) stmt->removeType); @@ -832,6 +854,22 @@ ProcessUtility(Node *parsetree, Assert(stmt->args == NIL); DefineType(stmt->defnames, stmt->definition); break; + case OBJECT_TSPARSER: + Assert(stmt->args == NIL); + DefineTSParser(stmt->defnames, stmt->definition); + break; + case OBJECT_TSDICTIONARY: + Assert(stmt->args == NIL); + DefineTSDictionary(stmt->defnames, stmt->definition); + break; + case OBJECT_TSTEMPLATE: + Assert(stmt->args == NIL); + DefineTSTemplate(stmt->defnames, stmt->definition); + break; + case OBJECT_TSCONFIGURATION: + Assert(stmt->args == NIL); + DefineTSConfiguration(stmt->defnames, stmt->definition); + break; default: elog(ERROR, "unrecognized define stmt type: %d", (int) stmt->kind); @@ -1221,6 +1259,14 @@ ProcessUtility(Node *parsetree, RemoveOpFamily((RemoveOpFamilyStmt *) parsetree); break; + case T_AlterTSDictionaryStmt: + AlterTSDictionary((AlterTSDictionaryStmt *) parsetree); + break; + + case T_AlterTSConfigurationStmt: + AlterTSConfiguration((AlterTSConfigurationStmt *) parsetree); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(parsetree)); @@ -1525,6 +1571,18 @@ CreateCommandTag(Node *parsetree) case OBJECT_SCHEMA: tag = "DROP SCHEMA"; break; + case OBJECT_TSPARSER: + tag = "DROP TEXT SEARCH PARSER"; + break; + case OBJECT_TSDICTIONARY: + tag = "DROP TEXT SEARCH DICTIONARY"; + break; + case OBJECT_TSTEMPLATE: + tag = "DROP TEXT SEARCH TEMPLATE"; + break; + case OBJECT_TSCONFIGURATION: + tag = "DROP TEXT SEARCH CONFIGURATION"; + break; default: tag = "???"; } @@ -1591,6 +1649,18 @@ CreateCommandTag(Node *parsetree) case OBJECT_VIEW: tag = "ALTER VIEW"; break; + case OBJECT_TSPARSER: + tag = "ALTER TEXT SEARCH PARSER"; + break; + case OBJECT_TSDICTIONARY: + tag = "ALTER TEXT SEARCH DICTIONARY"; + break; + case OBJECT_TSTEMPLATE: + tag = "ALTER TEXT SEARCH TEMPLATE"; + break; + case OBJECT_TSCONFIGURATION: + tag = "ALTER TEXT SEARCH CONFIGURATION"; + break; default: tag = "???"; break; @@ -1618,6 +1688,18 @@ CreateCommandTag(Node *parsetree) case OBJECT_TYPE: tag = "ALTER TYPE"; break; + case OBJECT_TSPARSER: + tag = "ALTER TEXT SEARCH PARSER"; + break; + case OBJECT_TSDICTIONARY: + tag = "ALTER TEXT SEARCH DICTIONARY"; + break; + case OBJECT_TSTEMPLATE: + tag = "ALTER TEXT SEARCH TEMPLATE"; + break; + case OBJECT_TSCONFIGURATION: + tag = "ALTER TEXT SEARCH CONFIGURATION"; + break; default: tag = "???"; break; @@ -1663,6 +1745,12 @@ CreateCommandTag(Node *parsetree) case OBJECT_TYPE: tag = "ALTER TYPE"; break; + case OBJECT_TSCONFIGURATION: + tag = "ALTER TEXT SEARCH CONFIGURATION"; + break; + case OBJECT_TSDICTIONARY: + tag = "ALTER TEXT SEARCH DICTIONARY"; + break; default: tag = "???"; break; @@ -1722,6 +1810,18 @@ CreateCommandTag(Node *parsetree) case OBJECT_TYPE: tag = "CREATE TYPE"; break; + case OBJECT_TSPARSER: + tag = "CREATE TEXT SEARCH PARSER"; + break; + case OBJECT_TSDICTIONARY: + tag = "CREATE TEXT SEARCH DICTIONARY"; + break; + case OBJECT_TSTEMPLATE: + tag = "CREATE TEXT SEARCH TEMPLATE"; + break; + case OBJECT_TSCONFIGURATION: + tag = "CREATE TEXT SEARCH CONFIGURATION"; + break; default: tag = "???"; } @@ -1949,6 +2049,14 @@ CreateCommandTag(Node *parsetree) tag = "DROP OPERATOR FAMILY"; break; + case T_AlterTSDictionaryStmt: + tag = "ALTER TEXT SEARCH DICTIONARY"; + break; + + case T_AlterTSConfigurationStmt: + tag = "ALTER TEXT SEARCH CONFIGURATION"; + break; + case T_PrepareStmt: tag = "PREPARE"; break; @@ -2386,6 +2494,14 @@ GetCommandLogLevel(Node *parsetree) lev = LOGSTMT_DDL; break; + case T_AlterTSDictionaryStmt: + lev = LOGSTMT_DDL; + break; + + case T_AlterTSConfigurationStmt: + lev = LOGSTMT_DDL; + break; + case T_PrepareStmt: { PrepareStmt *stmt = (PrepareStmt *) parsetree; diff --git a/src/backend/tsearch/Makefile b/src/backend/tsearch/Makefile new file mode 100644 index 00000000000..30bd13fa116 --- /dev/null +++ b/src/backend/tsearch/Makefile @@ -0,0 +1,51 @@ +#------------------------------------------------------------------------- +# +# Makefile for backend/tsearch +# +# Copyright (c) 2006-2007, PostgreSQL Global Development Group +# +# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.1 2007/08/21 01:11:18 tgl Exp $ +# +#------------------------------------------------------------------------- +subdir = src/backend/tsearch +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +DICTDIR=tsearch_data + +DICTFILES=synonym.syn.sample thesaurus.ths.sample + +OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ + dict_simple.o dict_synonym.o dict_thesaurus.o \ + dict_ispell.o regis.o spell.o \ + to_tsany.o ts_utils.o + +all: SUBSYS.o + +SUBSYS.o: $(OBJS) + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $^ + +depend dep: + $(CC) -MM $(CFLAGS) *.c >depend + +.PHONY: install-data +install-data: $(DICTFILES) installdirs + for i in $(DICTFILES); \ + do $(INSTALL_DATA) $$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \ + done + +installdirs: + $(mkinstalldirs) '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)' + +.PHONY: uninstall-data +uninstall-data: + for i in $(DICTFILES); \ + do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \ + done + +clean distclean maintainer-clean: + rm -f SUBSYS.o $(OBJS) + +ifeq (depend,$(wildcard depend)) +include depend +endif diff --git a/src/backend/tsearch/dict.c b/src/backend/tsearch/dict.c new file mode 100644 index 00000000000..15deb71af62 --- /dev/null +++ b/src/backend/tsearch/dict.c @@ -0,0 +1,131 @@ +/*------------------------------------------------------------------------- + * + * dict.c + * Standard interface to dictionary + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/dict.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "funcapi.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "access/skey.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_type.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +/* + * Lexize one word by dictionary, mostly debug function + */ +static ArrayType * +ts_lexize_workhorse(Oid dictId, text *in) +{ + TSDictionaryCacheEntry *dict; + TSLexeme *res, + *ptr; + Datum *da; + ArrayType *a; + DictSubState dstate = {false, false, NULL}; + + dict = lookup_ts_dictionary_cache(dictId); + + res = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize, + PointerGetDatum(dict->dictData), + PointerGetDatum(VARDATA(in)), + Int32GetDatum(VARSIZE(in) - VARHDRSZ), + PointerGetDatum(&dstate))); + + if (dstate.getnext) + { + dstate.isend = true; + ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize, + PointerGetDatum(dict->dictData), + PointerGetDatum(VARDATA(in)), + Int32GetDatum(VARSIZE(in) - VARHDRSZ), + PointerGetDatum(&dstate))); + if (ptr != NULL) + res = ptr; + } + + if (!res) + return NULL; + + ptr = res; + while (ptr->lexeme) + ptr++; + da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1)); + ptr = res; + while (ptr->lexeme) + { + da[ptr - res] = DirectFunctionCall1(textin, CStringGetDatum(ptr->lexeme)); + ptr++; + } + + a = construct_array(da, + ptr - res, + TEXTOID, + -1, + false, + 'i'); + + ptr = res; + while (ptr->lexeme) + { + pfree(DatumGetPointer(da[ptr - res])); + pfree(ptr->lexeme); + ptr++; + } + pfree(res); + pfree(da); + + return a; +} + +Datum +ts_lexize_byid(PG_FUNCTION_ARGS) +{ + Oid dictId = PG_GETARG_OID(0); + text *in = PG_GETARG_TEXT_P(1); + ArrayType *a; + + a = ts_lexize_workhorse(dictId, in); + + if (a) + PG_RETURN_POINTER(a); + else + PG_RETURN_NULL(); +} + +Datum +ts_lexize_byname(PG_FUNCTION_ARGS) +{ + text *dictname = PG_GETARG_TEXT_P(0); + text *in = PG_GETARG_TEXT_P(1); + Oid dictId; + ArrayType *a; + + dictId = TSDictionaryGetDictid(textToQualifiedNameList(dictname), false); + a = ts_lexize_workhorse(dictId, in); + + if (a) + PG_RETURN_POINTER(a); + else + PG_RETURN_NULL(); +} diff --git a/src/backend/tsearch/dict_ispell.c b/src/backend/tsearch/dict_ispell.c new file mode 100644 index 00000000000..f7cee107300 --- /dev/null +++ b/src/backend/tsearch/dict_ispell.c @@ -0,0 +1,164 @@ +/*------------------------------------------------------------------------- + * + * dict_ispell.c + * Ispell dictionary interface + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "tsearch/dicts/spell.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/memutils.h" + + +typedef struct +{ + StopList stoplist; + IspellDict obj; +} DictISpell; + +Datum +dispell_init(PG_FUNCTION_ARGS) +{ + DictISpell *d; + Map *cfg, + *pcfg; + bool affloaded = false, + dictloaded = false, + stoploaded = false; + text *in; + + /* init functions must defend against NULLs for themselves */ + if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NULL config not allowed for ISpell"))); + in = PG_GETARG_TEXT_P(0); + + parse_keyvalpairs(in, &cfg); + PG_FREE_IF_COPY(in, 0); + + d = (DictISpell *) palloc0(sizeof(DictISpell)); + d->stoplist.wordop = recode_and_lowerstr; + + pcfg = cfg; + while (pcfg->key) + { + if (pg_strcasecmp("DictFile", pcfg->key) == 0) + { + if (dictloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple DictFile parameters"))); + NIImportDictionary(&(d->obj), + get_tsearch_config_filename(pcfg->value, + "dict")); + dictloaded = true; + } + else if (pg_strcasecmp("AffFile", pcfg->key) == 0) + { + if (affloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple AffFile parameters"))); + NIImportAffixes(&(d->obj), + get_tsearch_config_filename(pcfg->value, + "affix")); + affloaded = true; + } + else if (pg_strcasecmp("StopWords", pcfg->key) == 0) + { + if (stoploaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple StopWords parameters"))); + readstoplist(pcfg->value, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + stoploaded = true; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized ISpell parameter: \"%s\"", + pcfg->key))); + } + pfree(pcfg->key); + pfree(pcfg->value); + pcfg++; + } + pfree(cfg); + + if (affloaded && dictloaded) + { + NISortDictionary(&(d->obj)); + NISortAffixes(&(d->obj)); + } + else if (!affloaded) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing AffFile parameter"))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing DictFile parameter"))); + } + + MemoryContextDeleteChildren(CurrentMemoryContext); + + PG_RETURN_POINTER(d); +} + +Datum +dispell_lexize(PG_FUNCTION_ARGS) +{ + DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt; + TSLexeme *res; + TSLexeme *ptr, + *cptr; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + txt = lowerstr_with_len(in, len); + res = NINormalizeWord(&(d->obj), txt); + + if (res == NULL) + PG_RETURN_POINTER(NULL); + + ptr = cptr = res; + while (ptr->lexeme) + { + if (searchstoplist(&(d->stoplist), ptr->lexeme)) + { + pfree(ptr->lexeme); + ptr->lexeme = NULL; + ptr++; + } + else + { + memcpy(cptr, ptr, sizeof(TSLexeme)); + cptr++; + ptr++; + } + } + cptr->lexeme = NULL; + + PG_RETURN_POINTER(res); +} diff --git a/src/backend/tsearch/dict_simple.c b/src/backend/tsearch/dict_simple.c new file mode 100644 index 00000000000..2c1bc3d017e --- /dev/null +++ b/src/backend/tsearch/dict_simple.c @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * dict_simple.c + * Simple dictionary: just lowercase and check for stopword + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +typedef struct +{ + StopList stoplist; +} DictExample; + + +Datum +dsimple_init(PG_FUNCTION_ARGS) +{ + DictExample *d = (DictExample *) palloc0(sizeof(DictExample)); + + d->stoplist.wordop = recode_and_lowerstr; + + if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL) + { + text *in = PG_GETARG_TEXT_P(0); + char *filename = TextPGetCString(in); + + readstoplist(filename, &d->stoplist); + sortstoplist(&d->stoplist); + pfree(filename); + } + + PG_RETURN_POINTER(d); +} + +Datum +dsimple_lexize(PG_FUNCTION_ARGS) +{ + DictExample *d = (DictExample *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + char *txt = lowerstr_with_len(in, len); + TSLexeme *res = palloc0(sizeof(TSLexeme) * 2); + + if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) + { + pfree(txt); + } + else + res[0].lexeme = txt; + + PG_RETURN_POINTER(res); +} diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c new file mode 100644 index 00000000000..cffad72b80f --- /dev/null +++ b/src/backend/tsearch/dict_synonym.c @@ -0,0 +1,176 @@ +/*------------------------------------------------------------------------- + * + * dict_synonym.c + * Synonym dictionary: replace word by its synonym + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/fd.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +#define SYNBUFLEN 4096 +typedef struct +{ + char *in; + char *out; +} Syn; + +typedef struct +{ + int len; + Syn *syn; +} DictSyn; + +static char * +findwrd(char *in, char **end) +{ + char *start; + + *end = NULL; + while (*in && t_isspace(in)) + in += pg_mblen(in); + + if (*in == '\0') + return NULL; + start = in; + + while (*in && !t_isspace(in)) + in += pg_mblen(in); + + *end = in; + return start; +} + +static int +compareSyn(const void *a, const void *b) +{ + return strcmp(((Syn *) a)->in, ((Syn *) b)->in); +} + + +Datum +dsynonym_init(PG_FUNCTION_ARGS) +{ + text *in; + DictSyn *d; + int cur = 0; + FILE *fin; + char *filename; + char buf[SYNBUFLEN]; + char *starti, + *starto, + *end = NULL; + int slen; + + /* init functions must defend against NULLs for themselves */ + if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NULL config not allowed for Synonym"))); + in = PG_GETARG_TEXT_P(0); + + filename = get_tsearch_config_filename(TextPGetCString(in), "syn"); + + PG_FREE_IF_COPY(in, 0); + + if ((fin = AllocateFile(filename, "r")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open synonym file \"%s\": %m", + filename))); + + d = (DictSyn *) palloc0(sizeof(DictSyn)); + + while (fgets(buf, SYNBUFLEN, fin)) + { + slen = strlen(buf); + pg_verifymbstr(buf, slen, false); + if (cur == d->len) + { + if (d->len == 0) + { + d->len = 16; + d->syn = (Syn *) palloc(sizeof(Syn) * d->len); + } + else + { + d->len *= 2; + d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); + } + } + + starti = findwrd(buf, &end); + if (!starti) + continue; + *end = '\0'; + if (end >= buf + slen) + continue; + + starto = findwrd(end + 1, &end); + if (!starto) + continue; + *end = '\0'; + + d->syn[cur].in = recode_and_lowerstr(starti); + d->syn[cur].out = recode_and_lowerstr(starto); + if (!(d->syn[cur].in && d->syn[cur].out)) + { + FreeFile(fin); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + + cur++; + } + + FreeFile(fin); + + d->len = cur; + if (cur > 1) + qsort(d->syn, d->len, sizeof(Syn), compareSyn); + + pfree(filename); + PG_RETURN_POINTER(d); +} + +Datum +dsynonym_lexize(PG_FUNCTION_ARGS) +{ + DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); + char *in = (char *) PG_GETARG_POINTER(1); + int32 len = PG_GETARG_INT32(2); + Syn key, + *found; + TSLexeme *res; + + if (len <= 0) + PG_RETURN_POINTER(NULL); + + key.in = lowerstr_with_len(in, len); + key.out = NULL; + + found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); + pfree(key.in); + + if (!found) + PG_RETURN_POINTER(NULL); + + res = palloc(sizeof(TSLexeme) * 2); + memset(res, 0, sizeof(TSLexeme) * 2); + res[0].lexeme = pstrdup(found->out); + + PG_RETURN_POINTER(res); +} diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c new file mode 100644 index 00000000000..8c544ad4f8a --- /dev/null +++ b/src/backend/tsearch/dict_thesaurus.c @@ -0,0 +1,887 @@ +/*------------------------------------------------------------------------- + * + * dict_thesaurus.c + * Thesaurus dictionary: phrase to phrase substitution + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/namespace.h" +#include "storage/fd.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +/* + * Temporay we use TSLexeme.flags for inner use... + */ +#define DT_USEASIS 0x1000 + +typedef struct LexemeInfo +{ + uint16 idsubst; /* entry's number in DictThesaurus->subst */ + uint16 posinsubst; /* pos info in entry */ + uint16 tnvariant; /* total num lexemes in one variant */ + struct LexemeInfo *nextentry; + struct LexemeInfo *nextvariant; +} LexemeInfo; + +typedef struct +{ + char *lexeme; + LexemeInfo *entries; +} TheLexeme; + +typedef struct +{ + uint16 lastlexeme; /* number lexemes to substitute */ + uint16 reslen; + TSLexeme *res; /* prepared substituted result */ +} TheSubstitute; + +typedef struct +{ + /* subdictionary to normalize lexemes */ + Oid subdictOid; + TSDictionaryCacheEntry *subdict; + + /* Array to search lexeme by exact match */ + TheLexeme *wrds; + int nwrds; + int ntwrds; + + /* + * Storage of substituted result, n-th element is for n-th expression + */ + TheSubstitute *subst; + int nsubst; +} DictThesaurus; + + +static void +newLexeme(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 posinsubst) +{ + TheLexeme *ptr; + + if (d->nwrds >= d->ntwrds) + { + if (d->ntwrds == 0) + { + d->ntwrds = 16; + d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds); + } + else + { + d->ntwrds *= 2; + d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds); + } + } + + ptr = d->wrds + d->nwrds; + d->nwrds++; + + ptr->lexeme = palloc(e - b + 1); + + memcpy(ptr->lexeme, b, e - b); + ptr->lexeme[e - b] = '\0'; + + ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); + + ptr->entries->nextentry = NULL; + ptr->entries->idsubst = idsubst; + ptr->entries->posinsubst = posinsubst; +} + +static void +addWrd(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis) +{ + static int nres = 0; + static int ntres = 0; + TheSubstitute *ptr; + + if (nwrd == 0) + { + nres = ntres = 0; + + if (idsubst >= d->nsubst) + { + if (d->nsubst == 0) + { + d->nsubst = 16; + d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst); + } + else + { + d->nsubst *= 2; + d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst); + } + } + } + + ptr = d->subst + idsubst; + + ptr->lastlexeme = posinsubst - 1; + + if (nres + 1 >= ntres) + { + if (ntres == 0) + { + ntres = 2; + ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres); + } + else + { + ntres *= 2; + ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres); + } + + } + + ptr->res[nres].lexeme = palloc(e - b + 1); + memcpy(ptr->res[nres].lexeme, b, e - b); + ptr->res[nres].lexeme[e - b] = '\0'; + + ptr->res[nres].nvariant = nwrd; + if (useasis) + ptr->res[nres].flags = DT_USEASIS; + else + ptr->res[nres].flags = 0; + + ptr->res[++nres].lexeme = NULL; +} + +#define TR_WAITLEX 1 +#define TR_INLEX 2 +#define TR_WAITSUBS 3 +#define TR_INSUBS 4 + +static void +thesaurusRead(char *filename, DictThesaurus * d) +{ + FILE *fh; + char str[BUFSIZ]; + int lineno = 0; + uint16 idsubst = 0; + bool useasis = false; + + filename = get_tsearch_config_filename(filename, "ths"); + fh = AllocateFile(filename, "r"); + if (!fh) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open thesaurus file \"%s\": %m", + filename))); + + while (fgets(str, sizeof(str), fh)) + { + char *ptr, + *recoded; + int state = TR_WAITLEX; + char *beginwrd = NULL; + uint16 posinsubst = 0; + uint16 nwrd = 0; + + ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + GetDatabaseEncoding(), PG_UTF8); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + lineno++; + + /* is it comment ? */ + while (t_isspace(ptr)) + ptr += pg_mblen(ptr); + if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r')) + continue; + + while (*ptr) + { + if (state == TR_WAITLEX) + { + if (t_iseq(ptr, ':')) + { + if (posinsubst == 0) + { + FreeFile(fh); + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("unexpected delimiter at line %d of thesaurus file \"%s\"", + lineno, filename))); + } + state = TR_WAITSUBS; + } + else if (!t_isspace(ptr)) + { + beginwrd = ptr; + state = TR_INLEX; + } + } + else if (state == TR_INLEX) + { + if (t_iseq(ptr, ':')) + { + newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); + state = TR_WAITSUBS; + } + else if (t_isspace(ptr)) + { + newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); + state = TR_WAITLEX; + } + } + else if (state == TR_WAITSUBS) + { + if (t_iseq(ptr, '*')) + { + useasis = true; + state = TR_INSUBS; + beginwrd = ptr + pg_mblen(ptr); + } + else if (t_iseq(ptr, '\\')) + { + useasis = false; + state = TR_INSUBS; + beginwrd = ptr + pg_mblen(ptr); + } + else if (!t_isspace(ptr)) + { + useasis = false; + beginwrd = ptr; + state = TR_INSUBS; + } + } + else if (state == TR_INSUBS) + { + if (t_isspace(ptr)) + { + if (ptr == beginwrd) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"", + lineno, filename))); + addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); + state = TR_WAITSUBS; + } + } + else + elog(ERROR, "unrecognized thesaurus state: %d", state); + + ptr += pg_mblen(ptr); + } + + if (state == TR_INSUBS) + { + if (ptr == beginwrd) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"", + lineno, filename))); + addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis); + } + + idsubst++; + + if (!(nwrd && posinsubst)) + { + FreeFile(fh); + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("unexpected end of line at line %d of thesaurus file \"%s\"", + lineno, filename))); + } + + if (recoded != str) + pfree(recoded); + } + + d->nsubst = idsubst; + + FreeFile(fh); +} + +static TheLexeme * +addCompiledLexeme(TheLexeme * newwrds, int *nnw, int *tnm, TSLexeme * lexeme, LexemeInfo * src, uint16 tnvariant) +{ + + if (*nnw >= *tnm) + { + *tnm *= 2; + newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm); + } + + newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo)); + + if (lexeme && lexeme->lexeme) + { + newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme); + newwrds[*nnw].entries->tnvariant = tnvariant; + } + else + { + newwrds[*nnw].lexeme = NULL; + newwrds[*nnw].entries->tnvariant = 1; + } + + newwrds[*nnw].entries->idsubst = src->idsubst; + newwrds[*nnw].entries->posinsubst = src->posinsubst; + + newwrds[*nnw].entries->nextentry = NULL; + + (*nnw)++; + return newwrds; +} + +static int +cmpLexemeInfo(LexemeInfo * a, LexemeInfo * b) +{ + if (a == NULL || b == NULL) + return 0; + + if (a->idsubst == b->idsubst) + { + if (a->posinsubst == b->posinsubst) + { + if (a->tnvariant == b->tnvariant) + return 0; + + return (a->tnvariant > b->tnvariant) ? 1 : -1; + } + + return (a->posinsubst > b->posinsubst) ? 1 : -1; + } + + return (a->idsubst > b->idsubst) ? 1 : -1; +} + +static int +cmpLexeme(TheLexeme * a, TheLexeme * b) +{ + if (a->lexeme == NULL) + { + if (b->lexeme == NULL) + return 0; + else + return 1; + } + else if (b->lexeme == NULL) + return -1; + + return strcmp(a->lexeme, b->lexeme); +} + +static int +cmpLexemeQ(const void *a, const void *b) +{ + return cmpLexeme((TheLexeme *) a, (TheLexeme *) b); +} + +static int +cmpTheLexeme(const void *a, const void *b) +{ + TheLexeme *la = (TheLexeme *) a; + TheLexeme *lb = (TheLexeme *) b; + int res; + + if ((res = cmpLexeme(la, lb)) != 0) + return res; + + return -cmpLexemeInfo(la->entries, lb->entries); +} + +static void +compileTheLexeme(DictThesaurus * d) +{ + int i, + nnw = 0, + tnm = 16; + TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm), + *ptrwrds; + + for (i = 0; i < d->nwrds; i++) + { + TSLexeme *ptr; + + ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), + PointerGetDatum(d->subdict->dictData), + PointerGetDatum(d->wrds[i].lexeme), + Int32GetDatum(strlen(d->wrds[i].lexeme)), + PointerGetDatum(NULL))); + + if (!(ptr && ptr->lexeme)) + { + if (!ptr) + elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)", + d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); + else + elog(NOTICE, "thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)", + d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1); + + newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0); + } + else + { + while (ptr->lexeme) + { + TSLexeme *remptr = ptr + 1; + int tnvar = 1; + int curvar = ptr->nvariant; + + /* compute n words in one variant */ + while (remptr->lexeme) + { + if (remptr->nvariant != (remptr - 1)->nvariant) + break; + tnvar++; + remptr++; + } + + remptr = ptr; + while (remptr->lexeme && remptr->nvariant == curvar) + { + newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar); + remptr++; + } + + ptr = remptr; + } + } + + pfree(d->wrds[i].lexeme); + pfree(d->wrds[i].entries); + } + + pfree(d->wrds); + d->wrds = newwrds; + d->nwrds = nnw; + d->ntwrds = tnm; + + if (d->nwrds > 1) + { + qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme); + + /* uniq */ + newwrds = d->wrds; + ptrwrds = d->wrds + 1; + while (ptrwrds - d->wrds < d->nwrds) + { + if (cmpLexeme(ptrwrds, newwrds) == 0) + { + if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries)) + { + ptrwrds->entries->nextentry = newwrds->entries; + newwrds->entries = ptrwrds->entries; + } + else + pfree(ptrwrds->entries); + + if (ptrwrds->lexeme) + pfree(ptrwrds->lexeme); + } + else + { + newwrds++; + *newwrds = *ptrwrds; + } + + ptrwrds++; + } + + d->nwrds = newwrds - d->wrds + 1; + d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds); + } +} + +static void +compileTheSubstitute(DictThesaurus * d) +{ + int i; + + for (i = 0; i < d->nsubst; i++) + { + TSLexeme *rem = d->subst[i].res, + *outptr, + *inptr; + int n = 2; + + outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n); + outptr->lexeme = NULL; + inptr = rem; + + while (inptr && inptr->lexeme) + { + TSLexeme *lexized, + tmplex[2]; + + if (inptr->flags & DT_USEASIS) + { /* do not lexize */ + tmplex[0] = *inptr; + tmplex[0].flags = 0; + tmplex[1].lexeme = NULL; + lexized = tmplex; + } + else + { + lexized = (TSLexeme *) DatumGetPointer( + FunctionCall4( + &(d->subdict->lexize), + PointerGetDatum(d->subdict->dictData), + PointerGetDatum(inptr->lexeme), + Int32GetDatum(strlen(inptr->lexeme)), + PointerGetDatum(NULL) + ) + ); + } + + if (lexized && lexized->lexeme) + { + int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1; + + while (lexized->lexeme) + { + if (outptr - d->subst[i].res + 1 >= n) + { + int diff = outptr - d->subst[i].res; + + n *= 2; + d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n); + outptr = d->subst[i].res + diff; + } + + *outptr = *lexized; + outptr->lexeme = pstrdup(lexized->lexeme); + + outptr++; + lexized++; + } + + if (toset > 0) + d->subst[i].res[toset].flags |= TSL_ADDPOS; + } + else if (lexized) + { + elog(NOTICE, "thesaurus word \"%s\" in substitution is a stop-word, ignored (rule %d)", inptr->lexeme, i + 1); + } + else + { + elog(ERROR, "thesaurus word \"%s\" in substitution isn't recognized (rule %d)", inptr->lexeme, i + 1); + } + + if (inptr->lexeme) + pfree(inptr->lexeme); + inptr++; + } + + if (outptr == d->subst[i].res) + elog(ERROR, "all words in thesaurus substitution are stop words (rule %d)", i + 1); + + d->subst[i].reslen = outptr - d->subst[i].res; + + pfree(rem); + } +} + +Datum +thesaurus_init(PG_FUNCTION_ARGS) +{ + DictThesaurus *d; + Map *cfg, + *pcfg; + text *in; + char *subdictname = NULL; + bool fileloaded = false; + + /* init functions must defend against NULLs for themselves */ + if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("NULL config not allowed for Thesaurus"))); + in = PG_GETARG_TEXT_P(0); + + parse_keyvalpairs(in, &cfg); + PG_FREE_IF_COPY(in, 0); + + d = (DictThesaurus *) palloc0(sizeof(DictThesaurus)); + + pcfg = cfg; + while (pcfg->key) + { + if (pg_strcasecmp("DictFile", pcfg->key) == 0) + { + if (fileloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple DictFile parameters"))); + thesaurusRead(pcfg->value, d); + fileloaded = true; + } + else if (pg_strcasecmp("Dictionary", pcfg->key) == 0) + { + if (subdictname) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple Dictionary parameters"))); + subdictname = pstrdup(pcfg->value); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unrecognized Thesaurus parameter: \"%s\"", + pcfg->key))); + } + pfree(pcfg->key); + pfree(pcfg->value); + pcfg++; + } + pfree(cfg); + + if (!fileloaded) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing DictFile parameter"))); + if (!subdictname) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing Dictionary parameter"))); + + d->subdictOid = TSDictionaryGetDictid(stringToQualifiedNameList(subdictname), false); + d->subdict = lookup_ts_dictionary_cache(d->subdictOid); + + compileTheLexeme(d); + compileTheSubstitute(d); + + PG_RETURN_POINTER(d); +} + +static LexemeInfo * +findTheLexeme(DictThesaurus * d, char *lexeme) +{ + TheLexeme key = {lexeme, NULL}, *res; + + if (d->nwrds == 0) + return NULL; + + res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ); + + if (res == NULL) + return NULL; + return res->entries; +} + +static bool +matchIdSubst(LexemeInfo * stored, uint16 idsubst) +{ + bool res = true; + + if (stored) + { + res = false; + + for (; stored; stored = stored->nextvariant) + if (stored->idsubst == idsubst) + { + res = true; + break; + } + } + + return res; +} + +static LexemeInfo * +findVariant(LexemeInfo * in, LexemeInfo * stored, uint16 curpos, LexemeInfo ** newin, int newn) +{ + for (;;) + { + int i; + LexemeInfo *ptr = newin[0]; + + for (i = 0; i < newn; i++) + { + while (newin[i] && newin[i]->idsubst < ptr->idsubst) + newin[i] = newin[i]->nextentry; + + if (newin[i] == NULL) + return in; + + if (newin[i]->idsubst > ptr->idsubst) + { + ptr = newin[i]; + i = -1; + continue; + } + + while (newin[i]->idsubst == ptr->idsubst) + { + if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn) + { + ptr = newin[i]; + break; + } + + newin[i] = newin[i]->nextentry; + if (newin[i] == NULL) + return in; + } + + if (newin[i]->idsubst != ptr->idsubst) + { + ptr = newin[i]; + i = -1; + continue; + } + } + + if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst))) + { /* found */ + + ptr->nextvariant = in; + in = ptr; + } + + /* step forward */ + for (i = 0; i < newn; i++) + newin[i] = newin[i]->nextentry; + } + + return NULL; +} + +static TSLexeme * +copyTSLexeme(TheSubstitute * ts) +{ + TSLexeme *res; + uint16 i; + + res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1)); + for (i = 0; i < ts->reslen; i++) + { + res[i] = ts->res[i]; + res[i].lexeme = pstrdup(ts->res[i].lexeme); + } + + res[ts->reslen].lexeme = NULL; + + return res; +} + +static TSLexeme * +checkMatch(DictThesaurus * d, LexemeInfo * info, uint16 curpos, bool *moreres) +{ + *moreres = false; + while (info) + { + Assert(info->idsubst < d->nsubst); + if (info->nextvariant) + *moreres = true; + if (d->subst[info->idsubst].lastlexeme == curpos) + return copyTSLexeme(d->subst + info->idsubst); + info = info->nextvariant; + } + + return NULL; +} + +Datum +thesaurus_lexize(PG_FUNCTION_ARGS) +{ + DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0); + DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3); + TSLexeme *res = NULL; + LexemeInfo *stored, + *info = NULL; + uint16 curpos = 0; + bool moreres = false; + + if (PG_NARGS() < 4 || dstate == NULL) + elog(ERROR, "forbidden call of thesaurus or nested call"); + + if (dstate->isend) + PG_RETURN_POINTER(NULL); + stored = (LexemeInfo *) dstate->private; + + if (stored) + curpos = stored->posinsubst + 1; + + if (!d->subdict->isvalid) + d->subdict = lookup_ts_dictionary_cache(d->subdictOid); + + res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize), + PointerGetDatum(d->subdict->dictData), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2), + PointerGetDatum(NULL))); + + if (res && res->lexeme) + { + TSLexeme *ptr = res, + *basevar; + + while (ptr->lexeme) + { + uint16 nv = ptr->nvariant; + uint16 i, + nlex = 0; + LexemeInfo **infos; + + basevar = ptr; + while (ptr->lexeme && nv == ptr->nvariant) + { + nlex++; + ptr++; + } + + infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex); + for (i = 0; i < nlex; i++) + if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL) + break; + + if (i < nlex) + { + /* no chance to find */ + pfree(infos); + continue; + } + + info = findVariant(info, stored, curpos, infos, nlex); + } + } + else if (res) + { /* stop-word */ + LexemeInfo *infos = findTheLexeme(d, NULL); + + info = findVariant(NULL, stored, curpos, &infos, 1); + } + else + { + info = NULL; /* word isn't recognized */ + } + + dstate->private = (void *) info; + + if (!info) + { + dstate->getnext = false; + PG_RETURN_POINTER(NULL); + } + + if ((res = checkMatch(d, info, curpos, &moreres)) != NULL) + { + dstate->getnext = moreres; + PG_RETURN_POINTER(res); + } + + dstate->getnext = true; + + PG_RETURN_POINTER(NULL); +} diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c new file mode 100644 index 00000000000..705dd96b4ac --- /dev/null +++ b/src/backend/tsearch/regis.c @@ -0,0 +1,236 @@ +/*------------------------------------------------------------------------- + * + * regis.c + * Fast regex subset + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/regis.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/dicts/regis.h" +#include "tsearch/ts_locale.h" + +bool +RS_isRegis(const char *str) +{ + while (str && *str) + { + if (t_isalpha(str) || + t_iseq(str, '[') || + t_iseq(str, ']') || + t_iseq(str, '^')) + str += pg_mblen(str); + else + return false; + } + return true; +} + +#define RS_IN_ONEOF 1 +#define RS_IN_ONEOF_IN 2 +#define RS_IN_NONEOF 3 +#define RS_IN_WAIT 4 + +static RegisNode * +newRegisNode(RegisNode * prev, int len) +{ + RegisNode *ptr; + + ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1); + if (prev) + prev->next = ptr; + return ptr; +} + +void +RS_compile(Regis * r, bool issuffix, char *str) +{ + int len = strlen(str); + int state = RS_IN_WAIT; + char *c = (char *) str; + RegisNode *ptr = NULL; + + memset(r, 0, sizeof(Regis)); + r->issuffix = (issuffix) ? 1 : 0; + + while (*c) + { + if (state == RS_IN_WAIT) + { + if (t_isalpha(c)) + { + if (ptr) + ptr = newRegisNode(ptr, len); + else + ptr = r->node = newRegisNode(NULL, len); + COPYCHAR(ptr->data, c); + ptr->type = RSF_ONEOF; + ptr->len = pg_mblen(c); + } + else if (t_iseq(c, '[')) + { + if (ptr) + ptr = newRegisNode(ptr, len); + else + ptr = r->node = newRegisNode(NULL, len); + ptr->type = RSF_ONEOF; + state = RS_IN_ONEOF; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regis pattern: \"%s\"", + str))); + } + else if (state == RS_IN_ONEOF) + { + if (t_iseq(c, '^')) + { + ptr->type = RSF_NONEOF; + state = RS_IN_NONEOF; + } + else if (t_isalpha(c)) + { + COPYCHAR(ptr->data, c); + ptr->len = pg_mblen(c); + state = RS_IN_ONEOF_IN; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regis pattern: \"%s\"", + str))); + } + else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) + { + if (t_isalpha(c)) + { + COPYCHAR(ptr->data + ptr->len, c); + ptr->len += pg_mblen(c); + } + else if (t_iseq(c, ']')) + state = RS_IN_WAIT; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regis pattern: \"%s\"", + str))); + } + else + elog(ERROR, "internal error in RS_compile: state %d", state); + c += pg_mblen(c); + } + + ptr = r->node; + while (ptr) + { + r->nchar++; + ptr = ptr->next; + } +} + +void +RS_free(Regis * r) +{ + RegisNode *ptr = r->node, + *tmp; + + while (ptr) + { + tmp = ptr->next; + pfree(ptr); + ptr = tmp; + } + + r->node = NULL; +} + +#ifdef TS_USE_WIDE +static bool +mb_strchr(char *str, char *c) +{ + int clen = pg_mblen(c), + plen, + i; + char *ptr = str; + bool res = false; + + clen = pg_mblen(c); + while (*ptr && !res) + { + plen = pg_mblen(ptr); + if (plen == clen) + { + i = plen; + res = true; + while (i--) + if (*(ptr + i) != *(c + i)) + { + res = false; + break; + } + } + + ptr += plen; + } + + return res; +} +#else +#define mb_strchr(s,c) ( (strchr((s),*(c)) == NULL) ? false : true ) +#endif + + +bool +RS_execute(Regis * r, char *str) +{ + RegisNode *ptr = r->node; + char *c = str; + int len = 0; + + while (*c) + { + len++; + c += pg_mblen(c); + } + + if (len < r->nchar) + return 0; + + c = str; + if (r->issuffix) + { + len -= r->nchar; + while (len-- > 0) + c += pg_mblen(c); + } + + + while (ptr) + { + switch (ptr->type) + { + case RSF_ONEOF: + if (mb_strchr((char *) ptr->data, c) != true) + return false; + break; + case RSF_NONEOF: + if (mb_strchr((char *) ptr->data, c) == true) + return false; + break; + default: + elog(ERROR, "unrecognized regis node type: %d", ptr->type); + } + ptr = ptr->next; + c += pg_mblen(c); + } + + return true; +} diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c new file mode 100644 index 00000000000..d09208649f7 --- /dev/null +++ b/src/backend/tsearch/spell.c @@ -0,0 +1,1747 @@ +/*------------------------------------------------------------------------- + * + * spell.c + * Normalizing word with ISpell + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/spell.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "storage/fd.h" +#include "tsearch/dicts/spell.h" +#include "tsearch/ts_locale.h" +#include "utils/memutils.h" + + +/* + * during initialization dictionary requires a lot + * of memory, so it will use temporary context + */ +static MemoryContext tmpCtx = NULL; + +#define tmpalloc(sz) MemoryContextAlloc(tmpCtx, (sz)) +#define tmpalloc0(sz) MemoryContextAllocZero(tmpCtx, (sz)) + +static void +checkTmpCtx(void) +{ + if (CurrentMemoryContext->firstchild == NULL) + { + tmpCtx = AllocSetContextCreate(CurrentMemoryContext, + "Ispell dictionary init context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + } + else + tmpCtx = CurrentMemoryContext->firstchild; +} + +static char * +lowerstr_ctx(char *src) +{ + MemoryContext saveCtx; + char *dst; + + saveCtx = MemoryContextSwitchTo(tmpCtx); + dst = lowerstr(src); + MemoryContextSwitchTo(saveCtx); + + return dst; +} + +#define MAX_NORM 1024 +#define MAXNORMLEN 256 + +#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) ) +#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] ) +#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T ) + +static char *VoidString = ""; + +static int +cmpspell(const void *s1, const void *s2) +{ + return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word)); +} +static int +cmpspellaffix(const void *s1, const void *s2) +{ + return (strcmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag)); +} + +static char * +strnduplicate(char *s, int len) +{ + char *d = (char *) palloc(len + 1); + + memcpy(d, s, len); + d[len] = '\0'; + return d; +} + +static char * +findchar(char *str, int c) +{ + while (*str) + { + if (t_iseq(str, c)) + return str; + str += pg_mblen(str); + } + + return NULL; +} + + +/* backward string compare for suffix tree operations */ +static int +strbcmp(const unsigned char *s1, const unsigned char *s2) +{ + int l1 = strlen((const char *) s1) - 1, + l2 = strlen((const char *) s2) - 1; + + while (l1 >= 0 && l2 >= 0) + { + if (s1[l1] < s2[l2]) + return -1; + if (s1[l1] > s2[l2]) + return 1; + l1--; + l2--; + } + if (l1 < l2) + return -1; + if (l1 > l2) + return 1; + + return 0; +} +static int +strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count) +{ + int l1 = strlen((const char *) s1) - 1, + l2 = strlen((const char *) s2) - 1, + l = count; + + while (l1 >= 0 && l2 >= 0 && l > 0) + { + if (s1[l1] < s2[l2]) + return -1; + if (s1[l1] > s2[l2]) + return 1; + l1--; + l2--; + l--; + } + if (l == 0) + return 0; + if (l1 < l2) + return -1; + if (l1 > l2) + return 1; + return 0; +} + +static int +cmpaffix(const void *s1, const void *s2) +{ + const AFFIX *a1 = (const AFFIX *) s1; + const AFFIX *a2 = (const AFFIX *) s2; + + if (a1->type < a2->type) + return -1; + if (a1->type > a2->type) + return 1; + if (a1->type == FF_PREFIX) + return strcmp(a1->repl, a2->repl); + else + return strbcmp((const unsigned char *) a1->repl, + (const unsigned char *) a2->repl); +} + +static void +NIAddSpell(IspellDict * Conf, const char *word, const char *flag) +{ + if (Conf->nspell >= Conf->mspell) + { + if (Conf->mspell) + { + Conf->mspell += 1024 * 20; + Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); + } + else + { + Conf->mspell = 1024 * 20; + Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *)); + } + } + Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1); + strcpy(Conf->Spell[Conf->nspell]->word, word); + strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, 16); + Conf->nspell++; +} + +/* + * import dictionary + * + * Note caller must already have applied get_tsearch_config_filename + */ +void +NIImportDictionary(IspellDict * Conf, const char *filename) +{ + char str[BUFSIZ], + *pstr; + FILE *dict; + + checkTmpCtx(); + + if (!(dict = AllocateFile(filename, "r"))) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open dictionary file \"%s\": %m", + filename))); + + while (fgets(str, sizeof(str), dict)) + { + char *s, + *recoded; + const char *flag; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + PG_UTF8, GetDatabaseEncoding()); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + flag = NULL; + if ((s = findchar(recoded, '/'))) + { + *s++ = '\0'; + flag = s; + while (*s) + { + /* we allow only single encoded flags for faster works */ + if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) + s++; + else + { + *s = '\0'; + break; + } + } + } + else + flag = ""; + + + s = recoded; + while (*s) + { + if (t_isspace(s)) + { + *s = '\0'; + break; + } + s += pg_mblen(s); + } + pstr = lowerstr_ctx(recoded); + + NIAddSpell(Conf, pstr, flag); + pfree(pstr); + + if (recoded != str) + pfree(recoded); + } + FreeFile(dict); +} + + +static int +FindWord(IspellDict * Conf, const char *word, int affixflag, int flag) +{ + SPNode *node = Conf->Dictionary; + SPNodeData *StopLow, + *StopHigh, + *StopMiddle; + uint8 *ptr = (uint8 *) word; + + flag &= FF_DICTFLAGMASK; + + while (node && *ptr) + { + StopLow = node->data; + StopHigh = node->data + node->length; + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + if (StopMiddle->val == *ptr) + { + if (*(ptr + 1) == '\0' && StopMiddle->isword) + { + if (flag == 0) + { + if (StopMiddle->compoundflag & FF_COMPOUNDONLY) + return 0; + } + else if ((flag & StopMiddle->compoundflag) == 0) + return 0; + + if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL)) + return 1; + } + node = StopMiddle->node; + ptr++; + break; + } + else if (StopMiddle->val < *ptr) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + if (StopLow >= StopHigh) + break; + } + return 0; +} + +static void +NIAddAffix(IspellDict * Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type) +{ + AFFIX *Affix; + + if (Conf->naffixes >= Conf->maffixes) + { + if (Conf->maffixes) + { + Conf->maffixes += 16; + Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); + } + else + { + Conf->maffixes = 16; + Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX)); + } + } + + Affix = Conf->Affix + Conf->naffixes; + + if (strcmp(mask, ".") == 0) + { + Affix->issimple = 1; + Affix->isregis = 0; + } + else if (RS_isRegis(mask)) + { + Affix->issimple = 0; + Affix->isregis = 1; + RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false, + (char *) ((mask && *mask) ? mask : VoidString)); + } + else + { + int masklen; + int wmasklen; + int err; + pg_wchar *wmask; + char *tmask; + + Affix->issimple = 0; + Affix->isregis = 0; + tmask = (char *) tmpalloc(strlen(mask) + 3); + if (type == FF_SUFFIX) + sprintf(tmask, "%s$", mask); + else + sprintf(tmask, "^%s", mask); + + masklen = strlen(tmask); + wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar)); + wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen); + + err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB); + if (err) + { + char errstr[100]; + + pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr)); + ereport(ERROR, + (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), + errmsg("invalid regular expression: %s", errstr))); + } + } + + Affix->flagflags = flagflags; + if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG)) + { + if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0) + Affix->flagflags |= FF_COMPOUNDFLAG; + } + Affix->flag = flag; + Affix->type = type; + + Affix->find = (find && *find) ? pstrdup(find) : VoidString; + if ((Affix->replen = strlen(repl)) > 0) + Affix->repl = pstrdup(repl); + else + Affix->repl = VoidString; + Conf->naffixes++; +} + +#define PAE_WAIT_MASK 0 +#define PAE_INMASK 1 +#define PAE_WAIT_FIND 2 +#define PAE_INFIND 3 +#define PAE_WAIT_REPL 4 +#define PAE_INREPL 5 + +static bool +parse_affentry(char *str, char *mask, char *find, char *repl, + const char *filename, int line) +{ + int state = PAE_WAIT_MASK; + char *pmask = mask, + *pfind = find, + *prepl = repl; + + *mask = *find = *repl = '\0'; + + while (*str) + { + if (state == PAE_WAIT_MASK) + { + if (t_iseq(str, '#')) + return false; + else if (!t_isspace(str)) + { + COPYCHAR(pmask, str); + pmask += pg_mblen(str); + state = PAE_INMASK; + } + } + else if (state == PAE_INMASK) + { + if (t_iseq(str, '>')) + { + *pmask = '\0'; + state = PAE_WAIT_FIND; + } + else if (!t_isspace(str)) + { + COPYCHAR(pmask, str); + pmask += pg_mblen(str); + } + } + else if (state == PAE_WAIT_FIND) + { + if (t_iseq(str, '-')) + { + state = PAE_INFIND; + } + else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ ) + { + COPYCHAR(prepl, str); + prepl += pg_mblen(str); + state = PAE_INREPL; + } + else if (!t_isspace(str)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("syntax error at line %d of affix file \"%s\"", + line, filename))); + } + else if (state == PAE_INFIND) + { + if (t_iseq(str, ',')) + { + *pfind = '\0'; + state = PAE_WAIT_REPL; + } + else if (t_isalpha(str)) + { + COPYCHAR(pfind, str); + pfind += pg_mblen(str); + } + else if (!t_isspace(str)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("syntax error at line %d of affix file \"%s\"", + line, filename))); + } + else if (state == PAE_WAIT_REPL) + { + if (t_iseq(str, '-')) + { + break; /* void repl */ + } + else if (t_isalpha(str)) + { + COPYCHAR(prepl, str); + prepl += pg_mblen(str); + state = PAE_INREPL; + } + else if (!t_isspace(str)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("syntax error at line %d of affix file \"%s\"", + line, filename))); + } + else if (state == PAE_INREPL) + { + if (t_iseq(str, '#')) + { + *prepl = '\0'; + break; + } + else if (t_isalpha(str)) + { + COPYCHAR(prepl, str); + prepl += pg_mblen(str); + } + else if (!t_isspace(str)) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("syntax error at line %d of affix file \"%s\"", + line, filename))); + } + else + elog(ERROR, "unknown state in parse_affentry: %d", state); + + str += pg_mblen(str); + } + + *pmask = *pfind = *prepl = '\0'; + + return (*mask && (*find || *repl)) ? true : false; +} + +static void +addFlagValue(IspellDict * Conf, char *s, uint32 val, + const char *filename, int line) +{ + while (*s && t_isspace(s)) + s++; + + if (!*s) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("syntax error at line %d of affix file \"%s\"", + line, filename))); + + if (pg_mblen(s) != 1) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"", + line, filename))); + + Conf->flagval[(unsigned int) *s] = (unsigned char) val; + Conf->usecompound = true; +} + +static void +NIImportOOAffixes(IspellDict * Conf, const char *filename) +{ + char str[BUFSIZ]; + char type[BUFSIZ], + *ptype = NULL; + char sflag[BUFSIZ]; + char mask[BUFSIZ], + *pmask; + char find[BUFSIZ], + *pfind; + char repl[BUFSIZ], + *prepl; + bool isSuffix = false; + int flag = 0; + char flagflags = 0; + FILE *affix; + int line = 0; + int scanread = 0; + char scanbuf[BUFSIZ]; + + checkTmpCtx(); + + /* read file to find any flag */ + memset(Conf->flagval, 0, sizeof(Conf->flagval)); + Conf->usecompound = false; + + if (!(affix = AllocateFile(filename, "r"))) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open affix file \"%s\": %m", + filename))); + + while (fgets(str, sizeof(str), affix)) + { + char *recoded; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + PG_UTF8, GetDatabaseEncoding()); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + line++; + + if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + continue; + + if (STRNCMP(recoded, "COMPOUNDFLAG") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"), + FF_COMPOUNDFLAG, filename, line); + else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"), + FF_COMPOUNDBEGIN, filename, line); + else if (STRNCMP(recoded, "COMPOUNDLAST") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"), + FF_COMPOUNDLAST, filename, line); + /* COMPOUNDLAST and COMPOUNDEND are synonyms */ + else if (STRNCMP(recoded, "COMPOUNDEND") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDEND"), + FF_COMPOUNDLAST, filename, line); + else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"), + FF_COMPOUNDMIDDLE, filename, line); + else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0) + addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"), + FF_COMPOUNDONLY, filename, line); + else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"), + FF_COMPOUNDPERMITFLAG, filename, line); + else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0) + addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"), + FF_COMPOUNDFORBIDFLAG, filename, line); + else if (STRNCMP(recoded, "FLAG") == 0) + { + char *s = recoded + strlen("FLAG"); + + while (*s && t_isspace(s)) + s++; + + if (*s && STRNCMP(s, "default") != 0) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("Ispell dictionary supports only default flag value at line %d of affix file \"%s\"", + line, filename))); + } + + if (recoded != str) + pfree(recoded); + } + FreeFile(affix); + line = 0; + + sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5); + + if (!(affix = AllocateFile(filename, "r"))) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open affix file \"%s\": %m", + filename))); + + while (fgets(str, sizeof(str), affix)) + { + char *recoded; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + PG_UTF8, GetDatabaseEncoding()); + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + line++; + if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + continue; + + scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask); + + if (ptype) + pfree(ptype); + ptype = lowerstr_ctx(type); + if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx"))) + continue; + + if (scanread == 4) + { + if (strlen(sflag) != 1) + continue; + flag = *sflag; + isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false; + pfind = lowerstr_ctx(find); + if (t_iseq(find, 'y')) + flagflags = FF_CROSSPRODUCT; + else + flagflags = 0; + pfree(pfind); + } + else + { + char *ptr; + int aflg = 0; + + if (strlen(sflag) != 1 || flag != *sflag || flag == 0) + continue; + prepl = lowerstr_ctx(repl); + /* affix flag */ + if ((ptr = strchr(prepl, '/')) != NULL) + { + *ptr = '\0'; + ptr++; + while (*ptr) + { + aflg |= Conf->flagval[(unsigned int) *ptr]; + ptr++; + } + } + pfind = lowerstr_ctx(find); + pmask = lowerstr_ctx(mask); + if (t_iseq(find, '0')) + *pfind = '\0'; + if (t_iseq(repl, '0')) + *prepl = '\0'; + + NIAddAffix(Conf, flag, flagflags | aflg, pmask, pfind, prepl, + isSuffix ? FF_SUFFIX : FF_PREFIX); + pfree(prepl); + pfree(pfind); + pfree(pmask); + } + + if (recoded != str) + pfree(recoded); + } + + if (ptype) + pfree(ptype); + FreeFile(affix); +} + +/* + * import affixes + * + * Note caller must already have applied get_tsearch_config_filename + */ +void +NIImportAffixes(IspellDict * Conf, const char *filename) +{ + char str[BUFSIZ], + *pstr = NULL; + char mask[BUFSIZ]; + char find[BUFSIZ]; + char repl[BUFSIZ]; + char *s; + int suffixes = 0; + int prefixes = 0; + int flag = 0; + char flagflags = 0; + FILE *affix; + int line = 0; + int oldformat = 0; + + checkTmpCtx(); + + if (!(affix = AllocateFile(filename, "r"))) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open affix file \"%s\": %m", + filename))); + + memset(Conf->flagval, 0, sizeof(Conf->flagval)); + Conf->usecompound = false; + + while (fgets(str, sizeof(str), affix)) + { + if (pstr) + pfree(pstr); + + pstr = recode_and_lowerstr(str); + + line++; + if (*pstr == '#' || *pstr == '\n') + continue; + + if (STRNCMP(pstr, "compoundwords") == 0) + { + s = findchar(str, 'l'); + if (s) + { + while (*s && !t_isspace(s)) + s++; + while (*s && t_isspace(s)) + s++; + if (*s && pg_mblen(s) == 1) + { + Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG; + Conf->usecompound = true; + } + oldformat++; + continue; + } + } + if (STRNCMP(pstr, "suffixes") == 0) + { + suffixes = 1; + prefixes = 0; + oldformat++; + continue; + } + if (STRNCMP(pstr, "prefixes") == 0) + { + suffixes = 0; + prefixes = 1; + oldformat++; + continue; + } + if (STRNCMP(pstr, "flag") == 0) + { + s = str + 4; + flagflags = 0; + + while (*s && t_isspace(s)) + s++; + oldformat++; + + /* allow only single-encoded flags */ + if (pg_mblen(s) != 1) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"", + line, filename))); + + if (*s == '*') + { + flagflags |= FF_CROSSPRODUCT; + s++; + } + else if (*s == '~') + { + flagflags |= FF_COMPOUNDONLY; + s++; + } + + if (*s == '\\') + s++; + + /* allow only single-encoded flags */ + if (pg_mblen(s) != 1) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"", + line, filename))); + + flag = (unsigned char) *s; + continue; + } + if (STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 || + STRNCMP(str, "PFX") == 0 || STRNCMP(str, "SFX") == 0) + { + if (oldformat) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("wrong affix file format for flag at line %d of affix file \"%s\"", + line, filename))); + FreeFile(affix); + NIImportOOAffixes(Conf, filename); + return; + } + if ((!suffixes) && (!prefixes)) + continue; + + if (!parse_affentry(pstr, mask, find, repl, filename, line)) + continue; + + NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX); + } + FreeFile(affix); + + if (pstr) + pfree(pstr); +} + +static int +MergeAffix(IspellDict * Conf, int a1, int a2) +{ + char **ptr; + + while (Conf->nAffixData + 1 >= Conf->lenAffixData) + { + Conf->lenAffixData *= 2; + Conf->AffixData = (char **) repalloc(Conf->AffixData, + sizeof(char *) * Conf->lenAffixData); + } + + ptr = Conf->AffixData + Conf->nAffixData; + *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + + 1 /* space */ + 1 /* \0 */ ); + sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); + ptr++; + *ptr = NULL; + Conf->nAffixData++; + + return Conf->nAffixData - 1; +} + +static uint32 +makeCompoundFlags(IspellDict * Conf, int affix) +{ + uint32 flag = 0; + char *str = Conf->AffixData[affix]; + + while (str && *str) + { + flag |= Conf->flagval[(unsigned int) *str]; + str++; + } + + return (flag & FF_DICTFLAGMASK); +} + +static SPNode * +mkSPNode(IspellDict * Conf, int low, int high, int level) +{ + int i; + int nchar = 0; + char lastchar = '\0'; + SPNode *rs; + SPNodeData *data; + int lownew = low; + + for (i = low; i < high; i++) + if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level]) + { + nchar++; + lastchar = Conf->Spell[i]->word[level]; + } + + if (!nchar) + return NULL; + + rs = (SPNode *) palloc0(SPNHRDSZ + nchar * sizeof(SPNodeData)); + rs->length = nchar; + data = rs->data; + + lastchar = '\0'; + for (i = low; i < high; i++) + if (Conf->Spell[i]->p.d.len > level) + { + if (lastchar != Conf->Spell[i]->word[level]) + { + if (lastchar) + { + data->node = mkSPNode(Conf, lownew, i, level + 1); + lownew = i; + data++; + } + lastchar = Conf->Spell[i]->word[level]; + } + data->val = ((uint8 *) (Conf->Spell[i]->word))[level]; + if (Conf->Spell[i]->p.d.len == level + 1) + { + bool clearCompoundOnly = false; + + if (data->isword && data->affix != Conf->Spell[i]->p.d.affix) + { + /* + * MergeAffix called a few times. If one of word is + * allowed to be in compound word and another isn't, then + * clear FF_COMPOUNDONLY flag. + */ + + clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag + & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix)) + ? false : true; + data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix); + } + else + data->affix = Conf->Spell[i]->p.d.affix; + data->isword = 1; + + data->compoundflag = makeCompoundFlags(Conf, data->affix); + + if ((data->compoundflag & FF_COMPOUNDONLY) && + (data->compoundflag & FF_COMPOUNDFLAG) == 0) + data->compoundflag |= FF_COMPOUNDFLAG; + + if (clearCompoundOnly) + data->compoundflag &= ~FF_COMPOUNDONLY; + } + } + + data->node = mkSPNode(Conf, lownew, high, level + 1); + + return rs; +} + +void +NISortDictionary(IspellDict * Conf) +{ + size_t i; + int naffix = 3; + + checkTmpCtx(); + + /* compress affixes */ + qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix); + for (i = 1; i < Conf->nspell; i++) + if (strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag)) + naffix++; + + Conf->AffixData = (char **) palloc0(naffix * sizeof(char *)); + naffix = 1; + Conf->AffixData[0] = pstrdup(""); + Conf->AffixData[1] = pstrdup(Conf->Spell[0]->p.flag); + Conf->Spell[0]->p.d.affix = 1; + Conf->Spell[0]->p.d.len = strlen(Conf->Spell[0]->word); + for (i = 1; i < Conf->nspell; i++) + { + if (strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[naffix])) + { + naffix++; + Conf->AffixData[naffix] = pstrdup(Conf->Spell[i]->p.flag); + } + Conf->Spell[i]->p.d.affix = naffix; + Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word); + } + + Conf->lenAffixData = Conf->nAffixData = naffix; + qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell); + Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0); + + Conf->Spell = NULL; +} + +static AffixNode * +mkANode(IspellDict * Conf, int low, int high, int level, int type) +{ + int i; + int nchar = 0; + uint8 lastchar = '\0'; + AffixNode *rs; + AffixNodeData *data; + int lownew = low; + int naff; + AFFIX **aff; + + for (i = low; i < high; i++) + if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type)) + { + nchar++; + lastchar = GETCHAR(Conf->Affix + i, level, type); + } + + if (!nchar) + return NULL; + + aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); + naff = 0; + + rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); + rs->length = nchar; + data = rs->data; + + lastchar = '\0'; + for (i = low; i < high; i++) + if (Conf->Affix[i].replen > level) + { + if (lastchar != GETCHAR(Conf->Affix + i, level, type)) + { + if (lastchar) + { + data->node = mkANode(Conf, lownew, i, level + 1, type); + if (naff) + { + data->naff = naff; + data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); + memcpy(data->aff, aff, sizeof(AFFIX *) * naff); + naff = 0; + } + data++; + lownew = i; + } + lastchar = GETCHAR(Conf->Affix + i, level, type); + } + data->val = GETCHAR(Conf->Affix + i, level, type); + if (Conf->Affix[i].replen == level + 1) + { /* affix stopped */ + aff[naff++] = Conf->Affix + i; + } + } + + data->node = mkANode(Conf, lownew, high, level + 1, type); + if (naff) + { + data->naff = naff; + data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); + memcpy(data->aff, aff, sizeof(AFFIX *) * naff); + naff = 0; + } + + pfree(aff); + + return rs; +} + +static void +mkVoidAffix(IspellDict * Conf, int issuffix, int startsuffix) +{ + int i, + cnt = 0; + int start = (issuffix) ? startsuffix : 0; + int end = (issuffix) ? Conf->naffixes : startsuffix; + AffixNode *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData)); + + Affix->length = 1; + Affix->isvoid = 1; + + if (issuffix) + { + Affix->data->node = Conf->Suffix; + Conf->Suffix = Affix; + } + else + { + Affix->data->node = Conf->Prefix; + Conf->Prefix = Affix; + } + + + for (i = start; i < end; i++) + if (Conf->Affix[i].replen == 0) + cnt++; + + if (cnt == 0) + return; + + Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt); + Affix->data->naff = (uint32) cnt; + + cnt = 0; + for (i = start; i < end; i++) + if (Conf->Affix[i].replen == 0) + { + Affix->data->aff[cnt] = Conf->Affix + i; + cnt++; + } +} + +static bool +isAffixInUse(IspellDict * Conf, char flag) +{ + int i; + + for (i = 0; i < Conf->nAffixData; i++) + if (strchr(Conf->AffixData[i], flag) != NULL) + return true; + + return false; +} + +void +NISortAffixes(IspellDict * Conf) +{ + AFFIX *Affix; + size_t i; + CMPDAffix *ptr; + int firstsuffix = -1; + + checkTmpCtx(); + + if (Conf->naffixes == 0) + return; + + if (Conf->naffixes > 1) + qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix); + Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes); + ptr->affix = NULL; + + for (i = 0; i < Conf->naffixes; i++) + { + Affix = &(((AFFIX *) Conf->Affix)[i]); + if (Affix->type == FF_SUFFIX && firstsuffix < 0) + firstsuffix = i; + + if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 && + isAffixInUse(Conf, (char) Affix->flag)) + { + if (ptr == Conf->CompoundAffix || + ptr->issuffix != (ptr - 1)->issuffix || + strbncmp((const unsigned char *) (ptr - 1)->affix, + (const unsigned char *) Affix->repl, + (ptr - 1)->len)) + { + /* leave only unique and minimals suffixes */ + ptr->affix = Affix->repl; + ptr->len = Affix->replen; + ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false; + ptr++; + } + } + } + ptr->affix = NULL; + Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1)); + + Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX); + Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX); + mkVoidAffix(Conf, 1, firstsuffix); + mkVoidAffix(Conf, 0, firstsuffix); +} + +static AffixNodeData * +FinfAffixes(AffixNode * node, const char *word, int wrdlen, int *level, int type) +{ + AffixNodeData *StopLow, + *StopHigh, + *StopMiddle; + uint8 symbol; + + if (node->isvoid) + { /* search void affixes */ + if (node->data->naff) + return node->data; + node = node->data->node; + } + + while (node && *level < wrdlen) + { + StopLow = node->data; + StopHigh = node->data + node->length; + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + symbol = GETWCHAR(word, wrdlen, *level, type); + if (StopMiddle->val == symbol) + { + (*level)++; + if (StopMiddle->naff) + return StopMiddle; + node = StopMiddle->node; + break; + } + else if (StopMiddle->val < symbol) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + if (StopLow >= StopHigh) + break; + } + return NULL; +} + +static char * +CheckAffix(const char *word, size_t len, AFFIX * Affix, int flagflags, char *newword, int *baselen) +{ + /* + * Check compound allow flags + */ + + if (flagflags == 0) + { + if (Affix->flagflags & FF_COMPOUNDONLY) + return NULL; + } + else if (flagflags & FF_COMPOUNDBEGIN) + { + if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) + return NULL; + if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0) + if (Affix->type == FF_SUFFIX) + return NULL; + } + else if (flagflags & FF_COMPOUNDMIDDLE) + { + if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 || + (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)) + return NULL; + } + else if (flagflags & FF_COMPOUNDLAST) + { + if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG) + return NULL; + if ((Affix->flagflags & FF_COMPOUNDLAST) == 0) + if (Affix->type == FF_PREFIX) + return NULL; + } + + /* + * make replace pattern of affix + */ + if (Affix->type == FF_SUFFIX) + { + strcpy(newword, word); + strcpy(newword + len - Affix->replen, Affix->find); + if (baselen) /* store length of non-changed part of word */ + *baselen = len - Affix->replen; + } + else + { + /* + * if prefix is a all non-chaged part's length then all word contains + * only prefix and suffix, so out + */ + if (baselen && *baselen + strlen(Affix->find) <= Affix->replen) + return NULL; + strcpy(newword, Affix->find); + strcat(newword, word + Affix->replen); + } + + /* + * check resulting word + */ + if (Affix->issimple) + return newword; + else if (Affix->isregis) + { + if (RS_execute(&(Affix->reg.regis), newword)) + return newword; + } + else + { + int err; + pg_wchar *data; + size_t data_len; + int newword_len; + + /* Convert data string to wide characters */ + newword_len = strlen(newword); + data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar)); + data_len = pg_mb2wchar_with_len(newword, data, newword_len); + + if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0))) + { + pfree(data); + return newword; + } + pfree(data); + } + + return NULL; +} + +static int +addToResult(char **forms, char **cur, char *word) +{ + if (cur - forms >= MAX_NORM - 1) + return 0; + if (forms == cur || strcmp(word, *(cur - 1)) != 0) + { + *cur = pstrdup(word); + cur++; + *cur = NULL; + return 1; + } + + return 0; +} + +static char ** +NormalizeSubWord(IspellDict * Conf, char *word, char flag) +{ + AffixNodeData *suffix = NULL, + *prefix = NULL; + int slevel = 0, + plevel = 0; + int wrdlen = strlen(word), + swrdlen; + char **forms; + char **cur; + char newword[2 * MAXNORMLEN] = ""; + char pnewword[2 * MAXNORMLEN] = ""; + AffixNode *snode = Conf->Suffix, + *pnode; + int i, + j; + + if (wrdlen > MAXNORMLEN) + return NULL; + cur = forms = (char **) palloc(MAX_NORM * sizeof(char *)); + *cur = NULL; + + + /* Check that the word itself is normal form */ + if (FindWord(Conf, word, 0, flag)) + { + *cur = pstrdup(word); + cur++; + *cur = NULL; + } + + /* Find all other NORMAL forms of the 'word' (check only prefix) */ + pnode = Conf->Prefix; + plevel = 0; + while (pnode) + { + prefix = FinfAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX); + if (!prefix) + break; + for (j = 0; j < prefix->naff; j++) + { + if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL)) + { + /* prefix success */ + if (FindWord(Conf, newword, prefix->aff[j]->flag, flag)) + cur += addToResult(forms, cur, newword); + } + } + pnode = prefix->node; + } + + /* + * Find all other NORMAL forms of the 'word' (check suffix and then + * prefix) + */ + while (snode) + { + int baselen = 0; + + /* find possible suffix */ + suffix = FinfAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX); + if (!suffix) + break; + /* foreach suffix check affix */ + for (i = 0; i < suffix->naff; i++) + { + if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen)) + { + /* suffix success */ + if (FindWord(Conf, newword, suffix->aff[i]->flag, flag)) + cur += addToResult(forms, cur, newword); + + /* now we will look changed word with prefixes */ + pnode = Conf->Prefix; + plevel = 0; + swrdlen = strlen(newword); + while (pnode) + { + prefix = FinfAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX); + if (!prefix) + break; + for (j = 0; j < prefix->naff; j++) + { + if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen)) + { + /* prefix success */ + int ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ? + 0 : prefix->aff[j]->flag; + + if (FindWord(Conf, pnewword, ff, flag)) + cur += addToResult(forms, cur, pnewword); + } + } + pnode = prefix->node; + } + } + } + + snode = suffix->node; + } + + if (cur == forms) + { + pfree(forms); + return (NULL); + } + return (forms); +} + +typedef struct SplitVar +{ + int nstem; + char **stem; + struct SplitVar *next; +} SplitVar; + +static int +CheckCompoundAffixes(CMPDAffix ** ptr, char *word, int len, bool CheckInPlace) +{ + bool issuffix; + + if (CheckInPlace) + { + while ((*ptr)->affix) + { + if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0) + { + len = (*ptr)->len; + issuffix = (*ptr)->issuffix; + (*ptr)++; + return (issuffix) ? len : 0; + } + (*ptr)++; + } + } + else + { + char *affbegin; + + while ((*ptr)->affix) + { + if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL) + { + len = (*ptr)->len + (affbegin - word); + issuffix = (*ptr)->issuffix; + (*ptr)++; + return (issuffix) ? len : 0; + } + (*ptr)++; + } + } + return -1; +} + +static SplitVar * +CopyVar(SplitVar * s, int makedup) +{ + SplitVar *v = (SplitVar *) palloc(sizeof(SplitVar)); + + v->stem = (char **) palloc(sizeof(char *) * (MAX_NORM)); + v->next = NULL; + if (s) + { + int i; + + v->nstem = s->nstem; + for (i = 0; i < s->nstem; i++) + v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i]; + } + else + v->nstem = 0; + return v; +} + + +static SplitVar * +SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, int wordlen, int startpos, int minpos) +{ + SplitVar *var = NULL; + SPNodeData *StopLow, + *StopHigh, + *StopMiddle = NULL; + SPNode *node = (snode) ? snode : Conf->Dictionary; + int level = (snode) ? minpos : startpos; /* recursive + * minpos==level */ + int lenaff; + CMPDAffix *caff; + char *notprobed; + int compoundflag = 0; + + notprobed = (char *) palloc(wordlen); + memset(notprobed, 1, wordlen); + var = CopyVar(orig, 1); + + while (level < wordlen) + { + /* find word with epenthetic or/and compound affix */ + caff = Conf->CompoundAffix; + while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0) + { + /* + * there is one of compound affixes, so check word for existings + */ + char buf[MAXNORMLEN]; + char **subres; + + lenaff = level - startpos + lenaff; + + if (!notprobed[startpos + lenaff - 1]) + continue; + + if (level + lenaff - 1 <= minpos) + continue; + + if (lenaff > 0) + memcpy(buf, word + startpos, lenaff); + buf[lenaff] = '\0'; + + if (level == FF_COMPOUNDBEGIN) + compoundflag = FF_COMPOUNDBEGIN; + else if (level == wordlen - 1) + compoundflag = FF_COMPOUNDLAST; + else + compoundflag = FF_COMPOUNDMIDDLE; + subres = NormalizeSubWord(Conf, buf, compoundflag); + if (subres) + { + /* Yes, it was a word from dictionary */ + SplitVar *new = CopyVar(var, 0); + SplitVar *ptr = var; + char **sptr = subres; + + notprobed[startpos + lenaff - 1] = 0; + + while (*sptr) + { + new->stem[new->nstem] = *sptr; + new->nstem++; + sptr++; + } + pfree(subres); + + while (ptr->next) + ptr = ptr->next; + ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff); + + pfree(new->stem); + pfree(new); + } + } + + if (!node) + break; + + StopLow = node->data; + StopHigh = node->data + node->length; + while (StopLow < StopHigh) + { + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); + if (StopMiddle->val == ((uint8 *) (word))[level]) + break; + else if (StopMiddle->val < ((uint8 *) (word))[level]) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + if (StopLow < StopHigh) + { + if (level == FF_COMPOUNDBEGIN) + compoundflag = FF_COMPOUNDBEGIN; + else if (level == wordlen - 1) + compoundflag = FF_COMPOUNDLAST; + else + compoundflag = FF_COMPOUNDMIDDLE; + + /* find infinitive */ + if (StopMiddle->isword && + (StopMiddle->compoundflag & compoundflag) && + notprobed[level]) + { + /* ok, we found full compoundallowed word */ + if (level > minpos) + { + /* and its length more than minimal */ + if (wordlen == level + 1) + { + /* well, it was last word */ + var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); + var->nstem++; + pfree(notprobed); + return var; + } + else + { + /* then we will search more big word at the same point */ + SplitVar *ptr = var; + + while (ptr->next) + ptr = ptr->next; + ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level); + /* we can find next word */ + level++; + var->stem[var->nstem] = strnduplicate(word + startpos, level - startpos); + var->nstem++; + node = Conf->Dictionary; + startpos = level; + continue; + } + } + } + node = StopMiddle->node; + } + else + node = NULL; + level++; + } + + var->stem[var->nstem] = strnduplicate(word + startpos, wordlen - startpos); + var->nstem++; + pfree(notprobed); + return var; +} + +TSLexeme * +NINormalizeWord(IspellDict * Conf, char *word) +{ + char **res; + TSLexeme *lcur = NULL, + *lres = NULL; + uint16 NVariant = 1; + + res = NormalizeSubWord(Conf, word, 0); + + if (res) + { + char **ptr = res; + + lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); + while (*ptr) + { + lcur->lexeme = *ptr; + lcur->flags = 0; + lcur->nvariant = NVariant++; + lcur++; + ptr++; + } + lcur->lexeme = NULL; + pfree(res); + } + + if (Conf->usecompound) + { + int wordlen = strlen(word); + SplitVar *ptr, + *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); + int i; + + while (var) + { + if (var->nstem > 1) + { + char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST); + + if (subres) + { + char **subptr = subres; + + if (!lcur) + lcur = lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme)); + + while (*subptr) + { + for (i = 0; i < var->nstem - 1; i++) + { + lcur->lexeme = (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]); + lcur->flags = 0; + lcur->nvariant = NVariant; + lcur++; + } + + lcur->lexeme = *subptr; + lcur->flags = 0; + lcur->nvariant = NVariant; + lcur++; + subptr++; + NVariant++; + } + + lcur->lexeme = NULL; + pfree(subres); + var->stem[0] = NULL; + pfree(var->stem[var->nstem - 1]); + } + } + + for (i = 0; i < var->nstem && var->stem[i]; i++) + pfree(var->stem[i]); + ptr = var->next; + pfree(var->stem); + pfree(var); + var = ptr; + } + } + + return lres; +} diff --git a/src/backend/tsearch/synonym.syn.sample b/src/backend/tsearch/synonym.syn.sample new file mode 100644 index 00000000000..fdccca102b4 --- /dev/null +++ b/src/backend/tsearch/synonym.syn.sample @@ -0,0 +1,3 @@ +skies sky +booking book +bookings book diff --git a/src/backend/tsearch/thesaurus.ths.sample b/src/backend/tsearch/thesaurus.ths.sample new file mode 100644 index 00000000000..7e7702e2ae4 --- /dev/null +++ b/src/backend/tsearch/thesaurus.ths.sample @@ -0,0 +1,20 @@ +# +# Theasurus config file. Character ':' separates string from replacement, eg +# sample-words : substitute-words +# +# Any substitute-word can be marked by preceding '*' character, +# which means do not lexize this word +# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary + +one two three : *123 +one two : *12 +one : *1 +two : *2 + +#foo bar : blah blah +#f bar : fbar +#e bar : ebar +#g bar bar : gbarbar +#asd:sdffff +#qwerty:qwer wert erty + diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c new file mode 100644 index 00000000000..ee4b61d44bf --- /dev/null +++ b/src/backend/tsearch/to_tsany.c @@ -0,0 +1,363 @@ +/*------------------------------------------------------------------------- + * + * to_tsany.c + * to_ts* function definitions + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/namespace.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/syscache.h" + + +Datum +get_current_ts_config(PG_FUNCTION_ARGS) +{ + PG_RETURN_OID(getTSCurrentConfig(true)); +} + +/* + * to_tsvector + */ +static int +compareWORD(const void *a, const void *b) +{ + if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len) + { + int res = strncmp( + ((ParsedWord *) a)->word, + ((ParsedWord *) b)->word, + ((ParsedWord *) b)->len); + + if (res == 0) + return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1; + return res; + } + return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1; +} + +static int +uniqueWORD(ParsedWord * a, int4 l) +{ + ParsedWord *ptr, + *res; + int tmppos; + + if (l == 1) + { + tmppos = LIMITPOS(a->pos.pos); + a->alen = 2; + a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); + a->pos.apos[0] = 1; + a->pos.apos[1] = tmppos; + return l; + } + + res = a; + ptr = a + 1; + + qsort((void *) a, l, sizeof(ParsedWord), compareWORD); + tmppos = LIMITPOS(a->pos.pos); + a->alen = 2; + a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); + a->pos.apos[0] = 1; + a->pos.apos[1] = tmppos; + + while (ptr - a < l) + { + if (!(ptr->len == res->len && + strncmp(ptr->word, res->word, res->len) == 0)) + { + res++; + res->len = ptr->len; + res->word = ptr->word; + tmppos = LIMITPOS(ptr->pos.pos); + res->alen = 2; + res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen); + res->pos.apos[0] = 1; + res->pos.apos[1] = tmppos; + } + else + { + pfree(ptr->word); + if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1) + { + if (res->pos.apos[0] + 1 >= res->alen) + { + res->alen *= 2; + res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen); + } + if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos)) + { + res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos); + res->pos.apos[0]++; + } + } + } + ptr++; + } + + return res + 1 - a; +} + +/* + * make value of tsvector, given parsed text + */ +TSVector +make_tsvector(ParsedText *prs) +{ + int4 i, + j, + lenstr = 0, + totallen; + TSVector in; + WordEntry *ptr; + char *str, + *cur; + + prs->curwords = uniqueWORD(prs->words, prs->curwords); + for (i = 0; i < prs->curwords; i++) + { + lenstr += SHORTALIGN(prs->words[i].len); + + if (prs->words[i].alen) + lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + } + + totallen = CALCDATASIZE(prs->curwords, lenstr); + in = (TSVector) palloc0(totallen); + SET_VARSIZE(in, totallen); + in->size = prs->curwords; + + ptr = ARRPTR(in); + cur = str = STRPTR(in); + for (i = 0; i < prs->curwords; i++) + { + ptr->len = prs->words[i].len; + if (cur - str > MAXSTRPOS) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("string is too long for tsvector"))); + ptr->pos = cur - str; + memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); + pfree(prs->words[i].word); + cur += SHORTALIGN(prs->words[i].len); + if (prs->words[i].alen) + { + WordEntryPos *wptr; + + ptr->haspos = 1; + *(uint16 *) cur = prs->words[i].pos.apos[0]; + wptr = POSDATAPTR(in, ptr); + for (j = 0; j < *(uint16 *) cur; j++) + { + WEP_SETWEIGHT(wptr[j], 0); + WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); + } + cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + pfree(prs->words[i].pos.apos); + } + else + ptr->haspos = 0; + ptr++; + } + pfree(prs->words); + return in; +} + +Datum +to_tsvector_byid(PG_FUNCTION_ARGS) +{ + Oid cfgId = PG_GETARG_OID(0); + text *in = PG_GETARG_TEXT_P(1); + ParsedText prs; + TSVector out; + + prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of + * word's number */ + if (prs.lenwords == 0) + prs.lenwords = 2; + prs.curwords = 0; + prs.pos = 0; + prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); + + parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); + PG_FREE_IF_COPY(in, 1); + + if (prs.curwords) + out = make_tsvector(&prs); + else + { + pfree(prs.words); + out = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(out, CALCDATASIZE(0, 0)); + out->size = 0; + } + + PG_RETURN_POINTER(out); +} + +Datum +to_tsvector(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); +} + +/* + * to_tsquery + */ + + +/* + * This function is used for morph parsing + */ +static void +pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight) +{ + int4 count = 0; + ParsedText prs; + uint32 variant, + pos, + cntvar = 0, + cntpos = 0, + cnt = 0; + + prs.lenwords = 4; + prs.curwords = 0; + prs.pos = 0; + prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); + + parsetext(state->cfg_id, &prs, strval, lenval); + + if (prs.curwords > 0) + { + + while (count < prs.curwords) + { + pos = prs.words[count].pos.pos; + cntvar = 0; + while (count < prs.curwords && pos == prs.words[count].pos.pos) + { + variant = prs.words[count].nvariant; + + cnt = 0; + while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) + { + + pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); + pfree(prs.words[count].word); + if (cnt) + pushquery(state, OPR, (int4) '&', 0, 0, 0); + cnt++; + count++; + } + + if (cntvar) + pushquery(state, OPR, (int4) '|', 0, 0, 0); + cntvar++; + } + + if (cntpos) + pushquery(state, OPR, (int4) '&', 0, 0, 0); + + cntpos++; + } + + pfree(prs.words); + + } + else + pushval_asis(state, VALSTOP, NULL, 0, 0); +} + +Datum +to_tsquery_byid(PG_FUNCTION_ARGS) +{ + Oid cfgid = PG_GETARG_OID(0); + text *in = PG_GETARG_TEXT_P(1); + TSQuery query; + QueryItem *res; + int4 len; + + query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false); + + if (query->size == 0) + PG_RETURN_TSQUERY(query); + + res = clean_fakeval(GETQUERY(query), &len); + if (!res) + { + SET_VARSIZE(query, HDRSIZETQ); + query->size = 0; + PG_RETURN_POINTER(query); + } + memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem)); + pfree(res); + PG_RETURN_TSQUERY(query); +} + +Datum +to_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); +} + +Datum +plainto_tsquery_byid(PG_FUNCTION_ARGS) +{ + Oid cfgid = PG_GETARG_OID(0); + text *in = PG_GETARG_TEXT_P(1); + TSQuery query; + QueryItem *res; + int4 len; + + query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true); + + if (query->size == 0) + PG_RETURN_TSQUERY(query); + + res = clean_fakeval(GETQUERY(query), &len); + if (!res) + { + SET_VARSIZE(query, HDRSIZETQ); + query->size = 0; + PG_RETURN_POINTER(query); + } + memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem)); + pfree(res); + PG_RETURN_POINTER(query); +} + +Datum +plainto_tsquery(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(0); + Oid cfgId; + + cfgId = getTSCurrentConfig(true); + PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid, + ObjectIdGetDatum(cfgId), + PointerGetDatum(in))); +} diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c new file mode 100644 index 00000000000..c822f086e0a --- /dev/null +++ b/src/backend/tsearch/ts_locale.c @@ -0,0 +1,241 @@ +/*------------------------------------------------------------------------- + * + * ts_locale.c + * locale compatiblility layer for tsearch + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" + +#ifdef TS_USE_WIDE + +#ifdef WIN32 + +size_t +wchar2char(char *to, const wchar_t *from, size_t len) +{ + if (len == 0) + return 0; + + if (GetDatabaseEncoding() == PG_UTF8) + { + int r; + + r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len, + NULL, NULL); + + if (r == 0) + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("UTF-16 to UTF-8 translation failed: %lu", + GetLastError()))); + Assert(r <= len); + + return r; + } + + return wcstombs(to, from, len); +} +#endif /* WIN32 */ + +size_t +char2wchar(wchar_t *to, const char *from, size_t len) +{ + if (len == 0) + return 0; + +#ifdef WIN32 + if (GetDatabaseEncoding() == PG_UTF8) + { + int r; + + r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len); + + if (!r) + { + pg_verifymbstr(from, len, false); + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + Assert(r <= len); + + return r; + } + else +#endif /* WIN32 */ + if (lc_ctype_is_c()) + { + /* + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be + * allocated with sufficient space + */ + return pg_mb2wchar_with_len(from, (pg_wchar *) to, len); + } + else + { + /* + * mbstowcs require ending '\0' + */ + char *str = pnstrdup(from, len); + size_t tolen; + + tolen = mbstowcs(to, str, len); + pfree(str); + + return tolen; + } +} + +int +_t_isalpha(const char *ptr) +{ + wchar_t character[2]; + + if (lc_ctype_is_c()) + return isalpha(TOUCHAR(ptr)); + + char2wchar(character, ptr, 1); + + return iswalpha((wint_t) *character); +} + +int +_t_isprint(const char *ptr) +{ + wchar_t character[2]; + + if (lc_ctype_is_c()) + return isprint(TOUCHAR(ptr)); + + char2wchar(character, ptr, 1); + + return iswprint((wint_t) *character); +} +#endif /* TS_USE_WIDE */ + +/* + * Convert C-string from UTF8 to server encoding and + * lower it + */ +char * +recode_and_lowerstr(char *str) +{ + char *recoded; + char *ret; + + recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + PG_UTF8, GetDatabaseEncoding()); + + if (recoded == NULL) + elog(ERROR, "encoding conversion failed"); + + ret = lowerstr(recoded); + + if (recoded != str) + pfree(recoded); + + return ret; +} + +char * +lowerstr(char *str) +{ + return lowerstr_with_len(str, strlen(str)); +} + +char * +lowerstr_with_len(char *str, int len) +{ + char *ptr = str; + char *out; + + if (len == 0) + return pstrdup(""); + +#ifdef TS_USE_WIDE + + /* + * Use wide char code only when max encoding length > 1 and ctype != C. + * Some operating systems fail with multi-byte encodings and a C locale. + * Also, for a C locale there is no need to process as multibyte. From + * backend/utils/adt/oracle_compat.c Teodor + */ + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + { + wchar_t *wstr, + *wptr; + int wlen; + + /* + * alloc number of wchar_t for worst case, len contains number of + * bytes <= number of characters and alloc 1 wchar_t for 0, because + * wchar2char(wcstombs in really) wants zero-terminated string + */ + wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1)); + + /* + * str SHOULD be cstring, so wlen contains number of converted + * character + */ + wlen = char2wchar(wstr, str, len); + if (wlen < 0) + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("translation failed from server encoding to wchar_t"))); + + Assert(wlen <= len); + wstr[wlen] = 0; + + while (*wptr) + { + *wptr = towlower((wint_t) *wptr); + wptr++; + } + + /* + * Alloc result string for worst case + '\0' + */ + len = sizeof(char) * pg_database_encoding_max_length() *(wlen + 1); + out = (char *) palloc(len); + + /* + * wlen now is number of bytes which is always >= number of characters + */ + wlen = wchar2char(out, wstr, len); + pfree(wstr); + + if (wlen < 0) + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("translation failed from wchar_t to server encoding %d", errno))); + Assert(wlen <= len); + out[wlen] = '\0'; + } + else +#endif + { + char *outptr; + + outptr = out = (char *) palloc(sizeof(char) * (len + 1)); + while (*ptr && ptr - str < len) + { + *outptr++ = tolower(*(unsigned char *) ptr); + ptr++; + } + *outptr = '\0'; + } + + return out; +} diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c new file mode 100644 index 00000000000..f286a61fb0d --- /dev/null +++ b/src/backend/tsearch/ts_parse.c @@ -0,0 +1,626 @@ +/*------------------------------------------------------------------------- + * + * ts_parse.c + * main parse functions for tsearch + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" + +#define IGNORE_LONGLEXEME 1 + +/* + * Lexize subsystem + */ + +typedef struct ParsedLex +{ + int type; + char *lemm; + int lenlemm; + bool resfollow; + struct ParsedLex *next; +} ParsedLex; + +typedef struct ListParsedLex +{ + ParsedLex *head; + ParsedLex *tail; +} ListParsedLex; + +typedef struct +{ + TSConfigCacheEntry *cfg; + Oid curDictId; + int posDict; + DictSubState dictState; + ParsedLex *curSub; + ListParsedLex towork; /* current list to work */ + ListParsedLex waste; /* list of lexemes that already lexized */ + + /* + * fields to store last variant to lexize (basically, thesaurus or similar + * to, which wants several lexemes + */ + + ParsedLex *lastRes; + TSLexeme *tmpRes; +} LexizeData; + +static void +LexizeInit(LexizeData * ld, TSConfigCacheEntry * cfg) +{ + ld->cfg = cfg; + ld->curDictId = InvalidOid; + ld->posDict = 0; + ld->towork.head = ld->towork.tail = ld->curSub = NULL; + ld->waste.head = ld->waste.tail = NULL; + ld->lastRes = NULL; + ld->tmpRes = NULL; +} + +static void +LPLAddTail(ListParsedLex * list, ParsedLex * newpl) +{ + if (list->tail) + { + list->tail->next = newpl; + list->tail = newpl; + } + else + list->head = list->tail = newpl; + newpl->next = NULL; +} + +static ParsedLex * +LPLRemoveHead(ListParsedLex * list) +{ + ParsedLex *res = list->head; + + if (list->head) + list->head = list->head->next; + + if (list->head == NULL) + list->tail = NULL; + + return res; +} + +static void +LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm) +{ + ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); + + newpl = (ParsedLex *) palloc(sizeof(ParsedLex)); + newpl->type = type; + newpl->lemm = lemm; + newpl->lenlemm = lenlemm; + LPLAddTail(&ld->towork, newpl); + ld->curSub = ld->towork.tail; +} + +static void +RemoveHead(LexizeData * ld) +{ + LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork)); + + ld->posDict = 0; +} + +static void +setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem) +{ + if (correspondLexem) + { + *correspondLexem = ld->waste.head; + } + else + { + ParsedLex *tmp, + *ptr = ld->waste.head; + + while (ptr) + { + tmp = ptr->next; + pfree(ptr); + ptr = tmp; + } + } + ld->waste.head = ld->waste.tail = NULL; +} + +static void +moveToWaste(LexizeData * ld, ParsedLex * stop) +{ + bool go = true; + + while (ld->towork.head && go) + { + if (ld->towork.head == stop) + { + ld->curSub = stop->next; + go = false; + } + RemoveHead(ld); + } +} + +static void +setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res) +{ + if (ld->tmpRes) + { + TSLexeme *ptr; + + for (ptr = ld->tmpRes; ptr->lexeme; ptr++) + pfree(ptr->lexeme); + pfree(ld->tmpRes); + } + ld->tmpRes = res; + ld->lastRes = lex; +} + +static TSLexeme * +LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem) +{ + int i; + ListDictionary *map; + TSDictionaryCacheEntry *dict; + TSLexeme *res; + + if (ld->curDictId == InvalidOid) + { + /* + * usial mode: dictionary wants only one word, but we should keep in + * mind that we should go through all stack + */ + + while (ld->towork.head) + { + ParsedLex *curVal = ld->towork.head; + + map = ld->cfg->map + curVal->type; + + if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0) + { + /* skip this type of lexeme */ + RemoveHead(ld); + continue; + } + + for (i = ld->posDict; i < map->len; i++) + { + dict = lookup_ts_dictionary_cache(map->dictIds[i]); + + ld->dictState.isend = ld->dictState.getnext = false; + ld->dictState.private = NULL; + res = (TSLexeme *) DatumGetPointer(FunctionCall4( + &(dict->lexize), + PointerGetDatum(dict->dictData), + PointerGetDatum(curVal->lemm), + Int32GetDatum(curVal->lenlemm), + PointerGetDatum(&ld->dictState) + )); + + if (ld->dictState.getnext) + { + /* + * dictionary wants next word, so setup and store current + * position and go to multiword mode + */ + + ld->curDictId = DatumGetObjectId(map->dictIds[i]); + ld->posDict = i + 1; + ld->curSub = curVal->next; + if (res) + setNewTmpRes(ld, curVal, res); + return LexizeExec(ld, correspondLexem); + } + + if (!res) /* dictionary doesn't know this lexeme */ + continue; + + RemoveHead(ld); + setCorrLex(ld, correspondLexem); + return res; + } + + RemoveHead(ld); + } + } + else + { /* curDictId is valid */ + dict = lookup_ts_dictionary_cache(ld->curDictId); + + /* + * Dictionary ld->curDictId asks us about following words + */ + + while (ld->curSub) + { + ParsedLex *curVal = ld->curSub; + + map = ld->cfg->map + curVal->type; + + if (curVal->type != 0) + { + bool dictExists = false; + + if (curVal->type >= ld->cfg->lenmap || map->len == 0) + { + /* skip this type of lexeme */ + ld->curSub = curVal->next; + continue; + } + + /* + * We should be sure that current type of lexeme is recognized + * by our dictinonary: we just check is it exist in list of + * dictionaries ? + */ + for (i = 0; i < map->len && !dictExists; i++) + if (ld->curDictId == DatumGetObjectId(map->dictIds[i])) + dictExists = true; + + if (!dictExists) + { + /* + * Dictionary can't work with current tpe of lexeme, + * return to basic mode and redo all stored lexemes + */ + ld->curDictId = InvalidOid; + return LexizeExec(ld, correspondLexem); + } + } + + ld->dictState.isend = (curVal->type == 0) ? true : false; + ld->dictState.getnext = false; + + res = (TSLexeme *) DatumGetPointer(FunctionCall4( + &(dict->lexize), + PointerGetDatum(dict->dictData), + PointerGetDatum(curVal->lemm), + Int32GetDatum(curVal->lenlemm), + PointerGetDatum(&ld->dictState) + )); + + if (ld->dictState.getnext) + { + /* Dictionary wants one more */ + ld->curSub = curVal->next; + if (res) + setNewTmpRes(ld, curVal, res); + continue; + } + + if (res || ld->tmpRes) + { + /* + * Dictionary normalizes lexemes, so we remove from stack all + * used lexemes , return to basic mode and redo end of stack + * (if it exists) + */ + if (res) + { + moveToWaste(ld, ld->curSub); + } + else + { + res = ld->tmpRes; + moveToWaste(ld, ld->lastRes); + } + + /* reset to initial state */ + ld->curDictId = InvalidOid; + ld->posDict = 0; + ld->lastRes = NULL; + ld->tmpRes = NULL; + setCorrLex(ld, correspondLexem); + return res; + } + + /* + * Dict don't want next lexem and didn't recognize anything, redo + * from ld->towork.head + */ + ld->curDictId = InvalidOid; + return LexizeExec(ld, correspondLexem); + } + } + + setCorrLex(ld, correspondLexem); + return NULL; +} + +/* + * Parse string and lexize words + */ +void +parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen) +{ + int type, + lenlemm; + char *lemm = NULL; + LexizeData ldata; + TSLexeme *norms; + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + void *prsdata; + + cfg = lookup_ts_config_cache(cfgId); + prsobj = lookup_ts_parser_cache(cfg->prsId); + + prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart, + PointerGetDatum(buf), + Int32GetDatum(buflen))); + + LexizeInit(&ldata, cfg); + + do + { + type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), + PointerGetDatum(prsdata), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))); + + if (type > 0 && lenlemm >= MAXSTRLEN) + { +#ifdef IGNORE_LONGLEXEME + ereport(NOTICE, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); + continue; +#else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long to be indexed"))); +#endif + } + + LexizeAddLemm(&ldata, type, lemm, lenlemm); + + while ((norms = LexizeExec(&ldata, NULL)) != NULL) + { + TSLexeme *ptr = norms; + + prs->pos++; /* set pos */ + + while (ptr->lexeme) + { + if (prs->curwords == prs->lenwords) + { + prs->lenwords *= 2; + prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord)); + } + + if (ptr->flags & TSL_ADDPOS) + prs->pos++; + prs->words[prs->curwords].len = strlen(ptr->lexeme); + prs->words[prs->curwords].word = ptr->lexeme; + prs->words[prs->curwords].nvariant = ptr->nvariant; + prs->words[prs->curwords].alen = 0; + prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); + ptr++; + prs->curwords++; + } + pfree(norms); + } + } while (type > 0); + + FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); +} + +/* + * Headline framework + */ +static void +hladdword(HeadlineText * prs, char *buf, int4 buflen, int type) +{ + while (prs->curwords >= prs->lenwords) + { + prs->lenwords *= 2; + prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord)); + } + memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWord)); + prs->words[prs->curwords].type = (uint8) type; + prs->words[prs->curwords].len = buflen; + prs->words[prs->curwords].word = palloc(buflen); + memcpy(prs->words[prs->curwords].word, buf, buflen); + prs->curwords++; +} + +static void +hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen) +{ + int i; + QueryItem *item = GETQUERY(query); + HeadlineWord *word; + + while (prs->curwords + query->size >= prs->lenwords) + { + prs->lenwords *= 2; + prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord)); + } + + word = &(prs->words[prs->curwords - 1]); + for (i = 0; i < query->size; i++) + { + if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0) + { + if (word->item) + { + memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWord)); + prs->words[prs->curwords].item = item; + prs->words[prs->curwords].repeated = 1; + prs->curwords++; + } + else + word->item = item; + } + item++; + } +} + +static void +addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms) +{ + ParsedLex *tmplexs; + TSLexeme *ptr; + + while (lexs) + { + + if (lexs->type > 0) + hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type); + + ptr = norms; + while (ptr && ptr->lexeme) + { + hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); + ptr++; + } + + tmplexs = lexs->next; + pfree(lexs); + lexs = tmplexs; + } + + if (norms) + { + ptr = norms; + while (ptr->lexeme) + { + pfree(ptr->lexeme); + ptr++; + } + pfree(norms); + } +} + +void +hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen) +{ + int type, + lenlemm; + char *lemm = NULL; + LexizeData ldata; + TSLexeme *norms; + ParsedLex *lexs; + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + void *prsdata; + + cfg = lookup_ts_config_cache(cfgId); + prsobj = lookup_ts_parser_cache(cfg->prsId); + + prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart), + PointerGetDatum(buf), + Int32GetDatum(buflen))); + + LexizeInit(&ldata, cfg); + + do + { + type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken), + PointerGetDatum(prsdata), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))); + + if (type > 0 && lenlemm >= MAXSTRLEN) + { +#ifdef IGNORE_LONGLEXEME + ereport(NOTICE, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long to be indexed"), + errdetail("Words longer than %d characters are ignored.", + MAXSTRLEN))); + continue; +#else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long to be indexed"))); +#endif + } + + LexizeAddLemm(&ldata, type, lemm, lenlemm); + + do + { + if ((norms = LexizeExec(&ldata, &lexs)) != NULL) + addHLParsedLex(prs, query, lexs, norms); + else + addHLParsedLex(prs, query, lexs, NULL); + } while (norms); + + } while (type > 0); + + FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata)); +} + +text * +generatHeadline(HeadlineText * prs) +{ + text *out; + int len = 128; + char *ptr; + HeadlineWord *wrd = prs->words; + + out = (text *) palloc(len); + ptr = ((char *) out) + VARHDRSZ; + + while (wrd - prs->words < prs->curwords) + { + while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len) + { + int dist = ptr - ((char *) out); + + len *= 2; + out = (text *) repalloc(out, len); + ptr = ((char *) out) + dist; + } + + if (wrd->in && !wrd->repeated) + { + if (wrd->replace) + { + *ptr = ' '; + ptr++; + } + else + { + if (wrd->selected) + { + memcpy(ptr, prs->startsel, prs->startsellen); + ptr += prs->startsellen; + } + memcpy(ptr, wrd->word, wrd->len); + ptr += wrd->len; + if (wrd->selected) + { + memcpy(ptr, prs->stopsel, prs->stopsellen); + ptr += prs->stopsellen; + } + } + } + else if (!wrd->repeated) + pfree(wrd->word); + + wrd++; + } + + SET_VARSIZE(out, ptr - ((char *) out)); + return out; +} diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c new file mode 100644 index 00000000000..bb0a75ca85a --- /dev/null +++ b/src/backend/tsearch/ts_utils.c @@ -0,0 +1,330 @@ +/*------------------------------------------------------------------------- + * + * ts_utils.c + * various support functions + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include <ctype.h> + +#include "miscadmin.h" +#include "storage/fd.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +#define CS_WAITKEY 0 +#define CS_INKEY 1 +#define CS_WAITEQ 2 +#define CS_WAITVALUE 3 +#define CS_INVALUE 4 +#define CS_IN2VALUE 5 +#define CS_WAITDELIM 6 +#define CS_INESC 7 +#define CS_IN2ESC 8 + +static char * +nstrdup(char *ptr, int len) +{ + char *res = palloc(len + 1), + *cptr; + + memcpy(res, ptr, len); + res[len] = '\0'; + cptr = ptr = res; + while (*ptr) + { + if (t_iseq(ptr, '\\')) + ptr++; + COPYCHAR(cptr, ptr); + cptr += pg_mblen(ptr); + ptr += pg_mblen(ptr); + } + *cptr = '\0'; + + return res; +} + +/* + * Parse a parameter string consisting of key = value clauses + */ +void +parse_keyvalpairs(text *in, Map ** m) +{ + Map *mptr; + char *ptr = VARDATA(in), + *begin = NULL; + char num = 0; + int state = CS_WAITKEY; + + while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ) + { + if (t_iseq(ptr, ',')) + num++; + ptr += pg_mblen(ptr); + } + + *m = mptr = (Map *) palloc(sizeof(Map) * (num + 2)); + memset(mptr, 0, sizeof(Map) * (num + 2)); + ptr = VARDATA(in); + while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ) + { + if (state == CS_WAITKEY) + { + if (t_isalpha(ptr)) + { + begin = ptr; + state = CS_INKEY; + } + else if (!t_isspace(ptr)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + TextPGetCString(in)))); + } + else if (state == CS_INKEY) + { + if (t_isspace(ptr)) + { + mptr->key = nstrdup(begin, ptr - begin); + state = CS_WAITEQ; + } + else if (t_iseq(ptr, '=')) + { + mptr->key = nstrdup(begin, ptr - begin); + state = CS_WAITVALUE; + } + else if (!t_isalpha(ptr)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + TextPGetCString(in)))); + } + else if (state == CS_WAITEQ) + { + if (t_iseq(ptr, '=')) + state = CS_WAITVALUE; + else if (!t_isspace(ptr)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + TextPGetCString(in)))); + } + else if (state == CS_WAITVALUE) + { + if (t_iseq(ptr, '"')) + { + begin = ptr + 1; + state = CS_INVALUE; + } + else if (!t_isspace(ptr)) + { + begin = ptr; + state = CS_IN2VALUE; + } + } + else if (state == CS_INVALUE) + { + if (t_iseq(ptr, '"')) + { + mptr->value = nstrdup(begin, ptr - begin); + mptr++; + state = CS_WAITDELIM; + } + else if (t_iseq(ptr, '\\')) + state = CS_INESC; + } + else if (state == CS_IN2VALUE) + { + if (t_isspace(ptr) || t_iseq(ptr, ',')) + { + mptr->value = nstrdup(begin, ptr - begin); + mptr++; + state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM; + } + else if (t_iseq(ptr, '\\')) + state = CS_INESC; + } + else if (state == CS_WAITDELIM) + { + if (t_iseq(ptr, ',')) + state = CS_WAITKEY; + else if (!t_isspace(ptr)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + TextPGetCString(in)))); + } + else if (state == CS_INESC) + state = CS_INVALUE; + else if (state == CS_IN2ESC) + state = CS_IN2VALUE; + else + elog(ERROR, "unrecognized parse_keyvalpairs state: %d", state); + ptr += pg_mblen(ptr); + } + + if (state == CS_IN2VALUE) + { + mptr->value = nstrdup(begin, ptr - begin); + mptr++; + } + else if (!(state == CS_WAITDELIM || state == CS_WAITKEY)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid parameter list format: \"%s\"", + TextPGetCString(in)))); +} + +/* + * Given the base name and extension of a tsearch config file, return + * its full path name. The base name is assumed to be user-supplied, + * and is checked to prevent pathname attacks. The extension is assumed + * to be safe. + * + * The result is a palloc'd string. + */ +char * +get_tsearch_config_filename(const char *basename, + const char *extension) +{ + char sharepath[MAXPGPATH]; + char *result; + const char *p; + + /* + * We enforce that the basename is all alpha characters. This may be + * overly restrictive, but we don't want to allow access to anything + * outside the tsearch_data directory, so for instance '/' *must* be + * rejected. This is the same test used for timezonesets names. + */ + for (p = basename; *p; p++) + { + if (!isalpha((unsigned char) *p)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid text search configuration file name \"%s\"", + basename))); + } + + get_share_path(my_exec_path, sharepath); + result = palloc(MAXPGPATH); + snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s", + sharepath, basename, extension); + + return result; +} + +#define STOPBUFLEN 4096 + +void +readstoplist(char *in, StopList * s) +{ + char **stop = NULL; + + s->len = 0; + if (in && *in) + { + char *filename = get_tsearch_config_filename(in, "stop"); + FILE *hin; + char buf[STOPBUFLEN]; + int reallen = 0; + int line = 0; + + if ((hin = AllocateFile(filename, "r")) == NULL) + ereport(ERROR, + (errcode(ERRCODE_CONFIG_FILE_ERROR), + errmsg("could not open stopword file \"%s\": %m", + filename))); + + while (fgets(buf, STOPBUFLEN, hin)) + { + char *pbuf = buf; + + line++; + while (*pbuf && !isspace(*pbuf)) + pbuf++; + *pbuf = '\0'; + + if (*buf == '\0') + continue; + + if (!pg_verifymbstr(buf, strlen(buf), true)) + { + FreeFile(hin); + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte encoding at line %d in file \"%s\"", + line, filename))); + } + + if (s->len >= reallen) + { + if (reallen == 0) + { + reallen = 16; + stop = (char **) palloc(sizeof(char *) * reallen); + } + else + { + reallen *= 2; + stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen); + } + } + + + if (s->wordop) + stop[s->len] = s->wordop(buf); + else + stop[s->len] = pstrdup(buf); + + (s->len)++; + } + FreeFile(hin); + pfree(filename); + } + + s->stop = stop; +} + +static int +comparestr(const void *a, const void *b) +{ + return strcmp(*(char **) a, *(char **) b); +} + +void +sortstoplist(StopList * s) +{ + if (s->stop && s->len > 0) + qsort(s->stop, s->len, sizeof(char *), comparestr); +} + +bool +searchstoplist(StopList * s, char *key) +{ + return (s->stop && s->len > 0 && + bsearch(&key, s->stop, s->len, + sizeof(char *), comparestr)) ? true : false; +} + +char * +pnstrdup(const char *in, int len) +{ + char *out = palloc(len + 1); + + memcpy(out, in, len); + out[len] = '\0'; + return out; +} diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c new file mode 100644 index 00000000000..0b374e8159e --- /dev/null +++ b/src/backend/tsearch/wparser.c @@ -0,0 +1,360 @@ +/*------------------------------------------------------------------------- + * + * wparser.c + * Standard interface to word parser + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "funcapi.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "access/skey.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_type.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +/******sql-level interface******/ + +typedef struct +{ + int cur; + LexDescr *list; +} TSTokenTypeStorage; + +static void +tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid) +{ + TupleDesc tupdesc; + MemoryContext oldcontext; + TSTokenTypeStorage *st; + TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); + + if (!OidIsValid(prs->lextypeOid)) + elog(ERROR, "method lextype isn't defined for text search parser %u", + prsid); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage)); + st->cur = 0; + /* OidFunctionCall0 is absent */ + st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid, + (Datum) 0)); + funcctx->user_fctx = (void *) st; + + tupdesc = CreateTemplateTupleDesc(3, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description", + TEXTOID, -1, 0); + + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); +} + +static Datum +tt_process_call(FuncCallContext *funcctx) +{ + TSTokenTypeStorage *st; + + st = (TSTokenTypeStorage *) funcctx->user_fctx; + if (st->list && st->list[st->cur].lexid) + { + Datum result; + char *values[3]; + char txtid[16]; + HeapTuple tuple; + + sprintf(txtid, "%d", st->list[st->cur].lexid); + values[0] = txtid; + values[1] = st->list[st->cur].alias; + values[2] = st->list[st->cur].descr; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = HeapTupleGetDatum(tuple); + + pfree(values[1]); + pfree(values[2]); + st->cur++; + return result; + } + if (st->list) + pfree(st->list); + pfree(st); + return (Datum) 0; +} + +Datum +ts_token_type_byid(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + tt_setup_firstcall(funcctx, PG_GETARG_OID(0)); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ((result = tt_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +Datum +ts_token_type_byname(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + text *prsname = PG_GETARG_TEXT_P(0); + Oid prsId; + + funcctx = SRF_FIRSTCALL_INIT(); + prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false); + tt_setup_firstcall(funcctx, prsId); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ((result = tt_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +typedef struct +{ + int type; + char *lexeme; +} LexemeEntry; + +typedef struct +{ + int cur; + int len; + LexemeEntry *list; +} PrsStorage; + + +static void +prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt) +{ + TupleDesc tupdesc; + MemoryContext oldcontext; + PrsStorage *st; + TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); + char *lex = NULL; + int llen = 0, + type = 0; + void *prsdata; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + st = (PrsStorage *) palloc(sizeof(PrsStorage)); + st->cur = 0; + st->len = 16; + st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len); + + prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart, + PointerGetDatum(VARDATA(txt)), + Int32GetDatum(VARSIZE(txt) - VARHDRSZ))); + + while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken, + PointerGetDatum(prsdata), + PointerGetDatum(&lex), + PointerGetDatum(&llen)))) != 0) + { + if (st->cur >= st->len) + { + st->len = 2 * st->len; + st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len); + } + st->list[st->cur].lexeme = palloc(llen + 1); + memcpy(st->list[st->cur].lexeme, lex, llen); + st->list[st->cur].lexeme[llen] = '\0'; + st->list[st->cur].type = type; + st->cur++; + } + + FunctionCall1(&prs->prsend, PointerGetDatum(prsdata)); + + st->len = st->cur; + st->cur = 0; + + funcctx->user_fctx = (void *) st; + tupdesc = CreateTemplateTupleDesc(2, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token", + TEXTOID, -1, 0); + + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); +} + +static Datum +prs_process_call(FuncCallContext *funcctx) +{ + PrsStorage *st; + + st = (PrsStorage *) funcctx->user_fctx; + if (st->cur < st->len) + { + Datum result; + char *values[2]; + char tid[16]; + HeapTuple tuple; + + values[0] = tid; + sprintf(tid, "%d", st->list[st->cur].type); + values[1] = st->list[st->cur].lexeme; + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = HeapTupleGetDatum(tuple); + + pfree(values[1]); + st->cur++; + return result; + } + else + { + if (st->list) + pfree(st->list); + pfree(st); + } + return (Datum) 0; +} + +Datum +ts_parse_byid(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + text *txt = PG_GETARG_TEXT_P(1); + + funcctx = SRF_FIRSTCALL_INIT(); + prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt); + PG_FREE_IF_COPY(txt, 1); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ((result = prs_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +Datum +ts_parse_byname(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + text *prsname = PG_GETARG_TEXT_P(0); + text *txt = PG_GETARG_TEXT_P(1); + Oid prsId; + + funcctx = SRF_FIRSTCALL_INIT(); + prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false); + prs_setup_firstcall(funcctx, prsId, txt); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ((result = prs_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +Datum +ts_headline_byid_opt(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(1); + TSQuery query = PG_GETARG_TSQUERY(2); + text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; + HeadlineText prs; + text *out; + TSConfigCacheEntry *cfg; + TSParserCacheEntry *prsobj; + + cfg = lookup_ts_config_cache(PG_GETARG_OID(0)); + prsobj = lookup_ts_parser_cache(cfg->prsId); + + memset(&prs, 0, sizeof(HeadlineText)); + prs.lenwords = 32; + prs.words = (HeadlineWord *) palloc(sizeof(HeadlineWord) * prs.lenwords); + + hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ); + + FunctionCall3(&(prsobj->prsheadline), + PointerGetDatum(&prs), + PointerGetDatum(opt), + PointerGetDatum(query)); + + out = generatHeadline(&prs); + + PG_FREE_IF_COPY(in, 1); + PG_FREE_IF_COPY(query, 2); + if (opt) + PG_FREE_IF_COPY(opt, 3); + pfree(prs.words); + pfree(prs.startsel); + pfree(prs.stopsel); + + PG_RETURN_POINTER(out); +} + +Datum +ts_headline_byid(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} + +Datum +ts_headline(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1))); +} + +Datum +ts_headline_opt(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt, + ObjectIdGetDatum(getTSCurrentConfig(true)), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2))); +} diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c new file mode 100644 index 00000000000..8d71e3e914e --- /dev/null +++ b/src/backend/tsearch/wparser_def.c @@ -0,0 +1,1873 @@ +/*------------------------------------------------------------------------- + * + * wparser_def.c + * Standard word parser + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.1 2007/08/21 01:11:18 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_locale.h" +#include "tsearch/ts_public.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +/* rememder !!!! */ +#define LASTNUM 23 + +#define LATWORD 1 +#define CYRWORD 2 +#define UWORD 3 +#define EMAIL 4 +#define FURL 5 +#define HOST 6 +#define SCIENTIFIC 7 +#define VERSIONNUMBER 8 +#define PARTHYPHENWORD 9 +#define CYRPARTHYPHENWORD 10 +#define LATPARTHYPHENWORD 11 +#define SPACE 12 +#define TAG 13 +#define PROTOCOL 14 +#define HYPHENWORD 15 +#define LATHYPHENWORD 16 +#define CYRHYPHENWORD 17 +#define URI 18 +#define FILEPATH 19 +#define DECIMAL 20 +#define SIGNEDINT 21 +#define UNSIGNEDINT 22 +#define HTMLENTITY 23 + +static const char *lex_descr[] = { + "", + "Latin word", + "Non-latin word", + "Word", + "Email", + "URL", + "Host", + "Scientific notation", + "VERSION", + "Part of hyphenated word", + "Non-latin part of hyphenated word", + "Latin part of hyphenated word", + "Space symbols", + "HTML Tag", + "Protocol head", + "Hyphenated word", + "Latin hyphenated word", + "Non-latin hyphenated word", + "URI", + "File or path name", + "Decimal notation", + "Signed integer", + "Unsigned integer", + "HTML Entity" +}; + +static const char *tok_alias[] = { + "", + "lword", + "nlword", + "word", + "email", + "url", + "host", + "sfloat", + "version", + "part_hword", + "nlpart_hword", + "lpart_hword", + "blank", + "tag", + "protocol", + "hword", + "lhword", + "nlhword", + "uri", + "file", + "float", + "int", + "uint", + "entity" +}; + +typedef enum +{ + TPS_Base = 0, + TPS_InUWord, + TPS_InLatWord, + TPS_InCyrWord, + TPS_InUnsignedInt, + TPS_InSignedIntFirst, + TPS_InSignedInt, + TPS_InSpace, + TPS_InUDecimalFirst, + TPS_InUDecimal, + TPS_InDecimalFirst, + TPS_InDecimal, + TPS_InVerVersion, + TPS_InSVerVersion, + TPS_InVersionFirst, + TPS_InVersion, + TPS_InMantissaFirst, + TPS_InMantissaSign, + TPS_InMantissa, + TPS_InHTMLEntityFirst, + TPS_InHTMLEntity, + TPS_InHTMLEntityNumFirst, + TPS_InHTMLEntityNum, + TPS_InHTMLEntityEnd, + TPS_InTagFirst, + TPS_InXMLBegin, + TPS_InTagCloseFirst, + TPS_InTagName, + TPS_InTagBeginEnd, + TPS_InTag, + TPS_InTagEscapeK, + TPS_InTagEscapeKK, + TPS_InTagBackSleshed, + TPS_InTagEnd, + TPS_InCommentFirst, + TPS_InCommentLast, + TPS_InComment, + TPS_InCloseCommentFirst, + TPS_InCloseCommentLast, + TPS_InCommentEnd, + TPS_InHostFirstDomain, + TPS_InHostDomainSecond, + TPS_InHostDomain, + TPS_InPortFirst, + TPS_InPort, + TPS_InHostFirstAN, + TPS_InHost, + TPS_InEmail, + TPS_InFileFirst, + TPS_InFileTwiddle, + TPS_InPathFirst, + TPS_InPathFirstFirst, + TPS_InPathSecond, + TPS_InFile, + TPS_InFileNext, + TPS_InURIFirst, + TPS_InURIStart, + TPS_InURI, + TPS_InFURL, + TPS_InProtocolFirst, + TPS_InProtocolSecond, + TPS_InProtocolEnd, + TPS_InHyphenLatWordFirst, + TPS_InHyphenLatWord, + TPS_InHyphenCyrWordFirst, + TPS_InHyphenCyrWord, + TPS_InHyphenUWordFirst, + TPS_InHyphenUWord, + TPS_InHyphenValueFirst, + TPS_InHyphenValue, + TPS_InHyphenValueExact, + TPS_InParseHyphen, + TPS_InParseHyphenHyphen, + TPS_InHyphenCyrWordPart, + TPS_InHyphenLatWordPart, + TPS_InHyphenUWordPart, + TPS_InHyphenUnsignedInt, + TPS_InHDecimalPartFirst, + TPS_InHDecimalPart, + TPS_InHVersionPartFirst, + TPS_InHVersionPart, + TPS_Null /* last state (fake value) */ +} TParserState; + +/* forward declaration */ +struct TParser; + + +typedef int (*TParserCharTest) (struct TParser *); /* any p_is* functions + * except p_iseq */ +typedef void (*TParserSpecial) (struct TParser *); /* special handler for + * special cases... */ + +typedef struct +{ + TParserCharTest isclass; + char c; + uint16 flags; + TParserState tostate; + int type; + TParserSpecial special; +} TParserStateActionItem; + +typedef struct +{ + TParserState state; + TParserStateActionItem *action; +} TParserStateAction; + +typedef struct TParserPosition +{ + int posbyte; /* position of parser in bytes */ + int poschar; /* osition of parser in characters */ + int charlen; /* length of current char */ + int lenbytelexeme; + int lencharlexeme; + TParserState state; + struct TParserPosition *prev; + int flags; + TParserStateActionItem *pushedAtAction; +} TParserPosition; + +typedef struct TParser +{ + /* string and position information */ + char *str; /* multibyte string */ + int lenstr; /* length of mbstring */ +#ifdef TS_USE_WIDE + wchar_t *wstr; /* wide character string */ + int lenwstr; /* length of wsting */ +#endif + + /* State of parse */ + int charmaxlen; + bool usewide; + TParserPosition *state; + bool ignore; + bool wanthost; + + /* silly char */ + char c; + + /* out */ + char *lexeme; + int lenbytelexeme; + int lencharlexeme; + int type; + +} TParser; + +static TParserPosition * +newTParserPosition(TParserPosition * prev) +{ + TParserPosition *res = (TParserPosition *) palloc(sizeof(TParserPosition)); + + if (prev) + memcpy(res, prev, sizeof(TParserPosition)); + else + memset(res, 0, sizeof(TParserPosition)); + + res->prev = prev; + + res->pushedAtAction = NULL; + + return res; +} + +static TParser * +TParserInit(char *str, int len) +{ + TParser *prs = (TParser *) palloc0(sizeof(TParser)); + + prs->charmaxlen = pg_database_encoding_max_length(); + prs->str = str; + prs->lenstr = len; + +#ifdef TS_USE_WIDE + + /* + * Use wide char code only when max encoding length > 1. + */ + + if (prs->charmaxlen > 1) + { + prs->usewide = true; + prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1)); + prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); + } + else +#endif + prs->usewide = false; + + prs->state = newTParserPosition(NULL); + prs->state->state = TPS_Base; + + return prs; +} + +static bool TParserGet(TParser * prs); + +static void +TParserClose(TParser * prs) +{ + while (prs->state) + { + TParserPosition *ptr = prs->state->prev; + + pfree(prs->state); + prs->state = ptr; + } + +#ifdef TS_USE_WIDE + if (prs->wstr) + pfree(prs->wstr); +#endif + + pfree(prs); +} + +/* + * defining support function, equvalent is* macroses, but + * working with any possible encodings and locales. Note, + * that with multibyte encoding and C-locale isw* function may fail + * or give wrong result. Note 2: multibyte encoding and C-local + * often are used for Asian languages + */ + +#ifdef TS_USE_WIDE + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + if ( prs->usewide ) \ + { \ + if ( lc_ctype_is_c() ) \ + return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \ + \ + return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \ + } \ + \ + return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + +static int +p_isalnum(TParser * prs) +{ + Assert(prs->state); + + if (prs->usewide) + { + if (lc_ctype_is_c()) + { + unsigned int c = *(prs->wstr + prs->state->poschar); + + /* + * any non-ascii symbol with multibyte encoding with C-locale is + * an alpha character + */ + if (c > 0x7f) + return 1; + + return isalnum(0xff & c); + } + + return iswalnum((wint_t) *(prs->wstr + prs->state->poschar)); + } + + return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte)); +} +static int +p_isnotalnum(TParser * prs) +{ + return !p_isalnum(prs); +} + +static int +p_isalpha(TParser * prs) +{ + Assert(prs->state); + + if (prs->usewide) + { + if (lc_ctype_is_c()) + { + unsigned int c = *(prs->wstr + prs->state->poschar); + + /* + * any non-ascii symbol with multibyte encoding with C-locale is + * an alpha character + */ + if (c > 0x7f) + return 1; + + return isalpha(0xff & c); + } + + return iswalpha((wint_t) *(prs->wstr + prs->state->poschar)); + } + + return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte)); +} + +static int +p_isnotalpha(TParser * prs) +{ + return !p_isalpha(prs); +} + +/* p_iseq should be used only for ascii symbols */ + +static int +p_iseq(TParser * prs, char c) +{ + Assert(prs->state); + return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; +} +#else /* TS_USE_WIDE */ + +#define p_iswhat(type) \ +static int \ +p_is##type(TParser *prs) { \ + Assert( prs->state ); \ + return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \ +} \ + \ +static int \ +p_isnot##type(TParser *prs) { \ + return !p_is##type(prs); \ +} + + +static int +p_iseq(TParser * prs, char c) +{ + Assert(prs->state); + return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0; +} + +p_iswhat(alnum) +p_iswhat(alpha) +#endif /* TS_USE_WIDE */ + +p_iswhat(digit) +p_iswhat(lower) +p_iswhat(print) +p_iswhat(punct) +p_iswhat(space) +p_iswhat(upper) +p_iswhat(xdigit) + +static int +p_isEOF(TParser * prs) +{ + Assert(prs->state); + return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0; +} + +static int +p_iseqC(TParser * prs) +{ + return p_iseq(prs, prs->c); +} + +static int +p_isneC(TParser * prs) +{ + return !p_iseq(prs, prs->c); +} + +static int +p_isascii(TParser * prs) +{ + return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0; +} + +static int +p_islatin(TParser * prs) +{ + return (p_isalpha(prs) && p_isascii(prs)) ? 1 : 0; +} + +static int +p_isnonlatin(TParser * prs) +{ + return (p_isalpha(prs) && !p_isascii(prs)) ? 1 : 0; +} + +void _make_compiler_happy(void); +void +_make_compiler_happy(void) +{ + p_isalnum(NULL); + p_isnotalnum(NULL); + p_isalpha(NULL); + p_isnotalpha(NULL); + p_isdigit(NULL); + p_isnotdigit(NULL); + p_islower(NULL); + p_isnotlower(NULL); + p_isprint(NULL); + p_isnotprint(NULL); + p_ispunct(NULL); + p_isnotpunct(NULL); + p_isspace(NULL); + p_isnotspace(NULL); + p_isupper(NULL); + p_isnotupper(NULL); + p_isxdigit(NULL); + p_isnotxdigit(NULL); + p_isEOF(NULL); + p_iseqC(NULL); + p_isneC(NULL); +} + + +static void +SpecialTags(TParser * prs) +{ + switch (prs->state->lencharlexeme) + { + case 8: /* </script */ + if (pg_strncasecmp(prs->lexeme, "</script", 8) == 0) + prs->ignore = false; + break; + case 7: /* <script || </style */ + if (pg_strncasecmp(prs->lexeme, "</style", 7) == 0) + prs->ignore = false; + else if (pg_strncasecmp(prs->lexeme, "<script", 7) == 0) + prs->ignore = true; + break; + case 6: /* <style */ + if (pg_strncasecmp(prs->lexeme, "<style", 6) == 0) + prs->ignore = true; + break; + default: + break; + } +} + +static void +SpecialFURL(TParser * prs) +{ + prs->wanthost = true; + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static void +SpecialHyphen(TParser * prs) +{ + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; +} + +static void +SpecialVerVersion(TParser * prs) +{ + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; + prs->state->lenbytelexeme = 0; + prs->state->lencharlexeme = 0; +} + +static int +p_isstophost(TParser * prs) +{ + if (prs->wanthost) + { + prs->wanthost = false; + return 1; + } + return 0; +} + +static int +p_isignore(TParser * prs) +{ + return (prs->ignore) ? 1 : 0; +} + +static int +p_ishost(TParser * prs) +{ + TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); + int res = 0; + + if (TParserGet(tmpprs) && tmpprs->type == HOST) + { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +static int +p_isURI(TParser * prs) +{ + TParser *tmpprs = TParserInit(prs->str + prs->state->posbyte, prs->lenstr - prs->state->posbyte); + int res = 0; + + tmpprs->state = newTParserPosition(tmpprs->state); + tmpprs->state->state = TPS_InFileFirst; + + if (TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH)) + { + prs->state->posbyte += tmpprs->lenbytelexeme; + prs->state->poschar += tmpprs->lencharlexeme; + prs->state->lenbytelexeme += tmpprs->lenbytelexeme; + prs->state->lencharlexeme += tmpprs->lencharlexeme; + prs->state->charlen = tmpprs->state->charlen; + res = 1; + } + TParserClose(tmpprs); + + return res; +} + +/* + * Table of state/action of parser + */ + +#define A_NEXT 0x0000 +#define A_BINGO 0x0001 +#define A_POP 0x0002 +#define A_PUSH 0x0004 +#define A_RERUN 0x0008 +#define A_CLEAR 0x0010 +#define A_MERGE 0x0020 +#define A_CLRALL 0x0040 + +static TParserStateActionItem actionTPS_Base[] = { + {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL}, + {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, + {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL}, + {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL} +}; + + +static TParserStateActionItem actionTPS_InUWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InLatWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InCyrWord[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedIntFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InSignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InSpace[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL}, + {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL}, + {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InUDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimalFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InDecimal[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL}, + {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InVerVersion[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InSVerVersion[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL}, + {NULL, 0, A_NEXT, TPS_Null, 0, NULL} +}; + + +static TParserStateActionItem actionTPS_InVersionFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InVersion[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissaSign[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InMantissa[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntity[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityNum[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL}, + {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = { + {NULL, 0, A_BINGO | A_CLEAR, TPS_Base, HTMLENTITY, NULL} +}; + +static TParserStateActionItem actionTPS_InTagFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL}, + {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL}, + {p_islatin, 0, A_PUSH, TPS_InTagName, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InXMLBegin[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + /* <?xml ... */ + {p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL}, + {p_iseqC, 'X', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagCloseFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InTagName, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagName[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + /* <br/> case */ + {p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, + {p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagBeginEnd[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTag[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags}, + {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL}, + {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEscapeKK[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL}, + {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagBackSleshed[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {NULL, 0, A_MERGE, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InTagEnd[] = { + {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL}, + /* <!DOCTYPE ...> */ + {p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL}, + {p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InComment[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst, 0, NULL}, + {NULL, 0, A_NEXT, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCloseCommentLast[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL}, + {NULL, 0, A_NEXT, TPS_InComment, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InCommentEnd[] = { + {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstDomain[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomainSecond, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomainSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomain, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHostDomain[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHostDomain, 0, NULL}, + {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL}, + {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_isdigit, 0, A_POP, TPS_Null, 0, NULL}, + {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InPortFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPort[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL}, + {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURIStart, HOST, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL} +}; + +static TParserStateActionItem actionTPS_InHostFirstAN[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHost[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL}, + {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InEmail[] = { + {p_ishost, 0, A_BINGO | A_CLRALL, TPS_Base, EMAIL, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFileFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFileTwiddle[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPathFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPathFirstFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InPathSecond[] = { + {p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, + {p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, + {p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFile[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, + {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL} +}; + +static TParserStateActionItem actionTPS_InFileNext[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURIFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '"', A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL}, + {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL}, +}; + +static TParserStateActionItem actionTPS_InURIStart[] = { + {NULL, 0, A_NEXT, TPS_InURI, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InURI[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL}, + {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL}, + {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL}, + {NULL, 0, A_BINGO, TPS_Base, URI, NULL} +}; + +static TParserStateActionItem actionTPS_InFURL[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isURI, 0, A_BINGO | A_CLRALL, TPS_Base, FURL, SpecialFURL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolSecond[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InProtocolEnd[] = { + {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWord[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWord[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWord[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenValue[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InHyphenValueExact[] = { + {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL}, + {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HYPHENWORD, SpecialHyphen} +}; + +static TParserStateActionItem actionTPS_InParseHyphen[] = { + {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt, 0, NULL}, + {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen, 0, NULL}, + {NULL, 0, A_RERUN, TPS_Base, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD, NULL}, + {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart, 0, NULL}, + {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUWordPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL}, + {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL} +}; + +static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt, 0, NULL}, + {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHDecimalPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPartFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InHVersionPart[] = { + {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst, 0, NULL}, + {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL} +}; + +/* + * order should be the same as in typedef enum {} TParserState!! + */ + +static const TParserStateAction Actions[] = { + {TPS_Base, actionTPS_Base}, + {TPS_InUWord, actionTPS_InUWord}, + {TPS_InLatWord, actionTPS_InLatWord}, + {TPS_InCyrWord, actionTPS_InCyrWord}, + {TPS_InUnsignedInt, actionTPS_InUnsignedInt}, + {TPS_InSignedIntFirst, actionTPS_InSignedIntFirst}, + {TPS_InSignedInt, actionTPS_InSignedInt}, + {TPS_InSpace, actionTPS_InSpace}, + {TPS_InUDecimalFirst, actionTPS_InUDecimalFirst}, + {TPS_InUDecimal, actionTPS_InUDecimal}, + {TPS_InDecimalFirst, actionTPS_InDecimalFirst}, + {TPS_InDecimal, actionTPS_InDecimal}, + {TPS_InVerVersion, actionTPS_InVerVersion}, + {TPS_InSVerVersion, actionTPS_InSVerVersion}, + {TPS_InVersionFirst, actionTPS_InVersionFirst}, + {TPS_InVersion, actionTPS_InVersion}, + {TPS_InMantissaFirst, actionTPS_InMantissaFirst}, + {TPS_InMantissaSign, actionTPS_InMantissaSign}, + {TPS_InMantissa, actionTPS_InMantissa}, + {TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst}, + {TPS_InHTMLEntity, actionTPS_InHTMLEntity}, + {TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst}, + {TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum}, + {TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd}, + {TPS_InTagFirst, actionTPS_InTagFirst}, + {TPS_InXMLBegin, actionTPS_InXMLBegin}, + {TPS_InTagCloseFirst, actionTPS_InTagCloseFirst}, + {TPS_InTagName, actionTPS_InTagName}, + {TPS_InTagBeginEnd, actionTPS_InTagBeginEnd}, + {TPS_InTag, actionTPS_InTag}, + {TPS_InTagEscapeK, actionTPS_InTagEscapeK}, + {TPS_InTagEscapeKK, actionTPS_InTagEscapeKK}, + {TPS_InTagBackSleshed, actionTPS_InTagBackSleshed}, + {TPS_InTagEnd, actionTPS_InTagEnd}, + {TPS_InCommentFirst, actionTPS_InCommentFirst}, + {TPS_InCommentLast, actionTPS_InCommentLast}, + {TPS_InComment, actionTPS_InComment}, + {TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst}, + {TPS_InCloseCommentLast, actionTPS_InCloseCommentLast}, + {TPS_InCommentEnd, actionTPS_InCommentEnd}, + {TPS_InHostFirstDomain, actionTPS_InHostFirstDomain}, + {TPS_InHostDomainSecond, actionTPS_InHostDomainSecond}, + {TPS_InHostDomain, actionTPS_InHostDomain}, + {TPS_InPortFirst, actionTPS_InPortFirst}, + {TPS_InPort, actionTPS_InPort}, + {TPS_InHostFirstAN, actionTPS_InHostFirstAN}, + {TPS_InHost, actionTPS_InHost}, + {TPS_InEmail, actionTPS_InEmail}, + {TPS_InFileFirst, actionTPS_InFileFirst}, + {TPS_InFileTwiddle, actionTPS_InFileTwiddle}, + {TPS_InPathFirst, actionTPS_InPathFirst}, + {TPS_InPathFirstFirst, actionTPS_InPathFirstFirst}, + {TPS_InPathSecond, actionTPS_InPathSecond}, + {TPS_InFile, actionTPS_InFile}, + {TPS_InFileNext, actionTPS_InFileNext}, + {TPS_InURIFirst, actionTPS_InURIFirst}, + {TPS_InURIStart, actionTPS_InURIStart}, + {TPS_InURI, actionTPS_InURI}, + {TPS_InFURL, actionTPS_InFURL}, + {TPS_InProtocolFirst, actionTPS_InProtocolFirst}, + {TPS_InProtocolSecond, actionTPS_InProtocolSecond}, + {TPS_InProtocolEnd, actionTPS_InProtocolEnd}, + {TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst}, + {TPS_InHyphenLatWord, actionTPS_InHyphenLatWord}, + {TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst}, + {TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord}, + {TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst}, + {TPS_InHyphenUWord, actionTPS_InHyphenUWord}, + {TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst}, + {TPS_InHyphenValue, actionTPS_InHyphenValue}, + {TPS_InHyphenValueExact, actionTPS_InHyphenValueExact}, + {TPS_InParseHyphen, actionTPS_InParseHyphen}, + {TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen}, + {TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart}, + {TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart}, + {TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart}, + {TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt}, + {TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst}, + {TPS_InHDecimalPart, actionTPS_InHDecimalPart}, + {TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst}, + {TPS_InHVersionPart, actionTPS_InHVersionPart}, + {TPS_Null, NULL} +}; + + +static bool +TParserGet(TParser * prs) +{ + TParserStateActionItem *item = NULL; + + if (prs->state->posbyte >= prs->lenstr) + return false; + + Assert(prs->state); + prs->lexeme = prs->str + prs->state->posbyte; + prs->state->pushedAtAction = NULL; + + /* look at string */ + while (prs->state->posbyte <= prs->lenstr) + { + if (prs->state->posbyte == prs->lenstr) + prs->state->charlen = 0; + else + prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen : + pg_mblen(prs->str + prs->state->posbyte); + + Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr); + Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null); + Assert(Actions[prs->state->state].state == prs->state->state); + + item = Actions[prs->state->state].action; + Assert(item != NULL); + + if (item < prs->state->pushedAtAction) + item = prs->state->pushedAtAction; + + /* find action by character class */ + while (item->isclass) + { + prs->c = item->c; + if (item->isclass(prs) != 0) + { + if (item > prs->state->pushedAtAction) /* remember: after + * pushing we were by + * false way */ + break; + } + item++; + } + + prs->state->pushedAtAction = NULL; + + /* call special handler if exists */ + if (item->special) + item->special(prs); + + /* BINGO, lexeme is found */ + if (item->flags & A_BINGO) + { + Assert(item->type > 0); + prs->lenbytelexeme = prs->state->lenbytelexeme; + prs->lencharlexeme = prs->state->lencharlexeme; + prs->state->lenbytelexeme = prs->state->lencharlexeme = 0; + prs->type = item->type; + } + + /* do various actions by flags */ + if (item->flags & A_POP) + { /* pop stored state in stack */ + TParserPosition *ptr = prs->state->prev; + + pfree(prs->state); + prs->state = ptr; + Assert(prs->state); + } + else if (item->flags & A_PUSH) + { /* push (store) state in stack */ + prs->state->pushedAtAction = item; /* remember where we push */ + prs->state = newTParserPosition(prs->state); + } + else if (item->flags & A_CLEAR) + { /* clear previous pushed state */ + TParserPosition *ptr; + + Assert(prs->state->prev); + ptr = prs->state->prev->prev; + pfree(prs->state->prev); + prs->state->prev = ptr; + } + else if (item->flags & A_CLRALL) + { /* clear all previous pushed state */ + TParserPosition *ptr; + + while (prs->state->prev) + { + ptr = prs->state->prev->prev; + pfree(prs->state->prev); + prs->state->prev = ptr; + } + } + else if (item->flags & A_MERGE) + { /* merge posinfo with current and pushed state */ + TParserPosition *ptr = prs->state; + + Assert(prs->state->prev); + prs->state = prs->state->prev; + + prs->state->posbyte = ptr->posbyte; + prs->state->poschar = ptr->poschar; + prs->state->charlen = ptr->charlen; + prs->state->lenbytelexeme = ptr->lenbytelexeme; + prs->state->lencharlexeme = ptr->lencharlexeme; + pfree(ptr); + } + + /* set new state if pointed */ + if (item->tostate != TPS_Null) + prs->state->state = item->tostate; + + /* check for go away */ + if ((item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN) == 0)) + break; + + /* go to begining of loop if we should rerun or we just restore state */ + if (item->flags & (A_RERUN | A_POP)) + continue; + + /* move forward */ + if (prs->state->charlen) + { + prs->state->posbyte += prs->state->charlen; + prs->state->lenbytelexeme += prs->state->charlen; + prs->state->poschar++; + prs->state->lencharlexeme++; + } + } + + return (item && (item->flags & A_BINGO)) ? true : false; +} + +Datum +prsd_lextype(PG_FUNCTION_ARGS) +{ + LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1)); + int i; + + for (i = 1; i <= LASTNUM; i++) + { + descr[i - 1].lexid = i; + descr[i - 1].alias = pstrdup(tok_alias[i]); + descr[i - 1].descr = pstrdup(lex_descr[i]); + } + + descr[LASTNUM].lexid = 0; + + PG_RETURN_POINTER(descr); +} + +Datum +prsd_start(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1))); +} + +Datum +prsd_nexttoken(PG_FUNCTION_ARGS) +{ + TParser *p = (TParser *) PG_GETARG_POINTER(0); + char **t = (char **) PG_GETARG_POINTER(1); + int *tlen = (int *) PG_GETARG_POINTER(2); + + if (!TParserGet(p)) + PG_RETURN_INT32(0); + + *t = p->lexeme; + *tlen = p->lenbytelexeme; + + PG_RETURN_INT32(p->type); +} + +Datum +prsd_end(PG_FUNCTION_ARGS) +{ + TParser *p = (TParser *) PG_GETARG_POINTER(0); + + TParserClose(p); + PG_RETURN_VOID(); +} + +#define LEAVETOKEN(x) ( (x)==12 ) +#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) +#define ENDPUNCTOKEN(x) ( (x)==12 ) + + +#define TS_IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 ) +#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 ) +#define HTMLHLIDIGNORE(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) +#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) ) +#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || TS_IDIGNORE(x) ) + +typedef struct +{ + HeadlineWordEntry *words; + int len; +} hlCheck; + +static bool +checkcondition_HL(void *checkval, QueryItem * val) +{ + int i; + + for (i = 0; i < ((hlCheck *) checkval)->len; i++) + { + if (((hlCheck *) checkval)->words[i].item == val) + return true; + } + return false; +} + + +static bool +hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q) +{ + int i, + j; + QueryItem *item = GETQUERY(query); + int pos = *p; + + *q = 0; + *p = 0x7fffffff; + + for (j = 0; j < query->size; j++) + { + if (item->type != VAL) + { + item++; + continue; + } + for (i = pos; i < prs->curwords; i++) + { + if (prs->words[i].item == item) + { + if (i > *q) + *q = i; + break; + } + } + item++; + } + + if (*q == 0) + return false; + + item = GETQUERY(query); + for (j = 0; j < query->size; j++) + { + if (item->type != VAL) + { + item++; + continue; + } + for (i = *q; i >= pos; i--) + { + if (prs->words[i].item == item) + { + if (i < *p) + *p = i; + break; + } + } + item++; + } + + if (*p <= *q) + { + hlCheck ch; + + ch.words = &(prs->words[*p]); + ch.len = *q - *p + 1; + if (TS_execute(GETQUERY(query), &ch, false, checkcondition_HL)) + return true; + else + { + (*p)++; + return hlCover(prs, query, p, q); + } + } + + return false; +} + +Datum +prsd_headline(PG_FUNCTION_ARGS) +{ + HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0); + text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */ + TSQuery query = PG_GETARG_TSQUERY(2); + + /* from opt + start and and tag */ + int min_words = 15; + int max_words = 35; + int shortword = 3; + + int p = 0, + q = 0; + int bestb = -1, + beste = -1; + int bestlen = -1; + int pose = 0, + posb, + poslen, + curlen; + + int i; + int highlight = 0; + + /* config */ + prs->startsel = NULL; + prs->stopsel = NULL; + if (opt) + { + Map *map, + *mptr; + + parse_keyvalpairs(opt, &map); + mptr = map; + + while (mptr && mptr->key) + { + if (pg_strcasecmp(mptr->key, "MaxWords") == 0) + max_words = pg_atoi(mptr->value, 4, 1); + else if (pg_strcasecmp(mptr->key, "MinWords") == 0) + min_words = pg_atoi(mptr->value, 4, 1); + else if (pg_strcasecmp(mptr->key, "ShortWord") == 0) + shortword = pg_atoi(mptr->value, 4, 1); + else if (pg_strcasecmp(mptr->key, "StartSel") == 0) + prs->startsel = pstrdup(mptr->value); + else if (pg_strcasecmp(mptr->key, "StopSel") == 0) + prs->stopsel = pstrdup(mptr->value); + else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0) + highlight = ( + pg_strcasecmp(mptr->value, "1") == 0 || + pg_strcasecmp(mptr->value, "on") == 0 || + pg_strcasecmp(mptr->value, "true") == 0 || + pg_strcasecmp(mptr->value, "t") == 0 || + pg_strcasecmp(mptr->value, "y") == 0 || + pg_strcasecmp(mptr->value, "yes") == 0) ? + 1 : 0; + + pfree(mptr->key); + pfree(mptr->value); + + mptr++; + } + pfree(map); + + if (highlight == 0) + { + if (min_words >= max_words) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MinWords should be less than MaxWords"))); + if (min_words <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MinWords should be positive"))); + if (shortword < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ShortWord should be >= 0"))); + } + } + + if (highlight == 0) + { + while (hlCover(prs, query, &p, &q)) + { + /* find cover len in words */ + curlen = 0; + poslen = 0; + for (i = p; i <= q && curlen < max_words; i++) + { + if (!NONWORDTOKEN(prs->words[i].type)) + curlen++; + if (prs->words[i].item && !prs->words[i].repeated) + poslen++; + pose = i; + } + + if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword)) + { + /* best already finded, so try one more cover */ + p++; + continue; + } + + posb = p; + if (curlen < max_words) + { /* find good end */ + for (i = i - 1; i < prs->curwords && curlen < max_words; i++) + { + if (i != q) + { + if (!NONWORDTOKEN(prs->words[i].type)) + curlen++; + if (prs->words[i].item && !prs->words[i].repeated) + poslen++; + } + pose = i; + if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) + continue; + if (curlen >= min_words) + break; + } + if (curlen < min_words && i >= prs->curwords) + { /* got end of text and our cover is shoter + * than min_words */ + for (i = p; i >= 0; i--) + { + if (!NONWORDTOKEN(prs->words[i].type)) + curlen++; + if (prs->words[i].item && !prs->words[i].repeated) + poslen++; + if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) + continue; + if (curlen >= min_words) + break; + } + posb = (i >= 0) ? i : 0; + } + } + else + { /* shorter cover :((( */ + for (; curlen > min_words; i--) + { + if (!NONWORDTOKEN(prs->words[i].type)) + curlen--; + if (prs->words[i].item && !prs->words[i].repeated) + poslen--; + pose = i; + if (NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword) + continue; + break; + } + } + + if (bestlen < 0 || (poslen > bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || + (bestlen >= 0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && + (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))) + { + bestb = posb; + beste = pose; + bestlen = poslen; + } + + p++; + } + + if (bestlen < 0) + { + curlen = 0; + for (i = 0; i < prs->curwords && curlen < min_words; i++) + { + if (!NONWORDTOKEN(prs->words[i].type)) + curlen++; + pose = i; + } + bestb = 0; + beste = pose; + } + } + else + { + bestb = 0; + beste = prs->curwords - 1; + } + + for (i = bestb; i <= beste; i++) + { + if (prs->words[i].item) + prs->words[i].selected = 1; + if (highlight == 0) + { + if (HLIDIGNORE(prs->words[i].type)) + prs->words[i].replace = 1; + } + else + { + if (HTMLHLIDIGNORE(prs->words[i].type)) + prs->words[i].replace = 1; + } + + prs->words[i].in = (prs->words[i].repeated) ? 0 : 1; + } + + if (!prs->startsel) + prs->startsel = pstrdup("<b>"); + if (!prs->stopsel) + prs->stopsel = pstrdup("</b>"); + prs->startsellen = strlen(prs->startsel); + prs->stopsellen = strlen(prs->stopsel); + + PG_RETURN_POINTER(prs); +} diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 12d158a49bd..73fcebb496d 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -1,7 +1,7 @@ # # Makefile for utils/adt # -# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.64 2007/04/02 03:49:39 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.65 2007/08/21 01:11:18 tgl Exp $ # subdir = src/backend/utils/adt @@ -25,8 +25,11 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ network.o mac.o inet_net_ntop.o inet_net_pton.o \ ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \ - ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o \ - uuid.o + ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \ + tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \ + tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ + tsvector.o tsvector_op.o \ + uuid.o xml.o like.o: like.c like_match.c diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 7ec9db3ec9b..e49f323daa8 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.102 2007/06/26 16:48:09 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.103 2007/08/21 01:11:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,6 +27,8 @@ #include "catalog/namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "parser/parse_type.h" @@ -1066,6 +1068,231 @@ regtypesend(PG_FUNCTION_ARGS) /* + * regconfigin - converts "tsconfigname" to tsconfig OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_ts_config entry. + * + * This function is not needed in bootstrap mode, so we don't worry about + * making it work then. + */ +Datum +regconfigin(PG_FUNCTION_ARGS) +{ + char *cfg_name_or_oid = PG_GETARG_CSTRING(0); + Oid result; + List *names; + + /* '-' ? */ + if (strcmp(cfg_name_or_oid, "-") == 0) + PG_RETURN_OID(InvalidOid); + + /* Numeric OID? */ + if (cfg_name_or_oid[0] >= '0' && + cfg_name_or_oid[0] <= '9' && + strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid)) + { + result = DatumGetObjectId(DirectFunctionCall1(oidin, + CStringGetDatum(cfg_name_or_oid))); + PG_RETURN_OID(result); + } + + /* + * Normal case: parse the name into components and see if it matches any + * pg_ts_config entries in the current search path. + */ + names = stringToQualifiedNameList(cfg_name_or_oid); + + result = TSConfigGetCfgid(names, false); + + PG_RETURN_OID(result); +} + +/* + * regconfigout - converts tsconfig OID to "tsconfigname" + */ +Datum +regconfigout(PG_FUNCTION_ARGS) +{ + Oid cfgid = PG_GETARG_OID(0); + char *result; + HeapTuple cfgtup; + + if (cfgid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + cfgtup = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgid), + 0, 0, 0); + + if (HeapTupleIsValid(cfgtup)) + { + Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup); + char *cfgname = NameStr(cfgform->cfgname); + char *nspname; + + /* + * Would this config be found by regconfigin? If not, qualify it. + */ + if (TSConfigIsVisible(cfgid)) + nspname = NULL; + else + nspname = get_namespace_name(cfgform->cfgnamespace); + + result = quote_qualified_identifier(nspname, cfgname); + + ReleaseSysCache(cfgtup); + } + else + { + /* If OID doesn't match any pg_ts_config row, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", cfgid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regconfigrecv - converts external binary format to regconfig + */ +Datum +regconfigrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regconfigsend - converts regconfig to binary format + */ +Datum +regconfigsend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* + * regdictionaryin - converts "tsdictionaryname" to tsdictionary OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_ts_dict entry. + * + * This function is not needed in bootstrap mode, so we don't worry about + * making it work then. + */ +Datum +regdictionaryin(PG_FUNCTION_ARGS) +{ + char *dict_name_or_oid = PG_GETARG_CSTRING(0); + Oid result; + List *names; + + /* '-' ? */ + if (strcmp(dict_name_or_oid, "-") == 0) + PG_RETURN_OID(InvalidOid); + + /* Numeric OID? */ + if (dict_name_or_oid[0] >= '0' && + dict_name_or_oid[0] <= '9' && + strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid)) + { + result = DatumGetObjectId(DirectFunctionCall1(oidin, + CStringGetDatum(dict_name_or_oid))); + PG_RETURN_OID(result); + } + + /* + * Normal case: parse the name into components and see if it matches any + * pg_ts_dict entries in the current search path. + */ + names = stringToQualifiedNameList(dict_name_or_oid); + + result = TSDictionaryGetDictid(names, false); + + PG_RETURN_OID(result); +} + +/* + * regdictionaryout - converts tsdictionary OID to "tsdictionaryname" + */ +Datum +regdictionaryout(PG_FUNCTION_ARGS) +{ + Oid dictid = PG_GETARG_OID(0); + char *result; + HeapTuple dicttup; + + if (dictid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + dicttup = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictid), + 0, 0, 0); + + if (HeapTupleIsValid(dicttup)) + { + Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup); + char *dictname = NameStr(dictform->dictname); + char *nspname; + + /* + * Would this dictionary be found by regdictionaryin? + * If not, qualify it. + */ + if (TSDictionaryIsVisible(dictid)) + nspname = NULL; + else + nspname = get_namespace_name(dictform->dictnamespace); + + result = quote_qualified_identifier(nspname, dictname); + + ReleaseSysCache(dicttup); + } + else + { + /* If OID doesn't match any pg_ts_dict row, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dictid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdictionaryrecv - converts external binary format to regdictionary + */ +Datum +regdictionaryrecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdictionarysend - converts regdictionary to binary format + */ +Datum +regdictionarysend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + + +/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1ba0b4eac48..60da08b1ec4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.234 2007/05/05 17:05:48 mha Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.235 2007/08/21 01:11:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2822,6 +2822,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCONFIGOID: + case REGDICTIONARYOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid); *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid); @@ -2925,6 +2927,8 @@ convert_numeric_to_scalar(Datum value, Oid typid) case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCONFIGOID: + case REGDICTIONARYOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c new file mode 100644 index 00000000000..491dd21aa81 --- /dev/null +++ b/src/backend/utils/adt/tsginidx.c @@ -0,0 +1,157 @@ +/*------------------------------------------------------------------------- + * + * tsginidx.c + * GIN support functions for tsvector_ops + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/skey.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" + + +Datum +gin_extract_tsvector(PG_FUNCTION_ARGS) +{ + TSVector vector = PG_GETARG_TSVECTOR(0); + uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + Datum *entries = NULL; + + *nentries = 0; + if (vector->size > 0) + { + int i; + WordEntry *we = ARRPTR(vector); + + *nentries = (uint32) vector->size; + entries = (Datum *) palloc(sizeof(Datum) * vector->size); + + for (i = 0; i < vector->size; i++) + { + text *txt = (text *) palloc(VARHDRSZ + we->len); + + SET_VARSIZE(txt, VARHDRSZ + we->len); + memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len); + + entries[i] = PointerGetDatum(txt); + + we++; + } + } + + PG_FREE_IF_COPY(vector, 0); + PG_RETURN_POINTER(entries); +} + +Datum +gin_extract_query(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + Datum *entries = NULL; + + *nentries = 0; + + if (query->size > 0) + { + int4 i, + j = 0, + len; + QueryItem *item; + + item = clean_NOT(GETQUERY(query), &len); + if (!item) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("query requires full scan, which is not supported by GIN indexes"))); + + item = GETQUERY(query); + + for (i = 0; i < query->size; i++) + if (item[i].type == VAL) + (*nentries)++; + + entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); + + for (i = 0; i < query->size; i++) + if (item[i].type == VAL) + { + text *txt; + + txt = (text *) palloc(VARHDRSZ + item[i].length); + + SET_VARSIZE(txt, VARHDRSZ + item[i].length); + memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length); + + entries[j++] = PointerGetDatum(txt); + + if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("@@ operator does not support lexeme class restrictions"), + errhint("Use the @@@ operator instead."))); + } + } + else + *nentries = -1; /* nothing can be found */ + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_POINTER(entries); +} + +typedef struct +{ + QueryItem *frst; + bool *mapped_check; +} GinChkVal; + +static bool +checkcondition_gin(void *checkval, QueryItem * val) +{ + GinChkVal *gcv = (GinChkVal *) checkval; + + return gcv->mapped_check[val - gcv->frst]; +} + +Datum +gin_ts_consistent(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ + TSQuery query = PG_GETARG_TSQUERY(2); + bool res = FALSE; + + if (query->size > 0) + { + int4 i, + j = 0; + QueryItem *item; + GinChkVal gcv; + + gcv.frst = item = GETQUERY(query); + gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size); + + for (i = 0; i < query->size; i++) + if (item[i].type == VAL) + gcv.mapped_check[i] = check[j++]; + + res = TS_execute( + GETQUERY(query), + &gcv, + true, + checkcondition_gin + ); + } + + PG_RETURN_BOOL(res); +} diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c new file mode 100644 index 00000000000..04c2b4c442f --- /dev/null +++ b/src/backend/utils/adt/tsgistidx.c @@ -0,0 +1,784 @@ +/*------------------------------------------------------------------------- + * + * tsgistidx.c + * GiST support functions for tsvector_ops + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gist.h" +#include "access/tuptoaster.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/pg_crc.h" + + +#define SIGLENINT 31 /* >121 => key will toast, so it will not work + * !!! */ + +#define SIGLEN ( sizeof(int4) * SIGLENINT ) +#define SIGLENBIT (SIGLEN * BITS_PER_BYTE) + +typedef char BITVEC[SIGLEN]; +typedef char *BITVECP; + +#define LOOPBYTE(a) \ + for(i=0;i<SIGLEN;i++) {\ + a;\ +} + +#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) ) +#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 ) +#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) ) +#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) ) +#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 ) + +#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) +#define HASH(sign, val) SETBIT((sign), HASHVAL(val)) + +#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key)) + +/* + * type of GiST index key + */ + +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ ; + int4 flag; + char data[1]; +} SignTSVector; + +#define ARRKEY 0x01 +#define SIGNKEY 0x02 +#define ALLISTRUE 0x04 + +#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY ) +#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY ) +#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE ) + +#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) ) +#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) + +#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) ) +#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) ) +#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) ) + +/* Number of one-bits in an unsigned byte */ +static const uint8 number_of_ones[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +static int4 sizebitvec(BITVECP sign); + +Datum +gtsvectorin(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("gtsvector_in not implemented"))); + PG_RETURN_DATUM(0); +} + +#define SINGOUTSTR "%d true bits, %d false bits" +#define ARROUTSTR "%d unique words" +#define EXTRALEN ( 2*13 ) + +static int outbuf_maxlen = 0; + +Datum +gtsvectorout(PG_FUNCTION_ARGS) +{ + SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0))); + char *outbuf; + + if (outbuf_maxlen == 0) + outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1; + outbuf = palloc(outbuf_maxlen); + + if (ISARRKEY(key)) + sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key)); + else + { + int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key)); + + sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue); + } + + PG_FREE_IF_COPY(key, 0); + PG_RETURN_POINTER(outbuf); +} + +static int +compareint(const void *a, const void *b) +{ + if (*((int4 *) a) == *((int4 *) b)) + return 0; + return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1; +} + +static int +uniqueint(int4 *a, int4 l) +{ + int4 *ptr, + *res; + + if (l == 1) + return l; + + ptr = res = a; + + qsort((void *) a, l, sizeof(int4), compareint); + + while (ptr - a < l) + if (*ptr != *res) + *(++res) = *ptr++; + else + ptr++; + return res + 1 - a; +} + +static void +makesign(BITVECP sign, SignTSVector * a) +{ + int4 k, + len = ARRNELEM(a); + int4 *ptr = GETARR(a); + + MemSet((void *) sign, 0, sizeof(BITVEC)); + for (k = 0; k < len; k++) + HASH(sign, ptr[k]); +} + +Datum +gtsvector_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) + { /* tsvector */ + SignTSVector *res; + TSVector val = DatumGetTSVector(entry->key); + int4 len; + int4 *arr; + WordEntry *ptr = ARRPTR(val); + char *words = STRPTR(val); + + len = CALCGTSIZE(ARRKEY, val->size); + res = (SignTSVector *) palloc(len); + SET_VARSIZE(res, len); + res->flag = ARRKEY; + arr = GETARR(res); + len = val->size; + while (len--) + { + pg_crc32 c; + + INIT_CRC32(c); + COMP_CRC32(c, words + ptr->pos, ptr->len); + FIN_CRC32(c); + + *arr = *(int4 *) &c; + arr++; + ptr++; + } + + len = uniqueint(GETARR(res), val->size); + if (len != val->size) + { + /* + * there is a collision of hash-function; len is always less than + * val->size + */ + len = CALCGTSIZE(ARRKEY, len); + res = (SignTSVector *) repalloc((void *) res, len); + SET_VARSIZE(res, len); + } + + /* make signature, if array is too long */ + if (VARSIZE(res) > TOAST_INDEX_TARGET) + { + SignTSVector *ressign; + + len = CALCGTSIZE(SIGNKEY, 0); + ressign = (SignTSVector *) palloc(len); + SET_VARSIZE(ressign, len); + ressign->flag = SIGNKEY; + makesign(GETSIGN(ressign), res); + res = ressign; + } + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, FALSE); + } + else if (ISSIGNKEY(DatumGetPointer(entry->key)) && + !ISALLTRUE(DatumGetPointer(entry->key))) + { + int4 i, + len; + SignTSVector *res; + BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); + + LOOPBYTE( + if ((sign[i] & 0xff) != 0xff) + PG_RETURN_POINTER(retval); + ); + + len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); + res = (SignTSVector *) palloc(len); + SET_VARSIZE(res, len); + res->flag = SIGNKEY | ALLISTRUE; + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, FALSE); + } + PG_RETURN_POINTER(retval); +} + +Datum +gtsvector_decompress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); + + if (key != (SignTSVector *) DatumGetPointer(entry->key)) + { + GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + + gistentryinit(*retval, PointerGetDatum(key), + entry->rel, entry->page, + entry->offset, FALSE); + + PG_RETURN_POINTER(retval); + } + + PG_RETURN_POINTER(entry); +} + +typedef struct +{ + int4 *arrb; + int4 *arre; +} CHKVAL; + +/* + * is there value 'val' in array or not ? + */ +static bool +checkcondition_arr(void *checkval, QueryItem * val) +{ + int4 *StopLow = ((CHKVAL *) checkval)->arrb; + int4 *StopHigh = ((CHKVAL *) checkval)->arre; + int4 *StopMiddle; + + /* Loop invariant: StopLow <= val < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + if (*StopMiddle == val->val) + return (true); + else if (*StopMiddle < val->val) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return (false); +} + +static bool +checkcondition_bit(void *checkval, QueryItem * val) +{ + return GETBIT(checkval, HASHVAL(val->val)); +} + +Datum +gtsvector_consistent(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(1); + SignTSVector *key = (SignTSVector *) DatumGetPointer( + ((GISTENTRY *) PG_GETARG_POINTER(0))->key + ); + + if (!query->size) + PG_RETURN_BOOL(false); + + if (ISSIGNKEY(key)) + { + if (ISALLTRUE(key)) + PG_RETURN_BOOL(true); + + PG_RETURN_BOOL(TS_execute( + GETQUERY(query), + (void *) GETSIGN(key), false, + checkcondition_bit + )); + } + else + { /* only leaf pages */ + CHKVAL chkval; + + chkval.arrb = GETARR(key); + chkval.arre = chkval.arrb + ARRNELEM(key); + PG_RETURN_BOOL(TS_execute( + GETQUERY(query), + (void *) &chkval, true, + checkcondition_arr + )); + } +} + +static int4 +unionkey(BITVECP sbase, SignTSVector * add) +{ + int4 i; + + if (ISSIGNKEY(add)) + { + BITVECP sadd = GETSIGN(add); + + if (ISALLTRUE(add)) + return 1; + + LOOPBYTE( + sbase[i] |= sadd[i]; + ); + } + else + { + int4 *ptr = GETARR(add); + + for (i = 0; i < ARRNELEM(add); i++) + HASH(sbase, ptr[i]); + } + return 0; +} + + +Datum +gtsvector_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + int *size = (int *) PG_GETARG_POINTER(1); + BITVEC base; + int4 i, + len; + int4 flag = 0; + SignTSVector *result; + + MemSet((void *) base, 0, sizeof(BITVEC)); + for (i = 0; i < entryvec->n; i++) + { + if (unionkey(base, GETENTRY(entryvec, i))) + { + flag = ALLISTRUE; + break; + } + } + + flag |= SIGNKEY; + len = CALCGTSIZE(flag, 0); + result = (SignTSVector *) palloc(len); + *size = len; + SET_VARSIZE(result, len); + result->flag = flag; + if (!ISALLTRUE(result)) + memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); + + PG_RETURN_POINTER(result); +} + +Datum +gtsvector_same(PG_FUNCTION_ARGS) +{ + SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0); + SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + + if (ISSIGNKEY(a)) + { /* then b also ISSIGNKEY */ + if (ISALLTRUE(a) && ISALLTRUE(b)) + *result = true; + else if (ISALLTRUE(a)) + *result = false; + else if (ISALLTRUE(b)) + *result = false; + else + { + int4 i; + BITVECP sa = GETSIGN(a), + sb = GETSIGN(b); + + *result = true; + LOOPBYTE( + if (sa[i] != sb[i]) + { + *result = false; + break; + } + ); + } + } + else + { /* a and b ISARRKEY */ + int4 lena = ARRNELEM(a), + lenb = ARRNELEM(b); + + if (lena != lenb) + *result = false; + else + { + int4 *ptra = GETARR(a), + *ptrb = GETARR(b); + int4 i; + + *result = true; + for (i = 0; i < lena; i++) + if (ptra[i] != ptrb[i]) + { + *result = false; + break; + } + } + } + + PG_RETURN_POINTER(result); +} + +static int4 +sizebitvec(BITVECP sign) +{ + int4 size = 0, + i; + + LOOPBYTE( + size += number_of_ones[(unsigned char) sign[i]]; + ); + return size; +} + +static int +hemdistsign(BITVECP a, BITVECP b) +{ + int i, + diff, + dist = 0; + + LOOPBYTE( + diff = (unsigned char) (a[i] ^ b[i]); + dist += number_of_ones[diff]; + ); + return dist; +} + +static int +hemdist(SignTSVector * a, SignTSVector * b) +{ + if (ISALLTRUE(a)) + { + if (ISALLTRUE(b)) + return 0; + else + return SIGLENBIT - sizebitvec(GETSIGN(b)); + } + else if (ISALLTRUE(b)) + return SIGLENBIT - sizebitvec(GETSIGN(a)); + + return hemdistsign(GETSIGN(a), GETSIGN(b)); +} + +Datum +gtsvector_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ + GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key); + SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key); + BITVECP orig = GETSIGN(origval); + + *penalty = 0.0; + + if (ISARRKEY(newval)) + { + BITVEC sign; + + makesign(sign, newval); + + if (ISALLTRUE(origval)) + *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1); + else + *penalty = hemdistsign(sign, orig); + } + else + *penalty = hemdist(origval, newval); + PG_RETURN_POINTER(penalty); +} + +typedef struct +{ + bool allistrue; + BITVEC sign; +} CACHESIGN; + +static void +fillcache(CACHESIGN * item, SignTSVector * key) +{ + item->allistrue = false; + if (ISARRKEY(key)) + makesign(item->sign, key); + else if (ISALLTRUE(key)) + item->allistrue = true; + else + memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) +typedef struct +{ + OffsetNumber pos; + int4 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) +{ + if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + return 0; + else + return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; +} + + +static int +hemdistcache(CACHESIGN * a, CACHESIGN * b) +{ + if (a->allistrue) + { + if (b->allistrue) + return 0; + else + return SIGLENBIT - sizebitvec(b->sign); + } + else if (b->allistrue) + return SIGLENBIT - sizebitvec(a->sign); + + return hemdistsign(a->sign, b->sign); +} + +Datum +gtsvector_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber k, + j; + SignTSVector *datum_l, + *datum_r; + BITVECP union_l, + union_r; + int4 size_alpha, + size_beta; + int4 size_waste, + waste = -1; + int4 nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + OffsetNumber maxoff; + BITVECP ptr; + int i; + CACHESIGN *cache; + SPLITCOST *costvector; + + maxoff = entryvec->n - 2; + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); + fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber)); + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + { + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) + { + if (k == FirstOffsetNumber) + fillcache(&cache[j], GETENTRY(entryvec, j)); + + size_waste = hemdistcache(&(cache[j]), &(cache[k])); + if (size_waste > waste) + { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + } + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + if (seed_1 == 0 || seed_2 == 0) + { + seed_1 = 1; + seed_2 = 2; + } + + /* form initial .. */ + if (cache[seed_1].allistrue) + { + datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + datum_l->flag = SIGNKEY | ALLISTRUE; + } + else + { + datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0)); + SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0)); + datum_l->flag = SIGNKEY; + memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); + } + if (cache[seed_2].allistrue) + { + datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + datum_r->flag = SIGNKEY | ALLISTRUE; + } + else + { + datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0)); + SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0)); + datum_r->flag = SIGNKEY; + memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); + } + + union_l = GETSIGN(datum_l); + union_r = GETSIGN(datum_r); + maxoff = OffsetNumberNext(maxoff); + fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); + /* sort before ... */ + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) + { + costvector[j - 1].pos = j; + size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); + size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); + costvector[j - 1].cost = Abs(size_alpha - size_beta); + } + qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) + { + j = costvector[k].pos; + if (j == seed_1) + { + *left++ = j; + v->spl_nleft++; + continue; + } + else if (j == seed_2) + { + *right++ = j; + v->spl_nright++; + continue; + } + + if (ISALLTRUE(datum_l) || cache[j].allistrue) + { + if (ISALLTRUE(datum_l) && cache[j].allistrue) + size_alpha = 0; + else + size_alpha = SIGLENBIT - sizebitvec( + (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign) + ); + } + else + size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l)); + + if (ISALLTRUE(datum_r) || cache[j].allistrue) + { + if (ISALLTRUE(datum_r) && cache[j].allistrue) + size_beta = 0; + else + size_beta = SIGLENBIT - sizebitvec( + (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign) + ); + } + else + size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r)); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) + { + if (ISALLTRUE(datum_l) || cache[j].allistrue) + { + if (!ISALLTRUE(datum_l)) + MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); + } + else + { + ptr = cache[j].sign; + LOOPBYTE( + union_l[i] |= ptr[i]; + ); + } + *left++ = j; + v->spl_nleft++; + } + else + { + if (ISALLTRUE(datum_r) || cache[j].allistrue) + { + if (!ISALLTRUE(datum_r)) + MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); + } + else + { + ptr = cache[j].sign; + LOOPBYTE( + union_r[i] |= ptr[i]; + ); + } + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c new file mode 100644 index 00000000000..1f8abb3298e --- /dev/null +++ b/src/backend/utils/adt/tsquery.c @@ -0,0 +1,767 @@ +/*------------------------------------------------------------------------- + * + * tsquery.c + * I/O functions for tsquery + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/memutils.h" +#include "utils/pg_crc.h" + +/* parser's states */ +#define WAITOPERAND 1 +#define WAITOPERATOR 2 +#define WAITFIRSTOPERAND 3 +#define WAITSINGLEOPERAND 4 + +/* + * node of query tree, also used + * for storing polish notation in parser + */ +typedef struct ParseQueryNode +{ + int2 weight; + int2 type; + int4 val; + int2 distance; + int2 length; + struct ParseQueryNode *next; +} ParseQueryNode; + +static char * +get_weight(char *buf, int2 *weight) +{ + *weight = 0; + + if (!t_iseq(buf, ':')) + return buf; + + buf++; + while (*buf && pg_mblen(buf) == 1) + { + switch (*buf) + { + case 'a': + case 'A': + *weight |= 1 << 3; + break; + case 'b': + case 'B': + *weight |= 1 << 2; + break; + case 'c': + case 'C': + *weight |= 1 << 1; + break; + case 'd': + case 'D': + *weight |= 1; + break; + default: + return buf; + } + buf++; + } + + return buf; +} + +/* + * get token from query string + */ +static int4 +gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight) +{ + while (1) + { + switch (state->state) + { + case WAITFIRSTOPERAND: + case WAITOPERAND: + if (t_iseq(state->buf, '!')) + { + (state->buf)++; /* can safely ++, t_iseq guarantee + * that pg_mblen()==1 */ + *val = (int4) '!'; + state->state = WAITOPERAND; + return OPR; + } + else if (t_iseq(state->buf, '(')) + { + state->count++; + (state->buf)++; + state->state = WAITOPERAND; + return OPEN; + } + else if (t_iseq(state->buf, ':')) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error at start of operand in tsearch query: \"%s\"", + state->buffer))); + } + else if (!t_isspace(state->buf)) + { + state->valstate.prsbuf = state->buf; + if (gettoken_tsvector(&(state->valstate))) + { + *strval = state->valstate.word; + *lenval = state->valstate.curpos - state->valstate.word; + state->buf = get_weight(state->valstate.prsbuf, weight); + state->state = WAITOPERATOR; + return VAL; + } + else if (state->state == WAITFIRSTOPERAND) + return END; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("no operand in tsearch query: \"%s\"", + state->buffer))); + } + break; + case WAITOPERATOR: + if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')) + { + state->state = WAITOPERAND; + *val = (int4) *(state->buf); + (state->buf)++; + return OPR; + } + else if (t_iseq(state->buf, ')')) + { + (state->buf)++; + state->count--; + return (state->count < 0) ? ERR : CLOSE; + } + else if (*(state->buf) == '\0') + return (state->count) ? ERR : END; + else if (!t_isspace(state->buf)) + return ERR; + break; + case WAITSINGLEOPERAND: + if (*(state->buf) == '\0') + return END; + *strval = state->buf; + *lenval = strlen(state->buf); + state->buf += strlen(state->buf); + state->count++; + return VAL; + default: + return ERR; + break; + } + state->buf += pg_mblen(state->buf); + } + return END; +} + +/* + * push new one in polish notation reverse view + */ +void +pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight) +{ + ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode)); + + tmp->weight = weight; + tmp->type = type; + tmp->val = val; + if (distance >= MAXSTRPOS) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("value is too big in tsearch query: \"%s\"", + state->buffer))); + if (lenval >= MAXSTRLEN) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("operand is too long in tsearch query: \"%s\"", + state->buffer))); + tmp->distance = distance; + tmp->length = lenval; + tmp->next = state->str; + state->str = tmp; + state->num++; +} + +/* + * This function is used for tsquery parsing + */ +void +pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight) +{ + pg_crc32 c; + + if (lenval >= MAXSTRLEN) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long in tsearch query: \"%s\"", + state->buffer))); + + INIT_CRC32(c); + COMP_CRC32(c, strval, lenval); + FIN_CRC32(c); + pushquery(state, type, *(int4 *) &c, + state->curop - state->op, lenval, weight); + + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int4 tmp = state->curop - state->op; + + state->lenop *= 2; + state->op = (char *) repalloc((void *) state->op, state->lenop); + state->curop = state->op + tmp; + } + memcpy((void *) state->curop, (void *) strval, lenval); + state->curop += lenval; + *(state->curop) = '\0'; + state->curop++; + state->sumlen += lenval + 1 /* \0 */ ; + return; +} + +#define STACKDEPTH 32 +/* + * make polish notation of query + */ +static int4 +makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2)) +{ + int4 val = 0, + type; + int4 lenval = 0; + char *strval = NULL; + int4 stack[STACKDEPTH]; + int4 lenstack = 0; + int2 weight = 0; + + while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END) + { + switch (type) + { + case VAL: + pushval(state, VAL, strval, lenval, weight); + while (lenstack && (stack[lenstack - 1] == (int4) '&' || + stack[lenstack - 1] == (int4) '!')) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + } + break; + case OPR: + if (lenstack && val == (int4) '|') + pushquery(state, OPR, val, 0, 0, 0); + else + { + if (lenstack == STACKDEPTH) /* internal error */ + elog(ERROR, "tsquery stack too small"); + stack[lenstack] = val; + lenstack++; + } + break; + case OPEN: + if (makepol(state, pushval) == ERR) + return ERR; + if (lenstack && (stack[lenstack - 1] == (int4) '&' || + stack[lenstack - 1] == (int4) '!')) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + } + break; + case CLOSE: + while (lenstack) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + }; + return END; + break; + case ERR: + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsearch query: \"%s\"", + state->buffer))); + return ERR; + + } + } + while (lenstack) + { + lenstack--; + pushquery(state, OPR, stack[lenstack], 0, 0, 0); + }; + return END; +} + +static void +findoprnd(QueryItem * ptr, int4 *pos) +{ + if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP) + { + ptr[*pos].left = 0; + (*pos)++; + } + else if (ptr[*pos].val == (int4) '!') + { + ptr[*pos].left = 1; + (*pos)++; + findoprnd(ptr, pos); + } + else + { + QueryItem *curitem = &ptr[*pos]; + int4 tmp = *pos; + + (*pos)++; + findoprnd(ptr, pos); + curitem->left = *pos - tmp; + findoprnd(ptr, pos); + } +} + + +/* + * input + */ +TSQuery +parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain) +{ + TSQueryParserState state; + int4 i; + TSQuery query; + int4 commonlen; + QueryItem *ptr; + ParseQueryNode *tmp; + int4 pos = 0; + + /* init state */ + state.buffer = buf; + state.buf = buf; + state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND; + state.count = 0; + state.num = 0; + state.str = NULL; + state.cfg_id = cfg_id; + + /* init value parser's state */ + state.valstate.oprisdelim = true; + state.valstate.len = 32; + state.valstate.word = (char *) palloc(state.valstate.len); + + /* init list of operand */ + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); + *(state.curop) = '\0'; + + /* parse query & make polish notation (postfix, but in reverse order) */ + makepol(&state, pushval); + pfree(state.valstate.word); + if (!state.num) + { + ereport(NOTICE, + (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"", + state.buffer))); + query = (TSQuery) palloc(HDRSIZETQ); + SET_VARSIZE(query, HDRSIZETQ); + query->size = 0; + return query; + } + + /* make finish struct */ + commonlen = COMPUTESIZE(state.num, state.sumlen); + query = (TSQuery) palloc(commonlen); + SET_VARSIZE(query, commonlen); + query->size = state.num; + ptr = GETQUERY(query); + + /* set item in polish notation */ + for (i = 0; i < state.num; i++) + { + ptr[i].weight = state.str->weight; + ptr[i].type = state.str->type; + ptr[i].val = state.str->val; + ptr[i].distance = state.str->distance; + ptr[i].length = state.str->length; + tmp = state.str->next; + pfree(state.str); + state.str = tmp; + } + + /* set user friendly-operand view */ + memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); + pfree(state.op); + + /* set left operand's position for every operator */ + pos = 0; + findoprnd(ptr, &pos); + + return query; +} + +/* + * in without morphology + */ +Datum +tsqueryin(PG_FUNCTION_ARGS) +{ + char *in = PG_GETARG_CSTRING(0); + + pg_verifymbstr(in, strlen(in), false); + + PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false)); +} + +/* + * out function + */ +typedef struct +{ + QueryItem *curpol; + char *buf; + char *cur; + char *op; + int4 buflen; +} INFIX; + +#define RESIZEBUF(inf,addsize) \ +while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \ +{ \ + int4 len = (inf)->cur - (inf)->buf; \ + (inf)->buflen *= 2; \ + (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \ + (inf)->cur = (inf)->buf + len; \ +} + +/* + * recursive walk on tree and print it in + * infix (human-readable) view + */ +static void +infix(INFIX * in, bool first) +{ + if (in->curpol->type == VAL) + { + char *op = in->op + in->curpol->distance; + int clen; + + RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); + *(in->cur) = '\''; + in->cur++; + while (*op) + { + if (t_iseq(op, '\'')) + { + *(in->cur) = '\''; + in->cur++; + } + COPYCHAR(in->cur, op); + + clen = pg_mblen(op); + op += clen; + in->cur += clen; + } + *(in->cur) = '\''; + in->cur++; + if (in->curpol->weight) + { + *(in->cur) = ':'; + in->cur++; + if (in->curpol->weight & (1 << 3)) + { + *(in->cur) = 'A'; + in->cur++; + } + if (in->curpol->weight & (1 << 2)) + { + *(in->cur) = 'B'; + in->cur++; + } + if (in->curpol->weight & (1 << 1)) + { + *(in->cur) = 'C'; + in->cur++; + } + if (in->curpol->weight & 1) + { + *(in->cur) = 'D'; + in->cur++; + } + } + *(in->cur) = '\0'; + in->curpol++; + } + else if (in->curpol->val == (int4) '!') + { + bool isopr = false; + + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; + if (in->curpol->type == OPR) + { + isopr = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + infix(in, isopr); + if (isopr) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int4 op = in->curpol->val; + INFIX nrm; + + in->curpol++; + if (op == (int4) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + + nrm.curpol = in->curpol; + nrm.op = in->op; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + + /* get right operand */ + infix(&nrm, false); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix(in, false); + + /* print operator & right operand */ + RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); + sprintf(in->cur, " %c %s", op, nrm.buf); + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); + + if (op == (int4) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } +} + + +Datum +tsqueryout(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + + if (query->size == 0) + { + char *b = palloc(1); + + *b = '\0'; + PG_RETURN_POINTER(b); + } + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_CSTRING(nrm.buf); +} + +Datum +tsquerysend(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + StringInfoData buf; + int i; + QueryItem *item = GETQUERY(query); + + pq_begintypsend(&buf); + + pq_sendint(&buf, query->size, sizeof(int32)); + for (i = 0; i < query->size; i++) + { + int tmp; + + pq_sendint(&buf, item->type, sizeof(item->type)); + pq_sendint(&buf, item->weight, sizeof(item->weight)); + pq_sendint(&buf, item->left, sizeof(item->left)); + pq_sendint(&buf, item->val, sizeof(item->val)); + + /* + * We are sure that sizeof(WordEntry) == sizeof(int32), and about + * layout of QueryItem + */ + tmp = *(int32 *) (((char *) item) + HDRSIZEQI); + pq_sendint(&buf, tmp, sizeof(tmp)); + + item++; + } + + item = GETQUERY(query); + for (i = 0; i < query->size; i++) + { + if (item->type == VAL) + pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length); + item++; + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +tsqueryrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSQuery query; + int i, + size, + tmp, + len = HDRSIZETQ; + QueryItem *item; + int datalen = 0; + char *ptr; + + size = pq_getmsgint(buf, sizeof(uint32)); + if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem))) + elog(ERROR, "invalid size of tsquery"); + len += sizeof(QueryItem) * size; + + query = (TSQuery) palloc(len); + query->size = size; + item = GETQUERY(query); + + for (i = 0; i < size; i++) + { + item->type = (int8) pq_getmsgint(buf, sizeof(int8)); + item->weight = (int8) pq_getmsgint(buf, sizeof(int8)); + item->left = (int16) pq_getmsgint(buf, sizeof(int16)); + item->val = (int32) pq_getmsgint(buf, sizeof(int32)); + tmp = pq_getmsgint(buf, sizeof(int32)); + memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32)); + + /* + * Sanity checks + */ + if (item->type == VAL) + { + datalen += item->length + 1; /* \0 */ + } + else if (item->type == OPR) + { + if (item->val == '|' || item->val == '&') + { + if (item->left <= 0 || i + item->left >= size) + elog(ERROR, "invalid pointer to left operand"); + } + + if (i == size - 1) + elog(ERROR, "invalid pointer to right operand"); + } + else + elog(ERROR, "unknown tsquery node type"); + + item++; + } + + query = (TSQuery) repalloc(query, len + datalen); + + item = GETQUERY(query); + ptr = GETOPERAND(query); + for (i = 0; i < size; i++) + { + if (item->type == VAL) + { + item->distance = ptr - GETOPERAND(query); + memcpy(ptr, + pq_getmsgbytes(buf, item->length), + item->length); + ptr += item->length; + *ptr++ = '\0'; + } + item++; + } + + Assert(ptr - GETOPERAND(query) == datalen); + + SET_VARSIZE(query, len + datalen); + + PG_RETURN_TSVECTOR(query); +} + +/* + * debug function, used only for view query + * which will be executed in non-leaf pages in index + */ +Datum +tsquerytree(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + INFIX nrm; + text *res; + QueryItem *q; + int4 len; + + if (query->size == 0) + { + res = (text *) palloc(VARHDRSZ); + SET_VARSIZE(res, VARHDRSZ); + PG_RETURN_POINTER(res); + } + + q = clean_NOT(GETQUERY(query), &len); + + if (!q) + { + res = (text *) palloc(1 + VARHDRSZ); + SET_VARSIZE(res, 1 + VARHDRSZ); + *((char *) VARDATA(res)) = 'T'; + } + else + { + nrm.curpol = q; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ); + SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ); + strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf); + pfree(q); + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_POINTER(res); +} diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c new file mode 100644 index 00000000000..7991a4ad198 --- /dev/null +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -0,0 +1,261 @@ +/*------------------------------------------------------------------------- + * + * tsquery_cleanup.c + * Cleanup query from NOT values and/or stopword + * Utility functions to correct work. + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" + +typedef struct NODE +{ + struct NODE *left; + struct NODE *right; + QueryItem *valnode; +} NODE; + +/* + * make query tree from plain view of query + */ +static NODE * +maketree(QueryItem * in) +{ + NODE *node = (NODE *) palloc(sizeof(NODE)); + + node->valnode = in; + node->right = node->left = NULL; + if (in->type == OPR) + { + node->right = maketree(in + 1); + if (in->val != (int4) '!') + node->left = maketree(in + in->left); + } + return node; +} + +typedef struct +{ + QueryItem *ptr; + int4 len; + int4 cur; +} PLAINTREE; + +static void +plainnode(PLAINTREE * state, NODE * node) +{ + if (state->cur == state->len) + { + state->len *= 2; + state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem)); + } + memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem)); + if (node->valnode->type == VAL) + state->cur++; + else if (node->valnode->val == (int4) '!') + { + state->ptr[state->cur].left = 1; + state->cur++; + plainnode(state, node->right); + } + else + { + int4 cur = state->cur; + + state->cur++; + plainnode(state, node->right); + state->ptr[cur].left = state->cur - cur; + plainnode(state, node->left); + } + pfree(node); +} + +/* + * make plain view of tree from 'normal' view of tree + */ +static QueryItem * +plaintree(NODE * root, int4 *len) +{ + PLAINTREE pl; + + pl.cur = 0; + pl.len = 16; + if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) + { + pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem)); + plainnode(&pl, root); + } + else + pl.ptr = NULL; + *len = pl.cur; + return pl.ptr; +} + +static void +freetree(NODE * node) +{ + if (!node) + return; + if (node->left) + freetree(node->left); + if (node->right) + freetree(node->right); + pfree(node); +} + +/* + * clean tree for ! operator. + * It's usefull for debug, but in + * other case, such view is used with search in index. + * Operator ! always return TRUE + */ +static NODE * +clean_NOT_intree(NODE * node) +{ + if (node->valnode->type == VAL) + return node; + + if (node->valnode->val == (int4) '!') + { + freetree(node); + return NULL; + } + + /* operator & or | */ + if (node->valnode->val == (int4) '|') + { + if ((node->left = clean_NOT_intree(node->left)) == NULL || + (node->right = clean_NOT_intree(node->right)) == NULL) + { + freetree(node); + return NULL; + } + } + else + { + NODE *res = node; + + node->left = clean_NOT_intree(node->left); + node->right = clean_NOT_intree(node->right); + if (node->left == NULL && node->right == NULL) + { + pfree(node); + res = NULL; + } + else if (node->left == NULL) + { + res = node->right; + pfree(node); + } + else if (node->right == NULL) + { + res = node->left; + pfree(node); + } + return res; + } + return node; +} + +QueryItem * +clean_NOT(QueryItem * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + + return plaintree(clean_NOT_intree(root), len); +} + + +#ifdef V_UNKNOWN /* exists in Windows headers */ +#undef V_UNKNOWN +#endif + +#define V_UNKNOWN 0 +#define V_TRUE 1 +#define V_FALSE 2 +#define V_STOP 3 + +/* + * Clean query tree from values which is always in + * text (stopword) + */ +static NODE * +clean_fakeval_intree(NODE * node, char *result) +{ + char lresult = V_UNKNOWN, + rresult = V_UNKNOWN; + + if (node->valnode->type == VAL) + return node; + else if (node->valnode->type == VALSTOP) + { + pfree(node); + *result = V_STOP; + return NULL; + } + + + if (node->valnode->val == (int4) '!') + { + node->right = clean_fakeval_intree(node->right, &rresult); + if (!node->right) + { + *result = V_STOP; + freetree(node); + return NULL; + } + } + else + { + NODE *res = node; + + node->left = clean_fakeval_intree(node->left, &lresult); + node->right = clean_fakeval_intree(node->right, &rresult); + if (lresult == V_STOP && rresult == V_STOP) + { + freetree(node); + *result = V_STOP; + return NULL; + } + else if (lresult == V_STOP) + { + res = node->right; + pfree(node); + } + else if (rresult == V_STOP) + { + res = node->left; + pfree(node); + } + return res; + } + return node; +} + +QueryItem * +clean_fakeval(QueryItem * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + char result = V_UNKNOWN; + NODE *resroot; + + resroot = clean_fakeval_intree(root, &result); + if (result != V_UNKNOWN) + { + elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored"); + *len = 0; + return NULL; + } + + return plaintree(resroot, len); +} diff --git a/src/backend/utils/adt/tsquery_gist.c b/src/backend/utils/adt/tsquery_gist.c new file mode 100644 index 00000000000..0deca10075c --- /dev/null +++ b/src/backend/utils/adt/tsquery_gist.c @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * tsquery_gist.c + * GiST index support for tsquery + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_gist.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/skey.h" +#include "access/gist.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" + +#define GETENTRY(vec,pos) ((TSQuerySign *) DatumGetPointer((vec)->vector[(pos)].key)) + +Datum +gtsquery_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) + { + TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign)); + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + *sign = makeTSQuerySign(DatumGetTSQuery(entry->key)); + + gistentryinit(*retval, PointerGetDatum(sign), + entry->rel, entry->page, + entry->offset, FALSE); + } + + PG_RETURN_POINTER(retval); +} + +Datum +gtsquery_decompress(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); +} + +Datum +gtsquery_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + TSQuerySign *key = (TSQuerySign *) DatumGetPointer(entry->key); + TSQuery query = PG_GETARG_TSQUERY(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + TSQuerySign sq = makeTSQuerySign(query); + bool retval; + + switch (strategy) + { + case RTContainsStrategyNumber: + if (GIST_LEAF(entry)) + retval = (*key & sq) == sq; + else + retval = (*key & sq) != 0; + break; + case RTContainedByStrategyNumber: + if (GIST_LEAF(entry)) + retval = (*key & sq) == *key; + else + retval = (*key & sq) != 0; + break; + default: + retval = FALSE; + } + PG_RETURN_BOOL(retval); +} + +Datum +gtsquery_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + int *size = (int *) PG_GETARG_POINTER(1); + TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign)); + int i; + + memset(sign, 0, sizeof(TSQuerySign)); + + for (i = 0; i < entryvec->n; i++) + *sign |= *GETENTRY(entryvec, i); + + *size = sizeof(TSQuerySign); + + PG_RETURN_POINTER(sign); +} + +Datum +gtsquery_same(PG_FUNCTION_ARGS) +{ + TSQuerySign *a = (TSQuerySign *) PG_GETARG_POINTER(0); + TSQuerySign *b = (TSQuerySign *) PG_GETARG_POINTER(1); + + PG_RETURN_POINTER(*a == *b); +} + +static int +sizebitvec(TSQuerySign sign) +{ + int size = 0, + i; + + for (i = 0; i < TSQS_SIGLEN; i++) + size += 0x01 & (sign >> i); + + return size; +} + +static int +hemdist(TSQuerySign a, TSQuerySign b) +{ + TSQuerySign res = a ^ b; + + return sizebitvec(res); +} + +Datum +gtsquery_penalty(PG_FUNCTION_ARGS) +{ + TSQuerySign *origval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key); + TSQuerySign *newval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key); + float *penalty = (float *) PG_GETARG_POINTER(2); + + *penalty = hemdist(*origval, *newval); + + PG_RETURN_POINTER(penalty); +} + + +typedef struct +{ + OffsetNumber pos; + int4 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) +{ + if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + return 0; + else + return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) + +Datum +gtsquery_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber maxoff = entryvec->n - 2; + OffsetNumber k, + j; + + TSQuerySign *datum_l, + *datum_r; + int4 size_alpha, + size_beta; + int4 size_waste, + waste = -1; + int4 nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + + SPLITCOST *costvector; + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + left = v->spl_left = (OffsetNumber *) palloc(nbytes); + right = v->spl_right = (OffsetNumber *) palloc(nbytes); + v->spl_nleft = v->spl_nright = 0; + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) + { + size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k)); + if (size_waste > waste) + { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + + + if (seed_1 == 0 || seed_2 == 0) + { + seed_1 = 1; + seed_2 = 2; + } + + datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign)); + *datum_l = *GETENTRY(entryvec, seed_1); + datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign)); + *datum_r = *GETENTRY(entryvec, seed_2); + + + maxoff = OffsetNumberNext(maxoff); + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) + { + costvector[j - 1].pos = j; + size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j)); + size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j)); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) + { + j = costvector[k].pos; + if (j == seed_1) + { + *left++ = j; + v->spl_nleft++; + continue; + } + else if (j == seed_2) + { + *right++ = j; + v->spl_nright++; + continue; + } + size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j)); + size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j)); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) + { + *datum_l |= *GETENTRY(entryvec, j); + *left++ = j; + v->spl_nleft++; + } + else + { + *datum_r |= *GETENTRY(entryvec, j); + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c new file mode 100644 index 00000000000..fd97c2796df --- /dev/null +++ b/src/backend/utils/adt/tsquery_op.c @@ -0,0 +1,289 @@ +/*------------------------------------------------------------------------- + * + * tsquery_op.c + * Various operations with tsquery + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_type.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_utils.h" +#include "utils/pg_crc.h" + +Datum +tsquery_numnode(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + int nnode = query->size; + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_INT32(nnode); +} + +static QTNode * +join_tsqueries(TSQuery a, TSQuery b) +{ + QTNode *res = (QTNode *) palloc0(sizeof(QTNode)); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + res->valnode->type = OPR; + + res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b)); + res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a)); + res->nchild = 2; + + return res; +} + +Datum +tsquery_and(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b); + + res->valnode->val = '&'; + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_TSQUERY(query); +} + +Datum +tsquery_or(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + QTNode *res; + TSQuery query; + + if (a->size == 0) + { + PG_FREE_IF_COPY(a, 1); + PG_RETURN_POINTER(b); + } + else if (b->size == 0) + { + PG_FREE_IF_COPY(b, 1); + PG_RETURN_POINTER(a); + } + + res = join_tsqueries(a, b); + + res->valnode->val = '|'; + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_POINTER(query); +} + +Datum +tsquery_not(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + QTNode *res; + TSQuery query; + + if (a->size == 0) + PG_RETURN_POINTER(a); + + res = (QTNode *) palloc0(sizeof(QTNode)); + + res->flags |= QTN_NEEDFREE; + + res->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + res->valnode->type = OPR; + res->valnode->val = '!'; + + res->child = (QTNode **) palloc0(sizeof(QTNode *)); + res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a)); + res->nchild = 1; + + query = QTN2QT(res); + + QTNFree(res); + PG_FREE_IF_COPY(a, 0); + + PG_RETURN_POINTER(query); +} + +static int +CompareTSQ(TSQuery a, TSQuery b) +{ + if (a->size != b->size) + { + return (a->size < b->size) ? -1 : 1; + } + else if (VARSIZE(a) != VARSIZE(b)) + { + return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1; + } + else + { + QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a)); + QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b)); + int res = QTNodeCompare(an, bn); + + QTNFree(an); + QTNFree(bn); + + return res; + } + + return 0; +} + +Datum +tsquery_cmp(PG_FUNCTION_ARGS) +{ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); + TSQuery b = PG_GETARG_TSQUERY_COPY(1); + int res = CompareTSQ(a, b); + + PG_FREE_IF_COPY(a, 0); + PG_FREE_IF_COPY(b, 1); + + PG_RETURN_INT32(res); +} + +#define CMPFUNC( NAME, CONDITION ) \ +Datum \ +NAME(PG_FUNCTION_ARGS) { \ + TSQuery a = PG_GETARG_TSQUERY_COPY(0); \ + TSQuery b = PG_GETARG_TSQUERY_COPY(1); \ + int res = CompareTSQ(a,b); \ + \ + PG_FREE_IF_COPY(a,0); \ + PG_FREE_IF_COPY(b,1); \ + \ + PG_RETURN_BOOL( CONDITION ); \ +} + +CMPFUNC(tsquery_lt, res < 0); +CMPFUNC(tsquery_le, res <= 0); +CMPFUNC(tsquery_eq, res == 0); +CMPFUNC(tsquery_ge, res >= 0); +CMPFUNC(tsquery_gt, res > 0); +CMPFUNC(tsquery_ne, res != 0); + +TSQuerySign +makeTSQuerySign(TSQuery a) +{ + int i; + QueryItem *ptr = GETQUERY(a); + TSQuerySign sign = 0; + + for (i = 0; i < a->size; i++) + { + if (ptr->type == VAL) + sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN); + ptr++; + } + + return sign; +} + +Datum +tsq_mcontains(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY(0); + TSQuery ex = PG_GETARG_TSQUERY(1); + TSQuerySign sq, + se; + int i, + j; + QueryItem *iq, + *ie; + + if (query->size < ex->size) + { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL(false); + } + + sq = makeTSQuerySign(query); + se = makeTSQuerySign(ex); + + if ((sq & se) != se) + { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL(false); + } + + ie = GETQUERY(ex); + + for (i = 0; i < ex->size; i++) + { + iq = GETQUERY(query); + if (ie[i].type != VAL) + continue; + for (j = 0; j < query->size; j++) + if (iq[j].type == VAL && ie[i].val == iq[j].val) + { + j = query->size + 1; + break; + } + if (j == query->size) + { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL(false); + } + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL(true); +} + +Datum +tsq_mcontained(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM( + DirectFunctionCall2( + tsq_mcontains, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) + ) + ); +} diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c new file mode 100644 index 00000000000..f0d22c644ae --- /dev/null +++ b/src/backend/utils/adt/tsquery_rewrite.c @@ -0,0 +1,524 @@ +/*------------------------------------------------------------------------- + * + * tsquery_rewrite.c + * Utilities for reconstructing tsquery + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "executor/spi.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" + + +static int +addone(int *counters, int last, int total) +{ + counters[last]++; + if (counters[last] >= total) + { + if (last == 0) + return 0; + if (addone(counters, last - 1, total - 1) == 0) + return 0; + counters[last] = counters[last - 1] + 1; + } + return 1; +} + +static QTNode * +findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) +{ + + if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val) + return node; + + if (node->flags & QTN_NOCHANGE) + return node; + + if (node->valnode->type == OPR) + { + if (node->nchild == ex->nchild) + { + if (QTNEq(node, ex)) + { + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + node = NULL; + *isfind = true; + } + } + else if (node->nchild > ex->nchild) + { + int *counters = (int *) palloc(sizeof(int) * node->nchild); + int i; + QTNode *tnode = (QTNode *) palloc(sizeof(QTNode)); + + memset(tnode, 0, sizeof(QTNode)); + tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild); + tnode->nchild = ex->nchild; + tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem)); + *(tnode->valnode) = *(ex->valnode); + + for (i = 0; i < ex->nchild; i++) + counters[i] = i; + + do + { + tnode->sign = 0; + for (i = 0; i < ex->nchild; i++) + { + tnode->child[i] = node->child[counters[i]]; + tnode->sign |= tnode->child[i]->sign; + } + + if (QTNEq(tnode, ex)) + { + int j = 0; + + pfree(tnode->valnode); + pfree(tnode->child); + pfree(tnode); + if (subs) + { + tnode = QTNCopy(subs); + tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE; + } + else + tnode = NULL; + + node->child[counters[0]] = tnode; + + for (i = 1; i < ex->nchild; i++) + node->child[counters[i]] = NULL; + for (i = 0; i < node->nchild; i++) + { + if (node->child[i]) + { + node->child[j] = node->child[i]; + j++; + } + } + + node->nchild = j; + + *isfind = true; + + break; + } + } while (addone(counters, ex->nchild - 1, node->nchild)); + if (tnode && (tnode->flags & QTN_NOCHANGE) == 0) + { + pfree(tnode->valnode); + pfree(tnode->child); + pfree(tnode); + } + else + QTNSort(node); + pfree(counters); + } + } + else if (QTNEq(node, ex)) + { + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + { + node = NULL; + } + *isfind = true; + } + + return node; +} + +static QTNode * +dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + root = findeq(root, ex, subs, isfind); + + if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR) + { + int i; + + for (i = 0; i < root->nchild; i++) + root->child[i] = dofindsubquery(root->child[i], ex, subs, isfind); + } + + return root; +} + +static QTNode * +dropvoidsubtree(QTNode * root) +{ + + if (!root) + return NULL; + + if (root->valnode->type == OPR) + { + int i, + j = 0; + + for (i = 0; i < root->nchild; i++) + { + if (root->child[i]) + { + root->child[j] = root->child[i]; + j++; + } + } + + root->nchild = j; + + if (root->valnode->val == (int4) '!' && root->nchild == 0) + { + QTNFree(root); + root = NULL; + } + else if (root->nchild == 1) + { + QTNode *nroot = root->child[0]; + + pfree(root); + root = nroot; + } + } + + return root; +} + +static QTNode * +findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + bool DidFind = false; + + root = dofindsubquery(root, ex, subs, &DidFind); + + if (!subs && DidFind) + root = dropvoidsubtree(root); + + if (isfind) + *isfind = DidFind; + + return root; +} + +Datum +ts_rewrite_accum(PG_FUNCTION_ARGS) +{ + TSQuery acc; + ArrayType *qa; + TSQuery q; + QTNode *qex = NULL, + *subs = NULL, + *acctree = NULL; + bool isfind = false; + Datum *elemsp; + int nelemsp; + MemoryContext aggcontext; + MemoryContext oldcontext; + + aggcontext = ((AggState *) fcinfo->context)->aggcontext; + + if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) + { + acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ); + SET_VARSIZE(acc, HDRSIZETQ); + acc->size = 0; + } + else + acc = PG_GETARG_TSQUERY(0); + + if (PG_ARGISNULL(1) || PG_GETARG_POINTER(1) == NULL) + PG_RETURN_TSQUERY(acc); + else + qa = PG_GETARG_ARRAYTYPE_P_COPY(1); + + if (ARR_NDIM(qa) != 1) + elog(ERROR, "array must be one-dimensional, not %d dimensions", + ARR_NDIM(qa)); + if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3) + elog(ERROR, "array should have only three elements"); + if (ARR_ELEMTYPE(qa) != TSQUERYOID) + elog(ERROR, "array should contain tsquery type"); + + deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp); + + q = DatumGetTSQuery(elemsp[0]); + if (q->size == 0) + { + pfree(elemsp); + PG_RETURN_POINTER(acc); + } + + if (!acc->size) + { + if (VARSIZE(acc) > HDRSIZETQ) + { + pfree(elemsp); + PG_RETURN_POINTER(acc); + } + else + acctree = QT2QTN(GETQUERY(q), GETOPERAND(q)); + } + else + acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc)); + + QTNTernary(acctree); + QTNSort(acctree); + + q = DatumGetTSQuery(elemsp[1]); + if (q->size == 0) + { + pfree(elemsp); + PG_RETURN_POINTER(acc); + } + qex = QT2QTN(GETQUERY(q), GETOPERAND(q)); + QTNTernary(qex); + QTNSort(qex); + + q = DatumGetTSQuery(elemsp[2]); + if (q->size) + subs = QT2QTN(GETQUERY(q), GETOPERAND(q)); + + acctree = findsubquery(acctree, qex, subs, &isfind); + + if (isfind || !acc->size) + { + /* pfree( acc ); do not pfree(p), because nodeAgg.c will */ + if (acctree) + { + QTNBinary(acctree); + oldcontext = MemoryContextSwitchTo(aggcontext); + acc = QTN2QT(acctree); + MemoryContextSwitchTo(oldcontext); + } + else + { + acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ); + SET_VARSIZE(acc, HDRSIZETQ); + acc->size = 0; + } + } + + pfree(elemsp); + QTNFree(qex); + QTNFree(subs); + QTNFree(acctree); + + PG_RETURN_TSQUERY(acc); +} + +Datum +ts_rewrite_finish(PG_FUNCTION_ARGS) +{ + TSQuery acc = PG_GETARG_TSQUERY(0); + TSQuery rewrited; + + if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0) + { + rewrited = (TSQuery) palloc(HDRSIZETQ); + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + } + else + { + rewrited = (TSQuery) palloc(VARSIZE(acc)); + memcpy(rewrited, acc, VARSIZE(acc)); + pfree(acc); + } + + PG_RETURN_POINTER(rewrited); +} + +Datum +tsquery_rewrite(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + text *in = PG_GETARG_TEXT_P(1); + TSQuery rewrited = query; + MemoryContext outercontext = CurrentMemoryContext; + MemoryContext oldcontext; + QTNode *tree; + char *buf; + void *plan; + Portal portal; + bool isnull; + int i; + + if (query->size == 0) + { + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + buf = TextPGetCString(in); + + SPI_connect(); + + if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", buf); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable->tupdesc->natts != 2 || + SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || + SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ts_rewrite query must return two tsquery columns"))); + + while (SPI_processed > 0 && tree) + { + for (i = 0; i < SPI_processed && tree; i++) + { + Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + Datum sdata; + + if (isnull) + continue; + + sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); + + if (!isnull) + { + TSQuery qtex = DatumGetTSQuery(qdata); + TSQuery qtsubs = DatumGetTSQuery(sdata); + QTNode *qex, + *qsubs = NULL; + + if (qtex->size == 0) + { + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + continue; + } + + qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); + + QTNTernary(qex); + QTNSort(qex); + + if (qtsubs->size) + qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); + + oldcontext = MemoryContextSwitchTo(outercontext); + tree = findsubquery(tree, qex, qsubs, NULL); + MemoryContextSwitchTo(oldcontext); + + QTNFree(qex); + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + QTNFree(qsubs); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + SPI_finish(); + + if (tree) + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + PG_FREE_IF_COPY(query, 0); + } + else + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + } + + pfree(buf); + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); +} + +Datum +tsquery_rewrite_query(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + TSQuery ex = PG_GETARG_TSQUERY(1); + TSQuery subst = PG_GETARG_TSQUERY(2); + TSQuery rewrited = query; + QTNode *tree, + *qex, + *subs = NULL; + + if (query->size == 0 || ex->size == 0) + { + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); + QTNTernary(qex); + QTNSort(qex); + + if (subst->size) + subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); + + tree = findsubquery(tree, qex, subs, NULL); + QTNFree(qex); + QTNFree(subs); + + if (!tree) + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + else + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); +} diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c new file mode 100644 index 00000000000..ae8cc318da9 --- /dev/null +++ b/src/backend/utils/adt/tsquery_util.c @@ -0,0 +1,317 @@ +/*------------------------------------------------------------------------- + * + * tsquery_util.c + * Utilities for tsquery datatype + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" + + +QTNode * +QT2QTN(QueryItem * in, char *operand) +{ + QTNode *node = (QTNode *) palloc0(sizeof(QTNode)); + + node->valnode = in; + + if (in->type == OPR) + { + node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + node->child[0] = QT2QTN(in + 1, operand); + node->sign = node->child[0]->sign; + if (in->val == (int4) '!') + node->nchild = 1; + else + { + node->nchild = 2; + node->child[1] = QT2QTN(in + in->left, operand); + node->sign |= node->child[1]->sign; + } + } + else if (operand) + { + node->word = operand + in->distance; + node->sign = 1 << (in->val % 32); + } + + return node; +} + +void +QTNFree(QTNode * in) +{ + if (!in) + return; + + if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0) + pfree(in->word); + + if (in->child) + { + if (in->valnode) + { + if (in->valnode->type == OPR && in->nchild > 0) + { + int i; + + for (i = 0; i < in->nchild; i++) + QTNFree(in->child[i]); + } + if (in->flags & QTN_NEEDFREE) + pfree(in->valnode); + } + pfree(in->child); + } + + pfree(in); +} + +int +QTNodeCompare(QTNode * an, QTNode * bn) +{ + if (an->valnode->type != bn->valnode->type) + return (an->valnode->type > bn->valnode->type) ? -1 : 1; + else if (an->valnode->val != bn->valnode->val) + return (an->valnode->val > bn->valnode->val) ? -1 : 1; + else if (an->valnode->type == VAL) + { + if (an->valnode->length == bn->valnode->length) + return strncmp(an->word, bn->word, an->valnode->length); + else + return (an->valnode->length > bn->valnode->length) ? -1 : 1; + } + else if (an->nchild != bn->nchild) + { + return (an->nchild > bn->nchild) ? -1 : 1; + } + else + { + int i, + res; + + for (i = 0; i < an->nchild; i++) + if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0) + return res; + } + + return 0; +} + +static int +cmpQTN(const void *a, const void *b) +{ + return QTNodeCompare(*(QTNode **) a, *(QTNode **) b); +} + +void +QTNSort(QTNode * in) +{ + int i; + + if (in->valnode->type != OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNSort(in->child[i]); + if (in->nchild > 1) + qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN); +} + +bool +QTNEq(QTNode * a, QTNode * b) +{ + uint32 sign = a->sign & b->sign; + + if (!(sign == a->sign && sign == b->sign)) + return 0; + + return (QTNodeCompare(a, b) == 0) ? true : false; +} + +void +QTNTernary(QTNode * in) +{ + int i; + + if (in->valnode->type != OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNTernary(in->child[i]); + + for (i = 0; i < in->nchild; i++) + { + if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val) + { + QTNode *cc = in->child[i]; + int oldnchild = in->nchild; + + in->nchild += cc->nchild - 1; + in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *)); + + if (i + 1 != oldnchild) + memmove(in->child + i + cc->nchild, in->child + i + 1, + (oldnchild - i - 1) * sizeof(QTNode *)); + + memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *)); + i += cc->nchild - 1; + + pfree(cc); + } + } +} + +void +QTNBinary(QTNode * in) +{ + int i; + + if (in->valnode->type != OPR) + return; + + for (i = 0; i < in->nchild; i++) + QTNBinary(in->child[i]); + + if (in->nchild <= 2) + return; + + while (in->nchild > 2) + { + QTNode *nn = (QTNode *) palloc0(sizeof(QTNode)); + + nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem)); + nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2); + + nn->nchild = 2; + nn->flags = QTN_NEEDFREE; + + nn->child[0] = in->child[0]; + nn->child[1] = in->child[1]; + nn->sign = nn->child[0]->sign | nn->child[1]->sign; + + nn->valnode->type = in->valnode->type; + nn->valnode->val = in->valnode->val; + + in->child[0] = nn; + in->child[1] = in->child[in->nchild - 1]; + in->nchild--; + } +} + +static void +cntsize(QTNode * in, int4 *sumlen, int4 *nnode) +{ + *nnode += 1; + if (in->valnode->type == OPR) + { + int i; + + for (i = 0; i < in->nchild; i++) + cntsize(in->child[i], sumlen, nnode); + } + else + { + *sumlen += in->valnode->length + 1; + } +} + +typedef struct +{ + QueryItem *curitem; + char *operand; + char *curoperand; +} QTN2QTState; + +static void +fillQT(QTN2QTState * state, QTNode * in) +{ + *(state->curitem) = *(in->valnode); + + if (in->valnode->type == VAL) + { + memcpy(state->curoperand, in->word, in->valnode->length); + state->curitem->distance = state->curoperand - state->operand; + state->curoperand[in->valnode->length] = '\0'; + state->curoperand += in->valnode->length + 1; + state->curitem++; + } + else + { + QueryItem *curitem = state->curitem; + + Assert(in->nchild <= 2); + state->curitem++; + + fillQT(state, in->child[0]); + + if (in->nchild == 2) + { + curitem->left = state->curitem - curitem; + fillQT(state, in->child[1]); + } + } +} + +TSQuery +QTN2QT(QTNode *in) +{ + TSQuery out; + int len; + int sumlen = 0, + nnode = 0; + QTN2QTState state; + + cntsize(in, &sumlen, &nnode); + len = COMPUTESIZE(nnode, sumlen); + + out = (TSQuery) palloc(len); + SET_VARSIZE(out, len); + out->size = nnode; + + state.curitem = GETQUERY(out); + state.operand = state.curoperand = GETOPERAND(out); + + fillQT(&state, in); + return out; +} + +QTNode * +QTNCopy(QTNode *in) +{ + QTNode *out = (QTNode *) palloc(sizeof(QTNode)); + + *out = *in; + out->valnode = (QueryItem *) palloc(sizeof(QueryItem)); + *(out->valnode) = *(in->valnode); + out->flags |= QTN_NEEDFREE; + + if (in->valnode->type == VAL) + { + out->word = palloc(in->valnode->length + 1); + memcpy(out->word, in->word, in->valnode->length); + out->word[in->valnode->length] = '\0'; + out->flags |= QTN_WORDFREE; + } + else + { + int i; + + out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild); + + for (i = 0; i < in->nchild; i++) + out->child[i] = QTNCopy(in->child[i]); + } + + return out; +} diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c new file mode 100644 index 00000000000..8b2ab884c8c --- /dev/null +++ b/src/backend/utils/adt/tsrank.c @@ -0,0 +1,804 @@ +/*------------------------------------------------------------------------- + * + * tsrank.c + * rank tsvector by tsquery + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" + + +static float weights[] = {0.1, 0.2, 0.4, 1.0}; + +#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) + +#define RANK_NO_NORM 0x00 +#define RANK_NORM_LOGLENGTH 0x01 +#define RANK_NORM_LENGTH 0x02 +#define RANK_NORM_EXTDIST 0x04 +#define RANK_NORM_UNIQ 0x08 +#define RANK_NORM_LOGUNIQ 0x10 +#define DEF_NORM_METHOD RANK_NO_NORM + +static float calc_rank_or(float *w, TSVector t, TSQuery q); +static float calc_rank_and(float *w, TSVector t, TSQuery q); + +/* + * Returns a weight of a word collocation + */ +static float4 +word_distance(int4 w) +{ + if (w > 100) + return 1e-30; + + return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2)); +} + +static int +cnt_length(TSVector t) +{ + WordEntry *ptr = ARRPTR(t), + *end = (WordEntry *) STRPTR(t); + int len = 0, + clen; + + while (ptr < end) + { + if ((clen = POSDATALEN(t, ptr)) == 0) + len += 1; + else + len += clen; + ptr++; + } + + return len; +} + +static int4 +WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item) +{ + if (ptr->len == item->length) + return strncmp( + eval + ptr->pos, + qval + item->distance, + item->length); + + return (ptr->len > item->length) ? 1 : -1; +} + +static WordEntry * +find_wordentry(TSVector t, TSQuery q, QueryItem * item) +{ + WordEntry *StopLow = ARRPTR(t); + WordEntry *StopHigh = (WordEntry *) STRPTR(t); + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= item < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item); + if (difference == 0) + return StopMiddle; + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return NULL; +} + + +static int +compareQueryItem(const void *a, const void *b, void *arg) +{ + char *operand = (char *) arg; + + if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length) + return strncmp(operand + (*(QueryItem **) a)->distance, + operand + (*(QueryItem **) b)->distance, + (*(QueryItem **) b)->length); + + return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1; +} + +static QueryItem ** +SortAndUniqItems(char *operand, QueryItem * item, int *size) +{ + QueryItem **res, + **ptr, + **prevptr; + + ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size); + + while ((*size)--) + { + if (item->type == VAL) + { + *ptr = item; + ptr++; + } + item++; + } + + *size = ptr - res; + if (*size < 2) + return res; + + qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand); + + ptr = res + 1; + prevptr = res; + + while (ptr - res < *size) + { + if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0) + { + prevptr++; + *prevptr = *ptr; + } + ptr++; + } + + *size = prevptr + 1 - res; + return res; +} + +static WordEntryPos POSNULL[] = { + 0, + 0 +}; + +static float +calc_rank_and(float *w, TSVector t, TSQuery q) +{ + uint16 **pos; + int i, + k, + l, + p; + WordEntry *entry; + WordEntryPos *post, + *ct; + int4 dimt, + lenct, + dist; + float res = -1.0; + QueryItem **item; + int size = q->size; + + item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); + if (size < 2) + { + pfree(item); + return calc_rank_or(w, t, q); + } + pos = (uint16 **) palloc(sizeof(uint16 *) * q->size); + memset(pos, 0, sizeof(uint16 *) * q->size); + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1); + + for (i = 0; i < size; i++) + { + entry = find_wordentry(t, q, item[i]); + if (!entry) + continue; + + if (entry->haspos) + pos[i] = (uint16 *) _POSDATAPTR(t, entry); + else + pos[i] = (uint16 *) POSNULL; + + + dimt = *(uint16 *) (pos[i]); + post = (WordEntryPos *) (pos[i] + 1); + for (k = 0; k < i; k++) + { + if (!pos[k]) + continue; + lenct = *(uint16 *) (pos[k]); + ct = (WordEntryPos *) (pos[k] + 1); + for (l = 0; l < dimt; l++) + { + for (p = 0; p < lenct; p++) + { + dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); + if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + } + } + } + } + } + pfree(pos); + pfree(item); + return res; +} + +static float +calc_rank_or(float *w, TSVector t, TSQuery q) +{ + WordEntry *entry; + WordEntryPos *post; + int4 dimt, + j, + i; + float res = 0.0; + QueryItem **item; + int size = q->size; + + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size); + + for (i = 0; i < size; i++) + { + float resj, + wjm; + int4 jm; + + entry = find_wordentry(t, q, item[i]); + if (!entry) + continue; + + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = *(uint16 *) POSNULL; + post = POSNULL + 1; + } + + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) + { + resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); + if (wpos(post[j]) > wjm) + { + wjm = wpos(post[j]); + jm = j; + } + } +/* + limit (sum(i/i^2),i->inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov +*/ + res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + } + if (size > 0) + res = res / size; + pfree(item); + return res; +} + +static float +calc_rank(float *w, TSVector t, TSQuery q, int4 method) +{ + QueryItem *item = GETQUERY(q); + float res = 0.0; + int len; + + if (!t->size || !q->size) + return 0.0; + + res = (item->type != VAL && item->val == (int4) '&') ? + calc_rank_and(w, t, q) : calc_rank_or(w, t, q); + + if (res < 0) + res = 1e-20; + + if ((method & RANK_NORM_LOGLENGTH) && t->size > 0) + res /= log((double) (cnt_length(t) + 1)) / log(2.0); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(t); + if (len > 0) + res /= (float) len; + } + + if ((method & RANK_NORM_UNIQ) && t->size > 0) + res /= (float) (t->size); + + if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) + res /= log((double) (t->size + 1)) / log(2.0); + + return res; +} + +static float * +getWeights(ArrayType *win) +{ + static float ws[lengthof(weights)]; + int i; + float4 *arrdata; + + if (win == 0) + return weights; + + if (ARR_NDIM(win) != 1) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight must be one-dimensional"))); + + if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights)) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("array of weight is too short"))); + + if (ARR_HASNULL(win)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array of weight must not contain nulls"))); + + arrdata = (float4 *) ARR_DATA_PTR(win); + for (i = 0; i < lengthof(weights); i++) + { + ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i]; + if (ws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + } + + return ws; +} + +Datum +ts_rank_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY(2); + float res; + + res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rank_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float res; + + res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +typedef struct +{ + QueryItem **item; + int16 nitem; + bool needfree; + uint8 wclass; + int32 pos; +} DocRepresentation; + +static int +compareDocR(const void *a, const void *b) +{ + if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos) + return 0; + return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; +} + +static bool +checkcondition_QueryItem(void *checkval, QueryItem * val) +{ + return (bool) (val->istrue); +} + +static void +reset_istrue_flag(TSQuery query) +{ + QueryItem *item = GETQUERY(query); + int i; + + /* reset istrue flag */ + for (i = 0; i < query->size; i++) + { + if (item->type == VAL) + item->istrue = 0; + item++; + } +} + +typedef struct +{ + int pos; + int p; + int q; + DocRepresentation *begin; + DocRepresentation *end; +} Extention; + + +static bool +Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext) +{ + DocRepresentation *ptr; + int lastpos = ext->pos; + int i; + bool found = false; + + reset_istrue_flag(query); + + ext->p = 0x7fffffff; + ext->q = 0; + ptr = doc + ext->pos; + + /* find upper bound of cover from current position, move up */ + while (ptr - doc < len) + { + for (i = 0; i < ptr->nitem; i++) + ptr->item[i]->istrue = 1; + if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem)) + { + if (ptr->pos > ext->q) + { + ext->q = ptr->pos; + ext->end = ptr; + lastpos = ptr - doc; + found = true; + } + break; + } + ptr++; + } + + if (!found) + return false; + + reset_istrue_flag(query); + + ptr = doc + lastpos; + + /* find lower bound of cover from founded upper bound, move down */ + while (ptr >= doc + ext->pos) + { + for (i = 0; i < ptr->nitem; i++) + ptr->item[i]->istrue = 1; + if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem)) + { + if (ptr->pos < ext->p) + { + ext->begin = ptr; + ext->p = ptr->pos; + } + break; + } + ptr--; + } + + if (ext->p <= ext->q) + { + /* + * set position for next try to next lexeme after begining of founded + * cover + */ + ext->pos = (ptr - doc) + 1; + return true; + } + + ext->pos++; + return Cover(doc, len, query, ext); +} + +static DocRepresentation * +get_docrep(TSVector txt, TSQuery query, int *doclen) +{ + QueryItem *item = GETQUERY(query); + WordEntry *entry; + WordEntryPos *post; + int4 dimt, + j, + i; + int len = query->size * 4, + cur = 0; + DocRepresentation *doc; + char *operand; + + *(uint16 *) POSNULL = lengthof(POSNULL) - 1; + doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); + operand = GETOPERAND(query); + reset_istrue_flag(query); + + for (i = 0; i < query->size; i++) + { + if (item[i].type != VAL || item[i].istrue) + continue; + + entry = find_wordentry(txt, query, &(item[i])); + if (!entry) + continue; + + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else + { + dimt = *(uint16 *) POSNULL; + post = POSNULL + 1; + } + + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); + } + + for (j = 0; j < dimt; j++) + { + if (j == 0) + { + QueryItem *kptr, + *iptr = item + i; + int k; + + doc[cur].needfree = false; + doc[cur].nitem = 0; + doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size); + + for (k = 0; k < query->size; k++) + { + kptr = item + k; + if (k == i || + (item[k].type == VAL && + compareQueryItem(&kptr, &iptr, operand) == 0)) + { + doc[cur].item[doc[cur].nitem] = item + k; + doc[cur].nitem++; + kptr->istrue = 1; + } + } + } + else + { + doc[cur].needfree = false; + doc[cur].nitem = doc[cur - 1].nitem; + doc[cur].item = doc[cur - 1].item; + } + doc[cur].pos = WEP_GETPOS(post[j]); + doc[cur].wclass = WEP_GETWEIGHT(post[j]); + cur++; + } + } + + *doclen = cur; + + if (cur > 0) + { + if (cur > 1) + qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); + return doc; + } + + pfree(doc); + return NULL; +} + +static float4 +calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) +{ + DocRepresentation *doc; + int len, + i, + doclen = 0; + Extention ext; + double Wdoc = 0.0; + double invws[lengthof(weights)]; + double SumDist = 0.0, + PrevExtPos = 0.0, + CurExtPos = 0.0; + int NExtent = 0; + + for (i = 0; i < lengthof(weights); i++) + { + invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i])); + if (invws[i] > 1.0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("weight out of range"))); + invws[i] = 1.0 / invws[i]; + } + + doc = get_docrep(txt, query, &doclen); + if (!doc) + return 0.0; + + MemSet(&ext, 0, sizeof(Extention)); + while (Cover(doc, doclen, query, &ext)) + { + double Cpos = 0.0; + double InvSum = 0.0; + int nNoise; + DocRepresentation *ptr = ext.begin; + + while (ptr <= ext.end) + { + InvSum += invws[ptr->wclass]; + ptr++; + } + + Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum; + + /* + * if doc are big enough then ext.q may be equal to ext.p due to limit + * of posional information. In this case we approximate number of + * noise word as half cover's length + */ + nNoise = (ext.q - ext.p) - (ext.end - ext.begin); + if (nNoise < 0) + nNoise = (ext.end - ext.begin) / 2; + Wdoc += Cpos / ((double) (1 + nNoise)); + + CurExtPos = ((double) (ext.q + ext.p)) / 2.0; + if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by + * zero in a case of + multiple lexize */ ) + SumDist += 1.0 / (CurExtPos - PrevExtPos); + + PrevExtPos = CurExtPos; + NExtent++; + } + + if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0) + Wdoc /= log((double) (cnt_length(txt) + 1)); + + if (method & RANK_NORM_LENGTH) + { + len = cnt_length(txt); + if (len > 0) + Wdoc /= (double) len; + } + + if ((method & RANK_NORM_EXTDIST) && SumDist > 0) + Wdoc /= ((double) NExtent) / SumDist; + + if ((method & RANK_NORM_UNIQ) && txt->size > 0) + Wdoc /= (double) (txt->size); + + if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) + Wdoc /= log((double) (txt->size + 1)) / log(2.0); + + for (i = 0; i < doclen; i++) + if (doc[i].needfree) + pfree(doc[i].item); + pfree(doc); + + return (float4) Wdoc; +} + +Datum +ts_rankcd_wttf(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY_COPY(2); + int method = PG_GETARG_INT32(3); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_wtt(PG_FUNCTION_ARGS) +{ + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + TSVector txt = PG_GETARG_TSVECTOR(1); + TSQuery query = PG_GETARG_TSQUERY_COPY(2); + float res; + + res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY_COPY(1); + int method = PG_GETARG_INT32(2); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + +Datum +ts_rankcd_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY_COPY(1); + float res; + + res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c new file mode 100644 index 00000000000..04b6345e162 --- /dev/null +++ b/src/backend/utils/adt/tsvector.c @@ -0,0 +1,683 @@ +/*------------------------------------------------------------------------- + * + * tsvector.c + * I/O functions for tsvector + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "libpq/pqformat.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_locale.h" +#include "tsearch/ts_utils.h" +#include "utils/memutils.h" + + +static int +comparePos(const void *a, const void *b) +{ + if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b)) + return 0; + return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1; +} + +static int +uniquePos(WordEntryPos * a, int4 l) +{ + WordEntryPos *ptr, + *res; + + if (l == 1) + return l; + + res = a; + qsort((void *) a, l, sizeof(WordEntryPos), comparePos); + + ptr = a + 1; + while (ptr - a < l) + { + if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res)) + { + res++; + *res = *ptr; + if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1) + break; + } + else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res)) + WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr)); + ptr++; + } + + return res + 1 - a; +} + +static int +compareentry(const void *a, const void *b, void *arg) +{ + char *BufferStr = (char *) arg; + + if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len) + { + return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos], + &BufferStr[((WordEntryIN *) b)->entry.pos], + ((WordEntryIN *) a)->entry.len); + } + + return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1; +} + +static int +uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) +{ + WordEntryIN *ptr, + *res; + + res = a; + if (l == 1) + { + if (a->entry.haspos) + { + *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos)); + *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos); + } + return l; + } + + ptr = a + 1; + qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); + + while (ptr - a < l) + { + if (!(ptr->entry.len == res->entry.len && + strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) + { + if (res->entry.haspos) + { + *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); + *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + res++; + memcpy(res, ptr, sizeof(WordEntryIN)); + } + else if (ptr->entry.haspos) + { + if (res->entry.haspos) + { + int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos); + + res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos)); + memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]), + &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos)); + *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos); + pfree(ptr->pos); + } + else + { + res->entry.haspos = 1; + res->pos = ptr->pos; + } + } + ptr++; + } + if (res->entry.haspos) + { + *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); + *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + + return res + 1 - a; +} + +static int +WordEntryCMP(WordEntry * a, WordEntry * b, char *buf) +{ + return compareentry(a, b, buf); +} + +#define WAITWORD 1 +#define WAITENDWORD 2 +#define WAITNEXTCHAR 3 +#define WAITENDCMPLX 4 +#define WAITPOSINFO 5 +#define INPOSINFO 6 +#define WAITPOSDELIM 7 +#define WAITCHARCMPLX 8 + +#define RESIZEPRSBUF \ +do { \ + if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \ + { \ + int4 clen = state->curpos - state->word; \ + state->len *= 2; \ + state->word = (char*)repalloc( (void*)state->word, state->len ); \ + state->curpos = state->word + clen; \ + } \ +} while (0) + +bool +gettoken_tsvector(TSVectorParseState *state) +{ + int4 oldstate = 0; + + state->curpos = state->word; + state->state = WAITWORD; + state->alen = 0; + + while (1) + { + if (state->state == WAITWORD) + { + if (*(state->prsbuf) == '\0') + return false; + else if (t_iseq(state->prsbuf, '\'')) + state->state = WAITENDCMPLX; + else if (t_iseq(state->prsbuf, '\\')) + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + else if (!t_isspace(state->prsbuf)) + { + COPYCHAR(state->curpos, state->prsbuf); + state->curpos += pg_mblen(state->prsbuf); + state->state = WAITENDWORD; + } + } + else if (state->state == WAITNEXTCHAR) + { + if (*(state->prsbuf) == '\0') + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("there is no escaped character"))); + else + { + RESIZEPRSBUF; + COPYCHAR(state->curpos, state->prsbuf); + state->curpos += pg_mblen(state->prsbuf); + state->state = oldstate; + } + } + else if (state->state == WAITENDWORD) + { + if (t_iseq(state->prsbuf, '\\')) + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || + (state->oprisdelim && ISOPERATOR(state->prsbuf))) + { + RESIZEPRSBUF; + if (state->curpos == state->word) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + *(state->curpos) = '\0'; + return true; + } + else if (t_iseq(state->prsbuf, ':')) + { + if (state->curpos == state->word) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + *(state->curpos) = '\0'; + if (state->oprisdelim) + return true; + else + state->state = INPOSINFO; + } + else + { + RESIZEPRSBUF; + COPYCHAR(state->curpos, state->prsbuf); + state->curpos += pg_mblen(state->prsbuf); + } + } + else if (state->state == WAITENDCMPLX) + { + if (t_iseq(state->prsbuf, '\'')) + { + state->state = WAITCHARCMPLX; + } + else if (t_iseq(state->prsbuf, '\\')) + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDCMPLX; + } + else if (*(state->prsbuf) == '\0') + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + else + { + RESIZEPRSBUF; + COPYCHAR(state->curpos, state->prsbuf); + state->curpos += pg_mblen(state->prsbuf); + } + } + else if (state->state == WAITCHARCMPLX) + { + if (t_iseq(state->prsbuf, '\'')) + { + RESIZEPRSBUF; + COPYCHAR(state->curpos, state->prsbuf); + state->curpos += pg_mblen(state->prsbuf); + state->state = WAITENDCMPLX; + } + else + { + RESIZEPRSBUF; + *(state->curpos) = '\0'; + if (state->curpos == state->word) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + if (state->oprisdelim) + { + /* state->prsbuf+=pg_mblen(state->prsbuf); */ + return true; + } + else + state->state = WAITPOSINFO; + continue; /* recheck current character */ + } + } + else if (state->state == WAITPOSINFO) + { + if (t_iseq(state->prsbuf, ':')) + state->state = INPOSINFO; + else + return true; + } + else if (state->state == INPOSINFO) + { + if (t_isdigit(state->prsbuf)) + { + if (state->alen == 0) + { + state->alen = 4; + state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen); + *(uint16 *) (state->pos) = 0; + } + else if (*(uint16 *) (state->pos) + 1 >= state->alen) + { + state->alen *= 2; + state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen); + } + (*(uint16 *) (state->pos))++; + WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf))); + if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("wrong position info in tsvector"))); + WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); + state->state = WAITPOSDELIM; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + } + else if (state->state == WAITPOSDELIM) + { + if (t_iseq(state->prsbuf, ',')) + state->state = INPOSINFO; + else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) + { + if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3); + } + else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) + { + if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2); + } + else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) + { + if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1); + } + else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) + { + if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); + } + else if (t_isspace(state->prsbuf) || + *(state->prsbuf) == '\0') + return true; + else if (!t_isdigit(state->prsbuf)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsvector"))); + } + else /* internal error */ + elog(ERROR, "internal error in gettoken_tsvector"); + + /* get next char */ + state->prsbuf += pg_mblen(state->prsbuf); + } + + return false; +} + +Datum +tsvectorin(PG_FUNCTION_ARGS) +{ + char *buf = PG_GETARG_CSTRING(0); + TSVectorParseState state; + WordEntryIN *arr; + WordEntry *inarr; + int4 len = 0, + totallen = 64; + TSVector in; + char *tmpbuf, + *cur; + int4 i, + buflen = 256; + + pg_verifymbstr(buf, strlen(buf), false); + state.prsbuf = buf; + state.len = 32; + state.word = (char *) palloc(state.len); + state.oprisdelim = false; + + arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); + cur = tmpbuf = (char *) palloc(buflen); + + while (gettoken_tsvector(&state)) + { + /* + * Realloc buffers if it's needed + */ + if (len >= totallen) + { + totallen *= 2; + arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); + } + + while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) + { + int4 dist = cur - tmpbuf; + + buflen *= 2; + tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); + cur = tmpbuf + dist; + } + + if (state.curpos - state.word >= MAXSTRLEN) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("word is too long (%d bytes, max %d bytes)", + state.curpos - state.word, MAXSTRLEN))); + + arr[len].entry.len = state.curpos - state.word; + if (cur - tmpbuf > MAXSTRPOS) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("position value too large"))); + arr[len].entry.pos = cur - tmpbuf; + memcpy((void *) cur, (void *) state.word, arr[len].entry.len); + cur += arr[len].entry.len; + + if (state.alen) + { + arr[len].entry.haspos = 1; + arr[len].pos = state.pos; + } + else + arr[len].entry.haspos = 0; + len++; + } + pfree(state.word); + + if (len > 0) + len = uniqueentry(arr, len, tmpbuf, &buflen); + else + buflen = 0; + totallen = CALCDATASIZE(len, buflen); + in = (TSVector) palloc0(totallen); + + SET_VARSIZE(in, totallen); + in->size = len; + cur = STRPTR(in); + inarr = ARRPTR(in); + for (i = 0; i < len; i++) + { + memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); + arr[i].entry.pos = cur - STRPTR(in); + cur += SHORTALIGN(arr[i].entry.len); + if (arr[i].entry.haspos) + { + memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); + cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); + pfree(arr[i].pos); + } + inarr[i] = arr[i].entry; + } + + PG_RETURN_TSVECTOR(in); +} + +Datum +tsvectorout(PG_FUNCTION_ARGS) +{ + TSVector out = PG_GETARG_TSVECTOR(0); + char *outbuf; + int4 i, + lenbuf = 0, + pp; + WordEntry *ptr = ARRPTR(out); + char *curbegin, + *curin, + *curout; + + lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; + for (i = 0; i < out->size; i++) + { + lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ; + if (ptr[i].haspos) + lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i])); + } + + curout = outbuf = (char *) palloc(lenbuf); + for (i = 0; i < out->size; i++) + { + curbegin = curin = STRPTR(out) + ptr->pos; + if (i != 0) + *curout++ = ' '; + *curout++ = '\''; + while (curin - curbegin < ptr->len) + { + int len = pg_mblen(curin); + + if (t_iseq(curin, '\'')) + *curout++ = '\''; + + while (len--) + *curout++ = *curin++; + } + + *curout++ = '\''; + if ((pp = POSDATALEN(out, ptr)) != 0) + { + WordEntryPos *wptr; + + *curout++ = ':'; + wptr = POSDATAPTR(out, ptr); + while (pp) + { + curout += sprintf(curout, "%d", WEP_GETPOS(*wptr)); + switch (WEP_GETWEIGHT(*wptr)) + { + case 3: + *curout++ = 'A'; + break; + case 2: + *curout++ = 'B'; + break; + case 1: + *curout++ = 'C'; + break; + case 0: + default: + break; + } + + if (pp > 1) + *curout++ = ','; + pp--; + wptr++; + } + } + ptr++; + } + + *curout = '\0'; + PG_FREE_IF_COPY(out, 0); + PG_RETURN_CSTRING(outbuf); +} + +Datum +tsvectorsend(PG_FUNCTION_ARGS) +{ + TSVector vec = PG_GETARG_TSVECTOR(0); + StringInfoData buf; + int i, + j; + WordEntry *weptr = ARRPTR(vec); + + pq_begintypsend(&buf); + + pq_sendint(&buf, vec->size, sizeof(int32)); + for (i = 0; i < vec->size; i++) + { + /* + * We are sure that sizeof(WordEntry) == sizeof(int32) + */ + pq_sendint(&buf, *(int32 *) weptr, sizeof(int32)); + + pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len); + if (weptr->haspos) + { + WordEntryPos *wepptr = POSDATAPTR(vec, weptr); + + pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos)); + for (j = 0; j < POSDATALEN(vec, weptr); j++) + pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos)); + } + weptr++; + } + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + +Datum +tsvectorrecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + TSVector vec; + int i, + size, + len = DATAHDRSIZE; + WordEntry *weptr; + int datalen = 0; + + size = pq_getmsgint(buf, sizeof(uint32)); + if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry))) + elog(ERROR, "invalid size of tsvector"); + + len += sizeof(WordEntry) * size; + + len *= 2; + vec = (TSVector) palloc0(len); + vec->size = size; + + weptr = ARRPTR(vec); + for (i = 0; i < size; i++) + { + int tmp; + + weptr = ARRPTR(vec) + i; + + /* + * We are sure that sizeof(WordEntry) == sizeof(int32) + */ + tmp = pq_getmsgint(buf, sizeof(int32)); + *weptr = *(WordEntry *) & tmp; + + while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len) + { + len *= 2; + vec = (TSVector) repalloc(vec, len); + weptr = ARRPTR(vec) + i; + } + + memcpy(STRPTR(vec) + weptr->pos, + pq_getmsgbytes(buf, weptr->len), + weptr->len); + datalen += SHORTALIGN(weptr->len); + + if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0) + elog(ERROR, "lexemes are unordered"); + + if (weptr->haspos) + { + uint16 j, + npos; + WordEntryPos *wepptr; + + npos = (uint16) pq_getmsgint(buf, sizeof(int16)); + if (npos > MAXNUMPOS) + elog(ERROR, "unexpected number of positions"); + + while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len) + { + len *= 2; + vec = (TSVector) repalloc(vec, len); + weptr = ARRPTR(vec) + i; + } + + memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16)); + wepptr = POSDATAPTR(vec, weptr); + for (j = 0; j < npos; j++) + { + wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16)); + if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1])) + elog(ERROR, "position information is unordered"); + } + + datalen += (npos + 1) * sizeof(WordEntry); + } + } + + SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen)); + + PG_RETURN_TSVECTOR(vec); +} diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c new file mode 100644 index 00000000000..341247bec75 --- /dev/null +++ b/src/backend/utils/adt/tsvector_op.c @@ -0,0 +1,1334 @@ +/*------------------------------------------------------------------------- + * + * tsvector_op.c + * operations over tsvector + * + * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/namespace.h" +#include "commands/trigger.h" +#include "executor/spi.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "tsearch/ts_type.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" + + +typedef struct +{ + WordEntry *arrb; + WordEntry *arre; + char *values; + char *operand; +} CHKVAL; + +typedef struct +{ + uint32 cur; + TSVector stat; +} StatStorage; + +typedef struct +{ + uint32 len; + uint32 pos; + uint32 ndoc; + uint32 nentry; +} StatEntry; + +typedef struct +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int4 size; + int4 weight; + char data[1]; +} tsstat; + +#define STATHDRSIZE (sizeof(int4) * 4) +#define CALCSTATSIZE(x, lenstr) ( (x) * sizeof(StatEntry) + STATHDRSIZE + (lenstr) ) +#define STATPTR(x) ( (StatEntry*) ( (char*)(x) + STATHDRSIZE ) ) +#define STATSTRPTR(x) ( (char*)(x) + STATHDRSIZE + ( sizeof(StatEntry) * ((TSVector)(x))->size ) ) +#define STATSTRSIZE(x) ( VARSIZE((TSVector)(x)) - STATHDRSIZE - ( sizeof(StatEntry) * ((TSVector)(x))->size ) ) + + +static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column); + + +static int +silly_cmp_tsvector(const TSVector a, const TSVector b) +{ + if (VARSIZE(a) < VARSIZE(b)) + return -1; + else if (VARSIZE(a) > VARSIZE(b)) + return 1; + else if (a->size < b->size) + return -1; + else if (a->size > b->size) + return 1; + else + { + WordEntry *aptr = ARRPTR(a); + WordEntry *bptr = ARRPTR(b); + int i = 0; + int res; + + + for (i = 0; i < a->size; i++) + { + if (aptr->haspos != bptr->haspos) + { + return (aptr->haspos > bptr->haspos) ? -1 : 1; + } + else if (aptr->len != bptr->len) + { + return (aptr->len > bptr->len) ? -1 : 1; + } + else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0) + { + return res; + } + else if (aptr->haspos) + { + WordEntryPos *ap = POSDATAPTR(a, aptr); + WordEntryPos *bp = POSDATAPTR(b, bptr); + int j; + + if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr)) + return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1; + + for (j = 0; j < POSDATALEN(a, aptr); j++) + { + if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp)) + { + return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1; + } + else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp)) + { + return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1; + } + ap++, bp++; + } + } + + aptr++; + bptr++; + } + } + + return 0; +} + +#define TSVECTORCMPFUNC( type, action, ret ) \ +Datum \ +tsvector_##type(PG_FUNCTION_ARGS) \ +{ \ + TSVector a = PG_GETARG_TSVECTOR(0); \ + TSVector b = PG_GETARG_TSVECTOR(1); \ + int res = silly_cmp_tsvector(a, b); \ + PG_FREE_IF_COPY(a,0); \ + PG_FREE_IF_COPY(b,1); \ + PG_RETURN_##ret( res action 0 ); \ +} + +TSVECTORCMPFUNC(lt, <, BOOL); +TSVECTORCMPFUNC(le, <=, BOOL); +TSVECTORCMPFUNC(eq, ==, BOOL); +TSVECTORCMPFUNC(ge, >=, BOOL); +TSVECTORCMPFUNC(gt, >, BOOL); +TSVECTORCMPFUNC(ne, !=, BOOL); +TSVECTORCMPFUNC(cmp, +, INT32); + +Datum +tsvector_strip(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + TSVector out; + int i, + len = 0; + WordEntry *arrin = ARRPTR(in), + *arrout; + char *cur; + + for (i = 0; i < in->size; i++) + len += SHORTALIGN(arrin[i].len); + + len = CALCDATASIZE(in->size, len); + out = (TSVector) palloc0(len); + SET_VARSIZE(out, len); + out->size = in->size; + arrout = ARRPTR(out); + cur = STRPTR(out); + for (i = 0; i < in->size; i++) + { + memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len); + arrout[i].haspos = 0; + arrout[i].len = arrin[i].len; + arrout[i].pos = cur - STRPTR(out); + cur += SHORTALIGN(arrout[i].len); + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +Datum +tsvector_length(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + int4 ret = in->size; + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_INT32(ret); +} + +Datum +tsvector_setweight(PG_FUNCTION_ARGS) +{ + TSVector in = PG_GETARG_TSVECTOR(0); + char cw = PG_GETARG_CHAR(1); + TSVector out; + int i, + j; + WordEntry *entry; + WordEntryPos *p; + int w = 0; + + switch (cw) + { + case 'A': + case 'a': + w = 3; + break; + case 'B': + case 'b': + w = 2; + break; + case 'C': + case 'c': + w = 1; + break; + case 'D': + case 'd': + w = 0; + break; + /* internal error */ + default: + elog(ERROR, "unrecognized weight"); + } + + out = (TSVector) palloc(VARSIZE(in)); + memcpy(out, in, VARSIZE(in)); + entry = ARRPTR(out); + i = out->size; + while (i--) + { + if ((j = POSDATALEN(out, entry)) != 0) + { + p = POSDATAPTR(out, entry); + while (j--) + { + WEP_SETWEIGHT(*p, w); + p++; + } + } + entry++; + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +static int +compareEntry(char *ptra, WordEntry * a, char *ptrb, WordEntry * b) +{ + if (a->len == b->len) + { + return strncmp( + ptra + a->pos, + ptrb + b->pos, + a->len); + } + return (a->len > b->len) ? 1 : -1; +} + +static int4 +add_pos(TSVector src, WordEntry * srcptr, TSVector dest, WordEntry * destptr, int4 maxpos) +{ + uint16 *clen = (uint16 *) _POSDATAPTR(dest, destptr); + int i; + uint16 slen = POSDATALEN(src, srcptr), + startlen; + WordEntryPos *spos = POSDATAPTR(src, srcptr), + *dpos = POSDATAPTR(dest, destptr); + + if (!destptr->haspos) + *clen = 0; + + startlen = *clen; + for (i = 0; i < slen && *clen < MAXNUMPOS && (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1); i++) + { + WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i])); + WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos)); + (*clen)++; + } + + if (*clen != startlen) + destptr->haspos = 1; + return *clen - startlen; +} + + +Datum +tsvector_concat(PG_FUNCTION_ARGS) +{ + TSVector in1 = PG_GETARG_TSVECTOR(0); + TSVector in2 = PG_GETARG_TSVECTOR(1); + TSVector out; + WordEntry *ptr; + WordEntry *ptr1, + *ptr2; + WordEntryPos *p; + int maxpos = 0, + i, + j, + i1, + i2; + char *cur; + char *data, + *data1, + *data2; + + ptr = ARRPTR(in1); + i = in1->size; + while (i--) + { + if ((j = POSDATALEN(in1, ptr)) != 0) + { + p = POSDATAPTR(in1, ptr); + while (j--) + { + if (WEP_GETPOS(*p) > maxpos) + maxpos = WEP_GETPOS(*p); + p++; + } + } + ptr++; + } + + ptr1 = ARRPTR(in1); + ptr2 = ARRPTR(in2); + data1 = STRPTR(in1); + data2 = STRPTR(in2); + i1 = in1->size; + i2 = in2->size; + out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2)); + SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2)); + out->size = in1->size + in2->size; + data = cur = STRPTR(out); + ptr = ARRPTR(out); + while (i1 && i2) + { + int cmp = compareEntry(data1, ptr1, data2, ptr2); + + if (cmp < 0) + { /* in1 first */ + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy(cur, data1 + ptr1->pos, ptr1->len); + ptr->pos = cur - data; + cur += SHORTALIGN(ptr1->len); + if (ptr->haspos) + { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; + ptr1++; + i1--; + } + else if (cmp > 0) + { /* in2 first */ + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy(cur, data2 + ptr2->pos, ptr2->len); + ptr->pos = cur - data; + cur += SHORTALIGN(ptr2->len); + if (ptr->haspos) + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; + ptr2++; + i2--; + } + else + { + ptr->haspos = ptr1->haspos | ptr2->haspos; + ptr->len = ptr1->len; + memcpy(cur, data1 + ptr1->pos, ptr1->len); + ptr->pos = cur - data; + cur += SHORTALIGN(ptr1->len); + if (ptr->haspos) + { + if (ptr1->haspos) + { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + if (ptr2->haspos) + cur += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos); + } + else if (ptr2->haspos) + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + } + ptr++; + ptr1++; + ptr2++; + i1--; + i2--; + } + } + + while (i1) + { + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy(cur, data1 + ptr1->pos, ptr1->len); + ptr->pos = cur - data; + cur += SHORTALIGN(ptr1->len); + if (ptr->haspos) + { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16)); + cur += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; + ptr1++; + i1--; + } + + while (i2) + { + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy(cur, data2 + ptr2->pos, ptr2->len); + ptr->pos = cur - data; + cur += SHORTALIGN(ptr2->len); + if (ptr->haspos) + { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos); + + if (addlen == 0) + ptr->haspos = 0; + else + cur += addlen * sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; + ptr2++; + i2--; + } + + out->size = ptr - ARRPTR(out); + SET_VARSIZE(out, CALCDATASIZE(out->size, cur - data)); + if (data != STRPTR(out)) + memmove(STRPTR(out), data, cur - data); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_POINTER(out); +} + +/* + * compare 2 string values + */ +static int4 +ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item) +{ + if (ptr->len == item->length) + return strncmp( + &(chkval->values[ptr->pos]), + &(chkval->operand[item->distance]), + item->length); + + return (ptr->len > item->length) ? 1 : -1; +} + +/* + * check weight info + */ +static bool +checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item) +{ + WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16)); + uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len))); + + while (len--) + { + if (item->weight & (1 << WEP_GETWEIGHT(*ptr))) + return true; + ptr++; + } + return false; +} + +/* + * is there value 'val' in array or not ? + */ +static bool +checkcondition_str(void *checkval, QueryItem * val) +{ + WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; + WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= val < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); + if (difference == 0) + return (val->weight && StopMiddle->haspos) ? + checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true; + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return (false); +} + +/* + * check for boolean condition + */ +bool +TS_execute(QueryItem * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, QueryItem * val)) +{ + if (curitem->type == VAL) + return chkcond(checkval, curitem); + else if (curitem->val == (int4) '!') + { + return (calcnot) ? + ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true) + : true; + } + else if (curitem->val == (int4) '&') + { + if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) + return TS_execute(curitem + 1, checkval, calcnot, chkcond); + else + return false; + } + else + { /* |-operator */ + if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) + return true; + else + return TS_execute(curitem + 1, checkval, calcnot, chkcond); + } + return false; +} + +/* + * boolean operations + */ +Datum +ts_match_qv(PG_FUNCTION_ARGS) +{ + PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0))); +} + +Datum +ts_match_vq(PG_FUNCTION_ARGS) +{ + TSVector val = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + CHKVAL chkval; + bool result; + + if (!val->size || !query->size) + { + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(false); + } + + chkval.arrb = ARRPTR(val); + chkval.arre = chkval.arrb + val->size; + chkval.values = STRPTR(val); + chkval.operand = GETOPERAND(query); + result = TS_execute( + GETQUERY(query), + &chkval, + true, + checkcondition_str + ); + + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(result); +} + +Datum +ts_match_tt(PG_FUNCTION_ARGS) +{ + TSVector vector; + TSQuery query; + bool res; + + vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector, + PG_GETARG_DATUM(0))); + query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery, + PG_GETARG_DATUM(1))); + + res = DatumGetBool(DirectFunctionCall2(ts_match_vq, + TSVectorGetDatum(vector), + TSQueryGetDatum(query))); + + pfree(vector); + pfree(query); + + PG_RETURN_BOOL(res); +} + +Datum +ts_match_tq(PG_FUNCTION_ARGS) +{ + TSVector vector; + TSQuery query = PG_GETARG_TSQUERY(1); + bool res; + + vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector, + PG_GETARG_DATUM(0))); + + res = DatumGetBool(DirectFunctionCall2(ts_match_vq, + TSVectorGetDatum(vector), + TSQueryGetDatum(query))); + + pfree(vector); + PG_FREE_IF_COPY(query, 1); + + PG_RETURN_BOOL(res); +} + +/* + * Statistics of tsvector + */ +static int +check_weight(TSVector txt, WordEntry * wptr, int8 weight) +{ + int len = POSDATALEN(txt, wptr); + int num = 0; + WordEntryPos *ptr = POSDATAPTR(txt, wptr); + + while (len--) + { + if (weight & (1 << WEP_GETWEIGHT(*ptr))) + num++; + ptr++; + } + return num; +} + +static WordEntry ** +SEI_realloc(WordEntry ** in, uint32 *len) +{ + if (*len == 0 || in == NULL) + { + *len = 8; + in = palloc(sizeof(WordEntry *) * (*len)); + } + else + { + *len *= 2; + in = repalloc(in, sizeof(WordEntry *) * (*len)); + } + return in; +} + +static int +compareStatWord(StatEntry * a, WordEntry * b, tsstat * stat, TSVector txt) +{ + if (a->len == b->len) + return strncmp( + STATSTRPTR(stat) + a->pos, + STRPTR(txt) + b->pos, + a->len + ); + return (a->len > b->len) ? 1 : -1; +} + +static tsstat * +formstat(tsstat * stat, TSVector txt, WordEntry ** entry, uint32 len) +{ + tsstat *newstat; + uint32 totallen, + nentry; + uint32 slen = 0; + WordEntry **ptr = entry; + char *curptr; + StatEntry *sptr, + *nptr; + + while (ptr - entry < len) + { + slen += (*ptr)->len; + ptr++; + } + + nentry = stat->size + len; + slen += STATSTRSIZE(stat); + totallen = CALCSTATSIZE(nentry, slen); + newstat = palloc(totallen); + SET_VARSIZE(newstat, totallen); + newstat->weight = stat->weight; + newstat->size = nentry; + + memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); + curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat); + + ptr = entry; + sptr = STATPTR(stat); + nptr = STATPTR(newstat); + + if (len == 1) + { + StatEntry *StopLow = STATPTR(stat); + StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); + + while (StopLow < StopHigh) + { + sptr = StopLow + (StopHigh - StopLow) / 2; + if (compareStatWord(sptr, *ptr, stat, txt) < 0) + StopLow = sptr + 1; + else + StopHigh = sptr; + } + nptr = STATPTR(newstat) + (StopLow - STATPTR(stat)); + memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat))); + if ((*ptr)->haspos) + nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); + else + nptr->nentry = 1; + nptr->ndoc = 1; + nptr->len = (*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow)); + } + else + { + while (sptr - STATPTR(stat) < stat->size && ptr - entry < len) + { + if (compareStatWord(sptr, *ptr, stat, txt) < 0) + { + memcpy(nptr, sptr, sizeof(StatEntry)); + sptr++; + } + else + { + if ((*ptr)->haspos) + nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); + else + nptr->nentry = 1; + nptr->ndoc = 1; + nptr->len = (*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + curptr += nptr->len; + ptr++; + } + nptr++; + } + + memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat)))); + + while (ptr - entry < len) + { + if ((*ptr)->haspos) + nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); + else + nptr->nentry = 1; + nptr->ndoc = 1; + nptr->len = (*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + curptr += nptr->len; + ptr++; + nptr++; + } + } + + return newstat; +} + +static tsstat * +ts_accum(tsstat * stat, Datum data) +{ + tsstat *newstat; + TSVector txt = DatumGetTSVector(data); + WordEntry **newentry = NULL; + uint32 len = 0, + cur = 0; + StatEntry *sptr; + WordEntry *wptr; + int n = 0; + + if (stat == NULL) + { /* Init in first */ + stat = palloc(STATHDRSIZE); + SET_VARSIZE(stat, STATHDRSIZE); + stat->size = 0; + stat->weight = 0; + } + + /* simple check of correctness */ + if (txt == NULL || txt->size == 0) + { + if (txt != (TSVector) DatumGetPointer(data)) + pfree(txt); + return stat; + } + + sptr = STATPTR(stat); + wptr = ARRPTR(txt); + + if (stat->size < 100 * txt->size) + { /* merge */ + while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size) + { + int cmp = compareStatWord(sptr, wptr, stat, txt); + + if (cmp < 0) + sptr++; + else if (cmp == 0) + { + if (stat->weight == 0) + { + sptr->ndoc++; + sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; + } + else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0) + { + sptr->ndoc++; + sptr->nentry += n; + } + sptr++; + wptr++; + } + else + { + if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) + { + if (cur == len) + newentry = SEI_realloc(newentry, &len); + newentry[cur] = wptr; + cur++; + } + wptr++; + } + } + + while (wptr - ARRPTR(txt) < txt->size) + { + if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) + { + if (cur == len) + newentry = SEI_realloc(newentry, &len); + newentry[cur] = wptr; + cur++; + } + wptr++; + } + } + else + { /* search */ + while (wptr - ARRPTR(txt) < txt->size) + { + StatEntry *StopLow = STATPTR(stat); + StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); + int cmp; + + while (StopLow < StopHigh) + { + sptr = StopLow + (StopHigh - StopLow) / 2; + cmp = compareStatWord(sptr, wptr, stat, txt); + if (cmp == 0) + { + if (stat->weight == 0) + { + sptr->ndoc++; + sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; + } + else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0) + { + sptr->ndoc++; + sptr->nentry += n; + } + break; + } + else if (cmp < 0) + StopLow = sptr + 1; + else + StopHigh = sptr; + } + + if (StopLow >= StopHigh) + { /* not found */ + if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) + { + if (cur == len) + newentry = SEI_realloc(newentry, &len); + newentry[cur] = wptr; + cur++; + } + } + wptr++; + } + } + + + if (cur == 0) + { /* no new words */ + if (txt != (TSVector) DatumGetPointer(data)) + pfree(txt); + return stat; + } + + newstat = formstat(stat, txt, newentry, cur); + pfree(newentry); + + if (txt != (TSVector) DatumGetPointer(data)) + pfree(txt); + return newstat; +} + +static void +ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, + tsstat * stat) +{ + TupleDesc tupdesc; + MemoryContext oldcontext; + StatStorage *st; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + st = palloc(sizeof(StatStorage)); + st->cur = 0; + st->stat = palloc(VARSIZE(stat)); + memcpy(st->stat, stat, VARSIZE(stat)); + funcctx->user_fctx = (void *) st; + + tupdesc = CreateTemplateTupleDesc(3, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry", + INT4OID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + + MemoryContextSwitchTo(oldcontext); +} + + +static Datum +ts_process_call(FuncCallContext *funcctx) +{ + StatStorage *st; + + st = (StatStorage *) funcctx->user_fctx; + + if (st->cur < st->stat->size) + { + Datum result; + char *values[3]; + char ndoc[16]; + char nentry[16]; + StatEntry *entry = STATPTR(st->stat) + st->cur; + HeapTuple tuple; + + values[0] = palloc(entry->len + 1); + memcpy(values[0], STATSTRPTR(st->stat) + entry->pos, entry->len); + (values[0])[entry->len] = '\0'; + sprintf(ndoc, "%d", entry->ndoc); + values[1] = ndoc; + sprintf(nentry, "%d", entry->nentry); + values[2] = nentry; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = HeapTupleGetDatum(tuple); + + pfree(values[0]); + st->cur++; + return result; + } + else + { + pfree(st->stat); + pfree(st); + } + + return (Datum) 0; +} + +static tsstat * +ts_stat_sql(text *txt, text *ws) +{ + char *query = TextPGetCString(txt); + int i; + tsstat *newstat, + *stat; + bool isnull; + Portal portal; + void *plan; + + if ((plan = SPI_prepare(query, 0, NULL)) == NULL) + /* internal error */ + elog(ERROR, "SPI_prepare(\"%s\") failed", query); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL) + /* internal error */ + elog(ERROR, "SPI_cursor_open(\"%s\") failed", query); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable->tupdesc->natts != 1 || + SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSVECTOROID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ts_stat query must return one tsvector column"))); + + stat = palloc(STATHDRSIZE); + SET_VARSIZE(stat, STATHDRSIZE); + stat->size = 0; + stat->weight = 0; + + if (ws) + { + char *buf; + + buf = VARDATA(ws); + while (buf - VARDATA(ws) < VARSIZE(ws) - VARHDRSZ) + { + if (pg_mblen(buf) == 1) + { + switch (*buf) + { + case 'A': + case 'a': + stat->weight |= 1 << 3; + break; + case 'B': + case 'b': + stat->weight |= 1 << 2; + break; + case 'C': + case 'c': + stat->weight |= 1 << 1; + break; + case 'D': + case 'd': + stat->weight |= 1; + break; + default: + stat->weight |= 0; + } + } + buf += pg_mblen(buf); + } + } + + while (SPI_processed > 0) + { + for (i = 0; i < SPI_processed; i++) + { + Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + + if (!isnull) + { + newstat = ts_accum(stat, data); + if (stat != newstat && stat) + pfree(stat); + stat = newstat; + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + pfree(query); + + return stat; +} + +Datum +ts_stat1(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + tsstat *stat; + text *txt = PG_GETARG_TEXT_P(0); + + funcctx = SRF_FIRSTCALL_INIT(); + SPI_connect(); + stat = ts_stat_sql(txt, NULL); + PG_FREE_IF_COPY(txt, 0); + ts_setup_firstcall(fcinfo, funcctx, stat); + SPI_finish(); + } + + funcctx = SRF_PERCALL_SETUP(); + if ((result = ts_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +Datum +ts_stat2(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) + { + tsstat *stat; + text *txt = PG_GETARG_TEXT_P(0); + text *ws = PG_GETARG_TEXT_P(1); + + funcctx = SRF_FIRSTCALL_INIT(); + SPI_connect(); + stat = ts_stat_sql(txt, ws); + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(ws, 1); + ts_setup_firstcall(fcinfo, funcctx, stat); + SPI_finish(); + } + + funcctx = SRF_PERCALL_SETUP(); + if ((result = ts_process_call(funcctx)) != (Datum) 0) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + + +/* Check if datatype is TEXT or binary-equivalent to it */ +static bool +istexttype(Oid typid) +{ + /* varchar(n) and char(n) are binary-compatible with text */ + if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID) + return true; + /* Allow domains over these types, too */ + typid = getBaseType(typid); + if (typid==TEXTOID || typid==VARCHAROID || typid==BPCHAROID) + return true; + return false; +} + + +/* + * Triggers for automatic update of a tsvector column from text column(s) + * + * Trigger arguments are either + * name of tsvector col, name of tsconfig to use, name(s) of text col(s) + * name of tsvector col, name of regconfig col, name(s) of text col(s) + * ie, tsconfig can either be specified by name, or indirectly as the + * contents of a regconfig field in the row. If the name is used, it must + * be explicitly schema-qualified. + */ +Datum +tsvector_update_trigger_byid(PG_FUNCTION_ARGS) +{ + return tsvector_update_trigger(fcinfo, false); +} + +Datum +tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS) +{ + return tsvector_update_trigger(fcinfo, true); +} + +static Datum +tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column) +{ + TriggerData *trigdata; + Trigger *trigger; + Relation rel; + HeapTuple rettuple = NULL; + int tsvector_attr_num, + i; + ParsedText prs; + Datum datum; + bool isnull; + text *txt; + Oid cfgId; + + /* Check call context */ + if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */ + elog(ERROR, "tsvector_update_trigger: not fired by trigger manager"); + + trigdata = (TriggerData *) fcinfo->context; + if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) + elog(ERROR, "tsvector_update_trigger: can't process STATEMENT events"); + if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) + elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event"); + + if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + rettuple = trigdata->tg_trigtuple; + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + rettuple = trigdata->tg_newtuple; + else + elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE"); + + trigger = trigdata->tg_trigger; + rel = trigdata->tg_relation; + + if (trigger->tgnargs < 3) + elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)"); + + /* Find the target tsvector column */ + tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); + if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("tsvector column \"%s\" does not exist", + trigger->tgargs[0]))); + if (SPI_gettypeid(rel->rd_att, tsvector_attr_num) != TSVECTOROID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of tsvector type", + trigger->tgargs[0]))); + + /* Find the configuration to use */ + if (config_column) + { + int config_attr_num; + + config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]); + if (config_attr_num == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("config column \"%s\" does not exist", + trigger->tgargs[1]))); + if (SPI_gettypeid(rel->rd_att, config_attr_num) != REGCONFIGOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of regconfig type", + trigger->tgargs[1]))); + + datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull); + if (isnull) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("config column \"%s\" must not be NULL", + trigger->tgargs[1]))); + cfgId = DatumGetObjectId(datum); + } + else + { + List *names; + + names = stringToQualifiedNameList(trigger->tgargs[1]); + /* require a schema so that results are not search path dependent */ + if (list_length(names) < 2) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("text search configuration name \"%s\" must be schema-qualified", + trigger->tgargs[1]))); + cfgId = TSConfigGetCfgid(names, false); + } + + /* initialize parse state */ + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); + + /* find all words in indexable column(s) */ + for (i = 2; i < trigger->tgnargs; i++) + { + int numattr; + + numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); + if (numattr == SPI_ERROR_NOATTRIBUTE) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", + trigger->tgargs[i]))); + if (!istexttype(SPI_gettypeid(rel->rd_att, numattr))) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("column \"%s\" is not of character type", + trigger->tgargs[i]))); + + datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); + if (isnull) + continue; + + txt = DatumGetTextP(datum); + + parsetext(cfgId, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); + + if (txt != (text *) DatumGetPointer(datum)) + pfree(txt); + } + + /* make tsvector value */ + if (prs.curwords) + { + datum = PointerGetDatum(make_tsvector(&prs)); + rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num, + &datum, NULL); + pfree(DatumGetPointer(datum)); + } + else + { + TSVector out = palloc(CALCDATASIZE(0, 0)); + + SET_VARSIZE(out, CALCDATASIZE(0, 0)); + out->size = 0; + datum = PointerGetDatum(out); + rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num, + &datum, NULL); + pfree(prs.words); + } + + if (rettuple == NULL) /* internal error */ + elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple", + SPI_result); + + return PointerGetDatum(rettuple); +} diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile index 879f013e335..7ad1cda6c73 100644 --- a/src/backend/utils/cache/Makefile +++ b/src/backend/utils/cache/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/cache # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/utils/cache/Makefile,v 1.21 2007/03/13 00:33:42 tgl Exp $ +# $PostgreSQL: pgsql/src/backend/utils/cache/Makefile,v 1.22 2007/08/21 01:11:19 tgl Exp $ # #------------------------------------------------------------------------- @@ -13,7 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = catcache.o inval.o plancache.o relcache.o \ - syscache.o lsyscache.o typcache.o + syscache.o lsyscache.o typcache.o ts_cache.o all: SUBSYS.o diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 13771c77ebc..9413d5ad36e 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.137 2007/04/21 04:49:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/catcache.c,v 1.138 2007/08/21 01:11:19 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -134,6 +134,8 @@ GetCCHashEqFuncs(Oid keytype, PGFunction *hashfunc, RegProcedure *eqfunc) case REGOPERATOROID: case REGCLASSOID: case REGTYPEOID: + case REGCONFIGOID: + case REGDICTIONARYOID: *hashfunc = hashoid; *eqfunc = F_OIDEQ; break; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 2d52b4299b7..db72857b73d 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/syscache.c,v 1.112 2007/04/02 03:49:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/syscache.c,v 1.113 2007/08/21 01:11:19 tgl Exp $ * * NOTES * These routines allow the parser/planner/executor to perform @@ -40,6 +40,11 @@ #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "utils/syscache.h" @@ -540,6 +545,114 @@ static const struct cachedesc cacheinfo[] = { }, 1024 }, + {TSConfigMapRelationId, /* TSCONFIGMAP */ + TSConfigMapIndexId, + 0, + 3, + { + Anum_pg_ts_config_map_mapcfg, + Anum_pg_ts_config_map_maptokentype, + Anum_pg_ts_config_map_mapseqno, + 0 + }, + 4 + }, + {TSConfigRelationId, /* TSCONFIGNAMENSP */ + TSConfigNameNspIndexId, + 0, + 2, + { + Anum_pg_ts_config_cfgname, + Anum_pg_ts_config_cfgnamespace, + 0, + 0 + }, + 16 + }, + {TSConfigRelationId, /* TSCONFIGOID */ + TSConfigOidIndexId, + 0, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 16 + }, + {TSDictionaryRelationId, /* TSDICTNAMENSP */ + TSDictionaryNameNspIndexId, + 0, + 2, + { + Anum_pg_ts_dict_dictname, + Anum_pg_ts_dict_dictnamespace, + 0, + 0 + }, + 16 + }, + {TSDictionaryRelationId, /* TSDICTOID */ + TSDictionaryOidIndexId, + 0, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 16 + }, + {TSParserRelationId, /* TSPARSERNAMENSP */ + TSParserNameNspIndexId, + 0, + 2, + { + Anum_pg_ts_parser_prsname, + Anum_pg_ts_parser_prsnamespace, + 0, + 0 + }, + 4 + }, + {TSParserRelationId, /* TSPARSEROID */ + TSParserOidIndexId, + 0, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, + {TSTemplateRelationId, /* TSTEMPLATENAMENSP */ + TSTemplateNameNspIndexId, + 0, + 2, + { + Anum_pg_ts_template_tmplname, + Anum_pg_ts_template_tmplnamespace, + 0, + 0 + }, + 16 + }, + {TSTemplateRelationId, /* TSTEMPLATEOID */ + TSTemplateOidIndexId, + 0, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 16 + }, {TypeRelationId, /* TYPENAMENSP */ TypeNameNspIndexId, Anum_pg_type_typrelid, diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c new file mode 100644 index 00000000000..cd3a9dad571 --- /dev/null +++ b/src/backend/utils/cache/ts_cache.c @@ -0,0 +1,641 @@ +/*------------------------------------------------------------------------- + * + * ts_cache.c + * Tsearch related object caches. + * + * Tsearch performance is very sensitive to performance of parsers, + * dictionaries and mapping, so lookups should be cached as much + * as possible. + * + * Once a backend has created a cache entry for a particular TS object OID, + * the cache entry will exist for the life of the backend; hence it is + * safe to hold onto a pointer to the cache entry while doing things that + * might result in recognizing a cache invalidation. Beware however that + * subsidiary information might be deleted and reallocated somewhere else + * if a cache inval and reval happens! This does not look like it will be + * a big problem as long as parser and dictionary methods do not attempt + * any database access. + * + * + * Copyright (c) 2006-2007, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/cache/ts_cache.c,v 1.1 2007/08/21 01:11:19 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_ts_config.h" +#include "catalog/pg_ts_config_map.h" +#include "catalog/pg_ts_dict.h" +#include "catalog/pg_ts_parser.h" +#include "catalog/pg_ts_template.h" +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "tsearch/ts_cache.h" +#include "tsearch/ts_utils.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" + + +/* + * MAXTOKENTYPE/MAXDICTSPERTT are arbitrary limits on the workspace size + * used in lookup_ts_config_cache(). We could avoid hardwiring a limit + * by making the workspace dynamically enlargeable, but it seems unlikely + * to be worth the trouble. + */ +#define MAXTOKENTYPE 256 +#define MAXDICTSPERTT 100 + + +static HTAB *TSParserCacheHash = NULL; +static TSParserCacheEntry *lastUsedParser = NULL; + +static HTAB *TSDictionaryCacheHash = NULL; +static TSDictionaryCacheEntry *lastUsedDictionary = NULL; + +static HTAB *TSConfigCacheHash = NULL; +static TSConfigCacheEntry *lastUsedConfig = NULL; + +/* + * GUC default_text_search_config, and a cache of the current config's OID + */ +char *TSCurrentConfig = NULL; + +static Oid TSCurrentConfigCache = InvalidOid; + + +/* + * We use this catcache callback to detect when a visible change to a TS + * catalog entry has been made, by either our own backend or another one. + * We don't get enough information to know *which* specific catalog row + * changed, so we have to invalidate all related cache entries. Fortunately, + * it seems unlikely that TS configuration changes will occur often enough + * for this to be a performance problem. + * + * We can use the same function for all TS caches by passing the hash + * table address as the "arg". + */ +static void +InvalidateTSCacheCallBack(Datum arg, Oid relid) +{ + HTAB *hash = (HTAB *) DatumGetPointer(arg); + HASH_SEQ_STATUS status; + TSAnyCacheEntry *entry; + + hash_seq_init(&status, hash); + while ((entry = (TSAnyCacheEntry *) hash_seq_search(&status)) != NULL) + entry->isvalid = false; + + /* Also invalidate the current-config cache if it's pg_ts_config */ + if (hash == TSConfigCacheHash) + TSCurrentConfigCache = InvalidOid; +} + +/* + * Fetch parser cache entry + */ +TSParserCacheEntry * +lookup_ts_parser_cache(Oid prsId) +{ + TSParserCacheEntry *entry; + + if (TSParserCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSParserCacheEntry); + ctl.hash = oid_hash; + TSParserCacheHash = hash_create("Tsearch parser cache", 4, + &ctl, HASH_ELEM | HASH_FUNCTION); + /* Flush cache on pg_ts_parser changes */ + CacheRegisterSyscacheCallback(TSPARSEROID, InvalidateTSCacheCallBack, + PointerGetDatum(TSParserCacheHash)); + } + + /* Check single-entry cache */ + if (lastUsedParser && lastUsedParser->prsId == prsId && + lastUsedParser->isvalid) + return lastUsedParser; + + /* Try to look up an existing entry */ + entry = (TSParserCacheEntry *) hash_search(TSParserCacheHash, + (void *) &prsId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. + * But first look up the object to be sure the OID is real. + */ + HeapTuple tp; + Form_pg_ts_parser prs; + + tp = SearchSysCache(TSPARSEROID, + ObjectIdGetDatum(prsId), + 0, 0, 0); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for text search parser %u", + prsId); + prs = (Form_pg_ts_parser) GETSTRUCT(tp); + + /* + * Sanity checks + */ + if (!OidIsValid(prs->prsstart)) + elog(ERROR, "text search parser %u has no prsstart method", prsId); + if (!OidIsValid(prs->prstoken)) + elog(ERROR, "text search parser %u has no prstoken method", prsId); + if (!OidIsValid(prs->prsend)) + elog(ERROR, "text search parser %u has no prsend method", prsId); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSParserCacheEntry *) + hash_search(TSParserCacheHash, + (void *) &prsId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + } + + MemSet(entry, 0, sizeof(TSParserCacheEntry)); + entry->prsId = prsId; + entry->startOid = prs->prsstart; + entry->tokenOid = prs->prstoken; + entry->endOid = prs->prsend; + entry->headlineOid = prs->prsheadline; + entry->lextypeOid = prs->prslextype; + + ReleaseSysCache(tp); + + fmgr_info_cxt(entry->startOid, &entry->prsstart, CacheMemoryContext); + fmgr_info_cxt(entry->tokenOid, &entry->prstoken, CacheMemoryContext); + fmgr_info_cxt(entry->endOid, &entry->prsend, CacheMemoryContext); + if (OidIsValid(entry->headlineOid)) + fmgr_info_cxt(entry->headlineOid, &entry->prsheadline, + CacheMemoryContext); + + entry->isvalid = true; + } + + lastUsedParser = entry; + + return entry; +} + +/* + * Fetch dictionary cache entry + */ +TSDictionaryCacheEntry * +lookup_ts_dictionary_cache(Oid dictId) +{ + TSDictionaryCacheEntry *entry; + + if (TSDictionaryCacheHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSDictionaryCacheEntry); + ctl.hash = oid_hash; + TSDictionaryCacheHash = hash_create("Tsearch dictionary cache", 8, + &ctl, HASH_ELEM | HASH_FUNCTION); + /* Flush cache on pg_ts_dict and pg_ts_template changes */ + CacheRegisterSyscacheCallback(TSDICTOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSDictionaryCacheHash)); + CacheRegisterSyscacheCallback(TSTEMPLATEOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSDictionaryCacheHash)); + } + + /* Check single-entry cache */ + if (lastUsedDictionary && lastUsedDictionary->dictId == dictId && + lastUsedDictionary->isvalid) + return lastUsedDictionary; + + /* Try to look up an existing entry */ + entry = (TSDictionaryCacheEntry *) hash_search(TSDictionaryCacheHash, + (void *) &dictId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. + * But first look up the object to be sure the OID is real. + */ + HeapTuple tpdict, + tptmpl; + Form_pg_ts_dict dict; + Form_pg_ts_template template; + MemoryContext saveCtx = NULL; + + tpdict = SearchSysCache(TSDICTOID, + ObjectIdGetDatum(dictId), + 0, 0, 0); + if (!HeapTupleIsValid(tpdict)) + elog(ERROR, "cache lookup failed for text search dictionary %u", + dictId); + dict = (Form_pg_ts_dict) GETSTRUCT(tpdict); + + /* + * Sanity checks + */ + if (!OidIsValid(dict->dicttemplate)) + elog(ERROR, "text search dictionary %u has no template", dictId); + + /* + * Retrieve dictionary's template + */ + tptmpl = SearchSysCache(TSTEMPLATEOID, + ObjectIdGetDatum(dict->dicttemplate), + 0, 0, 0); + if (!HeapTupleIsValid(tptmpl)) + elog(ERROR, "cache lookup failed for text search template %u", + dict->dicttemplate); + template = (Form_pg_ts_template) GETSTRUCT(tptmpl); + + /* + * Sanity checks + */ + if (!OidIsValid(template->tmpllexize)) + elog(ERROR, "text search template %u has no lexize method", + template->tmpllexize); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSDictionaryCacheEntry *) + hash_search(TSDictionaryCacheHash, + (void *) &dictId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + + /* Create private memory context the first time through */ + saveCtx = AllocSetContextCreate(CacheMemoryContext, + NameStr(dict->dictname), + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + } + else + { + /* Clear the existing entry's private context */ + saveCtx = entry->dictCtx; + MemoryContextResetAndDeleteChildren(saveCtx); + } + + MemSet(entry, 0, sizeof(TSDictionaryCacheEntry)); + entry->dictId = dictId; + entry->dictCtx = saveCtx; + + entry->lexizeOid = template->tmpllexize; + + if (OidIsValid(template->tmplinit)) + { + bool isnull; + Datum opt; + + opt = SysCacheGetAttr(TSDICTOID, tpdict, + Anum_pg_ts_dict_dictinitoption, + &isnull); + if (isnull) + opt = PointerGetDatum(NULL); + + /* + * Init method runs in dictionary's private memory context + */ + saveCtx = MemoryContextSwitchTo(entry->dictCtx); + entry->dictData = DatumGetPointer(OidFunctionCall1(template->tmplinit, opt)); + MemoryContextSwitchTo(saveCtx); + } + + ReleaseSysCache(tptmpl); + ReleaseSysCache(tpdict); + + fmgr_info_cxt(entry->lexizeOid, &entry->lexize, entry->dictCtx); + + entry->isvalid = true; + } + + lastUsedDictionary = entry; + + return entry; +} + +/* + * Initialize config cache and prepare callbacks. This is split out of + * lookup_ts_config_cache because we need to activate the callback before + * caching TSCurrentConfigCache, too. + */ +static void +init_ts_config_cache(void) +{ + HASHCTL ctl; + + if (!CacheMemoryContext) + CreateCacheMemoryContext(); + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(TSConfigCacheEntry); + ctl.hash = oid_hash; + TSConfigCacheHash = hash_create("Tsearch configuration cache", 16, + &ctl, HASH_ELEM | HASH_FUNCTION); + /* Flush cache on pg_ts_config and pg_ts_config_map changes */ + CacheRegisterSyscacheCallback(TSCONFIGOID, InvalidateTSCacheCallBack, + PointerGetDatum(TSConfigCacheHash)); + CacheRegisterSyscacheCallback(TSCONFIGMAP, InvalidateTSCacheCallBack, + PointerGetDatum(TSConfigCacheHash)); +} + +/* + * Fetch configuration cache entry + */ +TSConfigCacheEntry * +lookup_ts_config_cache(Oid cfgId) +{ + TSConfigCacheEntry *entry; + + if (TSConfigCacheHash == NULL) + { + /* First time through: initialize the hash table */ + init_ts_config_cache(); + } + + /* Check single-entry cache */ + if (lastUsedConfig && lastUsedConfig->cfgId == cfgId && + lastUsedConfig->isvalid) + return lastUsedConfig; + + /* Try to look up an existing entry */ + entry = (TSConfigCacheEntry *) hash_search(TSConfigCacheHash, + (void *) &cfgId, + HASH_FIND, NULL); + if (entry == NULL || !entry->isvalid) + { + /* + * If we didn't find one, we want to make one. + * But first look up the object to be sure the OID is real. + */ + HeapTuple tp; + Form_pg_ts_config cfg; + Relation maprel; + Relation mapidx; + ScanKeyData mapskey; + IndexScanDesc mapscan; + HeapTuple maptup; + ListDictionary maplists[MAXTOKENTYPE + 1]; + Oid mapdicts[MAXDICTSPERTT]; + int maxtokentype; + int ndicts; + int i; + + tp = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + cfg = (Form_pg_ts_config) GETSTRUCT(tp); + + /* + * Sanity checks + */ + if (!OidIsValid(cfg->cfgparser)) + elog(ERROR, "text search configuration %u has no parser", cfgId); + + if (entry == NULL) + { + bool found; + + /* Now make the cache entry */ + entry = (TSConfigCacheEntry *) + hash_search(TSConfigCacheHash, + (void *) &cfgId, + HASH_ENTER, &found); + Assert(!found); /* it wasn't there a moment ago */ + } + else + { + /* Cleanup old contents */ + if (entry->map) + { + for (i = 0; i < entry->lenmap; i++) + if (entry->map[i].dictIds) + pfree(entry->map[i].dictIds); + pfree(entry->map); + } + } + + MemSet(entry, 0, sizeof(TSConfigCacheEntry)); + entry->cfgId = cfgId; + entry->prsId = cfg->cfgparser; + + ReleaseSysCache(tp); + + /* + * Scan pg_ts_config_map to gather dictionary list for each token type + * + * Because the index is on (mapcfg, maptokentype, mapseqno), we will + * see the entries in maptokentype order, and in mapseqno order for + * each token type, even though we didn't explicitly ask for that. + */ + MemSet(maplists, 0, sizeof(maplists)); + maxtokentype = 0; + ndicts = 0; + + ScanKeyInit(&mapskey, + Anum_pg_ts_config_map_mapcfg, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(cfgId)); + + maprel = heap_open(TSConfigMapRelationId, AccessShareLock); + mapidx = index_open(TSConfigMapIndexId, AccessShareLock); + mapscan = index_beginscan(maprel, mapidx, SnapshotNow, 1, &mapskey); + + while ((maptup = index_getnext(mapscan, ForwardScanDirection)) != NULL) + { + Form_pg_ts_config_map cfgmap = (Form_pg_ts_config_map) GETSTRUCT(maptup); + int toktype = cfgmap->maptokentype; + + if (toktype <= 0 || toktype > MAXTOKENTYPE) + elog(ERROR, "maptokentype value %d is out of range", toktype); + if (toktype < maxtokentype) + elog(ERROR, "maptokentype entries are out of order"); + if (toktype > maxtokentype) + { + /* starting a new token type, but first save the prior data */ + if (ndicts > 0) + { + maplists[maxtokentype].len = ndicts; + maplists[maxtokentype].dictIds = (Oid *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(Oid) * ndicts); + memcpy(maplists[maxtokentype].dictIds, mapdicts, + sizeof(Oid) * ndicts); + } + maxtokentype = toktype; + mapdicts[0] = cfgmap->mapdict; + ndicts = 1; + } + else + { + /* continuing data for current token type */ + if (ndicts >= MAXDICTSPERTT) + elog(ERROR, "too many pg_ts_config_map entries for one token type"); + mapdicts[ndicts++] = cfgmap->mapdict; + } + } + + index_endscan(mapscan); + index_close(mapidx, AccessShareLock); + heap_close(maprel, AccessShareLock); + + if (ndicts > 0) + { + /* save the last token type's dictionaries */ + maplists[maxtokentype].len = ndicts; + maplists[maxtokentype].dictIds = (Oid *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(Oid) * ndicts); + memcpy(maplists[maxtokentype].dictIds, mapdicts, + sizeof(Oid) * ndicts); + /* and save the overall map */ + entry->lenmap = maxtokentype + 1; + entry->map = (ListDictionary *) + MemoryContextAlloc(CacheMemoryContext, + sizeof(ListDictionary) * entry->lenmap); + memcpy(entry->map, maplists, + sizeof(ListDictionary) * entry->lenmap); + } + + entry->isvalid = true; + } + + lastUsedConfig = entry; + + return entry; +} + + +/*--------------------------------------------------- + * GUC variable "default_text_search_config" + *--------------------------------------------------- + */ + +Oid +getTSCurrentConfig(bool emitError) +{ + /* if we have a cached value, return it */ + if (OidIsValid(TSCurrentConfigCache)) + return TSCurrentConfigCache; + + /* fail if GUC hasn't been set up yet */ + if (TSCurrentConfig == NULL || *TSCurrentConfig == '\0') + { + if (emitError) + elog(ERROR, "text search configuration isn't set"); + else + return InvalidOid; + } + + if (TSConfigCacheHash == NULL) + { + /* First time through: initialize the tsconfig inval callback */ + init_ts_config_cache(); + } + + /* Look up the config */ + TSCurrentConfigCache = + TSConfigGetCfgid(stringToQualifiedNameList(TSCurrentConfig), + !emitError); + + return TSCurrentConfigCache; +} + +const char * +assignTSCurrentConfig(const char *newval, bool doit, GucSource source) +{ + /* do nothing during initial GUC setup */ + if (newval == NULL) + { + if (doit) + TSCurrentConfigCache = InvalidOid; + return newval; + } + + /* + * If we aren't inside a transaction, we cannot do database access so + * cannot verify the config name. Must accept it on faith. + */ + if (IsTransactionState()) + { + Oid cfgId; + HeapTuple tuple; + Form_pg_ts_config cfg; + char *buf; + + cfgId = TSConfigGetCfgid(stringToQualifiedNameList(newval), true); + + if (!OidIsValid(cfgId)) + return NULL; + + /* + * Modify the actually stored value to be fully qualified, to ensure + * later changes of search_path don't affect it. + */ + tuple = SearchSysCache(TSCONFIGOID, + ObjectIdGetDatum(cfgId), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for text search configuration %u", + cfgId); + cfg = (Form_pg_ts_config) GETSTRUCT(tuple); + + buf = quote_qualified_identifier(get_namespace_name(cfg->cfgnamespace), + NameStr(cfg->cfgname)); + + ReleaseSysCache(tuple); + + /* GUC wants it malloc'd not palloc'd */ + newval = strdup(buf); + pfree(buf); + + if (doit) + TSCurrentConfigCache = cfgId; + } + else + { + if (doit) + TSCurrentConfigCache = InvalidOid; + } + + return newval; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a05b3be3937..e70768aa840 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut <peter_e@gmx.net>. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.413 2007/08/19 01:41:25 adunstan Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.414 2007/08/21 01:11:19 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -25,7 +25,6 @@ #include <syslog.h> #endif - #include "access/gin.h" #include "access/transam.h" #include "access/twophase.h" @@ -58,6 +57,7 @@ #include "storage/fd.h" #include "storage/freespace.h" #include "tcop/tcopprot.h" +#include "tsearch/ts_cache.h" #include "utils/builtins.h" #include "utils/guc_tables.h" #include "utils/memutils.h" @@ -2240,8 +2240,9 @@ static struct config_string ConfigureNamesString[] = { {"log_destination", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Sets the destination for server log output."), - gettext_noop("Valid values are combinations of \"stderr\", \"syslog\", " - " \"csvlog\" and \"eventlog\", depending on the platform."), + gettext_noop("Valid values are combinations of \"stderr\", " + "\"syslog\", \"csvlog\", and \"eventlog\", " + "depending on the platform."), GUC_LIST_INPUT }, &log_destination_string, @@ -2434,6 +2435,15 @@ static struct config_string ConfigureNamesString[] = "content", assign_xmloption, NULL }, + { + {"default_text_search_config", PGC_USERSET, CLIENT_CONN_LOCALE, + gettext_noop("Sets default text search configuration."), + NULL + }, + &TSCurrentConfig, + "pg_catalog.simple", assignTSCurrentConfig, NULL + }, + #ifdef USE_SSL { {"ssl_ciphers", PGC_POSTMASTER, CONN_AUTH_SECURITY, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8720d18c55d..b22099c2fd7 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -448,6 +448,9 @@ #lc_numeric = 'C' # locale for number formatting #lc_time = 'C' # locale for time formatting +# default configuration for text search +#default_text_search_config = 'pg_catalog.simple' + # - Other Defaults - #explain_pretty_print = on |