/*------------------------------------------------------------------------- * * system.c * interface routines for system tablesample method * * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * * IDENTIFICATION * src/backend/utils/tablesample/system.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "fmgr.h" #include "access/tablesample.h" #include "access/relscan.h" #include "nodes/execnodes.h" #include "nodes/relation.h" #include "optimizer/clauses.h" #include "storage/bufmgr.h" #include "utils/sampling.h" /* * State */ typedef struct { BlockSamplerData bs; uint32 seed; /* random seed */ BlockNumber nblocks; /* number of block in relation */ int samplesize; /* number of blocks to return */ OffsetNumber lt; /* last tuple returned from current block */ } SystemSamplerData; /* * Initializes the state. */ Datum tsm_system_init(PG_FUNCTION_ARGS) { TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); uint32 seed = PG_GETARG_UINT32(1); float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2); HeapScanDesc scan = tsdesc->heapScan; SystemSamplerData *sampler; if (percent < 0 || percent > 100) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("invalid sample size"), errhint("Sample size must be numeric value between 0 and 100 (inclusive)."))); sampler = palloc0(sizeof(SystemSamplerData)); /* Remember initial values for reinit */ sampler->seed = seed; sampler->nblocks = scan->rs_nblocks; sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0)); sampler->lt = InvalidOffsetNumber; BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, sampler->seed); tsdesc->tsmdata = (void *) sampler; PG_RETURN_VOID(); } /* * Get next block number or InvalidBlockNumber when we're done. * * Uses the same logic as ANALYZE for picking the random blocks. */ Datum tsm_system_nextblock(PG_FUNCTION_ARGS) { TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; BlockNumber blockno; if (!BlockSampler_HasMore(&sampler->bs)) PG_RETURN_UINT32(InvalidBlockNumber); blockno = BlockSampler_Next(&sampler->bs); PG_RETURN_UINT32(blockno); } /* * Get next tuple offset in current block or InvalidOffsetNumber if we are done * with this block. */ Datum tsm_system_nexttuple(PG_FUNCTION_ARGS) { TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); OffsetNumber maxoffset = PG_GETARG_UINT16(2); SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; OffsetNumber tupoffset = sampler->lt; if (tupoffset == InvalidOffsetNumber) tupoffset = FirstOffsetNumber; else tupoffset++; if (tupoffset > maxoffset) tupoffset = InvalidOffsetNumber; sampler->lt = tupoffset; PG_RETURN_UINT16(tupoffset); } /* * Cleanup method. */ Datum tsm_system_end(PG_FUNCTION_ARGS) { TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); pfree(tsdesc->tsmdata); PG_RETURN_VOID(); } /* * Reset state (called by ReScan). */ Datum tsm_system_reset(PG_FUNCTION_ARGS) { TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0); SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata; sampler->lt = InvalidOffsetNumber; BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize, sampler->seed); PG_RETURN_VOID(); } /* * Costing function. */ Datum tsm_system_cost(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Path *path = (Path *) PG_GETARG_POINTER(1); RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2); List *args = (List *) PG_GETARG_POINTER(3); BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4); double *tuples = (double *) PG_GETARG_POINTER(5); Node *pctnode; float4 samplesize; pctnode = linitial(args); pctnode = estimate_expression_value(root, pctnode); if (IsA(pctnode, RelabelType)) pctnode = (Node *) ((RelabelType *) pctnode)->arg; if (IsA(pctnode, Const)) { samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue); samplesize /= 100.0; } else { /* Default samplesize if the estimation didn't return Const. */ samplesize = 0.1f; } *pages = baserel->pages * samplesize; *tuples = path->rows * samplesize; path->rows = *tuples; PG_RETURN_VOID(); }