aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/chunk.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/chunk.c')
-rw-r--r--src/backend/utils/adt/chunk.c587
1 files changed, 587 insertions, 0 deletions
diff --git a/src/backend/utils/adt/chunk.c b/src/backend/utils/adt/chunk.c
new file mode 100644
index 00000000000..ca0cb2647ea
--- /dev/null
+++ b/src/backend/utils/adt/chunk.c
@@ -0,0 +1,587 @@
+/*-------------------------------------------------------------------------
+ *
+ * chunk.c--
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/Attic/chunk.c,v 1.1.1.1 1996/07/09 06:22:03 scrappy Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <ctype.h>
+#include "postgres.h"
+#include "utils/memutils.h"
+#include "libpq/libpq-fs.h"
+
+#include "storage/fd.h" /* for SEEK_ */
+
+#include "catalog/pg_type.h"
+
+#include "utils/palloc.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/array.h"
+
+#include "optimizer/internal.h"
+
+#define INFTY 500000000
+#define MANY 10000
+#define MAXPAT 20
+#define quot_ceil(x,y) (((x)+(y)-1)/(y))
+#define min(x,y) (((x) < (y))? (x) : (y))
+#define max(x,y) (((x) > (y))? (x) : (y))
+
+static CHUNK_INFO cInfo;
+
+/* non-export function prototypes */
+static int _FindBestChunk(int size, int dmax[], int dbest[], int dim,
+ int A[MAXPAT][MAXDIM+1], int N);
+static int get_next(int d[], int k, int C, int dmax[]);
+static void initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[]);
+static void _ConvertToChunkFile(int n, int baseSize, int dim[], int C[],
+ int srcfd, int destfd);
+static void read_chunk(int chunk_no[], int C[], char a_chunk[], int srcfd,
+ int n, int baseSize, int PX[], int dist[]);
+static int write_chunk(struct varlena * a_chunk, int ofile);
+static int seek_and_read(int pos, int size, char buff[], int fp, int from);
+
+/*------------------------------------------------------------------------
+ * _ChunkArray ---
+ * converts an input array to chunked format using the information
+ * provided by the access pattern.
+ * Results:
+ * creates a new file that stores the chunked array and returns
+ * information about the chunked file
+ *-----------------------------------------------------------------------
+ */
+char *
+_ChunkArray(int fd,
+ FILE *afd,
+ int ndim,
+ int dim[],
+ int baseSize,
+ int *nbytes,
+ char *chunkfile)
+{
+ int cfd;
+ int chunk[MAXDIM], csize;
+ bool reorgFlag;
+
+ if (chunkfile == NULL)
+ reorgFlag = true;
+ else
+ reorgFlag = false;
+
+#ifdef LOARRAY
+ if (reorgFlag)
+ /* create new LO for chunked file */
+ chunkfile = _array_newLO( &cfd, fileFlag );
+ else
+ cfd = LOopen(chunkfile, O_RDONLY);
+#endif
+ if (cfd < 0)
+ elog(WARN, "Enable to open chunk file");
+ strcpy (cInfo.lo_name, chunkfile);
+
+ /* find chunk size */
+ csize = GetChunkSize(afd, ndim, dim, baseSize, chunk);
+
+ if (reorgFlag)
+ /* copy data from input file to chunked file */
+ _ConvertToChunkFile(ndim, baseSize, dim, chunk, fd, cfd);
+
+ initialize_info(&cInfo, ndim, dim, chunk);
+ *nbytes = sizeof(CHUNK_INFO);
+ return (char *) &cInfo ;
+}
+
+/*--------------------------------------------------------------------------
+ * GetChunkSize --
+ * given an access pattern and array dimensionality etc, this program
+ * returns the dimensions of the chunk in "d"
+ *-----------------------------------------------------------------------
+ */
+int
+GetChunkSize(FILE *fd,
+ int ndim,
+ int dim[MAXDIM],
+ int baseSize,
+ int d[MAXDIM])
+{
+ int N, i, j, csize;
+ int A[MAXPAT][MAXDIM+1], dmax[MAXDIM];
+
+ /*
+ * ----------- read input ------------
+ */
+ fscanf(fd, "%d", &N);
+ if ( N > MAXPAT )
+ elog(WARN, "array_in: too many access pattern elements");
+ for (i = 0; i < N; i++)
+ for (j = 0; j < ndim+1; j++)
+ if (fscanf(fd, "%d ", &(A[i][j])) == EOF)
+ elog (WARN, "array_in: bad access pattern input");
+
+ /*
+ * estimate chunk size
+ */
+ for (i = 0; i < ndim; i++)
+ for (j = 0, dmax[i] = 1; j < N; j++)
+ if (dmax[i] < A[j][i])
+ dmax[i] = A[j][i];
+ csize = _PAGE_SIZE_/baseSize;
+
+ _FindBestChunk (csize, dmax, d, ndim, A, N);
+
+ return csize;
+}
+
+/*-------------------------------------------------------------------------
+ * _FindBestChunk --
+ * This routine does most of the number crunching to compute the
+ * optimal chunk shape.
+ * Called by GetChunkSize
+ *------------------------------------------------------------------------
+ */
+static int
+_FindBestChunk(int size,
+ int dmax[],
+ int dbest[],
+ int dim,
+ int A[MAXPAT][MAXDIM+1],
+ int N)
+{
+ int d[MAXDIM];
+ int tc, mintc = INFTY;
+
+ d[0] = 0;
+ mintc = INFTY;
+ while (get_next(d,dim,size, dmax)) {
+ /*
+ * compute the number of page fetches for a given
+ * chunk size (d[]) and access pattern (A[][])
+ */
+ register int i,j, nc;
+ for (i = 0, tc = 0; i < N; i++){
+ for (j = 0, nc = 1; j < dim; j++)
+ nc *= quot_ceil(A[i][j], d[j]);
+ nc *= A[i][dim];
+ tc += nc;
+ }
+ /*
+ * tc holds the total number of page fetches
+ */
+ if (mintc >= tc) {
+ mintc = tc;
+ for (j = 0; j < dim; dbest[j] = d[j], j++)
+ ;
+ }
+ }
+ return(mintc);
+}
+
+/*----------------------------------------------------------------------
+ * get_next --
+ * Called by _GetBestChunk to get the next tuple in the lexicographic order
+ *---------------------------------------------------------------------
+ */
+static int
+get_next(int d[], int k, int C, int dmax[])
+{
+ register int i,j, temp;
+
+ if (!d[0]) {
+ temp = C;
+ for (j = k-1; j >= 0; j--){
+ d[j] = min(temp, dmax[j]);
+ temp = max(1, temp/d[j]);
+ }
+ return(1);
+ }
+
+ for (j = 0, temp = 1; j < k; j++)
+ temp *= d[j];
+
+ for (i=k-1; i >= 0; i--){
+ temp = temp/d[i];
+ if (((temp*(d[i]+1)) < C) && (d[i]+1 <= dmax[i]))
+ break;
+ }
+ if (i < 0)
+ return(0);
+
+ d[i]++;
+ j = C/temp;
+ d[i] = min(dmax[i], j/(j/d[i]));
+ temp = temp*d[i];
+ temp = C/temp;
+
+ for (j = k-1; j > i; j--){
+ d[j] = min(temp, dmax[j]);
+ temp = max(1, temp/d[j]);
+ }
+ return(1);
+}
+
+static char a_chunk[_PAGE_SIZE_ + 4]; /* 4 since a_chunk is in
+ varlena format */
+
+static void
+initialize_info(CHUNK_INFO *A, int ndim, int dim[], int chunk[])
+{
+ int i;
+
+ for ( i = 0; i < ndim; i++)
+ A->C[i] = chunk[i];
+}
+
+/*--------------------------------------------------------------------------
+ * Procedure reorganize_data():
+ * This procedure reads the input multidimensional array that is organised
+ * in the order specified by array "X" and breaks it up into chunks of
+ * dimensions specified in "C".
+ *
+ * This is a very slow process, since reading and writing of LARGE files
+ * may be involved.
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+_ConvertToChunkFile(int n,
+ int baseSize,
+ int dim[],
+ int C[],
+ int srcfd,
+ int destfd)
+{
+ int max_chunks[MAXDIM], chunk_no[MAXDIM];
+ int PX[MAXDIM], dist[MAXDIM];
+ int csize = 1, i, temp;
+
+ for (i = 0; i < n; chunk_no[i++] = 0) {
+ max_chunks[i] = dim[i]/C[i];
+ csize *= C[i];
+ }
+ csize *= baseSize;
+ temp = csize + 4;
+ memmove(a_chunk, &temp, 4);
+
+ mda_get_prod(n, dim, PX);
+ mda_get_offset_values(n, dist, PX, C);
+ for (i = 0; i < n; dist[i] *= baseSize, i++)
+ ;
+ do {
+ read_chunk(chunk_no, C, &(a_chunk[4]), srcfd, n, baseSize, PX, dist);
+ write_chunk((struct varlena*)a_chunk, destfd);
+ } while (next_tuple(n, chunk_no, max_chunks) != -1);
+}
+
+/*--------------------------------------------------------------------------
+ * read_chunk
+ * reads a chunk from the input files into a_chunk, the position of the
+ * chunk is specified by chunk_no
+ *--------------------------------------------------------------------------
+ */
+static void
+read_chunk(int chunk_no[],
+ int C[],
+ char a_chunk[],
+ int srcfd,
+ int n,
+ int baseSize,
+ int PX[],
+ int dist[])
+{
+ int i, j, cp, unit_transfer;
+ int start_pos, pos[MAXDIM];
+ int indx[MAXDIM];
+ int fpOff;
+
+ for ( i = start_pos = 0; i < n; i++) {
+ pos[i] = chunk_no[i] * C[i];
+ start_pos += pos[i]*PX[i];
+ }
+ start_pos *= baseSize;
+
+ /* Read a block of dimesion C starting at co-ordinates pos */
+ unit_transfer = C[n-1] * baseSize;
+
+ for (i = 0; i < n; indx[i++] = 0)
+ ;
+ fpOff = start_pos;
+ seek_and_read(fpOff, unit_transfer, a_chunk, srcfd, SEEK_SET);
+ fpOff += unit_transfer;
+ cp = unit_transfer;
+
+ while ((j = next_tuple(n-1, indx, C)) != -1) {
+ fpOff += dist[j];
+ seek_and_read(fpOff, unit_transfer, &(a_chunk[cp]), srcfd, SEEK_SET);
+ cp += unit_transfer;
+ fpOff += unit_transfer;
+ }
+}
+
+/*--------------------------------------------------------------------------
+ * write_chunk()
+ * writes a chunk of size csize into the output file
+ *--------------------------------------------------------------------------
+ */
+static int
+write_chunk(struct varlena * a_chunk, int ofile)
+{
+ int got_n;
+#ifdef LOARRAY
+ got_n = LOwrite (ofile, a_chunk);
+#endif
+ return(got_n);
+}
+
+/*--------------------------------------------------------------------------
+ * seek_and_read()
+ * seeks to the asked location in the input file and reads the
+ * appropriate number of blocks
+ * Called By: read_chunk()
+ *--------------------------------------------------------------------------
+ */
+static int
+seek_and_read(int pos, int size, char buff[], int fp, int from)
+{
+ struct varlena *v;
+
+ /* Assuming only one file */
+ if ( lo_lseek(fp, pos, from ) < 0)
+ elog(WARN, "File seek error");
+#ifdef LOARRAY
+ v = (struct varlena *) LOread(fp, size);
+#endif
+ if (VARSIZE(v) - 4 < size)
+ elog(WARN, "File read error");
+ memmove(buff, VARDATA(v), size);
+ pfree(v);
+ return(1);
+
+}
+
+/*----------------------------------------------------------------------------
+ * _ReadChunkArray --
+ * returns the subarray specified bu the range indices "st" and "endp"
+ * from the chunked array stored in file "fp"
+ *---------------------------------------------------------------------------
+ */
+int
+_ReadChunkArray(int st[],
+ int endp[],
+ int bsize,
+ int fp,
+ char *destfp,
+ ArrayType *array,
+ int isDestLO,
+ bool *isNull)
+{
+ int i,j,jj;
+ int n, temp, words_read;
+ int chunk_span[MAXDIM], chunk_off[MAXDIM];
+ int chunk_st[MAXDIM], chunk_end[MAXDIM];
+ int block_seek;
+
+ int bptr, *C, csize, *dim, *lb;
+ int range_st[MAXDIM], range_end[MAXDIM],
+ range[MAXDIM], array_span[MAXDIM];
+ int PA[MAXDIM], PCHUNK[MAXDIM], PC[MAXDIM];
+ int to_read;
+ int cdist[MAXDIM], adist[MAXDIM];
+ int dist[MAXDIM], temp_seek;
+
+ int srcOff; /* Needed since LO don't understand SEEK_CUR*/
+ char *baseDestFp = (char *)destfp;
+
+ CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
+ n = ARR_NDIM(array);
+ dim = ARR_DIMS(array);
+ lb = ARR_LBOUND(array);
+ C = A->C;
+
+ csize = C[n-1];
+ PC[n-1] = 1;
+ temp = dim[n - 1]/C[n-1];
+ for (i = n-2; i >= 0; i--){
+ PC[i] = PC[i+1] * temp;
+ temp = dim[i] / C[i];
+ csize *= C[i];
+ }
+
+ for (i = 0; i < n; st[i] -= lb[i], endp[i] -= lb[i], i++)
+ ;
+ mda_get_prod(n, C, PCHUNK);
+ mda_get_range(n, array_span, st, endp);
+ mda_get_prod(n, array_span, PA);
+
+ array2chunk_coord(n, C, st, chunk_st);
+ array2chunk_coord(n, C, endp, chunk_end);
+ mda_get_range(n, chunk_span, chunk_st, chunk_end);
+ mda_get_offset_values(n, dist, PC, chunk_span);
+
+ for (i = 0; i < n; i++) {
+ range_st[i] = st[i];
+ range_end[i] = min(chunk_st[i]*C[i]+C[i]-1, endp[i]);
+ }
+
+ for (i = j = 0; i < n; i++)
+ j+= chunk_st[i]*PC[i];
+ temp_seek = srcOff = j * csize * bsize;
+ if (lo_lseek(fp, srcOff, SEEK_SET) < 0) RETURN_NULL;
+
+ jj = n-1;
+ for (i = 0; i < n; chunk_off[i++] = 0)
+ ;
+ words_read = 0; temp_seek = 0;
+ do {
+ /* Write chunk (chunk_st) to output buffer */
+ mda_get_range(n, array_span, range_st, range_end);
+ mda_get_offset_values(n, adist, PA, array_span);
+ mda_get_offset_values(n, cdist, PCHUNK, array_span);
+ for (i=0; i < n; range[i] = range_st[i]-st[i], i++);
+ bptr = tuple2linear(n, range, PA);
+ for (i = 0; i < n; range[i++] = 0);
+ j = n-1; bptr *= bsize;
+ if (isDestLO) {
+ if (lo_lseek(destfp, bptr, SEEK_SET) < 0)
+ RETURN_NULL;
+ }
+ else
+ destfp = baseDestFp + bptr;
+ for(i = 0, block_seek = 0; i < n; i++)
+ block_seek += (range_st[i]-(chunk_st[i] + chunk_off[i])
+ *C[i])*PCHUNK[i];
+ if (dist[jj] + block_seek + temp_seek) {
+ temp = (dist[jj]*csize+block_seek+temp_seek)*bsize;
+ srcOff += temp;
+ if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
+ RETURN_NULL;
+ }
+ for (i = n-1, to_read = bsize; i >= 0;
+ to_read *= min(C[i], array_span[i]), i--)
+ if (cdist[i] || adist[i])
+ break;
+ do {
+ if (cdist[j]) {
+ srcOff += (cdist[j]*bsize);
+ if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
+ RETURN_NULL;
+ }
+ block_seek += cdist[j];
+ bptr += adist[j]*bsize;
+ if (isDestLO) {
+ if (lo_lseek(destfp, bptr, SEEK_SET) < 0)
+ RETURN_NULL;
+ }
+ else
+ destfp = baseDestFp + bptr;
+ temp = _LOtransfer ((char**)&destfp, to_read, 1, (char**)&fp, 1, isDestLO);
+ if (temp < to_read)
+ RETURN_NULL;
+ srcOff += to_read;
+ words_read+=to_read;
+ bptr += to_read;
+ block_seek += (to_read/bsize);
+ /*
+ * compute next tuple in range[]
+ */
+ {
+ int x;
+ if (!(i+1))
+ j = -1;
+ else {
+ range[i] = (range[i]+1)%array_span[i];
+ for (x = i; x*(!range[x]); x--)
+ range[x-1] = (range[x-1]+1)%array_span[x-1];
+ if (x)
+ j = x;
+ else {
+ if (range[0])
+ j = 0;
+ else
+ j = -1;
+ }
+ }
+ }
+ /*
+ * end of compute next tuple --
+ * j is set to -1 if tuple generation is over
+ */
+ } while (j != -1);
+
+ block_seek = csize - block_seek;
+ temp_seek = block_seek;
+ jj = next_tuple(n, chunk_off, chunk_span);
+ if (jj == -1)
+ break;
+ range_st[jj] = (chunk_st[jj]+chunk_off[jj])*C[jj];
+ range_end[jj] = min(range_st[jj] + C[jj]-1, endp[jj]);
+
+ for (i = jj+1; i < n; i++) {
+ range_st[i] = st[i];
+ range_end[i] = min((chunk_st[i]+chunk_off[i])*C[i]+C[i]-1, endp[i]);
+ }
+ } while (jj != -1);
+ return(words_read);
+}
+
+/*------------------------------------------------------------------------
+ * _ReadChunkArray1El --
+ * returns one element of the chunked array as specified by the index "st"
+ * the chunked file descriptor is "fp"
+ *-------------------------------------------------------------------------
+ */
+struct varlena *
+_ReadChunkArray1El(int st[],
+ int bsize,
+ int fp,
+ ArrayType *array,
+ bool *isNull)
+{
+ int i, j, n, temp, srcOff;
+ int chunk_st[MAXDIM];
+
+ int *C, csize, *dim, *lb;
+ int PCHUNK[MAXDIM], PC[MAXDIM];
+
+ CHUNK_INFO *A = (CHUNK_INFO *) ARR_DATA_PTR(array);
+
+ n = ARR_NDIM(array);
+ lb = ARR_LBOUND(array);
+ C = A->C;
+ dim = ARR_DIMS(array);
+
+ csize = C[n-1];
+ PC[n-1] = 1;
+ temp = dim[n - 1]/C[n-1];
+ for (i = n-2; i >= 0; i--){
+ PC[i] = PC[i+1] * temp;
+ temp = dim[i] / C[i];
+ csize *= C[i];
+ }
+
+ for (i = 0; i < n; st[i] -= lb[i], i++);
+ mda_get_prod(n, C, PCHUNK);
+
+ array2chunk_coord(n, C, st, chunk_st);
+
+ for (i = j = 0; i < n; i++)
+ j+= chunk_st[i]*PC[i];
+ srcOff = j * csize;
+
+ for(i = 0; i < n; i++)
+ srcOff += (st[i]-chunk_st[i]*C[i])*PCHUNK[i];
+
+ srcOff *= bsize;
+ if (lo_lseek(fp, srcOff, SEEK_SET) < 0)
+ RETURN_NULL;
+#ifdef LOARRAY
+ return (struct varlena *) LOread(fp, bsize);
+#endif
+ return (struct varlena *) 0;
+}
+