Aggregation.java #1

/*
// $Id: //guest/julian_hyde/mondrian/src/main/mondrian/rolap/agg/Aggregation.java#1 $
// This software is subject to the terms of the Common Public License
// Agreement, available at the following URL:
// http://www.opensource.org/licenses/cpl.html.
// (C) Copyright 2001-2002 Kana Software, Inc. and others.
// All Rights Reserved.
// You must accept the terms of that agreement to use this software.
//
// jhyde, 28 August, 2001
*/

package mondrian.rolap;
import mondrian.olap.*;
import java.util.*;
import java.io.StringWriter;
import java.io.PrintWriter;

/**
 * A <code>RolapAggregation</code> is a pre-computed aggregation over a set of
 * columns.
 *
 * Rollup operations:<ul>
 * <li>drop an unrestricted column (e.g. state=*)</li>
 * <li>tighten any restriction (e.g. year={1997,1998} becomes
 *     year={1997})</li>
 * <li>restrict an unrestricted column (e.g. year=* becomes
 *     year={1997})</li>
 * </ul>
 *
 * <p>Representation of aggregations. Sparse and dense representations are
 * necessary for different data sets. Should adapt automatically. Use an
 * interface to hold the data set, so the segment doesn't care.</p>
 *
 * Suppose we have a segment {year=1997, quarter={1,2,3},
 * state={CA,WA}}. We want to roll up to a segment for {year=1997,
 * state={CA,WA}}.  We need to know that we have all quarters.  We don't.
 * Because year and quarter are independent, we know that we have all of
 * the ...</p>
 *
 * <p>Suppose we have a segment specified by {region=West, state=*,
 * year=*}, which materializes to ({West}, {CA,WA,OR}, {1997,1998}).
 * Because state=*, we can rollup to {region=West, year=*} or {region=West,
 * year=1997}.</p>
 *
 * <p>The space required for a segment depends upon the dimensionality (d),
 * cell count (c) and the value count (v). We don't count the space
 * required for the actual values, which is the same in any scheme.</p>
 *
 * @author jhyde
 * @since 28 August, 2001
 * @version $Id: //guest/julian_hyde/mondrian/src/main/mondrian/rolap/agg/Aggregation.java#1 $
 **/
public class RolapAggregation implements CellReader
{
	RolapStar star;
	RolapStar.Measure measure;
	RolapStar.Column[] columns;
	ArrayList segments;

	RolapAggregation(
		RolapStar star, RolapStar.Measure measure, RolapStar.Column[] columns)
	{
		this.star = star;
		this.measure = measure;
		this.columns = columns;
		this.segments = new ArrayList();
	}

	/**
	 * Loads a new aggregation.
	 */
	void load(Object[][] constraintses, Collection pinnedSegments)
	{
		Segment segment = new Segment(this, constraintses);
		segments.add(segment);
		int pinCount = 1;
		CachePool.instance().register(segment, pinCount, pinnedSegments);
	}

	/**
	 * Drops constraints, where the list of values is close to the values which
	 * would be returned anyway.
	 **/
	Object[][] optimizeConstraints(Object[][] constraintses)
	{
		Util.assert(constraintses.length == columns.length);
		Object[][] newConstraintses = (Object[][]) constraintses.clone();
		// build a list of constraints sorted by 'bloat factor'
		ConstraintComparator comparator = new ConstraintComparator(
			constraintses);
		Integer[] indexes = new Integer[columns.length];
		double cellCount = 1.0;
		for (int i = 0; i < columns.length; i++) {
			indexes[i] = new Integer(i);
			cellCount *= comparator.getCardinality(i);
		}
		Arrays.sort(indexes, comparator);
		// eliminate constraints one by one, until the estimated cell count
		// doubles and is greater than 100
		double originalCellCount = cellCount,
			maxCellCount = originalCellCount * 2 + 10;
		for (int i = 0; i < indexes.length; i++) {
			int j = ((Integer) indexes[i]).intValue();
			double bloat = comparator.getBloat(j);
			cellCount *= bloat;
			if (cellCount < maxCellCount) {
				// eliminate this constraint
				newConstraintses[j] = null;
			} else {
				break;
			}
		}
		return newConstraintses;
	}

	private class ConstraintComparator implements Comparator {
		Object[][] constraintses;
		ConstraintComparator(Object[][] constraintses)
		{
			this.constraintses = constraintses;
		}
		// implement Comparator
		public int compare(Object o0, Object o1)
		{
			double bloat0 = getBloat(o0),
				bloat1 = getBloat(o1);
			if (bloat0 == bloat1) {
				return 0;
			} else if (bloat0 < bloat1) {
				return -1;
			} else {
				return 1;
			}
		}
		double getBloat(Object o)
		{
			return getBloat(((Integer) o).intValue());
		}
		double getBloat(int i)
		{
			Object[] constraints = constraintses[i];
			if (constraints == null) {
				return 1.0;
			}
			RolapStar.Column column = columns[i];
			int cardinality = column.getCardinality();
			return ((double) cardinality) / ((double) constraints.length);
		}
		/** Returns the cardinality of this column, assuming that the
		 * constraint is not removed. **/
		double getCardinality(int i)
		{
			Object[] constraints = constraintses[i];
			if (constraints == null) {
				RolapStar.Column column = columns[i];
				return column.getCardinality();
			} else {
				return constraints.length;
			}
		}
	};

	Object[][] obsolete_optimizeConstraints(Object[][] constraintses)
	{
		// We currently drop constraints which would retrieve more than about
		// half of the column values (actually (cardinality - 5) / 2). But if
		// each of 10 dimensions caused a 2x bloat, that would bloat the
		// aggregation by 1000x, and that would be a problem. So we ought to
		// limit the total bloat to say 10x, and drop the constraints which
		// would cause the least bloat.
		Util.assert(constraintses.length == columns.length);
		Object[][] newConstraintses = (Object[][]) constraintses.clone();
		for (int i = 0; i < columns.length; i++) {
			Object[] constraints = constraintses[i];
			RolapStar.Column column = columns[i];
			if (constraints == null) {
				continue; // this column is already unconstrained
			}
			int cardinality = column.getCardinality();
			if (cardinality * 2 + 5 > constraints.length) {
				newConstraintses[i] = null;
			}
		}
		return newConstraintses;
	}
	
	// implement CellReader
	public Object get(Evaluator evaluator)
	{
		RolapEvaluator rolapEvaluator = (RolapEvaluator) evaluator;
		RolapStar.CellRequest request =
			RolapAggregationManager.instance().makeRequest(
				rolapEvaluator.currentMembers);
		Object[] keys = request.getSingleValues();
		return get(keys);
	}

	Object get(Object[] keys)
	{
		for (int i = 0, count = segments.size(); i < count; i++) {
			Segment segment = (Segment) segments.get(i);
			Object o = segment.get(keys);
			if (o != null) {
				// 'Util.nullValue' means right segment, but no fact table rows
				// exist at that coordinate, hence the total is null; 'null'
				// means the value wouldn't be in the segment
				return o;
			}
		}
		throw Util.getRes().newInternal("not found");
	}

	/**
	 * Retrieves the value identified by <code>keys</code>, and pins the
	 * segment which holds it. <code>pinSet</code> ensures that a segment is
	 * only pinned once. Returns <code>null</code> if no segment contains the
	 * cell.
	 **/
	Object getAndPin(Object[] keys, Collection pinSet)
	{
		for (int i = 0, count = segments.size(); i < count; i++) {
			Segment segment = (Segment) segments.get(i);
			Object o = segment.get(keys);
			if (o != null) {
				if (!pinSet.contains(segment)) {
					CachePool.instance().pin(segment, pinSet);
				}
				return o;
			}
		}
		return null;
	}

	// -- classes -------------------------------------------------------------

	static class Axis
	{
		RolapStar.Column column;
		Object[] constraints; // null if no constraint
		Object[] keys; // actual keys retrieved
		Hashtable mapKeyToOffset; // inversion of keys

		boolean contains(Object key)
		{
			if (constraints == null) {
				return true;
			}
			for (int i = 0; i < constraints.length; i++) {
				if (constraints[i].equals(key)) {
					return true;
				}
			}
			return false;
		}
		double getBytes()
		{
			return 16 + 8 * keys.length;
		}
	};
}

class Segment implements CachePool.Cacheable
{
	private int id; // for debug
	private static int nextId = 0; // generator for "id"
	private String desc;
	RolapAggregation aggregation;
	RolapAggregation.Axis[] axes;
	private SegmentDataset data;
	private int[] pos; // workspace
	private double recency; // when was this segment last used?
	private int pinCount;

	/**
	 * Creates a <code>Segment</code>.
	 * @param aggregation The aggregation that this <code>Segment</code>
	 *    belongs to
	 * @param constraintses For each column, either an array of values
	 *    to fetch or null, indicating that the column is unconstrained
	 **/
	Segment(RolapAggregation aggregation, Object[][] constraintses)
	{
		this.id = nextId++;
		this.aggregation = aggregation;
		int axisCount = aggregation.columns.length;
		Util.assert(constraintses.length == axisCount);
		this.axes = new RolapAggregation.Axis[axisCount];
		for (int i = 0; i < axisCount; i++) {
			RolapAggregation.Axis axis =
				this.axes[i] = new RolapAggregation.Axis();
			axis.column = aggregation.columns[i];
			axis.constraints = constraintses[i];
			axis.mapKeyToOffset = new Hashtable();
		}
		this.pos = new int[axisCount];
		this.data = aggregation.star.load(this);
		this.desc = makeDescription();
	}

	private String makeDescription()
	{
		StringWriter sw = new StringWriter();
		PrintWriter pw = new PrintWriter(sw);
		pw.print("Segment #" + id + " {measure=" + aggregation.measure.aggregator +
			"("	+ aggregation.measure.name + ")");
		for (int i = 0; i < aggregation.columns.length; i++) {
			pw.print(", ");
			pw.print(aggregation.columns[i].name);
			Object[] constraints = axes[i].constraints;
			if (constraints == null) {
				pw.print("=any");
			} else {
				pw.print("={");
				for (int j = 0; j < constraints.length; j++) {
					if (j > 0) {
						pw.print(", ");
					}
					Object o = constraints[j];
					pw.print(o);
				}
				pw.print("}");
			}
		}
		pw.print("}");
		pw.flush();
		return sw.toString();
	}
		
	public String toString()
	{
		return desc;
	}

	// implement CachePool.Cacheable
	public void removeFromCache()
	{
		boolean existed = aggregation.segments.remove(this);
		Util.assert(existed);
	}

	// implement CachePool.Cacheable
	public Object getKey()
	{
		return this;
	}
	// implement CachePool.Cacheable
	public void markAccessed(double recency)
	{
		this.recency = recency;
	}
	// implement CachePool.Cacheable
	public void setPinCount(int pinCount)
	{
		this.pinCount = pinCount;
	}
	// implement CachePool.Cacheable
	public int getPinCount()
	{
		return pinCount;
	}
	// implement CachePool.Cacheable
	public double getScore()
	{
		double benefit = getBenefit(),
			cost = getCost();
		return benefit / cost * recency;
	}
	// implement CachePool.Cacheable
	public double getCost()
	{
		double bytes = 0;
		for (int i = 0; i < axes.length; i++) {
			bytes += axes[i].getBytes();
		}
		bytes += data.getBytes();
		return bytes;
	}
	private double getBenefit()
	{
//			throw new UnsupportedOperationException();
		return 16;
	}

	/**
	 * Retrieves the value at the location identified by
	 * <code>keys</code>. Returns {@link Util#nullValue} if the cell value
	 * is null (because no fact table rows met those criteria), and
	 * <code>null</code> if the value is not supposed to be in this segment
	 * (because one or more of the keys do not pass the axis criteria).
	 **/
	Object get(Object[] keys)
	{
		Util.assert(keys.length == axes.length);
		int missed = 0;
		for (int i = 0; i < keys.length; i++) {
			Object key = keys[i];
			Integer integer = (Integer) axes[i].mapKeyToOffset.get(key);
			if (integer == null) {
				if (axes[i].contains(key)) {
					// see whether this segment should contain this value
					missed++;
					continue;
				} else {
					// this value should not appear in this segment; we
					// should be looking in a different segment
					return null;
				}
			}
			pos[i] = integer.intValue();
		}
		if (missed > 0) {
			// the value should be in this segment, but isn't, because one
			// or more of its keys does have any values
			return Util.nullValue;
		} else {
			Object o = data.get(pos);
			if (o == null) {
				o = Util.nullValue;
			}
			return o;
		}
	}
};

/**
 * A <code>SegmentDataset</code> holds the values in a segment.
 **/
interface SegmentDataset {
	Object get(int[] pos);
	double getBytes();
};

/**
 * A <code>SparseSegmentDataset</code> is a means of storing segment values
 * which is suitable when few of the combinations of keys have a value present.
 *
 * <p>The storage requirements are as follows. Key is 1 word for each
 * dimension. Hashtable entry is 3 words. Value is 1 word. Total space is (4 +
 * d) * v. (May also need hash table to ensure that values are only stored
 * once.)</p>
 *
 * <p>NOTE: This class is not synchronized.</p>
 **/
class SparseSegmentDataset implements SegmentDataset {
	Segment segment;
	Hashtable values;
	CellKey key = new CellKey(null);

	public Object get(int[] pos)
	{
		key.ordinals = pos;
		return values.get(key);
	}
	public double getBytes()
	{
		// assume a slot, key, and value are each 4 bytes
		return values.size() * 12;
	}
};

/**
 * A <code>DenseSegmentDataset</code> is a means of storing segment values
 * which is suitable when most of the combinations of keys have a value
 * present.
 *
 * <p>The storage requirements are as follows. Table requires 1 word per
 * cell.</p>
 **/
class DenseSegmentDataset implements SegmentDataset
{
	Segment segment;
	Object[] values; // length == m[0] * ... * m[axes.length-1]

	public Object get(int[] keys)
	{
		int offset = getOffset(keys);
		return values[offset];
	}
	public double getBytes()
	{
		// assume a slot, key, and value are each 4 bytes
		return values.length * 12;
	}
	boolean contains(Object[] keys)
	{
		return getOffset(keys) >= 0;
	}
	Object get(Object[] keys)
	{
		int offset = getOffset(keys);
		return keys[offset];
	}
	void put(Object[] keys, Object value)
	{
		int offset = getOffset(keys);
		keys[offset] = value;
	}
	private int getOffset(int[] keys)
	{
		int offset = 0;
		for (int i = 0; i < keys.length; i++) {
			RolapAggregation.Axis axis = segment.axes[i];
			offset *= axis.keys.length;
			offset += keys[i];
		}
		return offset;
	}
	private int getOffset(Object[] keys)
	{
		int offset = 0;
outer:
		for (int i = 0; i < keys.length; i++) {
			RolapAggregation.Axis axis = segment.axes[i];
			offset *= axis.keys.length;
			Object value = keys[i];
			for (int j = 0, axisLength = axis.keys.length; j <
					 axisLength; j++) {
				if (axis.keys[j].equals(value)) {
					offset += j;
					continue outer;
				}
			}
			return -1; // not found
		}
		return offset;
	}
};

// End RolapAggregation.java
#	Change	User	Description
#3	1603	Julian Hyde	mondrian: Add Andreas' taglib; Rename 'mondrian.rolap.Util.assert' to 'assertTrue', because 'assert' is a keyword in jdk 1.4; Fix 'NON EMPTY'; JUnit framework for tests; Add Oracle dataset.
#2	1576	Julian Hyde	mondrian: fix dataset (add column customer.ordinal); create dataset for oracle; get queries working on oracle; get format strings working; refactor out new packages mondrian.rolap.agg and mondrian.rolap.sql.
#1	1575	Julian Hyde	mondrian: move some stuff around
//guest/julian_hyde/mondrian/src/main/mondrian/rolap/RolapAggregation.java
#1	1453	Julian Hyde	mondrian: first source check-in