/* // $Id: //guest/julian_hyde/mondrian/src/main/mondrian/rolap/agg/Aggregation.java#1 $ // This software is subject to the terms of the Common Public License // Agreement, available at the following URL: // http://www.opensource.org/licenses/cpl.html. // (C) Copyright 2001-2002 Kana Software, Inc. and others. // All Rights Reserved. // You must accept the terms of that agreement to use this software. // // jhyde, 28 August, 2001 */ package mondrian.rolap; import mondrian.olap.*; import java.util.*; import java.io.StringWriter; import java.io.PrintWriter; /** * A <code>RolapAggregation</code> is a pre-computed aggregation over a set of * columns. * * Rollup operations:<ul> * <li>drop an unrestricted column (e.g. state=*)</li> * <li>tighten any restriction (e.g. year={1997,1998} becomes * year={1997})</li> * <li>restrict an unrestricted column (e.g. year=* becomes * year={1997})</li> * </ul> * * <p>Representation of aggregations. Sparse and dense representations are * necessary for different data sets. Should adapt automatically. Use an * interface to hold the data set, so the segment doesn't care.</p> * * Suppose we have a segment {year=1997, quarter={1,2,3}, * state={CA,WA}}. We want to roll up to a segment for {year=1997, * state={CA,WA}}. We need to know that we have all quarters. We don't. * Because year and quarter are independent, we know that we have all of * the ...</p> * * <p>Suppose we have a segment specified by {region=West, state=*, * year=*}, which materializes to ({West}, {CA,WA,OR}, {1997,1998}). * Because state=*, we can rollup to {region=West, year=*} or {region=West, * year=1997}.</p> * * <p>The space required for a segment depends upon the dimensionality (d), * cell count (c) and the value count (v). We don't count the space * required for the actual values, which is the same in any scheme.</p> * * @author jhyde * @since 28 August, 2001 * @version $Id: //guest/julian_hyde/mondrian/src/main/mondrian/rolap/agg/Aggregation.java#1 $ **/ public class RolapAggregation implements CellReader { RolapStar star; RolapStar.Measure measure; RolapStar.Column[] columns; ArrayList segments; RolapAggregation( RolapStar star, RolapStar.Measure measure, RolapStar.Column[] columns) { this.star = star; this.measure = measure; this.columns = columns; this.segments = new ArrayList(); } /** * Loads a new aggregation. */ void load(Object[][] constraintses, Collection pinnedSegments) { Segment segment = new Segment(this, constraintses); segments.add(segment); int pinCount = 1; CachePool.instance().register(segment, pinCount, pinnedSegments); } /** * Drops constraints, where the list of values is close to the values which * would be returned anyway. **/ Object[][] optimizeConstraints(Object[][] constraintses) { Util.assert(constraintses.length == columns.length); Object[][] newConstraintses = (Object[][]) constraintses.clone(); // build a list of constraints sorted by 'bloat factor' ConstraintComparator comparator = new ConstraintComparator( constraintses); Integer[] indexes = new Integer[columns.length]; double cellCount = 1.0; for (int i = 0; i < columns.length; i++) { indexes[i] = new Integer(i); cellCount *= comparator.getCardinality(i); } Arrays.sort(indexes, comparator); // eliminate constraints one by one, until the estimated cell count // doubles and is greater than 100 double originalCellCount = cellCount, maxCellCount = originalCellCount * 2 + 10; for (int i = 0; i < indexes.length; i++) { int j = ((Integer) indexes[i]).intValue(); double bloat = comparator.getBloat(j); cellCount *= bloat; if (cellCount < maxCellCount) { // eliminate this constraint newConstraintses[j] = null; } else { break; } } return newConstraintses; } private class ConstraintComparator implements Comparator { Object[][] constraintses; ConstraintComparator(Object[][] constraintses) { this.constraintses = constraintses; } // implement Comparator public int compare(Object o0, Object o1) { double bloat0 = getBloat(o0), bloat1 = getBloat(o1); if (bloat0 == bloat1) { return 0; } else if (bloat0 < bloat1) { return -1; } else { return 1; } } double getBloat(Object o) { return getBloat(((Integer) o).intValue()); } double getBloat(int i) { Object[] constraints = constraintses[i]; if (constraints == null) { return 1.0; } RolapStar.Column column = columns[i]; int cardinality = column.getCardinality(); return ((double) cardinality) / ((double) constraints.length); } /** Returns the cardinality of this column, assuming that the * constraint is not removed. **/ double getCardinality(int i) { Object[] constraints = constraintses[i]; if (constraints == null) { RolapStar.Column column = columns[i]; return column.getCardinality(); } else { return constraints.length; } } }; Object[][] obsolete_optimizeConstraints(Object[][] constraintses) { // We currently drop constraints which would retrieve more than about // half of the column values (actually (cardinality - 5) / 2). But if // each of 10 dimensions caused a 2x bloat, that would bloat the // aggregation by 1000x, and that would be a problem. So we ought to // limit the total bloat to say 10x, and drop the constraints which // would cause the least bloat. Util.assert(constraintses.length == columns.length); Object[][] newConstraintses = (Object[][]) constraintses.clone(); for (int i = 0; i < columns.length; i++) { Object[] constraints = constraintses[i]; RolapStar.Column column = columns[i]; if (constraints == null) { continue; // this column is already unconstrained } int cardinality = column.getCardinality(); if (cardinality * 2 + 5 > constraints.length) { newConstraintses[i] = null; } } return newConstraintses; } // implement CellReader public Object get(Evaluator evaluator) { RolapEvaluator rolapEvaluator = (RolapEvaluator) evaluator; RolapStar.CellRequest request = RolapAggregationManager.instance().makeRequest( rolapEvaluator.currentMembers); Object[] keys = request.getSingleValues(); return get(keys); } Object get(Object[] keys) { for (int i = 0, count = segments.size(); i < count; i++) { Segment segment = (Segment) segments.get(i); Object o = segment.get(keys); if (o != null) { // 'Util.nullValue' means right segment, but no fact table rows // exist at that coordinate, hence the total is null; 'null' // means the value wouldn't be in the segment return o; } } throw Util.getRes().newInternal("not found"); } /** * Retrieves the value identified by <code>keys</code>, and pins the * segment which holds it. <code>pinSet</code> ensures that a segment is * only pinned once. Returns <code>null</code> if no segment contains the * cell. **/ Object getAndPin(Object[] keys, Collection pinSet) { for (int i = 0, count = segments.size(); i < count; i++) { Segment segment = (Segment) segments.get(i); Object o = segment.get(keys); if (o != null) { if (!pinSet.contains(segment)) { CachePool.instance().pin(segment, pinSet); } return o; } } return null; } // -- classes ------------------------------------------------------------- static class Axis { RolapStar.Column column; Object[] constraints; // null if no constraint Object[] keys; // actual keys retrieved Hashtable mapKeyToOffset; // inversion of keys boolean contains(Object key) { if (constraints == null) { return true; } for (int i = 0; i < constraints.length; i++) { if (constraints[i].equals(key)) { return true; } } return false; } double getBytes() { return 16 + 8 * keys.length; } }; } class Segment implements CachePool.Cacheable { private int id; // for debug private static int nextId = 0; // generator for "id" private String desc; RolapAggregation aggregation; RolapAggregation.Axis[] axes; private SegmentDataset data; private int[] pos; // workspace private double recency; // when was this segment last used? private int pinCount; /** * Creates a <code>Segment</code>. * @param aggregation The aggregation that this <code>Segment</code> * belongs to * @param constraintses For each column, either an array of values * to fetch or null, indicating that the column is unconstrained **/ Segment(RolapAggregation aggregation, Object[][] constraintses) { this.id = nextId++; this.aggregation = aggregation; int axisCount = aggregation.columns.length; Util.assert(constraintses.length == axisCount); this.axes = new RolapAggregation.Axis[axisCount]; for (int i = 0; i < axisCount; i++) { RolapAggregation.Axis axis = this.axes[i] = new RolapAggregation.Axis(); axis.column = aggregation.columns[i]; axis.constraints = constraintses[i]; axis.mapKeyToOffset = new Hashtable(); } this.pos = new int[axisCount]; this.data = aggregation.star.load(this); this.desc = makeDescription(); } private String makeDescription() { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); pw.print("Segment #" + id + " {measure=" + aggregation.measure.aggregator + "(" + aggregation.measure.name + ")"); for (int i = 0; i < aggregation.columns.length; i++) { pw.print(", "); pw.print(aggregation.columns[i].name); Object[] constraints = axes[i].constraints; if (constraints == null) { pw.print("=any"); } else { pw.print("={"); for (int j = 0; j < constraints.length; j++) { if (j > 0) { pw.print(", "); } Object o = constraints[j]; pw.print(o); } pw.print("}"); } } pw.print("}"); pw.flush(); return sw.toString(); } public String toString() { return desc; } // implement CachePool.Cacheable public void removeFromCache() { boolean existed = aggregation.segments.remove(this); Util.assert(existed); } // implement CachePool.Cacheable public Object getKey() { return this; } // implement CachePool.Cacheable public void markAccessed(double recency) { this.recency = recency; } // implement CachePool.Cacheable public void setPinCount(int pinCount) { this.pinCount = pinCount; } // implement CachePool.Cacheable public int getPinCount() { return pinCount; } // implement CachePool.Cacheable public double getScore() { double benefit = getBenefit(), cost = getCost(); return benefit / cost * recency; } // implement CachePool.Cacheable public double getCost() { double bytes = 0; for (int i = 0; i < axes.length; i++) { bytes += axes[i].getBytes(); } bytes += data.getBytes(); return bytes; } private double getBenefit() { // throw new UnsupportedOperationException(); return 16; } /** * Retrieves the value at the location identified by * <code>keys</code>. Returns {@link Util#nullValue} if the cell value * is null (because no fact table rows met those criteria), and * <code>null</code> if the value is not supposed to be in this segment * (because one or more of the keys do not pass the axis criteria). **/ Object get(Object[] keys) { Util.assert(keys.length == axes.length); int missed = 0; for (int i = 0; i < keys.length; i++) { Object key = keys[i]; Integer integer = (Integer) axes[i].mapKeyToOffset.get(key); if (integer == null) { if (axes[i].contains(key)) { // see whether this segment should contain this value missed++; continue; } else { // this value should not appear in this segment; we // should be looking in a different segment return null; } } pos[i] = integer.intValue(); } if (missed > 0) { // the value should be in this segment, but isn't, because one // or more of its keys does have any values return Util.nullValue; } else { Object o = data.get(pos); if (o == null) { o = Util.nullValue; } return o; } } }; /** * A <code>SegmentDataset</code> holds the values in a segment. **/ interface SegmentDataset { Object get(int[] pos); double getBytes(); }; /** * A <code>SparseSegmentDataset</code> is a means of storing segment values * which is suitable when few of the combinations of keys have a value present. * * <p>The storage requirements are as follows. Key is 1 word for each * dimension. Hashtable entry is 3 words. Value is 1 word. Total space is (4 + * d) * v. (May also need hash table to ensure that values are only stored * once.)</p> * * <p>NOTE: This class is not synchronized.</p> **/ class SparseSegmentDataset implements SegmentDataset { Segment segment; Hashtable values; CellKey key = new CellKey(null); public Object get(int[] pos) { key.ordinals = pos; return values.get(key); } public double getBytes() { // assume a slot, key, and value are each 4 bytes return values.size() * 12; } }; /** * A <code>DenseSegmentDataset</code> is a means of storing segment values * which is suitable when most of the combinations of keys have a value * present. * * <p>The storage requirements are as follows. Table requires 1 word per * cell.</p> **/ class DenseSegmentDataset implements SegmentDataset { Segment segment; Object[] values; // length == m[0] * ... * m[axes.length-1] public Object get(int[] keys) { int offset = getOffset(keys); return values[offset]; } public double getBytes() { // assume a slot, key, and value are each 4 bytes return values.length * 12; } boolean contains(Object[] keys) { return getOffset(keys) >= 0; } Object get(Object[] keys) { int offset = getOffset(keys); return keys[offset]; } void put(Object[] keys, Object value) { int offset = getOffset(keys); keys[offset] = value; } private int getOffset(int[] keys) { int offset = 0; for (int i = 0; i < keys.length; i++) { RolapAggregation.Axis axis = segment.axes[i]; offset *= axis.keys.length; offset += keys[i]; } return offset; } private int getOffset(Object[] keys) { int offset = 0; outer: for (int i = 0; i < keys.length; i++) { RolapAggregation.Axis axis = segment.axes[i]; offset *= axis.keys.length; Object value = keys[i]; for (int j = 0, axisLength = axis.keys.length; j < axisLength; j++) { if (axis.keys[j].equals(value)) { offset += j; continue outer; } } return -1; // not found } return offset; } }; // End RolapAggregation.java
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#3 | 1603 | Julian Hyde |
mondrian: Add Andreas' taglib; Rename 'mondrian.rolap.Util.assert' to 'assertTrue', because 'assert' is a keyword in jdk 1.4; Fix 'NON EMPTY'; JUnit framework for tests; Add Oracle dataset. |
||
#2 | 1576 | Julian Hyde |
mondrian: fix dataset (add column customer.ordinal); create dataset for oracle; get queries working on oracle; get format strings working; refactor out new packages mondrian.rolap.agg and mondrian.rolap.sql. |
||
#1 | 1575 | Julian Hyde | mondrian: move some stuff around | ||
//guest/julian_hyde/mondrian/src/main/mondrian/rolap/RolapAggregation.java | |||||
#1 | 1453 | Julian Hyde | mondrian: first source check-in |