javaolapmondrianolap4jrolap

Mondrian tries to optimize Segment.load weirdly resulting in partial or fully empty results


I feel like I am missing something: I have simple MDX query and rather simple Mondrian code, yet, it still behaves weirdly - that is the results of the query differ depending on whether XML schema contains unrelated to query dimension(s).

At this point I am using this code:

import org.olap4j.CellSet;
import org.olap4j.OlapConnection;
import org.olap4j.OlapWrapper;
import org.olap4j.layout.RectangularCellSetFormatter;

import java.io.PrintWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;

public class MainTest {


    public static void main(String[] args) throws ClassNotFoundException, SQLException {
        Class.forName("mondrian.olap4j.MondrianOlap4jDriver");
        String driver = "org.postgresql.Driver";
        String jdbcString = "jdbc:postgresql://localhost:5432/postgres";
        String username = "postgres";
        String password = "password";
        String xmlLoc = "...";
        Connection connection = DriverManager.getConnection(
                "jdbc:mondrian:"
                        + "JdbcDrivers=" + driver + ";"
                        + "Jdbc=" + jdbcString + ";"
                        + "Catalog=" + xmlLoc + ";JdbcUser=" + username + ";JdbcPassword=" + password + ";PoolNeeded=true;"
        );

        OlapWrapper wrapper = (OlapWrapper) connection;
        OlapConnection connection1 = wrapper.unwrap(OlapConnection.class);

        CellSet cellSet = connection1.createStatement().executeOlapQuery(
                "select {[name1dim].Members} on 0, {[Measures].Members} on 1 from [testcube]"
        );

        PrintWriter pw = new PrintWriter(System.out);
        new RectangularCellSetFormatter(false).format(cellSet, pw);
        pw.flush();
    }
}

And this XML schema:

<Schema name="sth">
    <Cube name="testcube">
        <Table name="test_table_2"/>
        <Dimension name="name1dim" >
            <Hierarchy hasAll="false">
                <Level name="name1dimlevel" column="name1"/>
            </Hierarchy>
        </Dimension>

        <Dimension name="name2dim" >
            <Hierarchy hasAll="false">
                <Level name="name2dimlevel" column="name2"/>
            </Hierarchy>
        </Dimension>

        <Dimension name="name3dim" >
            <Hierarchy hasAll="false">
                <Level name="name3dimlevel" column="name3"/>
            </Hierarchy>
        </Dimension>
        
        <Measure name="type1measure_sum" column="type1" aggregator="sum"/>
        <Measure name="type1measure_avg" column="type1" aggregator="avg"/>
        <Measure name="type1measure_count" column="type1" aggregator="count"/>

        <Measure name="type2measure_sum" column="type2" aggregator="sum"/>
        <Measure name="type3measure_avg" column="type2" aggregator="avg"/>
        <Measure name="type3measure_count" column="type2" aggregator="count"/>
    </Cube>
</Schema>

Test table contains id, type1, type2 (integers) and name1, name2, name3 (varchars). I get following results:

|                    | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum   |   1 590 |   1 940 |    1 832 |    1 750 |    1 350 |   1 619 |   1 742 |   1 521 |   2 015 |   2 152 |   1 725 |   1 945 |   1 812 |
| type1measure_avg   |   4,804 |   5,119 |    5,357 |    5,014 |    4,193 |    4,51 |   5,109 |   4,798 |    5,14 |   5,249 |   4,901 |   4,642 |   4,611 |
| type1measure_count |     331 |     379 |      342 |      349 |      322 |     359 |     341 |     317 |     392 |     410 |     352 |     419 |     393 |
| type2measure_sum   |   2 719 |   2 740 |    2 865 |    2 894 |    2 616 |   3 000 |   2 869 |   2 634 |   3 204 |   3 178 |   2 708 |   3 335 |   3 166 |
| type3measure_avg   |   8,366 |   7,268 |    8,304 |    8,152 |    7,856 |   8,152 |   8,513 |   8,257 |   8,215 |   7,905 |   7,715 |   7,884 |   7,935 |
| type3measure_count |     325 |     377 |      345 |      355 |      333 |     368 |     337 |     319 |     390 |     402 |     351 |     423 |     399 |

HOWEVER, if I discard "name3dim" from XML Schema, the results differ:

|                    | name1_0 | name1_1 | name1_10 | name1_11 | name1_12 | name1_2 | name1_3 | name1_4 | name1_5 | name1_6 | name1_7 | name1_8 | name1_9 |
+--------------------+---------+---------+----------+----------+----------+---------+---------+---------+---------+---------+---------+---------+---------+
| type1measure_sum   |   1 655 |   1 970 |    1 845 |    1 802 |    1 376 |   1 687 |   1 809 |   1 546 |   2 064 |   2 170 |   1 772 |   2 007 |   1 842 |
| type1measure_avg   |   4,825 |   5,117 |    5,287 |    5,019 |    4,157 |   4,523 |   5,139 |   4,728 |   5,199 |   5,242 |   4,922 |   4,667 |   4,628 |
| type1measure_count |     343 |     385 |      349 |      359 |      331 |     373 |     352 |     327 |     397 |     414 |     360 |     430 |     398 |
| type2measure_sum   |   2 793 |   2 781 |    2 964 |    2 982 |    2 712 |   3 130 |   2 986 |   2 751 |   3 250 |   3 206 |   2 771 |   3 383 |   3 202 |
| type3measure_avg   |   8,312 |   7,261 |     8,42 |     8,17 |     7,93 |   8,194 |    8,58 |   8,362 |   8,228 |   7,897 |   7,719 |   7,795 |   7,926 |
| type3measure_count |     336 |     383 |      352 |      365 |      342 |     382 |     348 |     329 |     395 |     406 |     359 |     434 |     404 |

because Mondrian tries to optimize the query by adding unnecessary where clause:

select
    "test_table_2"."name1" as "c0",
    "test_table_2"."name2" as "c1",
    "test_table_2"."name3" as "c2",
    sum("test_table_2"."type1") as "m0",
    avg("test_table_2"."type1") as "m1",
    count("test_table_2"."type1") as "m2",
    sum("test_table_2"."type2") as "m3",
    avg("test_table_2"."type2") as "m4",
    count("test_table_2"."type2") as "m5"
from
    "test_table_2" as "test_table_2"
where
    "test_table_2"."name2" = 'name2_0'
and
    "test_table_2"."name3" is null
group by
    "test_table_2"."name1",
    "test_table_2"."name2",
    "test_table_2"."name3"

Which limits number of results that are included. I am not sure whether it is bug or some planned behavior. I also have another very similar issue, but instead of using "is null" as filter, Mondrian uses only first dimension member.

The issue is also present when instead of <Table/>, <View><SQL>SomeSql</SQL></View> is used in schema.


Solution

  • The test MDX query does not specify the slicer (WHERE clause). In this case the slicer is created automatically and includes all dimensions which are not mentioned on other axis. Moreover slicer is built with default dimension elements. As you have not specified any default members when defining dimension, Mondrian just takes the "first" element. What is the first element can be determined by the dimension ordinal (or sorting of the dimension elements). So I guess Mondrian just gets the "null" element as the first element in the dimension. I would suggest to adjust your Mondrian schema so that each dimension has a default order and perhaps add some filters to exclude NULL element values.