The following script may be used to perform automatic TPCH generation, database creation, load and indexing. It will also teke measures of each operation and display it at the end.

1 Setting Informix variables

Before running the index creation review database configuration and ensure you have correctly setup Informix. Review you have set MAX_PDQPRIORITY to 100, that have BUFFERS for the specific page size and DS_TOTAL_MEMORY and DS_NONPDQ_QUERY_MEM.

For example, if you have 12 GB or RAM available for Informix, setup 4GB for SHMVIRTSIZE, 8GB for buffers.

As you have 4GB for shared memory (SHMVIRTSIZE) we can give DS_TOTAL_MEMORY 2.5 GB and DS_NONPDQ_QUERY_MEM .06 GB (25 %). And if we are using 4K pages for dbspaces, we will allocate 2M of pages for a total of 8GB of buffers.

  • SHMVIRTSIZE 4000000
  • MAX_PDQPRIORITY 100
  • DS_TOTAL_MEMORY 2560000
  • DS_NONPDQ_QUERY_MEM 640000
  • BUFFERPOOL size=4K,buffers=2000000,lrus=16,lru_min_dirty=50,lru_max_dirty=60.5

2 Automatic load script

The script will generate tpch files for given scale, create a database tpch_SCALE, load data and create all indexes.

Setup:

  • DBGEN_PATH
  • SCALE
  • PDQPRIORITY
  • DBSPACE_D
  • DBSPACE_I

Copy
# =====================================================
# PATH to dbgen and dists.dss
# =====================================================

DBGEN_PATH=./

DBGEN_EXEC=$DBGEN_PATH/dbgen
if [ ! -f $DBGEN_EXEC ]; then
    echo "dbgen executable '$DBGEN_EXEC' not found"
    exit 1
fi


# =====================================================
# Location where dbgen data files will be stored
# =====================================================
DBGEN_DATA=`pwd`

# =====================================================
# copy dists.dss into DBGEN_DATA
# =====================================================
cp $DBGEN_PATH/dists.dss $DBGEN_DATA

# =====================================================
# TPCH scale: 1, 10, 100, 1000 (1TB)
# =====================================================
SCALE=1

# =====================================================
# SET PDQ PRIORITY
# =====================================================

PDQPRIORITY=100

# =====================================================
# dbspace for data
# =====================================================
DBSPACE_D=d_data

# =====================================================
# dbspace for indexes
# =====================================================
DBSPACE_I=i_data

# =====================================================
# table extens = (rowsize * number of rows / K)
# =====================================================

lineitem=$((146*6000000*$SCALE/1000))
orders=$((136*1500000*$SCALE/1000))
partsupp=$((221*800000*$SCALE/1000))
part=$((168*200000*$SCALE/1000))
customer=$((227*150000*$SCALE/1000))
supplier=$((200*10000*$SCALE/1000))

# Check max extent size

min() {
    echo $(( $1 < $2 ? $1 : $2 ))
}

echo "lineitem=" $lineitem
lineitem=$(min $lineitem 67108860)


tcph_dbgen()
{
	cp $DBGEN_PATH/dists.dss $DBGEN_DATA

	echo "-------------------------------------------------------------------"
	echo "GENERATE DBGEN AT SCALE $1"
	echo "-------------------------------------------------------------------"

	date
	time (cd $DBGEN_DATA; $DBGEN_EXEC -s $1 -f; ls -l lineitem.tbl)
	date
}

tpch_create()
{
	echo "-------------------------------------------------------------------"
	echo "CREATE DB"
	echo "-------------------------------------------------------------------"

	time dbaccess - - <<!
	    drop database if exists tpch_$1;
	    create database tpch_$1 in $DBSPACE_D;


-- Timer procedure to keep track of operation times
-- USE: 
-- timer_p('event')
-- event ...
-- timer_p('event')

CREATE PROCEDURE timer_p(p_key varchar(60))
    RETURNING VARCHAR(80)

    DEFINE m_dt    INTERVAL HOUR TO SECOND;
    DEFINE m_seqno INTEGER;
    CREATE TABLE IF NOT EXISTS timer_t(
                seqno serial,
                key varchar(60) not null PRIMARY KEY,
                start datetime year to fraction default current,
                dt interval hour to second
    );

    LET m_seqno = NULL;
    SELECT seqno INTO m_seqno FROM timer_t WHERE key = p_key;
    if m_seqno IS NULL THEN
        INSERT INTO timer_t(key) VALUES (p_key);

        RETURN "EVENT START : " || p_key;
    ELSE
       UPDATE timer_t SET dt = (
          SELECT (current-start)::interval second(9) to fraction
            FROM timer_t WHERE seqno = m_seqno
          )

         WHERE seqno = m_seqno;
       SELECT dt INTO m_dt FROM timer_t WHERE seqno = m_seqno;
       RETURN "EVENT FINISH: " || p_key || " time=" || m_dt;
    END IF

END PROCEDURE

!
}

tpch_load()
{
	echo "-------------------------------------------------------------------"
	echo "LOAD DATA"
	echo "-------------------------------------------------------------------"

	DBDATE=Y4MD-
	DBMONEY=.

	time dbaccess - - 2>/dev/null <<!

database tpch_$1;

create raw table nation (
    n_nationkey integer not null ,
    n_name char(25) not null ,
    n_regionkey integer not null ,
    n_comment varchar(152) 
) lock mode row;

create raw table region (
    r_regionkey integer not null ,
    r_name char(25) not null ,
    r_comment varchar(152) 
) lock mode row;

create raw table part (
    p_partkey integer not null ,
    p_name varchar(55) not null ,
    p_mfgr char(25) not null ,
    p_brand char(10) not null ,
    p_type varchar(25) not null ,
    p_size integer not null ,
    p_container char(10) not null ,
    p_retailprice decimal(15,2) not null ,
    p_comment varchar(23) not null 
) in $DBSPACE_D extent size $part next size 1024 lock mode row ;

create raw table supplier (
    s_suppkey integer not null ,
    s_name char(25) not null ,
    s_address varchar(40) not null ,
    s_nationkey integer not null ,
    s_phone char(15) not null ,
    s_acctbal decimal(15,2) not null ,
    s_comment varchar(101) not null 
) in $DBSPACE_D extent size $supplier next size 1024 lock mode row ;

create raw table partsupp (
    ps_partkey integer not null ,
    ps_suppkey integer not null ,
    ps_availqty integer not null ,
    ps_supplycost decimal(15,2) not null ,
    ps_comment varchar(199) not null ) lock mode row;
    create raw table customer (
    c_custkey integer not null ,
    c_name varchar(25) not null ,
    c_address varchar(40) not null ,
    c_nationkey integer not null ,
    c_phone char(15) not null ,
    c_acctbal decimal(15,2) not null ,
    c_mktsegment char(10) not null ,
    c_comment varchar(117) not null 
) in $DBSPACE_D extent size $partsupp next size 1024 lock mode row ;

create raw table orders (
    o_orderkey integer not null ,
    o_custkey integer not null ,
    o_orderstatus char(1) not null ,
    o_totalprice decimal(15,2) not null ,
    o_orderdate date not null ,
    o_orderpriority char(15) not null ,
    o_clerk char(15) not null ,
    o_shippriority integer not null ,
    o_comment varchar(79) not null 
) in $DBSPACE_D extent size $orders next size 1024 lock mode row ;

create raw table lineitem (
    l_orderkey integer not null ,
    l_partkey integer not null ,
    l_suppkey integer not null ,
    l_linenumber integer not null ,
    l_quantity decimal(15,2) not null ,
    l_extendedprice decimal(15,2) not null ,
    l_discount decimal(15,2) not null ,
    l_tax decimal(15,2) not null ,
    l_returnflag char(1) not null ,
    l_linestatus char(1) not null ,
    l_shipdate date not null ,
    l_commitdate date not null ,
    l_receiptdate date not null ,
    l_shipinstruct char(25) not null ,
    l_shipmode char(10) not null ,
    l_comment varchar(44) not null
) in $DBSPACE_D extent size $lineitem next size 1024 lock mode row ;

create external table nation_ext   sameas nation   using ( datafiles("disk:/${DBGEN_DATA}/nation.tbl"));
create external table region_ext   sameas region   using ( datafiles("disk:/${DBGEN_DATA}/region.tbl"));
create external table part_ext     sameas part     using ( datafiles("disk:/${DBGEN_DATA}/part.tbl"));
create external table supplier_ext sameas supplier using ( datafiles("disk:/${DBGEN_DATA}/supplier.tbl"));
create external table partsupp_ext sameas partsupp using ( datafiles("disk:/${DBGEN_DATA}/partsupp.tbl"));
create external table customer_ext sameas customer using ( datafiles("disk:/${DBGEN_DATA}/customer.tbl"));
create external table orders_ext   sameas orders   using ( datafiles("disk:/${DBGEN_DATA}/orders.tbl"));
create external table lineitem_ext sameas lineitem using ( datafiles("disk:/${DBGEN_DATA}/lineitem.tbl"));

-- ==================================================
-- ENSURE TO RESET timer_t TABLE
-- ==================================================

delete from timer_t;

-- ==================================================
-- Start loading
-- ==================================================

OUTPUT TO PIPE cat WITHOUT HEADINGS select current, 'loading nation' from systables where tabid=1;
execute procedure timer_p("load nation");
insert into nation select * from nation_ext;
execute procedure timer_p("load nation");

execute procedure timer_p("load nation");
insert into region select * from region_ext;
execute procedure timer_p("load nation");

execute procedure timer_p("load supplier");
insert into supplier select * from supplier_ext;
execute procedure timer_p("load supplier");

execute procedure timer_p("load customer");
insert into customer select * from customer_ext;
execute procedure timer_p("load customer");

execute procedure timer_p("load part");
insert into part select * from part_ext;
execute procedure timer_p("load part");

execute procedure timer_p("load partsupp");
insert into partsupp select * from partsupp_ext;
execute procedure timer_p("load partsupp");

execute procedure timer_p("load lineitem");
insert into lineitem select * from lineitem_ext;
execute procedure timer_p("load lineitem");

execute procedure timer_p("load orders");
insert into orders select * from orders_ext;
execute procedure timer_p("load orders");

-- ==================================================
-- SHOW TIMES
-- ==================================================

select key, dt from timer_t order by seqno;


alter table nation type (standard);
alter table region type (standard);
alter table supplier type (standard);
alter table customer type (standard);
alter table part type (standard);
alter table partsupp type (standard);
alter table lineitem type (standard);
alter table orders type (standard);

OUTPUT TO PIPE cat WITHOUT HEADINGS select current, 'begin update statistics' from systables where tabid=1;
update statistics;
OUTPUT TO PIPE cat WITHOUT HEADINGS select current, 'end update statistics' from systables where tabid=1;

!

    date
    ontape -s -F -t /dev/null
}

tpch_index()
{
	echo "-------------------------------------------------------------------"
	echo "INDEX DB"
	echo "-------------------------------------------------------------------"

	time dbaccess - - <<!

database tpch_$1;

create temp table tp01 ( dt datetime year to fraction );

-- ==================================================
--  SET PDQ
-- ==================================================

SET PDQPRIORITY $PDQPRIORITY;

-- ==================================================
-- CREATE UNIQUE INDEXES
-- ==================================================


execute procedure timer_p("index region_pk");
create unique index region_pk on region (r_regionkey) IN $DBSPACE_I;
execute procedure timer_p("index region_pk");

execute procedure timer_p("index nation_pk");
create unique index nation_pk on nation (n_nationkey) IN $DBSPACE_I;
execute procedure timer_p("index nation_pk");

execute procedure timer_p("index supplier_pk");
create unique index supplier_pk on supplier (s_suppkey) IN $DBSPACE_I;
execute procedure timer_p("index supplier_pk");

execute procedure timer_p("index customer_pk");
create unique index customer_pk on customer (c_custkey) IN $DBSPACE_I;
execute procedure timer_p("index customer_pk");

execute procedure timer_p("index part_pk");
create unique index part_pk on part (p_partkey) IN $DBSPACE_I;
execute procedure timer_p("index part_pk");

execute procedure timer_p("index partsupp_pk");
create unique index partsupp_pk on partsupp (ps_partkey, ps_suppkey) IN $DBSPACE_I;
execute procedure timer_p("index partsupp_pk");

execute procedure timer_p("index lineitem_pk");
create unique index lineitem_pk on lineitem (l_orderkey, l_linenumber) IN $DBSPACE_I;
execute procedure timer_p("index lineitem_pk");

execute procedure timer_p("index orders_pk");
create unique index orders_pk on orders (o_orderkey) IN $DBSPACE_I;
execute procedure timer_p("index orders_pk");

-- ==================================================
-- CREATE INDEXES
-- ==================================================

execute procedure timer_p("index nation_fk_region");
create index nation_fk_region on nation (n_regionkey) IN $DBSPACE_I;
execute procedure timer_p("index nation_fk_region");

execute procedure timer_p("index supplier_fk_region");
create index supplier_fk_nation on supplier (s_nationkey) IN $DBSPACE_I;
execute procedure timer_p("index supplier_fk_region");

execute procedure timer_p("index partsupp_fk_part");
create index partsupp_fk_part on partsupp (ps_partkey) IN $DBSPACE_I;
execute procedure timer_p("index partsupp_fk_part");

execute procedure timer_p("index partsupp_fk_supplier");
create index partsupp_fk_supplier on partsupp (ps_suppkey) IN $DBSPACE_I;
execute procedure timer_p("index partsupp_fk_supplier");

execute procedure timer_p("index customer_fk_nation");
create index customer_fk_nation on customer (c_nationkey) IN $DBSPACE_I;
execute procedure timer_p("index customer_fk_nation");

execute procedure timer_p("index orders_fk_customer");
create index orders_fk_customer on orders (o_custkey) IN $DBSPACE_I;
execute procedure timer_p("index orders_fk_customer");

execute procedure timer_p("index lineitem_fk_orders");
create index lineitem_fk_orders on lineitem (l_orderkey) IN $DBSPACE_I;
execute procedure timer_p("index lineitem_fk_orders");

execute procedure timer_p("index lineitem_fk_part");
create index lineitem_fk_part on lineitem (l_partkey) IN $DBSPACE_I;
execute procedure timer_p("index lineitem_fk_part");

execute procedure timer_p("index lineitem_fk_supplier");
create index lineitem_fk_supplier on lineitem (l_suppkey) IN $DBSPACE_I;
execute procedure timer_p("index lineitem_fk_supplier");

-- ==================================================
-- ADD PRIMARY KEY CONSTRAINTS
-- ==================================================


alter table region add constraint (primary key (r_regionkey));
alter table nation add constraint (primary key (n_nationkey));
alter table supplier add constraint (primary key (s_suppkey));
alter table customer add constraint (primary key (c_custkey));
alter table part add constraint (primary key (p_partkey));
alter table partsupp add constraint (primary key (ps_partkey, ps_suppkey));
alter table lineitem add constraint (primary key (l_orderkey, l_linenumber));
alter table orders add constraint (primary key (o_orderkey));

-- ==================================================
-- ADD FOREIGN KEY CONSTRAINTS
-- ==================================================


alter table nation add constraint (foreign key (n_regionkey) references region novalidate);
alter table supplier add constraint (foreign key (s_nationkey) references nation novalidate);
alter table partsupp add constraint (foreign key (ps_partkey) references part novalidate);
alter table partsupp add constraint (foreign key (ps_suppkey) references supplier novalidate);
alter table customer add constraint (foreign key (c_nationkey) references nation novalidate);
alter table orders add constraint (foreign key (o_custkey) references customer novalidate);
alter table lineitem add constraint (foreign key (l_orderkey) references orders novalidate);
alter table lineitem add constraint (foreign key (l_partkey) references part novalidate);
alter table lineitem add constraint (foreign key (l_suppkey) references supplier novalidate);

-- ==================================================
-- SHOW TIMES
-- ==================================================

select key, dt from timer_t order by seqno;


!

}




echo
echo "==================================================================="
echo "TPCH SCALE $SCALE "
echo "Estimated table extents"
echo
printf '%20s %8dK\n' lineitem $lineitem
printf '%20s %8dK\n' partsupp $partsupp
printf '%20s %8dK\n' part     $part
printf '%20s %8dK\n' customer $customer
printf '%20s %8dK\n' supplier $supplier
printf '%20s %8dG\n' TOTAL    $((($lineitem+$partsupp+$part+$customer+$supplier)/(1024*1024)))
echo "==================================================================="



onstat -g cfg | grep -E "MAX_PDQPRIORITY|DS_TOTAL_MEMORY|DS_NONPDQ_QUERY_MEM"
onstat -z
date


echo "1 dbgen"
tcph_dbgen $SCALE
if [ $? -ne 0 ]; then
   exit 1
fi

echo "2 create"
tpch_create $SCALE
if [ $? -ne 0 ]; then
   echo "Error creating database"
   exit 1 
fi

echo "3 loading"
tpch_load $SCALE
if [ $? -ne 0 ]; then
   echo "Error loading data"
   exit 1
fi

echo "4 indexing"
tpch_index $SCALE
if [ $? -ne 0 ]; then
   echo "Error indexing data"
   exit 1
fi