6.893 Architecture of Database Systems Term Project

Creating a B-Tree Index

In order to create a B-tree index, we use the following commands to modify the POSTGRES system catalogs:

INSERT INTO pg_opclass (opcname, opcdeftype)
    SELECT 'wstd_ops', oid FROM pg_type WHERE typname = 'wstd';

SELECT o.oid AS opoid, o.oprname
INTO TABLE wstd_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid and o.oprright = t.oid
   and t.typname = 'wstd';

INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
   SELECT am.oid, opcl.oid, c.opoid, 1
   FROM pg_am am, pg_opclass opcl, wstd_ops_tmp c
   WHERE amname = 'btree' and opcname = 'wstd_ops'
      and c.oprname = '<';

INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
   SELECT am.oid, opcl.oid, c.opoid, 2
   FROM pg_am am, pg_opclass opcl, wstd_ops_tmp c
   WHERE amname = 'btree' and opcname = 'wstd_ops'
      and c.oprname = '<=';

INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
   SELECT am.oid, opcl.oid, c.opoid, 3
   FROM pg_am am, pg_opclass opcl, wstd_ops_tmp c
   WHERE amname = 'btree' and opcname = 'wstd_ops'
      and c.oprname = '=';

INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
   SELECT am.oid, opcl.oid, c.opoid, 4
   FROM pg_am am, pg_opclass opcl, wstd_ops_tmp c
   WHERE amname = 'btree' and opcname = 'wstd_ops'
      and c.oprname = '>=';

INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
   SELECT am.oid, opcl.oid, c.opoid, 5
   FROM pg_am am, pg_opclass opcl, wstd_ops_tmp c
   WHERE amname = 'btree' and opcname = 'wstd_ops'
      and c.oprname = '>';

INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
   SELECT am.oid, opcl.oid, pro.oid, 1
   FROM pg_am am, pg_opclass opcl, pg_proc pro
   WHERE  amname = 'btree' and opcname = 'wstd_ops'
      and proname = 'wstd_cmp';

DROP TABLE wstd_ops_tmp;

Testing the B-Tree Index

In order to test whether the optimizer is using the B-tree index, we first create 2 identical large tables. In one table, we create a B-tree index, while for the other we do not. We then use the EXPLAIN command to figure out which access methods the optimizer chooses for two different types of queries.

CREATE TABLE WSTDtest (
    value	int,		-- Random value
    key		wstd		-- WSTD for testing
);

COPY WSTDtest FROM '/home/benleong/6.893/Project/randdata.txt' USING DELIMITERS '|';

CREATE TABLE WSTDopt (
    value	int,		-- Random value
    key		wstd		-- WSTD for testing
);

COPY WSTDopt FROM '/home/benleong/6.893/Project/randdata.txt' USING DELIMITERS '|';
CREATE INDEX WSTD_index ON WSTDopt (key);

Case 1

In this case, the optimizer decides that a sequential scan is the best access method for both tables.

EXPLAIN SELECT * FROM WSTDtest WHERE key < '(1920,1)';

psql:test.sql:64: NOTICE:  QUERY PLAN:

Seq Scan on wstdtest  (cost=0.00..22.50 rows=333 width=12)

EXPLAIN SELECT * FROM WSTDopt WHERE key < '(1920,1)';

psql:test.sql:65: NOTICE:  QUERY PLAN:

Seq Scan on wstdopt  (cost=0.00..529.38 rows=9583 width=12)

SELECT count(*) FROM WSTDtest WHERE key < '(1920,1)';

 count
-------
  7189
(1 row)

SELECT count(*) FROM WSTDopt WHERE key < '(1920,1)';

count
-------
  7189
(1 row)

Case 2

In this case, the optimizer decides that it should use the B-tree index for WSTDopt.

EXPLAIN SELECT * FROM WSTDtest WHERE key > '(1970,1)' AND key < '(1990,1)';

psql:test.sql:66: NOTICE:  QUERY PLAN:

Seq Scan on wstdtest  (cost=0.00..25.00 rows=10 width=12)

EXPLAIN SELECT * FROM WSTDopt WHERE key > '(1970,1)' AND key < '(1990,1)';

psql:test.sql:67: NOTICE:  QUERY PLAN:

Index Scan using wstd_index on wstdopt  (cost=0.00..179.48 rows=288 width=12)

SELECT count(*) FROM WSTDtest WHERE key > '(1970,1)' AND key < '(1990,1)';

 count
-------
  5054
(1 row)

SELECT count(*) FROM WSTDopt WHERE key > '(1970,1)' AND key < '(1990,1)';

count
-------
  5054
(1 row)

Previous Up Next