Massachusetts Institute of Technology - Department of Urban Studies and Planning


11.521
Spatial Database Management and Advanced Geographic Information Systems
11.523
Fundamentals of Spatial Database Management

Script & Sample Queries to Create & Manipulate the URISA Proceedings Database in ORACLE


/*--- Create the URISA database schema ------------- */
 drop table authors;
 drop table titles;  
 drop table keywords;
 drop table match;

create table authors    (     lastname varchar2(25),     fnamemi varchar2(15),     paper number(10, 0)   ); create table titles    (     title varchar2(140),     paper number(10, 0)   ); create table keywords    (     code number(10, 0),     keyword varchar2(45),     major number(10, 0),     category varchar2(20)   ); create table match    (     code number(10, 0),     major number(10, 0),     paper number(10, 0)   );
create index apaper on authors (paper); create index tpaper on titles (paper); create index mpaper on match (paper); create index cmatch on match (code); create index kcode on keywords (code);
/*--- Paper count by Author ------------------------------------------------------*/ drop view v_pcount;
/* Count papers written by each author - note, a paper can have multiple authors*/ create view v_pcount as select lastname, fnamemi, count(*) as papers  from authors group by lastname, fnamemi;
/* List alphabetically */ select * from v_pcount order by lastname;
/* Sort by number of papers for those writing more than one paper*/ select fnamemi, lastname, papers from v_pcount where papers > 1 order by papers DESC, lastname ASC, fnamemi ASC;
/* Find all the papers written by particular authors */ select distinct lastname, fnamemi, paper from authors  where lastname like 'FER%' order by lastname, fnamemi, paper asc;
/*--- Keywords ------------------------------------------------------*/ drop view v_kcount;
/* Count papers using each keyword */ create view v_kcount as select k.code, keyword, count(*) as papers from keywords k, match m where k.code = m.code group by k.code, keyword;
/* Sort the list by keyword code */ select distinct code, keyword, papers from v_kcount order by code, keyword;
/* Sort keywords by count of papers*/ select distinct code, keyword, papers from v_kcount where papers > 10 order by papers desc, code, keyword;
/* count of papers using *EITHER* of two particular keywords */ select code, count(*) as papers from match  where code = 106    or code = 229 group by code;
/* Count of papers using *BOTH* of two particular keywords.    Doing 'AND' combinations is trickier since any one    row in the 'match' table associates a paper with a single    keyword.  You need to use a SELF-JOIN that looks at the keyword table twice to first find all papers associated with keyword code 106 AND a second time to find papers associated with keyword code = 229 */
select m.code, n.code, count(*) as papers   from match m, match n  where m.paper = n.paper     and (m.code = 106 AND n.code = 229)  group by m.code, n.code;
/* Here's a variation that joins in the 'keywords' table to    lookup the keyword description and then counts papers    that use either code=106 OR a keyword with 'TRANS' in it. */
/* Count papers using the desired keyword combination */ select m.code, keyword, count(*) as papers from match m, keywords k where m.code = k.code and (m.code = 106     or k.keyword like 'TRANS%') group by m.code, k.keyword;
/* Now do a similar query with an *AND* condition instead of *OR*. Do you see why we need four tables in the 'from' clause?*/ select m.code, substr(k1.keyword,1,20) as keyword1,        n.code, substr(k2.keyword,1,20) as keyword2,         count(*) as papers   from match m, match n, keywords k1, keywords k2  where m.paper = n.paper and         m.code  = k1.code and        n.code  = k2.code    and (m.code = 106 AND k2.keyword like 'TRANS%')  group by m.code, k1.keyword, n.code, k2.keyword;
/* Another AND case with a join to the titles table */ select t.paper, substr(title,1,50)   from titles t, match m, match n  where m.paper = n.paper and m.paper = t.paper    and (m.code = 229 AND n.code = 106)  order by paper;
/*-------- More Complex Queries ---------------------   What other keywords were most frequently used by    GIS-related papers?   ----------------------------------------------------*/
/* Keywords related to GIS and mapping: */ drop view v_gispapers;
CREATE view v_gispapers AS SELECT m.code, keyword, m.paper FROM keywords k, match m WHERE m.code = k.code AND       (keyword LIKE '%GIS%' OR        keyword LIKE '%GEOGRAPHIC INFORMATION%' OR        keyword LIKE '%MAPPING%');
/* Counts of papers using these keywords */ SELECT m.code, k.keyword, count(distinct t.paper) as papers from match m, titles t, keywords k where m.paper = t.paper AND k.code = m.code AND        m.code IN (select distinct code from v_gispapers) group by m.code, k.keyword order by m.code;
/********************************************************   Now, determine the frequency of use of OTHER keywords among    those papers (in the 'v_gispapers' view) that we've   categorized as 'GIS/Mapping' papers  ******************************************************** / drop view v_combo1; drop view v_combo2;
/* Find papers with a GIS/Mapping keyword plus a non-GIS/Mapping keyword CREATE VIEW v_combo1 as SELECT m1.paper, m1.code AS giscode, m2.code AS othercode FROM match m1, match m2 WHERE m1.code     IN (select distinct code from v_gispapers) AND        m2.code NOT IN (select distinct code from v_gispapers) AND        m1.paper = m2.paper;
/* The above 'v_combo1' view lists every paper that used a    'gis/mapping' keyword (i.e., codes that show up in the    'v_gispapers' view) as well as some non-gis keywords     (i.e., codes that do NOT show up in the 'v_gispapers' view.    There is one row for every combination of gis and non-gis    keyword paring.  If a paper used three of the gis keywords    and two other keywords, it would appear in this list    3x2 = 6 times.  That's why there are so many rows in v_combo1.    To get the list of unique 'other' keywords    used by each paper, let's create view 'v_combo2' from    the 'v_combo1' view:  */
CREATE VIEW v_combo2 AS SELECT DISTINCT paper, othercode, keyword   FROM v_combo1 c, keywords k  WHERE c.othercode = k.code;
SELECT * from v_combo2     ORDER BY paper, othercode;
/*  Okay, there were 731 rows in 'v_combo1', 601 rows in 'v_combo2',     but only 183 distinct papers appearing in each table.     Does this may sense to you?     Now, finally, let's determine the frequency of use     of each of the non-GIS keywords by the 'v_gispapers'.     119 'other' keywords are used by these papers,     and 76 of these are used more than once.    */
SELECT othercode, keyword, count(paper) as papers      FROM v_combo2     group by othercode, keyword having count(paper) > 10     ORDER BY papers DESC, othercode;
/*  We see that 17 other keywords were used at least 10 times by the gis-related papers. (119 were used at least once.) From the listing, we see a few other keywords (such     as 909 = Land Information Systems) that are more or     less synonyms for GIS.  We could go back and redefine     'v_gispapers' to include those using these keywords     as well.  Then the identical subsequent queries from above     could be rerun as is to do the revised 'othercode' analysis. */

Back to the 11.521 Home Page |Back to the CRON Home Page | Back to the MIT Home Page

Written by Joseph Ferreira, Jr.
Converted to HTML by Thomas H. Grayson on 1 February 2000.
Last modified: 5 March 2009 [jf]