Massachusetts Institute of Technology - Department of Urban Studies and Planning

11.521	Spatial Database Management and Advanced Geographic Information Systems
11.523	Fundamentals of Spatial Database Management
Script & Sample Queries to Create & Manipulate the URISA Proceedings Database in ORACLE

/*--- Create the URISA database schema ------------- */
 drop table authors;
 drop table titles;  
 drop table keywords;
 drop table match;


create table authors 
  (
    lastname varchar2(25),
    fnamemi varchar2(15),
    paper number(10, 0)
  );

create table titles 
  (
    title varchar2(140),
    paper number(10, 0)
  );

create table keywords 
  (
    code number(10, 0),
    keyword varchar2(45),
    major number(10, 0),
    category varchar2(20)
  );

create table match 
  (
    code number(10, 0),
    major number(10, 0),
    paper number(10, 0)
  );


create index apaper on authors (paper);
create index tpaper on titles (paper);
create index mpaper on match (paper);
create index cmatch on match (code);
create index kcode on keywords (code);


/*--- Paper count by Author ------------------------------------------------------*/
drop view v_pcount;


/* Count papers written by each author - note, a paper can have multiple authors*/
create view v_pcount as
select lastname, fnamemi, count(*) papers 
  from authors
 group by lastname, fnamemi;



/* List alphabetically */
select * 
  from v_pcount
 order by lastname;


/* Sort by number of papers for those writing more than one paper*/
select fnamemi, lastname, papers 
  from v_pcount
 where papers > 1
 order by papers DESC, lastname ASC, fnamemi ASC;


/* Find all the papers written by particular authors */
select distinct lastname, fnamemi, paper
  from authors 
 where lastname like 'FER%'
 order by lastname, fnamemi, paper asc;


/*--- Keywords ------------------------------------------------------*/
drop view v_kcount;


/* Count papers using each keyword */
create view v_kcount as
select k.code, keyword, count(*) papers
  from keywords k, match m
 where k.code = m.code
 group by k.code, keyword;


/* Sort the list by keyword code */
select distinct code, keyword, papers 
  from v_kcount
 order by code, keyword;


/* Sort keywords by count of papers*/
select distinct code, keyword, papers 
  from v_kcount
 where papers > 10
 order by papers desc, code, keyword;


/* count of papers using *EITHER* of two particular keywords */
select code, count(*) papers
  from match 
 where code = 106
    or code = 229
 group by code;


/* Count of papers using *BOTH* of two particular keywords.
   Doing 'AND' combinations is trickier since any one
   row in the 'match' table associates a paper with a single
   keyword.  You need to use a SELF-JOIN that looks at the 
   keyword table twice to first find all papers associated with 
   keyword code 106 AND a second time to find papers associated
   with keyword code = 229 */


select m.code, n.code, count(*) papers
  from match m, match n
 where m.paper = n.paper 
   and (m.code = 106 AND n.code = 229)
 group by m.code, n.code;


/* Here's a variation that joins in the 'keywords' table to
   lookup the keyword description and then counts papers
   that use either code=106 OR a keyword with 'TRANS' in it. */


/* Count papers using the desired keyword combination */
select m.code, keyword, count(*) papers
  from match m, keywords k
 where m.code = k.code and (m.code = 106
    or k.keyword like 'TRANS%')
 group by m.code, k.keyword;


/* Now do a similar query with an *AND* condition instead of *OR*. 
   Do you see why we need four tables in the 'from' clause?*/
select m.code, substr(k1.keyword,1,20) keyword1,
       n.code, substr(k2.keyword,1,20) keyword2, 
       count(*) papers
  from match m, match n, keywords k1, keywords k2
 where m.paper = n.paper and 
       m.code  = k1.code and
       n.code  = k2.code
   and (m.code = 106 AND k2.keyword like 'TRANS%')
 group by m.code, k1.keyword, n.code, k2.keyword;


/* Another AND case with a join to the titles table */
select t.paper, substr(title,1,50)
  from titles t, match m, match n
 where m.paper = n.paper and m.paper = t.paper
   and (m.code = 229 AND n.code = 106)
 order by paper;


/*-------- More Complex Queries ---------------------
  What other keywords were most frequently used by 
  GIS-related papers?  
----------------------------------------------------*/


/* Keywords related to GIS and mapping: */

drop view v_gispapers;


CREATE view v_gispapers AS
SELECT m.code, keyword, m.paper
  FROM keywords k, match m
 WHERE m.code = k.code AND
      (keyword LIKE '%GIS%' OR
       keyword LIKE '%GEOGRAPHIC INFORMATION%' OR
       keyword LIKE '%MAPPING%');


/* Counts of papers using these keywords */

SELECT m.code, k.keyword, count(distinct t.paper) papers
  from match m, titles t, keywords k
 where m.paper = t.paper AND k.code = m.code AND 
      m.code IN (select distinct code from v_gispapers)
 group by m.code, k.keyword
 order by m.code;


/********************************************************
  Now, determine the frequency of use of OTHER keywords among 
  those papers (in the 'v_gispapers' view) that we've
  categorized as 'GIS/Mapping' papers
 ******************************************************** /
drop view v_combo1;
drop view v_combo2;


/* Find papers with a GIS/Mapping keyword plus a non-GIS/Mapping keyword
CREATE VIEW v_combo1 as
SELECT m1.paper, m1.code AS giscode, m2.code AS othercode
  FROM match m1, match m2
 WHERE m1.code     IN (select distinct code from v_gispapers) AND
       m2.code NOT IN (select distinct code from v_gispapers) AND
       m1.paper = m2.paper;


/* The above 'v_combo1' view lists every paper that used a
   'gis/mapping' keyword (i.e., codes that show up in the
   'v_gispapers' view) as well as some non-gis keywords 
   (i.e., codes that do NOT show up in the 'v1gispaper' view.
   There is one row for every combination of gis and non-gis
   keyword paring.  If a paper used three of the gis keywords
   and two other keywords, it would appear in this list
   3x2 = 6 times.  That's why there are so many rows in v_combo1.
   To get the list of unique 'other' keywords
   used by each paper, let's create view 'v_combo2' from
   the 'v_combo1' view:  */


CREATE VIEW v_combo2 AS
SELECT DISTINCT paper, othercode, keyword
  FROM v_combo1 c, keywords k
 WHERE c.othercode = k.code;


SELECT * from v_combo2
    ORDER BY paper, othercode;


/*  Okay, there were 731 rows in 'v_combo1', 601 rows in 'v_combo2',
    but only 183 distinct papers appearing in each table.
    Does this may sense to you?

    Now, finally, let's determine the frequency of use
    of each of the non-GIS keywords by the 'v_gispapers'.
    119 'other' keywords are used by these papers,
    and 76 of these are used more than once.    */


SELECT othercode, keyword, count(paper) papers 
    FROM v_combo2
    group by othercode, keyword
   having count(paper) > 10
    ORDER BY papers DESC, othercode;


/*  We see that 17 other keywords were used at least 10 times
    by the gis-related papers.  (119 were used at least once.)

    From the listing, we see a few other keywords (such
    as 909 = Land Information Systems) that are more or
    less synonyms for GIS.  We could go back and redefine
    'v_gispapers' to include those using these keywords
    as well.  Then the identical subsequent queries from above
    could be rerun as is to do the revised 'othercode' analysis. */
Back to the 11.521 Home Page |Back to the CRON Home Page | Back to the MIT Home Page
Written by Joseph Ferreira, Jr.
Converted to HTML by Thomas H. Grayson on 1 February 2000.
Last modified: 5 March 2009 [jf]