11.521 | Spatial
Database Management and Advanced Geographic Information Systems |
11.523 | Fundamentals
of Spatial Database Management |
/*--- Create the URISA database schema ------------- */ drop table authors; drop table titles; drop table keywords; drop table match;
create table authors ( lastname varchar2(25), fnamemi varchar2(15), paper number(10, 0) ); create table titles ( title varchar2(140), paper number(10, 0) ); create table keywords ( code number(10, 0), keyword varchar2(45), major number(10, 0), category varchar2(20) ); create table match ( code number(10, 0), major number(10, 0), paper number(10, 0) );
create index apaper on authors (paper); create index tpaper on titles (paper); create index mpaper on match (paper); create index cmatch on match (code); create index kcode on keywords (code);
/*--- Paper count by Author ------------------------------------------------------*/ drop view v_pcount;
/* Count papers written by each author - note, a paper can have multiple authors*/ create view v_pcount as select lastname, fnamemi, count(*) papers from authors group by lastname, fnamemi;
/* List alphabetically */ select * from v_pcount order by lastname;
/* Sort by number of papers for those writing more than one paper*/ select fnamemi, lastname, papers from v_pcount where papers > 1 order by papers DESC, lastname ASC, fnamemi ASC;
/* Find all the papers written by particular authors */ select distinct lastname, fnamemi, paper from authors where lastname like 'FER%' order by lastname, fnamemi, paper asc;
/*--- Keywords ------------------------------------------------------*/ drop view v_kcount;
/* Count papers using each keyword */ create view v_kcount as select k.code, keyword, count(*) papers from keywords k, match m where k.code = m.code group by k.code, keyword;
/* Sort the list by keyword code */ select distinct code, keyword, papers from v_kcount order by code, keyword;
/* Sort keywords by count of papers*/ select distinct code, keyword, papers from v_kcount where papers > 10 order by papers desc, code, keyword;
/* count of papers using *EITHER* of two particular keywords */ select code, count(*) papers from match where code = 106 or code = 229 group by code;
/* Count of papers using *BOTH* of two particular keywords. Doing 'AND' combinations is trickier since any one row in the 'match' table associates a paper with a single keyword. You need to use a SELF-JOIN that looks at the keyword table twice to first find all papers associated with keyword code 106 AND a second time to find papers associated with keyword code = 229 */
select m.code, n.code, count(*) papers from match m, match n where m.paper = n.paper and (m.code = 106 AND n.code = 229) group by m.code, n.code;
/* Here's a variation that joins in the 'keywords' table to lookup the keyword description and then counts papers that use either code=106 OR a keyword with 'TRANS' in it. */
/* Count papers using the desired keyword combination */ select m.code, keyword, count(*) papers from match m, keywords k where m.code = k.code and (m.code = 106 or k.keyword like 'TRANS%') group by m.code, k.keyword;
/* Now do a similar query with an *AND* condition instead of *OR*. Do you see why we need four tables in the 'from' clause?*/ select m.code, substr(k1.keyword,1,20) keyword1, n.code, substr(k2.keyword,1,20) keyword2, count(*) papers from match m, match n, keywords k1, keywords k2 where m.paper = n.paper and m.code = k1.code and n.code = k2.code and (m.code = 106 AND k2.keyword like 'TRANS%') group by m.code, k1.keyword, n.code, k2.keyword;
/* Another AND case with a join to the titles table */ select t.paper, substr(title,1,50) from titles t, match m, match n where m.paper = n.paper and m.paper = t.paper and (m.code = 229 AND n.code = 106) order by paper;
/*-------- More Complex Queries --------------------- What other keywords were most frequently used by GIS-related papers? ----------------------------------------------------*/
/* Keywords related to GIS and mapping: */ drop view v_gispapers;
CREATE view v_gispapers AS SELECT m.code, keyword, m.paper FROM keywords k, match m WHERE m.code = k.code AND (keyword LIKE '%GIS%' OR keyword LIKE '%GEOGRAPHIC INFORMATION%' OR keyword LIKE '%MAPPING%');
/* Counts of papers using these keywords */ SELECT m.code, k.keyword, count(distinct t.paper) papers from match m, titles t, keywords k where m.paper = t.paper AND k.code = m.code AND m.code IN (select distinct code from v_gispapers) group by m.code, k.keyword order by m.code;
/******************************************************** Now, determine the frequency of use of OTHER keywords among those papers (in the 'v_gispapers' view) that we've categorized as 'GIS/Mapping' papers ******************************************************** / drop view v_combo1; drop view v_combo2;
/* Find papers with a GIS/Mapping keyword plus a non-GIS/Mapping keyword CREATE VIEW v_combo1 as SELECT m1.paper, m1.code AS giscode, m2.code AS othercode FROM match m1, match m2 WHERE m1.code IN (select distinct code from v_gispapers) AND m2.code NOT IN (select distinct code from v_gispapers) AND m1.paper = m2.paper;
/* The above 'v_combo1' view lists every paper that used a 'gis/mapping' keyword (i.e., codes that show up in the 'v_gispapers' view) as well as some non-gis keywords (i.e., codes that do NOT show up in the 'v1gispaper' view. There is one row for every combination of gis and non-gis keyword paring. If a paper used three of the gis keywords and two other keywords, it would appear in this list 3x2 = 6 times. That's why there are so many rows in v_combo1. To get the list of unique 'other' keywords used by each paper, let's create view 'v_combo2' from the 'v_combo1' view: */
CREATE VIEW v_combo2 AS SELECT DISTINCT paper, othercode, keyword FROM v_combo1 c, keywords k WHERE c.othercode = k.code;
SELECT * from v_combo2 ORDER BY paper, othercode;
/* Okay, there were 731 rows in 'v_combo1', 601 rows in 'v_combo2', but only 183 distinct papers appearing in each table. Does this may sense to you? Now, finally, let's determine the frequency of use of each of the non-GIS keywords by the 'v_gispapers'. 119 'other' keywords are used by these papers, and 76 of these are used more than once. */
SELECT othercode, keyword, count(paper) papers FROM v_combo2 group by othercode, keyword having count(paper) > 10 ORDER BY papers DESC, othercode;
/* We see that 17 other keywords were used at least 10 times by the gis-related papers. (119 were used at least once.) From the listing, we see a few other keywords (such as 909 = Land Information Systems) that are more or less synonyms for GIS. We could go back and redefine 'v_gispapers' to include those using these keywords as well. Then the identical subsequent queries from above could be rerun as is to do the revised 'othercode' analysis. */
Back to the 11.521 Home Page |Back to the CRON Home Page | Back to the MIT Home Page
Written by Joseph Ferreira, Jr.
Converted to HTML by Thomas H. Grayson on 1 February
2000.
Last modified: 5 March 2009 [jf]