11.521 |
Spatial
Database Management and Advanced Geographic Information
Systems
|
11.523 |
Fundamentals
of Spatial Database Management
|
/*--- Create the URISA database schema ------------- */
drop table authors;
drop table titles;
drop table keywords;
drop table match;
create table authors
(
lastname varchar2(25),
fnamemi varchar2(15),
paper number(10, 0)
);
create table titles
(
title varchar2(140),
paper number(10, 0)
);
create table keywords
(
code number(10, 0),
keyword varchar2(45),
major number(10, 0),
category varchar2(20)
);
create table match
(
code number(10, 0),
major number(10, 0),
paper number(10, 0)
);
create index apaper on authors (paper);
create index tpaper on titles (paper);
create index mpaper on match (paper);
create index cmatch on match (code);
create index kcode on keywords (code);
/*--- Paper count by Author ------------------------------------------------------*/
drop view v_pcount;
/* Count papers written by each author - note, a paper can have multiple authors*/
create view v_pcount as
select lastname, fnamemi, count(*) as papers
from authors
group by lastname, fnamemi;
/* List alphabetically */
select *
from v_pcount
order by lastname;
/* Sort by number of papers for those writing more than one paper*/
select fnamemi, lastname, papers
from v_pcount
where papers > 1
order by papers DESC, lastname ASC, fnamemi ASC;
/* Find all the papers written by particular authors */
select distinct lastname, fnamemi, paper
from authors
where lastname like 'FER%'
order by lastname, fnamemi, paper asc;
/*--- Keywords ------------------------------------------------------*/
drop view v_kcount;
/* Count papers using each keyword */
create view v_kcount as
select k.code, keyword, count(*) as papers
from keywords k, match m
where k.code = m.code
group by k.code, keyword;
/* Sort the list by keyword code */
select distinct code, keyword, papers
from v_kcount
order by code, keyword;
/* Sort keywords by count of papers*/
select distinct code, keyword, papers
from v_kcount
where papers > 10
order by papers desc, code, keyword;
/* count of papers using *EITHER* of two particular keywords */
select code, count(*) as papers
from match
where code = 106
or code = 229
group by code;
/* Count of papers using *BOTH* of two particular keywords.
Doing 'AND' combinations is trickier since any one
row in the 'match' table associates a paper with a single
keyword. You need to use a SELF-JOIN that looks at the
keyword table twice to first find all papers associated with
keyword code 106 AND a second time to find papers associated
with keyword code = 229 */
select m.code, n.code, count(*) as papers
from match m, match n
where m.paper = n.paper
and (m.code = 106 AND n.code = 229)
group by m.code, n.code;
/* Here's a variation that joins in the 'keywords' table to
lookup the keyword description and then counts papers
that use either code=106 OR a keyword with 'TRANS' in it. */
/* Count papers using the desired keyword combination */
select m.code, keyword, count(*) as papers
from match m, keywords k
where m.code = k.code and (m.code = 106
or k.keyword like 'TRANS%')
group by m.code, k.keyword;
/* Now do a similar query with an *AND* condition instead of *OR*.
Do you see why we need four tables in the 'from' clause?*/
select m.code, substr(k1.keyword,1,20) as keyword1,
n.code, substr(k2.keyword,1,20) as keyword2,
count(*) as papers
from match m, match n, keywords k1, keywords k2
where m.paper = n.paper and
m.code = k1.code and
n.code = k2.code
and (m.code = 106 AND k2.keyword like 'TRANS%')
group by m.code, k1.keyword, n.code, k2.keyword;
/* Another AND case with a join to the titles table */
select t.paper, substr(title,1,50)
from titles t, match m, match n
where m.paper = n.paper and m.paper = t.paper
and (m.code = 229 AND n.code = 106)
order by paper;
/*-------- More Complex Queries ---------------------
What other keywords were most frequently used by
GIS-related papers?
----------------------------------------------------*/
/* Keywords related to GIS and mapping: */
drop view v_gispapers;
CREATE view v_gispapers AS
SELECT m.code, keyword, m.paper
FROM keywords k, match m
WHERE m.code = k.code AND
(keyword LIKE '%GIS%' OR
keyword LIKE '%GEOGRAPHIC INFORMATION%' OR
keyword LIKE '%MAPPING%');
/* Counts of papers using these keywords */
SELECT m.code, k.keyword, count(distinct t.paper) as papers
from match m, titles t, keywords k
where m.paper = t.paper AND k.code = m.code AND
m.code IN (select distinct code from v_gispapers)
group by m.code, k.keyword
order by m.code;
/********************************************************
Now, determine the frequency of use of OTHER keywords among
those papers (in the 'v_gispapers' view) that we've
categorized as 'GIS/Mapping' papers
******************************************************** /
drop view v_combo1;
drop view v_combo2;
/* Find papers with a GIS/Mapping keyword plus a non-GIS/Mapping keyword
CREATE VIEW v_combo1 as
SELECT m1.paper, m1.code AS giscode, m2.code AS othercode
FROM match m1, match m2
WHERE m1.code IN (select distinct code from v_gispapers) AND
m2.code NOT IN (select distinct code from v_gispapers) AND
m1.paper = m2.paper;
/* The above 'v_combo1' view lists every paper that used a
'gis/mapping' keyword (i.e., codes that show up in the
'v_gispapers' view) as well as some non-gis keywords
(i.e., codes that do NOT show up in the 'v_gispapers' view.
There is one row for every combination of gis and non-gis
keyword paring. If a paper used three of the gis keywords
and two other keywords, it would appear in this list
3x2 = 6 times. That's why there are so many rows in v_combo1.
To get the list of unique 'other' keywords
used by each paper, let's create view 'v_combo2' from
the 'v_combo1' view: */
CREATE VIEW v_combo2 AS
SELECT DISTINCT paper, othercode, keyword
FROM v_combo1 c, keywords k
WHERE c.othercode = k.code;
SELECT * from v_combo2
ORDER BY paper, othercode;
/* Okay, there were 731 rows in 'v_combo1', 601 rows in 'v_combo2',
but only 183 distinct papers appearing in each table.
Does this may sense to you?
Now, finally, let's determine the frequency of use
of each of the non-GIS keywords by the 'v_gispapers'.
119 'other' keywords are used by these papers,
and 76 of these are used more than once. */
SELECT othercode, keyword, count(paper) as papers
FROM v_combo2
group by othercode, keyword
having count(paper) > 10
ORDER BY papers DESC, othercode;
/* We see that 17 other keywords were used at least 10 times
by the gis-related papers. (119 were used at least once.)
From the listing, we see a few other keywords (such
as 909 = Land Information Systems) that are more or
less synonyms for GIS. We could go back and redefine
'v_gispapers' to include those using these keywords
as well. Then the identical subsequent queries from above
could be rerun as is to do the revised 'othercode' analysis. */
Back to the 11.521 Home Page |Back to the CRON Home Page | Back to the MIT Home Page
Written by Joseph Ferreira, Jr.
Converted to HTML by Thomas H. Grayson on 1 February
2000.
Last modified: 5 March 2009 [jf]