Skip to content

Commit

Permalink
Update duck.sql
Browse files Browse the repository at this point in the history
cf #9
  • Loading branch information
adriens authored Apr 21, 2024
1 parent 1465e7b commit 7de20d7
Showing 1 changed file with 15 additions and 4 deletions.
19 changes: 15 additions & 4 deletions duck.sql
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
-- Load acronyms data
-- Create table with constraints
create or replace table acronyms (
id_acronym VARCHAR NOT NULL CHECK (id_acronym = UPPER(id_acronym)),
id_acronym_unique VARCHAR PRIMARY KEY CHECK (id_acronym_unique like id_acronym || '%'),
description VARCHAR UNIQUE
);

insert into acronyms (id_acronym, description)
-- Load acronyms csv data into table
insert into acronyms (id_acronym, id_acronym_unique, description)
from
(
FROM read_csv('data/acronyms_optnc.csv',
header = true,
columns = {
'id_acronym': 'VARCHAR',
'id_acronym_unique': 'VARCHAR',
'description': 'VARCHAR'
})
) t;
);

-- Get a preveiw
from acronyms limit 5;
Expand All @@ -39,7 +42,7 @@ create or replace temp table sorted_table as
description
from acronyms
-- order by acronym and description
order by id_acronym, description);
order by id_acronym, id_acronym_unique, description);

-- Check the resulting tables
from orig_table limit 5;
Expand Down Expand Up @@ -73,3 +76,11 @@ order by orig_table.index;
-- Check the resulting table
-- from test_sorted
-- where orig_index != sorted_index;

-- reporting des duplicats
from acronyms
select id_acronym,
count(*) as nb_occurrences
group by id_acronym
having nb_occurrences > 1
order by nb_occurrences desc;

0 comments on commit 7de20d7

Please sign in to comment.