|
1 | | -create extension if not exists pg_csv; |
2 | | - |
3 | | -create type gender_enum as enum ('female', 'male', 'non_binary', 'prefer_not_to_say'); |
4 | | -create type attachment_enum as enum ('secure', 'anxious', 'avoidant', 'fearful'); |
5 | | -create type regulation_strategy as enum ('cognitive_reappraisal', 'suppression', 'rumination', |
6 | | - 'problem_solving', 'distraction', 'other'); |
7 | | - |
8 | | -create table student_emotion_assessments ( |
9 | | - -- identifiers |
10 | | - assessment_id bigserial primary key, |
11 | | - student_uuid uuid not null, |
12 | | - institution_id int not null, |
13 | | - |
14 | | - -- demographics |
15 | | - gender gender_enum not null, |
16 | | - birth_date date not null, |
17 | | - nationality text not null, |
18 | | - socioeconomic_level text not null, |
19 | | - |
20 | | - -- academic context |
21 | | - faculty text not null, |
22 | | - degree_program text not null, |
23 | | - year_of_study smallint not null check (year_of_study between 1 and 7), |
24 | | - current_gpa numeric(3,2) not null check (current_gpa between 0 and 4), |
25 | | - credits_completed int not null check (credits_completed >= 0), |
26 | | - enrollment_status boolean not null default true, -- true = active student |
27 | | - |
28 | | - -- attachment style |
29 | | - attachment_style attachment_enum not null, |
30 | | - attachment_score_anxiety numeric(4,2) not null check (attachment_score_anxiety between 1 and 7), |
31 | | - attachment_score_avoidant numeric(4,2) not null check (attachment_score_avoidant between 1 and 7), |
32 | | - |
33 | | - -- difficulties in emotion regulation scale (ders-18) sub-scores |
34 | | - ders_non_acceptance smallint not null check (ders_non_acceptance between 6 and 30), |
35 | | - ders_goals smallint not null check (ders_goals between 5 and 25), |
36 | | - ders_impulse smallint not null check (ders_impulse between 6 and 30), |
37 | | - ders_awareness smallint not null check (ders_awareness between 6 and 30), |
38 | | - ders_strategy smallint not null check (ders_strategy between 8 and 40), |
39 | | - ders_clarity smallint not null check (ders_clarity between 5 and 25), |
40 | | - ders_total smallint generated always as |
41 | | - (ders_non_acceptance + ders_goals + ders_impulse + |
42 | | - ders_awareness + ders_strategy + ders_clarity) stored, |
43 | | - |
44 | | - -- emotion-regulation strategy prevalence (likert 1-5) |
45 | | - uses_reappraisal smallint not null check (uses_reappraisal between 1 and 5), |
46 | | - uses_suppression smallint not null check (uses_suppression between 1 and 5), |
47 | | - uses_rumination smallint not null check (uses_rumination between 1 and 5), |
48 | | - predominant_strategy regulation_strategy not null, |
| 1 | +-- based on the northwind database https://github.com/pthom/northwind_psql |
| 2 | +-- the idea is to use the aggregate over a relation with lots of columns to test the performance |
49 | 3 |
|
50 | | - -- well-being & mental-health screeners |
51 | | - perceived_stress smallint not null check (perceived_stress between 0 and 40), |
52 | | - anxiety_score_gad7 smallint not null check (anxiety_score_gad7 between 0 and 21), |
53 | | - depression_score_phq9 smallint not null check (depression_score_phq9 between 0 and 27), |
| 4 | +create extension if not exists pg_csv; |
54 | 5 |
|
55 | | - -- environmental variables |
56 | | - living_with_family boolean not null, |
57 | | - weekly_work_hours smallint not null check (weekly_work_hours between 0 and 60), |
58 | | - social_support_index smallint not null check (social_support_index between 12 and 84), |
| 6 | +CREATE TABLE customers ( |
| 7 | + customer_id CHAR(5) PRIMARY KEY, |
| 8 | + company_name TEXT NOT NULL, |
| 9 | + contact_name TEXT, |
| 10 | + contact_title TEXT, |
| 11 | + address TEXT, |
| 12 | + city TEXT, |
| 13 | + region TEXT, |
| 14 | + postal_code TEXT, |
| 15 | + country TEXT, |
| 16 | + phone TEXT, |
| 17 | + fax TEXT |
| 18 | +); |
59 | 19 |
|
60 | | - -- audit fields |
61 | | - administered_by text not null, -- name/id of interviewer or system |
62 | | - collected_at timestamptz not null default now(), |
63 | | - updated_at timestamptz not null default now(), |
64 | | - constraint updated_at_future check (updated_at <= now()) |
| 20 | +CREATE TABLE orders ( |
| 21 | + order_id BIGSERIAL PRIMARY KEY, |
| 22 | + customer_id CHAR(5) NOT NULL REFERENCES customers(customer_id) ON DELETE CASCADE, |
| 23 | + employee_id SMALLINT, |
| 24 | + order_date DATE, |
| 25 | + required_date DATE, |
| 26 | + shipped_date DATE, |
| 27 | + freight NUMERIC(10,2) DEFAULT 0 CHECK (freight >= 0), |
| 28 | + ship_name TEXT, |
| 29 | + ship_address TEXT, |
| 30 | + ship_city TEXT, |
| 31 | + ship_region TEXT, |
| 32 | + ship_postal_code TEXT, |
| 33 | + ship_country TEXT |
65 | 34 | ); |
66 | 35 |
|
67 | | -INSERT INTO student_emotion_assessments ( |
68 | | - student_uuid, institution_id, gender, birth_date, nationality, socioeconomic_level, |
69 | | - faculty, degree_program, year_of_study, current_gpa, credits_completed, enrollment_status, |
70 | | - attachment_style, attachment_score_anxiety, attachment_score_avoidant, |
71 | | - ders_non_acceptance, ders_goals, ders_impulse, ders_awareness, ders_strategy, ders_clarity, |
72 | | - uses_reappraisal, uses_suppression, uses_rumination, predominant_strategy, |
73 | | - perceived_stress, anxiety_score_gad7, depression_score_phq9, |
74 | | - living_with_family, weekly_work_hours, social_support_index, |
75 | | - administered_by |
| 36 | +-- generate seed data |
| 37 | +-- three groups of 100 by city/country |
| 38 | +INSERT INTO customers ( |
| 39 | + customer_id, company_name, contact_name, contact_title, |
| 40 | + address, city, region, postal_code, country, phone, fax |
| 41 | +) |
| 42 | +SELECT |
| 43 | + ('C' || lpad(i::text, 4, '0'))::char(5) AS customer_id, |
| 44 | + 'Company ' || i AS company_name, |
| 45 | + 'Contact ' || i AS contact_name, |
| 46 | + CASE |
| 47 | + WHEN i <= 100 THEN 'Owner' |
| 48 | + WHEN i <= 200 THEN 'Sales Manager' |
| 49 | + ELSE 'Purchasing' |
| 50 | + END AS contact_title, |
| 51 | + i::text || ' Main Street' AS address, |
| 52 | + CASE |
| 53 | + WHEN i <= 100 THEN 'Seattle' |
| 54 | + WHEN i <= 200 THEN 'London' |
| 55 | + ELSE 'Sao Paulo' |
| 56 | + END AS city, |
| 57 | + CASE |
| 58 | + WHEN i <= 100 THEN 'WA' |
| 59 | + WHEN i <= 200 THEN NULL |
| 60 | + ELSE 'SP' |
| 61 | + END AS region, |
| 62 | + (10000 + i)::text AS postal_code, |
| 63 | + CASE |
| 64 | + WHEN i <= 100 THEN 'USA' |
| 65 | + WHEN i <= 200 THEN 'UK' |
| 66 | + ELSE 'Brazil' |
| 67 | + END AS country, |
| 68 | + '+1-555-' || lpad(i::text, 4, '0') AS phone, |
| 69 | + CASE |
| 70 | + WHEN right(i::text, 1) IN ('0','5') THEN NULL |
| 71 | + ELSE '+1-555-' || lpad((i + 1000)::text, 4, '0') |
| 72 | + END AS fax |
| 73 | +FROM generate_series(1, 300) AS s(i); |
| 74 | + |
| 75 | +-- 2700 orders, 9 orders per customer |
| 76 | +WITH base AS ( |
| 77 | + SELECT c.customer_id, c.company_name, c.address, c.city, c.region, c.postal_code, c.country |
| 78 | + FROM customers c |
| 79 | +) |
| 80 | +INSERT INTO orders ( |
| 81 | + customer_id, employee_id, order_date, required_date, shipped_date, |
| 82 | + freight, ship_name, ship_address, ship_city, ship_region, ship_postal_code, ship_country |
76 | 83 | ) |
77 | 84 | SELECT |
78 | | - gen_random_uuid(), -- student_uuid |
79 | | - 1 + (i % 5), -- institution_id 1-5 |
80 | | - CASE (i % 4) |
81 | | - WHEN 0 THEN 'female' |
82 | | - WHEN 1 THEN 'male' |
83 | | - WHEN 2 THEN 'non_binary' |
84 | | - ELSE 'prefer_not_to_say' |
85 | | - END::gender_enum, |
86 | | - (CURRENT_DATE - ((18 + (i % 10)) * INTERVAL '1 year'))::date, |
87 | | - 'Country ' || i, |
88 | | - CASE WHEN i % 3 = 0 THEN 'alto' |
89 | | - WHEN i % 3 = 1 THEN 'medio' |
90 | | - ELSE 'bajo' |
91 | | - END, |
92 | | - CASE WHEN i % 2 = 0 THEN 'Psychology' ELSE 'Engineering' END, |
93 | | - CASE WHEN i % 2 = 0 THEN 'BSc' ELSE 'BA' END, |
94 | | - (i % 7) + 1, |
95 | | - round((random()*4)::numeric, 2)::numeric(3,2), -- GPA 0-4.00 |
96 | | - (i * 10) % 200, |
97 | | - TRUE, |
98 | | - CASE (i % 4) |
99 | | - WHEN 0 THEN 'secure' |
100 | | - WHEN 1 THEN 'anxious' |
101 | | - WHEN 2 THEN 'avoidant' |
102 | | - ELSE 'fearful' |
103 | | - END::attachment_enum, |
104 | | - 3 + (i % 4), |
105 | | - 3 + ((i + 2) % 4), |
106 | | - 10 + (i % 15), -- ders_non_acceptance |
107 | | - 10 + ((i+1) % 15), -- ders_goals |
108 | | - 10 + ((i+2) % 15), -- ders_impulse |
109 | | - 10 + ((i+3) % 15), -- ders_awareness |
110 | | - 15 + (i % 10), -- ders_strategy |
111 | | - 5 + ((i+4) % 21), -- ders_clarity (range 5-25) |
112 | | - 1 + (i % 5), -- uses_reappraisal |
113 | | - 1 + ((i+1) % 4), -- uses_suppression |
114 | | - 1 + ((i+2) % 3), -- uses_rumination |
115 | | - CASE (i % 6) |
116 | | - WHEN 0 THEN 'cognitive_reappraisal' |
117 | | - WHEN 1 THEN 'suppression' |
118 | | - WHEN 2 THEN 'rumination' |
119 | | - WHEN 3 THEN 'problem_solving' |
120 | | - WHEN 4 THEN 'distraction' |
121 | | - ELSE 'other' |
122 | | - END::regulation_strategy, |
123 | | - 10 + (i % 20), |
124 | | - 2 + (i % 5), |
125 | | - 3 + (i % 9), |
126 | | - (i % 2 = 0), |
127 | | - (i % 61), |
128 | | - 20 + (i % 50), |
129 | | - 'seed_script' |
130 | | -FROM generate_series(1, 3000) AS s(i); |
| 85 | + b.customer_id, |
| 86 | + n::smallint AS employee_id, |
| 87 | + (DATE '2024-01-01' + (n || ' day')::interval)::date AS order_date, |
| 88 | + (DATE '2024-01-01' + ((n + 7) || ' day')::interval)::date AS required_date, |
| 89 | + CASE WHEN n = 9 THEN NULL |
| 90 | + ELSE (DATE '2024-01-01' + ((n + 3) || ' day')::interval)::date |
| 91 | + END AS shipped_date, |
| 92 | + (10 + n)::numeric(10,2) AS freight, |
| 93 | + b.company_name AS ship_name, |
| 94 | + b.address AS ship_address, |
| 95 | + b.city AS ship_city, |
| 96 | + b.region AS ship_region, |
| 97 | + b.postal_code AS ship_postal_code, |
| 98 | + b.country AS ship_country |
| 99 | +FROM base b |
| 100 | +CROSS JOIN generate_series(1, 9) AS n; |
| 101 | + |
| 102 | +-- create a view to have more columns |
| 103 | +CREATE OR REPLACE VIEW orders_customers AS |
| 104 | +SELECT |
| 105 | + o.order_id, |
| 106 | + o.customer_id, |
| 107 | + c.company_name, |
| 108 | + c.contact_name, |
| 109 | + c.contact_title, |
| 110 | + c.address AS customer_address, |
| 111 | + c.city AS customer_city, |
| 112 | + c.region AS customer_region, |
| 113 | + c.postal_code AS customer_postal_code, |
| 114 | + c.country AS customer_country, |
| 115 | + c.phone, |
| 116 | + c.fax, |
| 117 | + o.employee_id, |
| 118 | + o.order_date, |
| 119 | + o.required_date, |
| 120 | + o.shipped_date, |
| 121 | + o.freight, |
| 122 | + o.ship_name, |
| 123 | + o.ship_address, |
| 124 | + o.ship_city, |
| 125 | + o.ship_region, |
| 126 | + o.ship_postal_code, |
| 127 | + o.ship_country |
| 128 | +FROM orders o |
| 129 | +JOIN customers c USING (customer_id); |
0 commit comments