Skip to content

Commit eda91f9

Browse files
committed
feat: add BOM option to csv_agg
Support for BOM, requested on PostgREST/postgrest#1371 (comment). ```sql select csv_agg(x, csv_options(bom := true)) from projects x; csv_agg ------------------- id,name,client_id+ 1,Windows 7,1 + 2,Windows 10,1 + 3,IOS,2 + 4,OSX,2 + 5,Orphan, (1 row) ``` Also possible to do with [media type handlers](https://docs.postgrest.org/en/v12/references/api/media_type_handlers.html#overriding-a-builtin-handler), but it's more convenient to have it builtin.
1 parent 010a1aa commit eda91f9

File tree

9 files changed

+101
-11
lines changed

9 files changed

+101
-11
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
loadtest:
3838
strategy:
3939
matrix:
40-
kind: ['csv_agg', 'csv_agg_delim', 'postgrest']
40+
kind: ['csv_agg', 'csv_agg_delim', 'csv_agg_delim_bom', 'postgrest']
4141
name: Loadtest
4242
runs-on: ubuntu-24.04
4343
steps:

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ else
2626
endif
2727

2828
EXTENSION = pg_csv
29-
EXTVERSION = 0.2
29+
EXTVERSION = 0.3
3030

3131
DATA = $(wildcard sql/*--*.sql)
3232

README.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ select csv_agg(x) from projects x;
3535
(1 row)
3636
```
3737

38-
It also supports adding a custom delimiter.
38+
### Custom Delimiter
39+
40+
You can use a custom delimiter.
3941

4042
```psql
41-
select csv_agg(x, '|') from projects x;
43+
select csv_agg(x, csv_options(delimiter := '|')) from projects x;
4244
csv_agg
4345
-------------------
4446
id|name|client_id+
@@ -50,5 +52,22 @@ select csv_agg(x, '|') from projects x;
5052
(1 row)
5153
```
5254

53-
> [!IMPORTANT]
55+
> [!NOTE]
5456
> Newline, carriage return and double quotes are not supported as delimiters to maintain the integrity of the separated values format.
57+
58+
### BOM
59+
60+
You can include a byte-order mark (BOM) to make the CSV compatible with Excel.
61+
62+
```psql
63+
select csv_agg(x, csv_options(bom := true)) from projects x;
64+
csv_agg
65+
-------------------
66+
id,name,client_id+
67+
1,Windows 7,1 +
68+
2,Windows 10,1 +
69+
3,IOS,2 +
70+
4,OSX,2 +
71+
5,Orphan,
72+
(1 row)
73+
```

bench/csv_agg_delim_bom.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
\set lim random(1000, 2000)
2+
3+
select csv_agg(t, csv_options(delimiter:=',', bom:=true)) from (
4+
select * from student_emotion_assessments limit :lim
5+
) as t;

sql/pg_csv--0.2--0.3.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
alter type csv_options add attribute bom bool;
2+
3+
create or replace function csv_options(delimiter "char" default NULL, bom bool default NULL) returns csv_options as $$
4+
select row(delimiter, bom)::csv_options;
5+
$$ language sql;

sql/pg_csv.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
create type csv_options as (
22
delimiter "char"
3+
, bom bool
34
);
45

5-
create function csv_options(delimiter "char" default ',') returns csv_options as $$
6-
select row(delimiter)::csv_options;
6+
create or replace function csv_options(delimiter "char" default NULL, bom bool default NULL) returns csv_options as $$
7+
select row(delimiter, bom)::csv_options;
78
$$ language sql;
89

910
create function csv_agg_transfn(internal, anyelement)
@@ -34,4 +35,3 @@ create aggregate csv_agg(anyelement, csv_options) (
3435
finalfunc = csv_agg_finalfn,
3536
parallel = safe
3637
);
37-

src/pg_csv.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@ PG_MODULE_MAGIC;
55
static const char NEWLINE = '\n';
66
static const char DQUOTE = '"';
77
static const char CR = '\r';
8+
static const char BOM[3] = "\xEF\xBB\xBF";
89

910
typedef struct {
1011
char delim;
12+
bool with_bom;
1113
} CsvOptions;
1214

1315
typedef struct {
@@ -55,15 +57,16 @@ static char *datum_to_cstring(Datum datum, Oid typeoid) {
5557

5658
static void parse_csv_options(HeapTupleHeader opts_hdr, CsvOptions *csv_opts) {
5759
// defaults
58-
csv_opts->delim = ',';
60+
csv_opts->delim = ',';
61+
csv_opts->with_bom = false;
5962

6063
if (opts_hdr == NULL) return;
6164

6265
TupleDesc desc = lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(opts_hdr),
6366
HeapTupleHeaderGetTypMod(opts_hdr));
6467

65-
Datum values[1];
66-
bool nulls[1];
68+
Datum values[2];
69+
bool nulls[2];
6770

6871
heap_deform_tuple(
6972
&(HeapTupleData){.t_len = HeapTupleHeaderGetDatumLength(opts_hdr), .t_data = opts_hdr}, desc,
@@ -77,6 +80,10 @@ static void parse_csv_options(HeapTupleHeader opts_hdr, CsvOptions *csv_opts) {
7780
"double quote")));
7881
}
7982

83+
if (!nulls[1]) {
84+
csv_opts->with_bom = DatumGetBool(values[1]);
85+
}
86+
8087
ReleaseTupleDesc(desc);
8188
}
8289

@@ -118,6 +125,8 @@ Datum csv_agg_transfn(PG_FUNCTION_ARGS) {
118125
TupleDesc tdesc =
119126
lookup_rowtype_tupdesc(HeapTupleHeaderGetTypeId(next), HeapTupleHeaderGetTypMod(next));
120127

128+
if (state->options->with_bom) appendBinaryStringInfo(&state->accum_buf, BOM, sizeof(BOM));
129+
121130
// build header row
122131
for (int i = 0; i < tdesc->natts; i++) {
123132
Form_pg_attribute att = TupleDescAttr(tdesc, i);

test/expected/bom.out

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
-- this is done to avoid failing on a pure psql change that happened on postgres 16
2+
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
3+
\pset format unaligned
4+
\pset tuples_only on
5+
\echo
6+
7+
-- include BOM (byte-order mark)
8+
SELECT csv_agg(x, csv_options(bom := true)) AS body
9+
FROM projects x;
10+
id,name,client_id
11+
1,Windows 7,1
12+
2,"has,comma",1
13+
,,
14+
4,OSX,2
15+
,"has""quote",
16+
5,"has,comma and ""quote""",7
17+
6,"has
18+
LF",7
19+
7,"has CR",8
20+
8,"has
21+
CRLF""",8
22+
\echo
23+
24+
-- include BOM with custom delimiter
25+
SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body
26+
FROM projects x;
27+
id;name;client_id
28+
1;Windows 7;1
29+
2;has,comma;1
30+
;;
31+
4;OSX;2
32+
;"has""quote";
33+
5;"has,comma and ""quote""";7
34+
6;"has
35+
LF";7
36+
7;"has CR";8
37+
8;"has
38+
CRLF""";8

test/sql/bom.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
-- this is done to avoid failing on a pure psql change that happened on postgres 16
2+
-- on pg <= 15 the BOM output adds one extra space, on pg 16 it doesn't
3+
\pset format unaligned
4+
\pset tuples_only on
5+
\echo
6+
7+
-- include BOM (byte-order mark)
8+
SELECT csv_agg(x, csv_options(bom := true)) AS body
9+
FROM projects x;
10+
\echo
11+
12+
-- include BOM with custom delimiter
13+
SELECT csv_agg(x, csv_options(delimiter := ';', bom := true)) AS body
14+
FROM projects x;

0 commit comments

Comments
 (0)