Skip to content

Commit ef3dbab

Browse files
committed
Improve the benchmark data generator
1 parent b217c31 commit ef3dbab

File tree

3 files changed

+40
-24
lines changed

3 files changed

+40
-24
lines changed

benchmarks/test_bench_base.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def setup_class(self):
3333
{
3434
"geom_type": "LineString",
3535
"target_rows": num_geoms,
36-
"vertices_per_linestring_range": [2, 2],
36+
"vertices_per_linestring_range": [2, 10],
3737
},
3838
),
3939
(
@@ -70,33 +70,41 @@ def setup_class(self):
7070
),
7171
]:
7272
# Generate synthetic data with two different geometry sets that have overlapping spatial distribution
73-
# This creates more realistic workloads for spatial predicates
74-
73+
# The intersection rate between geom1 and geom2 will be around 2%.
74+
# This creates more realistic workloads for spatial predicates.
75+
7576
# Options for first geometry set (geom1) - left-leaning distribution
7677
options1 = base_options.copy()
77-
options1.update({
78-
"seed": 42,
79-
"bounds": [0.0, 0.0, 80.0, 100.0], # Slightly left-leaning
80-
"size_range": [1.0, 15.0] # Medium-sized geometries for good intersection chance
81-
})
82-
83-
# Options for second geometry set (geom2) - right-leaning distribution
78+
options1.update(
79+
{
80+
"seed": 42,
81+
"bounds": [0.0, 0.0, 80.0, 100.0], # Slightly left-leaning
82+
"size_range": [
83+
1.0,
84+
15.0,
85+
], # Medium-sized geometries for good intersection chance
86+
}
87+
)
88+
89+
# Options for second geometry set (geom2) - right-leaning distribution
8490
options2 = base_options.copy()
85-
options2.update({
86-
"seed": 1337,
87-
"bounds": [20.0, 0.0, 100.0, 100.0], # Slightly right-leaning
88-
"size_range": [1.0, 15.0] # Same size range for fair comparison
89-
})
90-
91+
options2.update(
92+
{
93+
"seed": 43,
94+
"bounds": [20.0, 0.0, 100.0, 100.0], # Slightly right-leaning
95+
"size_range": [1.0, 15.0], # Same size range for fair comparison
96+
}
97+
)
98+
9199
query = f"""
92100
WITH geom1_data AS (
93-
SELECT
101+
SELECT
94102
geometry as geom1,
95103
row_number() OVER () as id
96104
FROM sd_random_geometry('{json.dumps(options1)}')
97105
),
98106
geom2_data AS (
99-
SELECT
107+
SELECT
100108
geometry as geom2,
101109
row_number() OVER () as id
102110
FROM sd_random_geometry('{json.dumps(options2)}')
@@ -111,7 +119,7 @@ def setup_class(self):
111119
tab = self.sedonadb.execute_and_collect(query)
112120

113121
self.sedonadb.create_table_arrow(name, tab)
114-
# self.postgis.create_table_arrow(name, tab)
122+
self.postgis.create_table_arrow(name, tab)
115123
self.duckdb.create_table_arrow(name, tab)
116124

117125
def _get_eng(self, eng):

python/sedonadb/tests/functions/test_distance.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ def test_st_distance(eng, geom1, geom2, expected):
4343
eng.assert_query_result(
4444
f"SELECT ST_Distance({geom_or_null(geom1)}, {geom_or_null(geom2)})",
4545
expected,
46+
numeric_epsilon=1e-8,
4647
)

rust/sedona-testing/src/datagen.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,8 @@ fn generate_random_linestring<R: rand::Rng>(
566566
);
567567
// Always sample in such a way that we end up with a valid linestring
568568
let num_vertices = rng.sample(vertices_dist).max(2);
569-
let coords = generate_circular_vertices(center_x, center_y, half_size, num_vertices, false);
569+
let coords =
570+
generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, false);
570571
LineString::from(coords)
571572
}
572573
}
@@ -582,7 +583,8 @@ fn generate_random_polygon<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOp
582583
);
583584
// Always sample in such a way that we end up with a valid Polygon
584585
let num_vertices = rng.sample(vertices_dist).max(3);
585-
let coords = generate_circular_vertices(center_x, center_y, half_size, num_vertices, true);
586+
let coords =
587+
generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, true);
586588
let shell = LineString::from(coords);
587589
let mut holes = Vec::new();
588590

@@ -593,7 +595,7 @@ fn generate_random_polygon<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOp
593595
if add_hole {
594596
let new_size = half_size * hole_scale_factor;
595597
let mut coords =
596-
generate_circular_vertices(center_x, center_y, new_size, num_vertices, true);
598+
generate_circular_vertices(rng, center_x, center_y, new_size, num_vertices, true);
597599
coords.reverse();
598600
holes.push(LineString::from(coords));
599601
}
@@ -756,15 +758,20 @@ fn generate_non_overlapping_sub_rectangles(num_parts: usize, bounds: &Rect) -> V
756758
tiles
757759
}
758760

759-
fn generate_circular_vertices(
761+
fn generate_circular_vertices<R: rand::Rng>(
762+
rng: &mut R,
760763
center_x: f64,
761764
center_y: f64,
762765
radius: f64,
763766
num_vertices: usize,
764767
closed: bool,
765768
) -> Vec<Coord> {
766769
let mut out = Vec::new();
767-
let mut angle: f64 = 0.0;
770+
771+
// Randomize starting angle (0 to 2 * PI)
772+
let start_angle_dist = Uniform::new(0.0, 2.0 * PI);
773+
let mut angle: f64 = rng.sample(start_angle_dist);
774+
768775
let dangle = 2.0 * PI / (num_vertices as f64).max(3.0);
769776
for _ in 0..num_vertices {
770777
out.push(Coord {

0 commit comments

Comments
 (0)