Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 45 additions & 6 deletions benchmarks/test_bench_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def setup_class(self):
num_geoms = 100_000

# Setup tables
for name, options in [
for name, base_options in [
(
"segments_large",
{
"geom_type": "LineString",
"target_rows": num_geoms,
"vertices_per_linestring_range": [2, 2],
"vertices_per_linestring_range": [2, 10],
},
),
(
Expand Down Expand Up @@ -69,13 +69,52 @@ def setup_class(self):
},
),
]:
# Generate synthetic data
# Generate synthetic data with two different geometry sets that have overlapping spatial distribution
# The intersection rate between geom1 and geom2 will be around 2%.
# This creates more realistic workloads for spatial predicates.

# Options for first geometry set (geom1) - left-leaning distribution
options1 = base_options.copy()
options1.update(
{
"seed": 42,
"bounds": [0.0, 0.0, 80.0, 100.0], # Slightly left-leaning
"size_range": [
1.0,
15.0,
], # Medium-sized geometries for good intersection chance
}
)

# Options for second geometry set (geom2) - right-leaning distribution
options2 = base_options.copy()
options2.update(
{
"seed": 43,
"bounds": [20.0, 0.0, 100.0, 100.0], # Slightly right-leaning
"size_range": [1.0, 15.0], # Same size range for fair comparison
}
)

query = f"""
WITH geom1_data AS (
SELECT
geometry as geom1,
row_number() OVER () as id
FROM sd_random_geometry('{json.dumps(options1)}')
),
geom2_data AS (
SELECT
geometry as geom2,
row_number() OVER () as id
FROM sd_random_geometry('{json.dumps(options2)}')
)
SELECT
geometry as geom1,
geometry as geom2,
g1.geom1,
g2.geom2,
round(random() * 100) as integer
FROM sd_random_geometry('{json.dumps(options)}')
FROM geom1_data g1
JOIN geom2_data g2 ON g1.id = g2.id
"""
tab = self.sedonadb.execute_and_collect(query)

Expand Down
1 change: 1 addition & 0 deletions python/sedonadb/tests/functions/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ def test_st_distance(eng, geom1, geom2, expected):
eng.assert_query_result(
f"SELECT ST_Distance({geom_or_null(geom1)}, {geom_or_null(geom2)})",
expected,
numeric_epsilon=1e-8,
)
4 changes: 2 additions & 2 deletions python/sedonadb/tests/test_sjoin.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_spatial_join_geography(join_type, on):
"vertices_per_linestring_range": [2, 10],
"bounds": west_most_bound,
"size_range": [0.1, 5],
"seed": 42,
"seed": 43,
}
)
df_point = eng_sedonadb.execute_and_collect(
Expand All @@ -118,7 +118,7 @@ def test_spatial_join_geography(join_type, on):
"vertices_per_linestring_range": [2, 10],
"bounds": east_most_bound,
"size_range": [0.1, 5],
"seed": 43,
"seed": 44,
}
)
df_polygon = eng_sedonadb.execute_and_collect(
Expand Down
17 changes: 12 additions & 5 deletions rust/sedona-testing/src/datagen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,8 @@ fn generate_random_linestring<R: rand::Rng>(
);
// Always sample in such a way that we end up with a valid linestring
let num_vertices = rng.sample(vertices_dist).max(2);
let coords = generate_circular_vertices(center_x, center_y, half_size, num_vertices, false);
let coords =
generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, false);
LineString::from(coords)
}
}
Expand All @@ -582,7 +583,8 @@ fn generate_random_polygon<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOp
);
// Always sample in such a way that we end up with a valid Polygon
let num_vertices = rng.sample(vertices_dist).max(3);
let coords = generate_circular_vertices(center_x, center_y, half_size, num_vertices, true);
let coords =
generate_circular_vertices(rng, center_x, center_y, half_size, num_vertices, true);
let shell = LineString::from(coords);
let mut holes = Vec::new();

Expand All @@ -593,7 +595,7 @@ fn generate_random_polygon<R: rand::Rng>(rng: &mut R, options: &RandomGeometryOp
if add_hole {
let new_size = half_size * hole_scale_factor;
let mut coords =
generate_circular_vertices(center_x, center_y, new_size, num_vertices, true);
generate_circular_vertices(rng, center_x, center_y, new_size, num_vertices, true);
coords.reverse();
holes.push(LineString::from(coords));
}
Expand Down Expand Up @@ -756,15 +758,20 @@ fn generate_non_overlapping_sub_rectangles(num_parts: usize, bounds: &Rect) -> V
tiles
}

fn generate_circular_vertices(
fn generate_circular_vertices<R: rand::Rng>(
rng: &mut R,
center_x: f64,
center_y: f64,
radius: f64,
num_vertices: usize,
closed: bool,
) -> Vec<Coord> {
let mut out = Vec::new();
let mut angle: f64 = 0.0;

// Randomize starting angle (0 to 2 * PI)
let start_angle_dist = Uniform::new(0.0, 2.0 * PI);
let mut angle: f64 = rng.sample(start_angle_dist);

let dangle = 2.0 * PI / (num_vertices as f64).max(3.0);
for _ in 0..num_vertices {
out.push(Coord {
Expand Down
Loading