diff --git a/delphi/polismath/benchmarks/__init__.py b/delphi/polismath/benchmarks/__init__.py
new file mode 100644
index 0000000000..d6408e0378
--- /dev/null
+++ b/delphi/polismath/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Benchmark scripts for polismath performance testing."""
diff --git a/delphi/polismath/benchmarks/bench_repness.py b/delphi/polismath/benchmarks/bench_repness.py
new file mode 100755
index 0000000000..ba2be3fd89
--- /dev/null
+++ b/delphi/polismath/benchmarks/bench_repness.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for repness (representativeness) computation performance.
+
+Usage:
+    cd delphi
+    ../.venv/bin/python -m polismath.benchmarks.bench_repness <votes_csv_path> [--runs N]
+    ../.venv/bin/python -m polismath.benchmarks.bench_repness <votes_csv_path> --profile
+
+Example:
+    ../.venv/bin/python -m polismath.benchmarks.bench_repness real_data/.local/r7wehfsmutrwndviddnii-bg2050/2025-11-25-1909-r7wehfsmutrwndviddnii-votes.csv --runs 3
+    ../.venv/bin/python -m polismath.benchmarks.bench_repness real_data/.local/r7wehfsmutrwndviddnii-bg2050/2025-11-25-1909-r7wehfsmutrwndviddnii-votes.csv --profile
+"""
+# TODO(datasets): Once PR https://github.com/compdemocracy/polis/pull/2312 is merged,
+# use the datasets package with include_local=True instead of requiring a path argument.
+
+import time
+from pathlib import Path
+
+import click
+
+from polismath.benchmarks.benchmark_utils import (
+    load_votes_from_csv,
+    extract_dataset_name,
+    votes_csv_argument,
+    runs_option,
+)
+from polismath.conversation import Conversation
+from polismath.pca_kmeans_rep.repness import (
+    conv_repness,
+    comment_stats,
+    add_comparative_stats,
+    finalize_cmt_stats,
+    select_rep_comments,
+    compute_group_comment_stats_df,
+    select_rep_comments_df,
+    select_consensus_comments_df,
+    prop_test_vectorized,
+    two_prop_test_vectorized
+)
+
+
+profile_option = click.option(
+    '--profile', '-p',
+    is_flag=True,
+    help='Run with line profiler on conv_repness',
+)
+
+
+def setup_conversation(votes_csv: Path) -> tuple[Conversation, str, int, float]:
+    """
+    Load votes and setup conversation with PCA and clusters.
+
+    Args:
+        votes_csv: Path to votes CSV file
+
+    Returns:
+        Tuple of (conversation, dataset_name, n_votes, setup_time)
+    """
+    dataset_name = extract_dataset_name(votes_csv)
+
+    print(f"Loading votes from '{votes_csv}'...")
+    votes_dict = load_votes_from_csv(votes_csv)
+    n_votes = len(votes_dict['votes'])
+    print(f"Loaded {n_votes:,} votes")
+    print()
+
+    print("Setting up conversation with votes and clusters...")
+    setup_start = time.perf_counter()
+    conv = Conversation(dataset_name)
+    conv = conv.update_votes(votes_dict, recompute=False)
+    conv._compute_pca()
+    conv._compute_clusters()
+    setup_time = time.perf_counter() - setup_start
+
+    print(f"Setup completed in {setup_time:.2f}s")
+    print(f"  Matrix shape: {conv.raw_rating_mat.shape}")
+    print(f"  Number of groups: {len(conv.group_clusters)}")
+    print()
+
+    return conv, dataset_name, n_votes, setup_time
+
+
+def benchmark_repness(votes_csv: Path, runs: int = 3) -> dict:
+    """
+    Benchmark repness computation on a dataset.
+
+    Args:
+        votes_csv: Path to votes CSV file
+        runs: Number of runs to average
+
+    Returns:
+        Dictionary with benchmark results
+    """
+    conv, dataset_name, n_votes, setup_time = setup_conversation(votes_csv)
+
+    # Benchmark repness computation
+    print(f"Benchmarking repness computation ({runs} runs)...")
+    times = []
+    for i in range(runs):
+        start = time.perf_counter()
+        conv._compute_repness()
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        n_rep_comments = sum(len(v) for v in conv.repness.get('group_repness', {}).values())
+        print(f"  Run {i+1}: {elapsed:.3f}s ({n_rep_comments} representative comments)")
+
+    avg = sum(times) / len(times)
+    min_time = min(times)
+    max_time = max(times)
+
+    print()
+    print("=" * 50)
+    print(f"Dataset: {dataset_name}")
+    print(f"Votes: {n_votes:,}")
+    print(f"Matrix shape: {conv.raw_rating_mat.shape}")
+    print(f"Groups: {len(conv.group_clusters)}")
+    print(f"Average repness time: {avg:.3f}s")
+    print(f"Min/Max: {min_time:.3f}s / {max_time:.3f}s")
+
+    # Calculate comments per second
+    n_comments = conv.raw_rating_mat.shape[1]
+    n_participants = conv.raw_rating_mat.shape[0]
+    n_groups = len(conv.group_clusters)
+
+    # Repness complexity is roughly O(groups * comments * participants)
+    operations = n_groups * n_comments * n_participants
+    print(f"Throughput: {operations/avg:,.0f} ops/sec (groups × comments × participants)")
+
+    return {
+        'dataset': dataset_name,
+        'n_votes': n_votes,
+        'shape': conv.raw_rating_mat.shape,
+        'n_groups': n_groups,
+        'times': times,
+        'avg': avg,
+        'min': min_time,
+        'max': max_time,
+        'setup_time': setup_time,
+    }
+
+
+def profile_repness(votes_csv: Path) -> None:
+    """
+    Run line profiler on conv_repness.
+
+    Args:
+        votes_csv: Path to votes CSV file
+    """
+    from line_profiler import LineProfiler
+    conv, _, _, _ = setup_conversation(votes_csv)
+
+    # Setup line profiler
+    profiler = LineProfiler()
+    profiler.add_function(conv_repness)
+    profiler.add_function(compute_group_comment_stats_df)
+    profiler.add_function(select_rep_comments_df)
+    profiler.add_function(select_consensus_comments_df)
+    profiler.add_function(prop_test_vectorized)
+    profiler.add_function(two_prop_test_vectorized)
+
+    # Run profiled
+    print("Running conv_repness with line profiler...")
+    profiler.runcall(conv_repness, conv.rating_mat, conv.group_clusters)
+
+    # Print results
+    print()
+    print("=" * 70)
+    print("LINE PROFILE RESULTS")
+    print("=" * 70)
+    profiler.print_stats()
+
+
+@click.command()
+@votes_csv_argument
+@runs_option
+@profile_option
+def main(votes_csv: Path, runs: int, profile: bool):
+    """Benchmark repness computation performance."""
+    if profile:
+        profile_repness(votes_csv)
+    else:
+        benchmark_repness(votes_csv, runs)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/delphi/polismath/benchmarks/bench_update_votes.py b/delphi/polismath/benchmarks/bench_update_votes.py
new file mode 100755
index 0000000000..2595623433
--- /dev/null
+++ b/delphi/polismath/benchmarks/bench_update_votes.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for update_votes performance.
+
+Usage:
+    cd delphi
+    ../.venv/bin/python -m polismath.benchmarks.bench_update_votes <votes_csv_path> [--runs N]
+
+Example:
+    ../.venv/bin/python -m polismath.benchmarks.bench_update_votes real_data/.local/r7wehfsmutrwndviddnii-bg2050/2025-11-25-1909-r7wehfsmutrwndviddnii-votes.csv --runs 3
+"""
+# TODO(datasets): Once PR https://github.com/compdemocracy/polis/pull/2312 is merged,
+# use the datasets package with include_local=True instead of requiring a path argument.
+
+import time
+from pathlib import Path
+
+import click
+
+from polismath.benchmarks.benchmark_utils import (
+    load_votes_from_csv,
+    extract_dataset_name,
+    votes_csv_argument,
+    runs_option,
+)
+
+
+def benchmark_update_votes(votes_csv: Path, runs: int = 3) -> dict:
+    """
+    Benchmark update_votes on a dataset.
+
+    Args:
+        votes_csv: Path to votes CSV file
+        runs: Number of runs to average
+
+    Returns:
+        Dictionary with benchmark results
+    """
+    from polismath.conversation import Conversation
+
+    dataset_name = extract_dataset_name(votes_csv)
+
+    print(f"Loading votes from '{votes_csv}'...")
+    votes_dict = load_votes_from_csv(votes_csv)
+    n_votes = len(votes_dict['votes'])
+    print(f"Loaded {n_votes:,} votes")
+    print()
+
+    times = []
+    for i in range(runs):
+        conv = Conversation(dataset_name)
+        start = time.perf_counter()
+        conv = conv.update_votes(votes_dict, recompute=False)
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        print(f"  Run {i+1}: {elapsed:.2f}s")
+
+    avg = sum(times) / len(times)
+    min_time = min(times)
+    max_time = max(times)
+
+    print()
+    print(f"Dataset: {dataset_name}")
+    print(f"Votes: {n_votes:,}")
+    print(f"Matrix shape: {conv.raw_rating_mat.shape}")
+    print(f"Average time: {avg:.2f}s")
+    print(f"Min/Max: {min_time:.2f}s / {max_time:.2f}s")
+    print(f"Throughput: {n_votes/avg:,.0f} votes/sec")
+
+    return {
+        'dataset': dataset_name,
+        'n_votes': n_votes,
+        'shape': conv.raw_rating_mat.shape,
+        'times': times,
+        'avg': avg,
+        'min': min_time,
+        'max': max_time,
+        'throughput': n_votes / avg,
+    }
+
+
+@click.command()
+@votes_csv_argument
+@runs_option
+def main(votes_csv: Path, runs: int):
+    """Benchmark update_votes performance."""
+    benchmark_update_votes(votes_csv, runs)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/delphi/polismath/benchmarks/benchmark_utils.py b/delphi/polismath/benchmarks/benchmark_utils.py
new file mode 100644
index 0000000000..23cb57fc5f
--- /dev/null
+++ b/delphi/polismath/benchmarks/benchmark_utils.py
@@ -0,0 +1,110 @@
+"""
+Shared utilities for benchmark scripts.
+"""
+
+import time
+from pathlib import Path
+from typing import Callable, Dict, Any
+
+import click
+import pandas as pd
+
+
+def load_votes_from_csv(votes_csv: Path) -> dict:
+    """
+    Load votes from a CSV file into the format expected by Conversation.update_votes().
+
+    Args:
+        votes_csv: Path to votes CSV file with columns: voter-id, comment-id, vote, timestamp
+
+    Returns:
+        Dictionary with 'votes' list and 'lastVoteTimestamp'
+    """
+    df = pd.read_csv(votes_csv)
+
+    # Fixed timestamp for reproducibility
+    fixed_timestamp = 1700000000000
+
+    # Use vectorized pandas operations instead of iterrows() for efficiency
+    df = df.rename(columns={
+        'voter-id': 'pid',
+        'comment-id': 'tid',
+    })
+    if 'timestamp' in df.columns:
+        df['created'] = df['timestamp'].astype(int)
+    else:
+        df['created'] = fixed_timestamp
+
+    votes_list = df[['pid', 'tid', 'vote', 'created']].to_dict('records')
+
+    return {
+        'votes': votes_list,
+        'lastVoteTimestamp': fixed_timestamp
+    }
+
+
+def extract_dataset_name(votes_path: Path) -> str:
+    """
+    Extract dataset name from path.
+
+    Args:
+        votes_path: Path to votes CSV file
+
+    Returns:
+        Dataset name (e.g., "r7wehfsmutrwndviddnii-bg2050" -> "bg2050")
+    """
+    parent_name = votes_path.parent.name
+    if '-' in parent_name:
+        return parent_name.split('-', 1)[1]
+    return parent_name
+
+
+def run_benchmark(
+    func: Callable[[], Any],
+    runs: int,
+    description: str = "operation"
+) -> Dict[str, Any]:
+    """
+    Run a benchmark function multiple times and collect timing statistics.
+
+    Args:
+        func: Function to benchmark (called with no arguments)
+        runs: Number of runs
+        description: Description for printing
+
+    Returns:
+        Dictionary with timing statistics and results from last run
+    """
+    times = []
+    result = None
+    for i in range(runs):
+        start = time.perf_counter()
+        result = func()
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+        print(f"  Run {i+1}: {elapsed:.3f}s")
+
+    avg = sum(times) / len(times)
+    min_time = min(times)
+    max_time = max(times)
+
+    return {
+        'times': times,
+        'avg': avg,
+        'min': min_time,
+        'max': max_time,
+        'result': result,
+    }
+
+
+# Common click options for benchmark scripts
+votes_csv_argument = click.argument(
+    'votes_csv',
+    type=click.Path(exists=True, path_type=Path),
+)
+
+runs_option = click.option(
+    '--runs', '-n',
+    default=3,
+    help='Number of benchmark runs (default: 3)',
+)
diff --git a/delphi/polismath/conversation/conversation.py b/delphi/polismath/conversation/conversation.py
index 42c37ca74d..c2ea6c9a8e 100644
--- a/delphi/polismath/conversation/conversation.py
+++ b/delphi/polismath/conversation/conversation.py
@@ -19,7 +19,6 @@
 from polismath.pca_kmeans_rep.clusters import cluster_dataframe
 from polismath.pca_kmeans_rep.repness import conv_repness, participant_stats
 from polismath.pca_kmeans_rep.corr import compute_correlation
-from polismath.utils.general import agree, disagree, pass_vote
 
 
 # Configure logging
@@ -231,25 +230,39 @@ def update_votes(self,
 
         logger.info(f"[{time.time() - start_time:.2f}s] Found {len(new_rows)} new rows and {len(new_cols)} new columns")
 
-        # Apply all updates in a single batch operation for better performance
-        # Honestly, we should probably keep the matrix of votes in long-form,
-        # and only convert to wide-form when requested.
-        
-        logger.info(f"[{time.time() - start_time:.2f}s] Applying {len(vote_updates)} votes as batch update...")
+        # Apply all updates using vectorized pivot_table approach.
+        # This is much faster than row-by-row iteration because pandas/numpy
+        # can use optimized C code for the reshape operation.
+
+        logger.info(f"[{time.time() - start_time:.2f}s] Applying {len(updates_df)} votes as batch update...")
         batch_start = time.time()
-        # For backward compatibility, sort the rows and columns by label.
-        result.raw_rating_mat = result.raw_rating_mat.reindex(index=all_rows, columns=all_cols, fill_value=np.nan)
-        # NOTE: we cannot use .loc(rows, cols) = values with rows,cols,and values being Series 
-        # for example `result.raw_rating_mat.loc[updates_df['row'], updates_df['col']] = updates_df['value'].values`
-        # because pandas then tries to assign to the Cartesian product of rows and cols, and it gets very messy
-        # and is definitely *not* what we intended. 
-        # We could convert to integer indices with get_loc, then use .value to use numpy assignment (which does not
-        # do any cartesian product), but a/ it's less legible, b/ there is *no* guarantee at all that .value is always
-        # a view and not a copy, so we might end up modifying a copy of the data frame.
-        # Therefore, for simplicity and readability, sticking to an ugly for loop.
-        # If you have a better idea, let me know at julien@cornebise.com, I would love to know :)
-        for idx, row_data in updates_df.iterrows():
-            result.raw_rating_mat.at[row_data['row'], row_data['col']] = row_data['value']
+
+        # Build a wide-form matrix from the long-form updates using pivot_table.
+        # aggfunc='last' keeps the last vote if any duplicates remain after dedup.
+        update_matrix = updates_df.pivot_table(
+            index='row',
+            columns='col',
+            values='value',
+            aggfunc='last'
+        )
+
+        # Expand the existing matrix to include any new rows/columns.
+        # fill_value=np.nan ensures new cells start as "no vote".
+        result.raw_rating_mat = result.raw_rating_mat.reindex(
+            index=all_rows, columns=all_cols, fill_value=np.nan
+        )
+
+        # Align the update matrix to the same shape (new cells become NaN).
+        update_matrix = update_matrix.reindex(index=all_rows, columns=all_cols)
+
+        # Merge: where update_matrix has a value, use it; otherwise keep original.
+        # DataFrame.where(cond, other) keeps self where cond is True, uses other where False.
+        # So: keep raw_rating_mat where update_matrix is NaN, else use update_matrix.
+        result.raw_rating_mat = result.raw_rating_mat.where(
+            update_matrix.isna(),  # condition: True where update has no value
+            update_matrix          # other: use update value where condition is False
+        )
+
         logger.info(f"[{time.time() - start_time:.2f}s] Batch update completed in {time.time() - batch_start:.2f}s")
         
         # Update last updated timestamp
@@ -292,12 +305,10 @@ def _apply_moderation(self) -> None:
     
     def _compute_vote_stats(self) -> None:
         """
-        Compute statistics on votes.
+        Compute statistics on votes using vectorized operations.
         """
-        # Make sure pandas is imported
         import numpy as np
-        import pandas as pd
-        
+
         # Initialize stats
         self.vote_stats = {
             'n_votes': 0,
@@ -307,84 +318,65 @@ def _compute_vote_stats(self) -> None:
             'comment_stats': {},
             'participant_stats': {}
         }
-        
-        # Get matrix values and ensure they are numeric
+
         try:
-            # Make a clean copy that's definitely numeric
+            # Get clean numeric matrix
             clean_mat = self._get_clean_matrix()
-            # TODO: we can probably count without needing to convert to numpy array
             values = clean_mat.to_numpy()
 
-            # Count votes safely
+            # Create boolean masks once for the entire matrix.
+            # These are 2D arrays of the same shape as values.
+            non_null_mask = ~np.isnan(values)
+            agree_mask = np.abs(values - 1.0) < 0.001  # Close to 1
+            disagree_mask = np.abs(values + 1.0) < 0.001  # Close to -1
+
+            # Global stats: sum over entire matrix
             try:
-                # Create masks, handling non-numeric data
-                non_null_mask = ~np.isnan(values)
-                agree_mask = np.abs(values - 1.0) < 0.001  # Close to 1
-                disagree_mask = np.abs(values + 1.0) < 0.001  # Close to -1
-                
                 self.vote_stats['n_votes'] = int(np.sum(non_null_mask))
                 self.vote_stats['n_agree'] = int(np.sum(agree_mask))
                 self.vote_stats['n_disagree'] = int(np.sum(disagree_mask))
-                self.vote_stats['n_pass'] = int(np.sum(np.isnan(values)))
+                self.vote_stats['n_pass'] = int(np.sum(~non_null_mask))
             except Exception as e:
-                logger.error(f"Error counting votes: {e}")
-                # Set defaults if counting fails
-                self.vote_stats['n_votes'] = 0
-                self.vote_stats['n_agree'] = 0
-                self.vote_stats['n_disagree'] = 0
-                self.vote_stats['n_pass'] = 0
-            
-            # Compute comment stats
-            for i, cid in enumerate(clean_mat.columns):
-                if i >= values.shape[1]:
-                    continue
-                    
-                try:
-                    col = values[:, i]
-                    n_votes = np.sum(~np.isnan(col))
-                    n_agree = np.sum(np.abs(col - 1.0) < 0.001)
-                    n_disagree = np.sum(np.abs(col + 1.0) < 0.001)
-                    
-                    self.vote_stats['comment_stats'][cid] = {
-                        'n_votes': int(n_votes),
-                        'n_agree': int(n_agree),
-                        'n_disagree': int(n_disagree),
-                        'agree_ratio': float(n_agree / max(n_votes, 1))
-                    }
-                except Exception as e:
-                    logger.error(f"Error computing stats for comment {cid}: {e}")
+                logger.error(f"Error counting global votes: {e}")
+
+            # Per-comment stats: sum along axis=0 (columns).
+            # axis=0 sums over rows, giving one value per column (comment).
+            try:
+                comment_n_votes = np.sum(non_null_mask, axis=0)
+                comment_n_agree = np.sum(agree_mask, axis=0)
+                comment_n_disagree = np.sum(disagree_mask, axis=0)
+                # Avoid division by zero: use np.maximum to ensure denominator >= 1
+                comment_agree_ratio = comment_n_agree / np.maximum(comment_n_votes, 1)
+
+                # Build comment_stats dict from the arrays.
+                for i, cid in enumerate(clean_mat.columns):
                     self.vote_stats['comment_stats'][cid] = {
-                        'n_votes': 0,
-                        'n_agree': 0,
-                        'n_disagree': 0,
-                        'agree_ratio': 0.0
-                    }
-            
-            # Compute participant stats
-            for i, pid in enumerate(clean_mat.index):
-                if i >= values.shape[0]:
-                    continue
-                    
-                try:
-                    row = values[i, :]
-                    n_votes = np.sum(~np.isnan(row))
-                    n_agree = np.sum(np.abs(row - 1.0) < 0.001)
-                    n_disagree = np.sum(np.abs(row + 1.0) < 0.001)
-                    
-                    self.vote_stats['participant_stats'][pid] = {
-                        'n_votes': int(n_votes),
-                        'n_agree': int(n_agree),
-                        'n_disagree': int(n_disagree),
-                        'agree_ratio': float(n_agree / max(n_votes, 1))
+                        'n_votes': int(comment_n_votes[i]),
+                        'n_agree': int(comment_n_agree[i]),
+                        'n_disagree': int(comment_n_disagree[i]),
+                        'agree_ratio': float(comment_agree_ratio[i])
                     }
-                except Exception as e:
-                    logger.error(f"Error computing stats for participant {pid}: {e}")
+            except Exception as e:
+                logger.error(f"Error computing comment stats: {e}")
+
+            # Per-participant stats: sum along axis=1 (rows).
+            # axis=1 sums over columns, giving one value per row (participant).
+            try:
+                ptpt_n_votes = np.sum(non_null_mask, axis=1)
+                ptpt_n_agree = np.sum(agree_mask, axis=1)
+                ptpt_n_disagree = np.sum(disagree_mask, axis=1)
+                ptpt_agree_ratio = ptpt_n_agree / np.maximum(ptpt_n_votes, 1)
+
+                # Build participant_stats dict from the arrays.
+                for i, pid in enumerate(clean_mat.index):
                     self.vote_stats['participant_stats'][pid] = {
-                        'n_votes': 0,
-                        'n_agree': 0,
-                        'n_disagree': 0,
-                        'agree_ratio': 0.0
+                        'n_votes': int(ptpt_n_votes[i]),
+                        'n_agree': int(ptpt_n_agree[i]),
+                        'n_disagree': int(ptpt_n_disagree[i]),
+                        'agree_ratio': float(ptpt_agree_ratio[i])
                     }
+            except Exception as e:
+                logger.error(f"Error computing participant stats: {e}")
         except Exception as e:
             logger.error(f"Error in vote stats computation: {e}")
             # Initialize with empty stats if computation fails
@@ -447,14 +439,18 @@ def update_moderation(self,
     def _compute_pca(self, n_components: int = 2) -> None:
         """
         Compute PCA on the vote matrix.
-        
+
         Args:
             n_components: Number of principal components
         """
+        import time
+        start_time = time.time()
+        logger.info(f"Starting PCA computation (matrix shape: {self.rating_mat.shape})...")
+
         # Make sure pandas and numpy are imported
         import numpy as np
         import pandas as pd
-        
+
         # Check if we have enough data
         if self.rating_mat.shape[0] < 2 or self.rating_mat.shape[1] < 2:
             # Not enough data for PCA, create minimal results
@@ -464,32 +460,35 @@ def _compute_pca(self, n_components: int = 2) -> None:
                 'comps': np.zeros((min(n_components, 2), cols))
             }
             self.proj = {pid: np.zeros(2) for pid in self.rating_mat.index}
+            logger.info(f"PCA computation completed in {time.time() - start_time:.2f}s (insufficient data)")
             return
         
         try:
             # Make a clean copy of the rating matrix
             clean_matrix = self._get_clean_matrix()
-            
+
             pca_results, proj_dict = pca_project_dataframe(clean_matrix, n_components)
-            
+
             # Store results
             self.pca = pca_results
             self.proj = proj_dict
-        
+            logger.info(f"PCA computation completed in {time.time() - start_time:.2f}s")
+
         except Exception as e:
             # If PCA fails, create minimal results
             logger.error(f"Error in PCA computation: {e}")
             # Make sure we have numpy and pandas
             import numpy as np
             import pandas as pd
-            
+
             cols = self.rating_mat.shape[1]
             self.pca = {
                 'center': np.zeros(cols),
                 'comps': np.zeros((min(n_components, 2), cols))
             }
             self.proj = {pid: np.zeros(2) for pid in self.rating_mat.index}
-    
+            logger.info(f"PCA computation completed in {time.time() - start_time:.2f}s (with errors)")
+
     def _get_clean_matrix(self) -> pd.DataFrame:
         """
         Get a clean copy of the rating matrix with proper numeric values.
@@ -527,55 +526,64 @@ def _compute_clusters(self) -> None:
         """
         Compute participant clusters using auto-determination of optimal k.
         """
+        import time
+        start_time = time.time()
+        logger.info(f"Starting clustering computation ({len(self.proj)} participants)...")
+
         # Make sure numpy and pandas are imported
         import numpy as np
         import pandas as pd
-        
+
         # Check if we have projections
         if not self.proj:
             self.base_clusters = []
             self.group_clusters = []
             self.subgroup_clusters = {}
+            logger.info(f"Clustering completed in {time.time() - start_time:.2f}s (no projections)")
             return
-        
+
         # Prepare data for clustering
         ptpt_ids = list(self.proj.keys())
         proj_values = np.array([self.proj[pid] for pid in ptpt_ids])
-        
+
         # Create projection matrix
         proj_matrix = pd.DataFrame(
             data=proj_values,
             index=ptpt_ids,
             columns=['x', 'y']
         )
-        
+
         # Use auto-determination of k based on data size
         # The determine_k function will handle this appropriately
         # Let the clustering function auto-determine the appropriate number of clusters
         # Pass k=None to use the built-in determine_k function
         base_clusters = cluster_dataframe(proj_matrix, k=None)
-        
+
         # Convert base clusters to group clusters
         # Group clusters are high-level groups based on base clusters
         group_clusters = base_clusters
-        
+
         # Store results
         self.base_clusters = base_clusters
         self.group_clusters = group_clusters
-        
+
         # Compute subgroup clusters if needed
         self.subgroup_clusters = {}
-        
-        # TODO: Implement subgroup clustering if needed
-    
+
+        logger.info(f"Clustering completed in {time.time() - start_time:.2f}s ({len(group_clusters)} groups)")
+
     def _compute_repness(self) -> None:
         """
         Compute comment representativeness.
         """
+        import time
+        start_time = time.time()
+        logger.info(f"Starting representativeness computation ({len(self.group_clusters)} groups, {self.rating_mat.shape[1]} comments)...")
+
         # Make sure numpy and pandas are imported
         import numpy as np
         import pandas as pd
-        
+
         # Check if we have groups
         if not self.group_clusters:
             self.repness = {
@@ -583,11 +591,13 @@ def _compute_repness(self) -> None:
                 'group_repness': {},
                 'consensus_comments': []
             }
+            logger.info(f"Representativeness completed in {time.time() - start_time:.2f}s (no groups)")
             return
-        
+
         # Compute representativeness
         self.repness = conv_repness(self.rating_mat, self.group_clusters)
-    
+        logger.info(f"Representativeness completed in {time.time() - start_time:.2f}s")
+
     def _compute_participant_info_optimized(self, vote_matrix: pd.DataFrame, group_clusters: List[Dict[str, Any]]) -> Dict[str, Any]:
         """
         Optimized version of the participant info computation.
diff --git a/delphi/polismath/pca_kmeans_rep/repness.py b/delphi/polismath/pca_kmeans_rep/repness.py
index bef5c00e4d..abbd83515c 100644
--- a/delphi/polismath/pca_kmeans_rep/repness.py
+++ b/delphi/polismath/pca_kmeans_rep/repness.py
@@ -12,13 +12,27 @@
 import math
 from scipy import stats
 
-from polismath.utils.general import agree, disagree, pass_vote
+from polismath.utils.general import AGREE, DISAGREE
 
 
 # Statistical constants
 Z_90 = 1.645  # Z-score for 90% confidence
 Z_95 = 1.96   # Z-score for 95% confidence
-PSEUDO_COUNT = 1.5  # Pseudocount for Bayesian smoothing
+
+# Pseudocount for Bayesian smoothing (Laplace smoothing / additive smoothing)
+#
+# Why use pseudocounts?
+# - Prevents extreme probabilities (0 or 1) when sample sizes are small
+# - With PSEUDO_COUNT = 1.5, we effectively add 0.75 "virtual" agrees and
+#   0.75 "virtual" disagrees to each comment's vote count
+# - This pulls probabilities toward 0.5 (the prior), with the effect diminishing
+#   as sample size grows
+# - Formula: p_agree = (n_agree + PSEUDO_COUNT/2) / (n_votes + PSEUDO_COUNT)
+#
+# Example: With 3 agrees out of 4 votes:
+#   - Raw probability: 3/4 = 0.75
+#   - Smoothed (PSEUDO_COUNT=1.5): (3 + 0.75) / (4 + 1.5) = 3.75/5.5 ≈ 0.68
+PSEUDO_COUNT = 1.5
 
 
 def z_score_sig_90(z: float) -> bool:
@@ -113,11 +127,11 @@ def comment_stats(votes: np.ndarray, group_members: List[int]) -> Dict[str, Any]
         Dictionary of statistics
     """
     # Filter votes to only include group members
-    group_votes = [votes[i] for i in group_members if i < len(votes)]
-    
+    group_votes = votes[group_members]
+
     # Count agrees, disagrees, and total votes
-    n_agree = sum(1 for v in group_votes if agree(v))
-    n_disagree = sum(1 for v in group_votes if disagree(v))
+    n_agree = np.sum(group_votes == AGREE)
+    n_disagree = np.sum(group_votes == DISAGREE)
     n_votes = n_agree + n_disagree
     
     # Calculate probabilities with pseudocounts (Bayesian smoothing)
@@ -399,10 +413,10 @@ def calculate_kl_divergence(p: np.ndarray, q: np.ndarray) -> float:
 def select_consensus_comments(all_stats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
     Select comments with broad consensus.
-    
+
     Args:
         all_stats: List of comment statistics for all groups
-        
+
     Returns:
         List of consensus comments
     """
@@ -413,18 +427,18 @@ def select_consensus_comments(all_stats: List[Dict[str, Any]]) -> List[Dict[str,
         if cid not in by_comment:
             by_comment[cid] = []
         by_comment[cid].append(stat)
-    
+
     # Comments that have stats for all groups
     consensus_candidates = []
-    
+
     for cid, stats in by_comment.items():
         # Check if all groups mostly agree
         all_agree = all(s['pa'] > 0.6 for s in stats)
-        
+
         if all_agree:
             # Calculate average agreement
             avg_agree = sum(s['pa'] for s in stats) / len(stats)
-            
+
             # Add as consensus candidate
             consensus_candidates.append({
                 'comment_id': cid,
@@ -432,159 +446,494 @@ def select_consensus_comments(all_stats: List[Dict[str, Any]]) -> List[Dict[str,
                 'repful': 'consensus',
                 'stats': stats
             })
-    
+
     # Sort by average agreement
     consensus_candidates.sort(key=lambda x: x['avg_agree'], reverse=True)
-    
+
     # Take top 2
     return consensus_candidates[:2]
 
 
-def conv_repness(vote_matrix: pd.DataFrame, group_clusters: List[Dict[str, Any]]) -> Dict[str, Any]:
+# =============================================================================
+# Vectorized DataFrame-native functions for multi-group operations
+# =============================================================================
+
+def prop_test_vectorized(p: pd.Series, n: pd.Series, p0: float = 0.5) -> pd.Series:
     """
-    Calculate representativeness for all comments and groups.
-    
+    Vectorized one-proportion z-test.
+
     Args:
-        vote_matrix: pd.DataFrame of votes
+        p: Series of observed proportions
+        n: Series of number of observations
+        p0: Expected proportion under null hypothesis (default: 0.5)
+
+    Returns:
+        Series of z-scores
+    """
+    se = np.sqrt(p0 * (1 - p0) / n)
+    z = (p - p0) / se
+    # Handle edge cases: n=0, p0=0, p0=1 all result in 0
+    z = z.fillna(0.0)
+    z = z.replace([np.inf, -np.inf], 0.0)
+    return z
+
+
+def two_prop_test_vectorized(p1: pd.Series, n1: pd.Series,
+                             p2: pd.Series, n2: pd.Series) -> pd.Series:
+    """
+    Vectorized two-proportion z-test.
+
+    Args:
+        p1: Series of first proportions
+        n1: Series of number of observations for first proportion
+        p2: Series of second proportions
+        n2: Series of number of observations for second proportion
+
+    Returns:
+        Series of z-scores
+    """
+    # Pooled probability
+    p_pooled = (p1 * n1 + p2 * n2) / (n1 + n2)
+
+    # Standard error
+    se = np.sqrt(p_pooled * (1 - p_pooled) * (1/n1 + 1/n2))
+
+    # Z-score calculation
+    z = (p1 - p2) / se
+
+    # Handle edge cases
+    z = z.fillna(0.0)
+    z = z.replace([np.inf, -np.inf], 0.0)
+    return z
+
+
+def compute_group_comment_stats_df(votes_long: pd.DataFrame,
+                                   group_clusters: List[Dict[str, Any]]) -> pd.DataFrame:
+    """
+    Compute vote counts and probabilities for all (group, comment) pairs.
+
+    This is the vectorized version of comment_stats() that operates on all
+    groups and comments simultaneously.
+
+    Args:
+        votes_long: Long-format DataFrame with columns:
+            - 'participant': participant ID
+            - 'comment': comment ID
+            - 'vote': vote value (AGREE, DISAGREE, PASS, or NaN)
         group_clusters: List of group clusters
-        
+
     Returns:
-        Dictionary with representativeness data for each group
+        DataFrame indexed by (group_id, comment) with columns:
+            - na: number of agrees
+            - nd: number of disagrees
+            - ns: number of votes (agrees + disagrees)
+            - pa: probability of agree (with pseudocount smoothing)
+            - pd: probability of disagree (with pseudocount smoothing)
+            - pat: proportion test z-score for agree
+            - pdt: proportion test z-score for disagree
+            - ra: representativeness ratio for agree (group vs other)
+            - rd: representativeness ratio for disagree (group vs other)
+            - rat: representativeness test z-score for agree
+            - rdt: representativeness test z-score for disagree
+            - agree_metric: metric for agree representativeness
+            - disagree_metric: metric for disagree representativeness
+            - repful: 'agree' or 'disagree' based on which is more representative
+    """
+    # Build participant -> group mapping
+    ptpt_to_group = {}
+    for group in group_clusters:
+        for member in group['members']:
+            ptpt_to_group[member] = group['id']
+
+    # Drop NaN votes (unvoted) first - this applies to all participants
+    votes_only = votes_long.dropna(subset=['vote'])
+
+    if votes_only.empty:
+        # Return empty DataFrame with correct schema
+        return pd.DataFrame(columns=['na', 'nd', 'ns', 'pa', 'pd', 'pat', 'pdt'])
+
+    # Compute total counts per comment BEFORE filtering to group members
+    # This matches the old behavior where "other" included ALL participants
+    # not in the current group (even those not in any cluster)
+    total_counts = votes_only.groupby('comment').agg(
+        total_agree=('vote', lambda x: (x == AGREE).sum()),
+        total_disagree=('vote', lambda x: (x == DISAGREE).sum()),
+    )
+    total_counts['total_votes'] = total_counts['total_agree'] + total_counts['total_disagree']
+
+    # Now add group column and filter to only group members
+    votes_with_group = votes_only.copy()
+    votes_with_group['group_id'] = votes_with_group['participant'].map(ptpt_to_group)
+
+    # Keep only votes from participants in some group (for group-specific counts)
+    votes_in_groups = votes_with_group.dropna(subset=['group_id'])
+
+    if votes_in_groups.empty:
+        # Return empty DataFrame with correct schema
+        return pd.DataFrame(columns=['na', 'nd', 'ns', 'pa', 'pd', 'pat', 'pdt'])
+
+    # Get all unique comments that have at least one vote (from anyone)
+    all_comments = total_counts.index.tolist()
+
+    # Get all group IDs
+    all_group_ids = [group['id'] for group in group_clusters]
+
+    # Compute vote counts per (group, comment) for votes from group members
+    group_counts = votes_in_groups.groupby(['group_id', 'comment']).agg(
+        na=('vote', lambda x: (x == AGREE).sum()),
+        nd=('vote', lambda x: (x == DISAGREE).sum()),
+    )
+    group_counts['ns'] = group_counts['na'] + group_counts['nd']
+
+    # Create full index with all (group, comment) combinations to match old behavior
+    # Old implementation: for each group, iterate over ALL comments (that have any votes)
+    full_index = pd.MultiIndex.from_product(
+        [all_group_ids, all_comments],
+        names=['group_id', 'comment']
+    )
+
+    # Reindex to include all combinations, filling missing with 0
+    group_counts = group_counts.reindex(full_index, fill_value=0)
+
+    # Join total counts to group counts
+    stats_df = group_counts.join(total_counts, on='comment')
+
+    # Compute "other" counts (everyone not in this group)
+    stats_df['other_agree'] = stats_df['total_agree'] - stats_df['na']
+    stats_df['other_disagree'] = stats_df['total_disagree'] - stats_df['nd']
+    stats_df['other_votes'] = stats_df['total_votes'] - stats_df['ns']
+
+    # Compute probabilities with pseudocounts (Bayesian smoothing)
+    # For group
+    stats_df['pa'] = (stats_df['na'] + PSEUDO_COUNT/2) / (stats_df['ns'] + PSEUDO_COUNT)
+    stats_df['pd'] = (stats_df['nd'] + PSEUDO_COUNT/2) / (stats_df['ns'] + PSEUDO_COUNT)
+
+    # Handle ns == 0 case: default to uninformative prior (0.5)
+    zero_mask = stats_df['ns'] == 0
+    stats_df.loc[zero_mask, 'pa'] = 0.5
+    stats_df.loc[zero_mask, 'pd'] = 0.5
+
+    # For "other" group
+    stats_df['other_pa'] = (stats_df['other_agree'] + PSEUDO_COUNT/2) / (stats_df['other_votes'] + PSEUDO_COUNT)
+    stats_df['other_pd'] = (stats_df['other_disagree'] + PSEUDO_COUNT/2) / (stats_df['other_votes'] + PSEUDO_COUNT)
+
+    other_zero_mask = stats_df['other_votes'] == 0
+    stats_df.loc[other_zero_mask, 'other_pa'] = 0.5
+    stats_df.loc[other_zero_mask, 'other_pd'] = 0.5
+
+    # Compute proportion tests (group vs 0.5)
+    stats_df['pat'] = prop_test_vectorized(stats_df['pa'], stats_df['ns'], 0.5)
+    stats_df['pdt'] = prop_test_vectorized(stats_df['pd'], stats_df['ns'], 0.5)
+
+    # Compute representativeness ratios (group vs other)
+    stats_df['ra'] = stats_df['pa'] / stats_df['other_pa']
+    stats_df['rd'] = stats_df['pd'] / stats_df['other_pd']
+
+    # Handle division by zero (other_pa or other_pd == 0)
+    stats_df['ra'] = stats_df['ra'].replace([np.inf, -np.inf], 1.0).fillna(1.0)
+    stats_df['rd'] = stats_df['rd'].replace([np.inf, -np.inf], 1.0).fillna(1.0)
+
+    # Compute representativeness tests (two-proportion z-test: group vs other)
+    stats_df['rat'] = two_prop_test_vectorized(
+        stats_df['pa'], stats_df['ns'],
+        stats_df['other_pa'], stats_df['other_votes']
+    )
+    stats_df['rdt'] = two_prop_test_vectorized(
+        stats_df['pd'], stats_df['ns'],
+        stats_df['other_pd'], stats_df['other_votes']
+    )
+
+    # Compute metrics
+    # agree_metric = pa * (|pat| + |rat|)
+    # disagree_metric = (1 - pd) * (|pdt| + |rdt|)
+    stats_df['agree_metric'] = stats_df['pa'] * (stats_df['pat'].abs() + stats_df['rat'].abs())
+    stats_df['disagree_metric'] = (1 - stats_df['pd']) * (stats_df['pdt'].abs() + stats_df['rdt'].abs())
+
+    # Determine repful ('agree' or 'disagree')
+    # Logic: if pa > 0.5 and ra > 1.0 -> 'agree'
+    #        elif pd > 0.5 and rd > 1.0 -> 'disagree'
+    #        else: use higher metric
+    conditions = [
+        (stats_df['pa'] > 0.5) & (stats_df['ra'] > 1.0),
+        (stats_df['pd'] > 0.5) & (stats_df['rd'] > 1.0),
+    ]
+    choices = ['agree', 'disagree']
+    stats_df['repful'] = np.select(conditions, choices,
+                                   default=np.where(stats_df['agree_metric'] >= stats_df['disagree_metric'],
+                                                    'agree', 'disagree'))
+
+    return stats_df
+
+
+def select_rep_comments_df(stats_df: pd.DataFrame,
+                           agree_count: int = 3,
+                           disagree_count: int = 2) -> pd.DataFrame:
+    """
+    Select representative comments for a single group from a DataFrame.
+
+    DataFrame-native version of select_rep_comments().
+
+    Args:
+        stats_df: DataFrame with comment statistics for ONE group
+        agree_count: Number of agreement comments to select
+        disagree_count: Number of disagreement comments to select
+
+    Returns:
+        DataFrame of selected representative comments
+    """
+    if stats_df.empty:
+        return stats_df
+
+    total_wanted = agree_count + disagree_count
+
+    # Best agree: pa > pd and passes significance tests
+    agree_candidates = stats_df[stats_df['pa'] > stats_df['pd']].copy()
+    if not agree_candidates.empty:
+        # Check significance: |pat| >= Z_90 and |rat| >= Z_90
+        passing_agree = agree_candidates[
+            (agree_candidates['pat'].abs() >= Z_90) &
+            (agree_candidates['rat'].abs() >= Z_90) &
+            (agree_candidates['pa'] >= 0.5)
+        ]
+        if not passing_agree.empty:
+            agree_candidates = passing_agree
+
+    # Best disagree: pd > pa and passes significance tests
+    disagree_candidates = stats_df[stats_df['pd'] > stats_df['pa']].copy()
+    if not disagree_candidates.empty:
+        passing_disagree = disagree_candidates[
+            (disagree_candidates['pdt'].abs() >= Z_90) &
+            (disagree_candidates['rdt'].abs() >= Z_90) &
+            (disagree_candidates['pd'] >= 0.5)
+        ]
+        if not passing_disagree.empty:
+            disagree_candidates = passing_disagree
+
+    # Sort candidates by metric
+    if not agree_candidates.empty:
+        agree_candidates = agree_candidates.sort_values('agree_metric', ascending=False)
+    if not disagree_candidates.empty:
+        disagree_candidates = disagree_candidates.sort_values('disagree_metric', ascending=False)
+
+    # Select top N from each category
+    selected_parts = []
+
+    if not agree_candidates.empty:
+        top_agree = agree_candidates.head(agree_count).copy()
+        top_agree['repful'] = 'agree'
+        selected_parts.append(top_agree)
+
+    if not disagree_candidates.empty:
+        top_disagree = disagree_candidates.head(disagree_count).copy()
+        top_disagree['repful'] = 'disagree'
+        selected_parts.append(top_disagree)
+
+    if selected_parts:
+        selected = pd.concat(selected_parts, ignore_index=False)
+    else:
+        selected = pd.DataFrame()
+
+    # If we couldn't find enough, try to fill from available candidates
+    # This matches the exact behavior of the old select_rep_comments() function:
+    # - First fallback adds agree_comments[agree_count:min(len, total_wanted)] regardless of
+    #   whether we exceed total_wanted (up to disagree_count more agrees)
+    # - Second fallback only runs if STILL < total_wanted
+    if len(selected) < total_wanted:
+        # Try to add more agree comments
+        # Old code: range(agree_count, min(len(agree_comments), agree_count + disagree_count))
+        if not agree_candidates.empty and len(agree_candidates) > agree_count:
+            extra_limit = min(len(agree_candidates), total_wanted)
+            extra_agrees = agree_candidates.iloc[agree_count:extra_limit].copy()
+            extra_agrees['repful'] = 'agree'
+            selected = pd.concat([selected, extra_agrees], ignore_index=False)
+
+        # Try to add more disagree comments (only if still not enough)
+        # Old code: range(disagree_count, min(len(disagree_comments), agree_count + disagree_count))
+        if len(selected) < total_wanted and not disagree_candidates.empty and len(disagree_candidates) > disagree_count:
+            extra_limit = min(len(disagree_candidates), total_wanted)
+            extra_disagrees = disagree_candidates.iloc[disagree_count:extra_limit].copy()
+            extra_disagrees['repful'] = 'disagree'
+            selected = pd.concat([selected, extra_disagrees], ignore_index=False)
+
+    # Fallback: if still empty, take first row
+    if selected.empty and not stats_df.empty:
+        selected = stats_df.head(1).copy()
+        selected['repful'] = selected['repful'].iloc[0] if 'repful' in selected.columns else 'agree'
+
+    return selected
+
+
+def select_consensus_comments_df(stats_df: pd.DataFrame,
+                                  n_groups: int) -> List[Dict[str, Any]]:
+    """
+    Select consensus comments from DataFrame.
+
+    Args:
+        stats_df: DataFrame with all (group, comment) statistics
+        n_groups: Number of groups
+
+    Returns:
+        List of consensus comment dicts
+    """
+    if stats_df.empty:
+        return []
+
+    # Group by comment and check if all groups have high agreement
+    stats_reset = stats_df.reset_index()
+    comment_stats = stats_reset.groupby('comment').agg(
+        min_pa=('pa', 'min'),
+        avg_pa=('pa', 'mean'),
+        group_count=('group_id', 'count')
+    )
+
+    # Filter to comments where all groups agree (pa > 0.6 for all)
+    # and present in all groups
+    consensus = comment_stats[
+        (comment_stats['min_pa'] > 0.6) &
+        (comment_stats['group_count'] == n_groups)
+    ].copy()
+
+    if consensus.empty:
+        return []
+
+    # Sort by average agreement and take top 2
+    consensus = consensus.nlargest(2, 'avg_pa')
+
+    # Convert to list of dicts using _stats_row_to_dict for legacy format
+    result = []
+    for comment_id in consensus.index:
+        comment_rows = stats_reset[stats_reset['comment'] == comment_id]
+        # Convert each row to legacy dict format
+        stats_list = [_stats_row_to_dict(row) for _, row in comment_rows.iterrows()]
+        result.append({
+            'comment_id': comment_id,
+            'avg_agree': consensus.loc[comment_id, 'avg_pa'],
+            'repful': 'consensus',
+            'stats': stats_list
+        })
+
+    return result
+
+
+def _stats_row_to_dict(row: pd.Series) -> Dict[str, Any]:
+    """Convert a stats DataFrame row to the legacy dict format."""
+    return {
+        'comment_id': row['comment'],
+        'group_id': row['group_id'],
+        'na': int(row['na']),
+        'nd': int(row['nd']),
+        'ns': int(row['ns']),
+        'pa': row['pa'],
+        'pd': row['pd'],
+        'pat': row['pat'],
+        'pdt': row['pdt'],
+        'ra': row['ra'],
+        'rd': row['rd'],
+        'rat': row['rat'],
+        'rdt': row['rdt'],
+        'agree_metric': row['agree_metric'],
+        'disagree_metric': row['disagree_metric'],
+        'repful': row['repful'],
+    }
+
+
+def conv_repness(vote_matrix_df: pd.DataFrame, group_clusters: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Calculate representativeness for all comments and groups.
+
+    Uses a vectorized long-format DataFrame approach for efficiency.
+
+    Args:
+        vote_matrix_df: pd.DataFrame of matrix of votes (participants × comments)
+            Values should be AGREE (1), DISAGREE (-1), PASS (0), or NaN (unvoted)
+        group_clusters: List of group clusters, each with 'id' and 'members'
+
+    Returns:
+        Dictionary with representativeness data for each group:
+            - comment_ids: list of comment IDs
+            - group_repness: dict mapping group_id -> list of representative comments
+            - consensus_comments: list of consensus comments
+            - comment_repness: list of all comment repness data
     """
-    # Extract and clean the matrix values
-    matrix_values = vote_matrix.to_numpy(copy = True)
-    
-    # Ensure the matrix contains numeric values
-    if not np.issubdtype(matrix_values.dtype, np.number):
-        # Convert to numeric matrix with proper NaN handling
-        numeric_matrix = np.zeros(matrix_values.shape, dtype=float)
-        for i in range(matrix_values.shape[0]):
-            for j in range(matrix_values.shape[1]):
-                val = matrix_values[i, j]
-                if pd.isna(val) or val is None:
-                    numeric_matrix[i, j] = np.nan
-                else:
-                    try:
-                        numeric_matrix[i, j] = float(val)
-                    except (ValueError, TypeError):
-                        numeric_matrix[i, j] = np.nan
-        matrix_values = numeric_matrix
-    
-    # Replace NaNs with None for the algorithm
-    matrix_values = np.where(np.isnan(matrix_values), None, matrix_values)
-    
     # Create empty-result structure in case we need to return early
     empty_result = {
-        'comment_ids': vote_matrix.columns.tolist(),
+        'comment_ids': vote_matrix_df.columns.tolist(),
         'group_repness': {group['id']: [] for group in group_clusters},
         'consensus_comments': [],
-        'comment_repness': []  # Add a list for all comment repness data
+        'comment_repness': []
     }
-    
+
     # Check if we have enough data
-    if matrix_values.shape[0] < 2 or matrix_values.shape[1] < 2:
+    if vote_matrix_df.shape[0] < 2 or vote_matrix_df.shape[1] < 2:
         return empty_result
-    
-    # Result will hold repness data for each group
+
+    # Convert wide-format to long-format DataFrame
+    # Wide: participants × comments (values = votes)
+    # Long: participant | comment | vote
+    votes_long = vote_matrix_df.melt(
+        ignore_index=False,
+        var_name='comment',
+        value_name='vote'
+    ).reset_index(names='participant')
+
+    # Ensure vote column is numeric (handle object dtype with None values)
+    votes_long['vote'] = pd.to_numeric(votes_long['vote'], errors='coerce')
+
+    # Compute all stats using vectorized function
+    stats_df = compute_group_comment_stats_df(votes_long, group_clusters)
+
+    if stats_df.empty:
+        return empty_result
+
+    # Reset index for easier manipulation
+    stats_df_reset = stats_df.reset_index()
+
+    # Build comment_repness list (vectorized)
+    stats_df_reset['repness'] = np.where(
+        stats_df_reset['repful'] == 'agree',
+        stats_df_reset['agree_metric'],
+        stats_df_reset['disagree_metric']
+    )
+    comment_repness = stats_df_reset[['comment', 'group_id', 'repness', 'pa', 'pd']].copy()
+    comment_repness.columns = ['tid', 'gid', 'repness', 'pa', 'pd']
+
+    # Build result structure
     result = {
-        'comment_ids': vote_matrix.columns.tolist(),
+        'comment_ids': vote_matrix_df.columns.tolist(),
         'group_repness': {},
-        'comment_repness': []  # Add a list for all comment repness data
+        'comment_repness': comment_repness.to_dict('records')
     }
-    
-    # For each group, calculate representativeness
-    all_stats = []
-    
+
+    # Select representative comments per group (DataFrame operations)
     for group in group_clusters:
         group_id = group['id']
-        
-        # Convert member IDs to indices with error handling
-        group_members = []
-        for m in group['members']:
-            try:
-                if m in vote_matrix.index:
-                    idx = vote_matrix.index.get_loc(m)
-                    # Question: why would idx ever *not* be within matrix_values.shape ?
-                    if 0 <= idx < matrix_values.shape[0]:
-                        group_members.append(idx)
-            except (ValueError, TypeError) as e:
-                print(f"Error finding member {m} in matrix: {e}")
-        
-        if not group_members:
-            # Skip empty groups
+        group_stats = stats_df_reset[stats_df_reset['group_id'] == group_id]
+
+        if group_stats.empty:
             result['group_repness'][group_id] = []
             continue
-        
-        # Calculate other members (all participants not in this group)
-        all_indices = list(range(matrix_values.shape[0]))
-        other_members = [i for i in all_indices if i not in group_members]
-        
-        # Stats for each comment
-        group_stats = []
-        
-        for c_idx, comment_id in enumerate(vote_matrix.columns):
-            if c_idx >= matrix_values.shape[1]:
-                continue
-                
-            comment_votes = matrix_values[:, c_idx]
-            
-            # Skip comments with no votes
-            if not any(v is not None for v in comment_votes):
-                continue
-                
-            try:
-                # Calculate stats for this group
-                stats = comment_stats(comment_votes, group_members)
-                
-                # Calculate stats for other groups
-                other_stats = comment_stats(comment_votes, other_members)
-                
-                # Add comparative stats
-                stats = add_comparative_stats(stats, other_stats)
-                
-                # Finalize stats
-                stats = finalize_cmt_stats(stats)
-                
-                # Add metadata
-                stats['comment_id'] = comment_id
-                stats['group_id'] = group_id
-                
-                group_stats.append(stats)
-                all_stats.append(stats)
-                
-                # Also add to the comment_repness list
-                repness = {
-                    'tid': comment_id,
-                    'gid': group_id,
-                    'repness': stats.get('agree_metric', 0) if stats.get('repful') == 'agree' else stats.get('disagree_metric', 0),
-                    'pa': stats.get('pa', 0),
-                    'pd': stats.get('pd', 0)
-                }
-                result['comment_repness'].append(repness)
-            except Exception as e:
-                print(f"Error calculating stats for comment {comment_id} in group {group_id}: {e}")
-                continue
-        
+
         try:
-            # Select representative comments for this group
-            rep_comments = select_rep_comments(group_stats)
-            
-            # Store in result
+            rep_df = select_rep_comments_df(group_stats)
+            # Convert to list of dicts only at the end
+            rep_comments = [_stats_row_to_dict(row) for _, row in rep_df.iterrows()]
             result['group_repness'][group_id] = rep_comments
         except Exception as e:
             print(f"Error selecting representative comments for group {group_id}: {e}")
             result['group_repness'][group_id] = []
-    
+
     # Add consensus comments if there are multiple groups
     try:
-        if len(group_clusters) > 1 and all_stats:
-            result['consensus_comments'] = select_consensus_comments(all_stats)
+        if len(group_clusters) > 1:
+            result['consensus_comments'] = select_consensus_comments_df(
+                stats_df, len(group_clusters)
+            )
         else:
             result['consensus_comments'] = []
     except Exception as e:
         print(f"Error selecting consensus comments: {e}")
         result['consensus_comments'] = []
-    
+
     return result
 
 
diff --git a/delphi/polismath/utils/general.py b/delphi/polismath/utils/general.py
index 6467833ffc..df3f2a9d30 100644
--- a/delphi/polismath/utils/general.py
+++ b/delphi/polismath/utils/general.py
@@ -12,6 +12,9 @@
 T = TypeVar('T')
 U = TypeVar('U')
 
+AGREE = 1
+DISAGREE = -1
+PASS = 0
 
 def xor(a: bool, b: bool) -> bool:
     """
@@ -41,45 +44,6 @@ def round_to(n: float, digits: int = 0) -> float:
     return round(n, digits)
 
 
-def agree(vote: Optional[float]) -> bool:
-    """
-    Check if a vote is an agreement.
-    
-    Args:
-        vote: Vote value (1 for agree, -1 for disagree, None for pass)
-        
-    Returns:
-        True if the vote is an agreement
-    """
-    return vote == 1
-
-
-def disagree(vote: Optional[float]) -> bool:
-    """
-    Check if a vote is a disagreement.
-    
-    Args:
-        vote: Vote value (1 for agree, -1 for disagree, None for pass)
-        
-    Returns:
-        True if the vote is a disagreement
-    """
-    return vote == -1
-
-
-def pass_vote(vote: Optional[float]) -> bool:
-    """
-    Check if a vote is a pass.
-    
-    Args:
-        vote: Vote value (1 for agree, -1 for disagree, None for pass)
-        
-    Returns:
-        True if the vote is a pass (None)
-    """
-    return vote is None
-
-
 def zip_collections(*colls: Iterable[T]) -> List[Tuple[T, ...]]:
     """
     Zip multiple collections together.
diff --git a/delphi/pyproject.toml b/delphi/pyproject.toml
index d96def3b4a..1d949c729b 100644
--- a/delphi/pyproject.toml
+++ b/delphi/pyproject.toml
@@ -76,6 +76,8 @@ dev = [
     # Type stubs
     "types-requests",
     "types-psycopg2",
+    # Profiling
+    "line_profiler>=4.0.0",
 ]
 
 notebook = [
diff --git a/delphi/tests/test_old_format_repness.py b/delphi/tests/test_old_format_repness.py
new file mode 100644
index 0000000000..6679172a76
--- /dev/null
+++ b/delphi/tests/test_old_format_repness.py
@@ -0,0 +1,541 @@
+"""
+Tests for the representativeness module's backwards-compatible interface.
+
+These tests verify the single-group, single-comment "old format" API
+that wraps the new DataFrame-native implementation.
+"""
+
+import numpy as np
+import pandas as pd
+import sys
+import os
+
+# Add the parent directory to the path to import the module
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from polismath.pca_kmeans_rep.repness import (
+    z_score_sig_90, z_score_sig_95, prop_test, two_prop_test,
+    comment_stats, add_comparative_stats, repness_metric, finalize_cmt_stats,
+    passes_by_test, best_agree, best_disagree, select_rep_comments,
+    select_consensus_comments, conv_repness,
+    participant_stats
+)
+
+
+class TestStatisticalFunctions:
+    """Tests for the statistical utility functions."""
+
+    def test_z_score_significance(self):
+        """Test z-score significance checks."""
+        # 90% confidence
+        assert z_score_sig_90(1.645)
+        assert z_score_sig_90(2.0)
+        assert z_score_sig_90(-1.645)
+        assert not z_score_sig_90(1.0)
+
+        # 95% confidence
+        assert z_score_sig_95(1.96)
+        assert z_score_sig_95(2.5)
+        assert z_score_sig_95(-1.96)
+        assert not z_score_sig_95(1.5)
+
+    def test_prop_test(self):
+        """Test one-proportion z-test."""
+        # Test cases
+        assert np.isclose(prop_test(0.7, 100, 0.5), 4.0, atol=0.1)
+        assert np.isclose(prop_test(0.2, 50, 0.3), -1.6, atol=0.1)
+
+        # Edge cases
+        assert prop_test(0.5, 0, 0.5) == 0.0
+        assert prop_test(0.7, 100, 0.0) == 0.0
+        assert prop_test(0.7, 100, 1.0) == 0.0
+
+    def test_two_prop_test(self):
+        """Test two-proportion z-test."""
+        # Test cases
+        assert np.isclose(two_prop_test(0.7, 100, 0.5, 100), 2.9, atol=0.1)
+        assert np.isclose(two_prop_test(0.2, 50, 0.3, 50), -1.2, atol=0.1)
+
+        # Edge cases
+        assert two_prop_test(0.5, 0, 0.5, 100) == 0.0
+        assert two_prop_test(0.5, 100, 0.5, 0) == 0.0
+
+
+class TestCommentStats:
+    """Tests for comment statistics functions (old single-array interface)."""
+
+    def test_comment_stats(self):
+        """Test basic comment statistics calculation."""
+        # Create test votes: 3 agrees, 1 disagree, 1 pass
+        votes = np.array([1, 1, 1, -1, None])
+        group_members = [0, 1, 2, 3, 4]
+
+        stats = comment_stats(votes, group_members)
+
+        assert stats['na'] == 3
+        assert stats['nd'] == 1
+        assert stats['ns'] == 4
+
+        # Check probabilities (with pseudocounts)
+        n_agree = 3
+        n_disagree = 1
+        n_votes = 4
+        p_agree = (n_agree + 1.5/2) / (n_votes + 1.5)
+        p_disagree = (n_disagree + 1.5/2) / (n_votes + 1.5)
+
+        assert np.isclose(stats['pa'], p_agree)
+        assert np.isclose(stats['pd'], p_disagree)
+
+        # Test with no votes
+        empty_votes = np.array([None, None])
+        empty_stats = comment_stats(empty_votes, [0, 1])
+
+        assert empty_stats['na'] == 0
+        assert empty_stats['nd'] == 0
+        assert empty_stats['ns'] == 0
+        assert np.isclose(empty_stats['pa'], 0.5)
+        assert np.isclose(empty_stats['pd'], 0.5)
+
+    def test_add_comparative_stats(self):
+        """Test adding comparative statistics."""
+        # Group stats: 80% agree
+        group_stats = {
+            'na': 8,
+            'nd': 2,
+            'ns': 10,
+            'pa': 0.8,
+            'pd': 0.2,
+            'pat': 3.0,
+            'pdt': -3.0
+        }
+
+        # Other group stats: 40% agree
+        other_stats = {
+            'na': 4,
+            'nd': 6,
+            'ns': 10,
+            'pa': 0.4,
+            'pd': 0.6,
+            'pat': -1.0,
+            'pdt': 1.0
+        }
+
+        result = add_comparative_stats(group_stats, other_stats)
+
+        # Check representativeness ratios
+        assert np.isclose(result['ra'], 0.8 / 0.4)
+        assert np.isclose(result['rd'], 0.2 / 0.6)
+
+        # Test edge case with zero probability
+        other_stats_zero = {
+            'na': 0,
+            'nd': 10,
+            'ns': 10,
+            'pa': 0.0,
+            'pd': 1.0,
+            'pat': -5.0,
+            'pdt': 5.0
+        }
+
+        result_zero = add_comparative_stats(group_stats, other_stats_zero)
+        assert np.isclose(result_zero['ra'], 1.0)  # Should default to 1.0
+
+    def test_repness_metric(self):
+        """Test representativeness metric calculation."""
+        stats = {
+            'pa': 0.8,
+            'pd': 0.2,
+            'pat': 3.0,
+            'pdt': -3.0,
+            'ra': 2.0,
+            'rd': 0.33,
+            'rat': 2.5,
+            'rdt': -2.5
+        }
+
+        # Calculate agree metric
+        agree_metric = repness_metric(stats, 'a')
+        expected_agree = 0.8 * (abs(3.0) + abs(2.5))
+        assert np.isclose(agree_metric, expected_agree)
+
+        # Calculate disagree metric
+        disagree_metric = repness_metric(stats, 'd')
+        expected_disagree = (1 - 0.2) * (abs(-3.0) + abs(-2.5))
+        assert np.isclose(disagree_metric, expected_disagree)
+
+    def test_finalize_cmt_stats(self):
+        """Test finalizing comment statistics."""
+        # Stats where agree is more representative
+        agree_stats = {
+            'pa': 0.8,
+            'pd': 0.2,
+            'pat': 3.0,
+            'pdt': -3.0,
+            'ra': 2.0,
+            'rd': 0.33,
+            'rat': 2.5,
+            'rdt': -2.5
+        }
+
+        finalized_agree = finalize_cmt_stats(agree_stats)
+
+        assert 'agree_metric' in finalized_agree
+        assert 'disagree_metric' in finalized_agree
+        assert finalized_agree['repful'] == 'agree'
+
+        # Stats where disagree is more representative
+        disagree_stats = {
+            'pa': 0.2,
+            'pd': 0.8,
+            'pat': -3.0,
+            'pdt': 3.0,
+            'ra': 0.33,
+            'rd': 2.0,
+            'rat': -2.5,
+            'rdt': 2.5
+        }
+
+        finalized_disagree = finalize_cmt_stats(disagree_stats)
+        assert finalized_disagree['repful'] == 'disagree'
+
+
+class TestSelectionFunctions:
+    """Tests for representative comment selection functions."""
+
+    def test_passes_by_test(self):
+        """Test checking if comments pass significance tests."""
+        # Create stats that pass significance tests
+        passing_stats = {
+            'pa': 0.8,
+            'pd': 0.2,
+            'pat': 3.0,
+            'pdt': -3.0,
+            'ra': 2.0,
+            'rd': 0.33,
+            'rat': 3.0,
+            'rdt': -3.0
+        }
+
+        assert passes_by_test(passing_stats, 'agree')
+        assert not passes_by_test(passing_stats, 'disagree')
+
+        # Create stats that don't pass (not significant)
+        failing_stats = {
+            'pa': 0.8,
+            'pd': 0.2,
+            'pat': 1.0,  # Below 90% threshold
+            'pdt': -1.0,
+            'ra': 2.0,
+            'rd': 0.33,
+            'rat': 1.0,  # Below 90% threshold
+            'rdt': -1.0
+        }
+
+        assert not passes_by_test(failing_stats, 'agree')
+
+    def test_best_agree(self):
+        """Test filtering for best agreement comments."""
+        # Create a mix of stats
+        stats = [
+            {  # Passes tests, high agreement
+                'comment_id': 'c1',
+                'pa': 0.8, 'pd': 0.2,
+                'pat': 3.0, 'pdt': -3.0,
+                'rat': 3.0, 'rdt': -3.0
+            },
+            {  # Doesn't pass tests
+                'comment_id': 'c2',
+                'pa': 0.6, 'pd': 0.4,
+                'pat': 1.0, 'pdt': -1.0,
+                'rat': 1.0, 'rdt': -1.0
+            },
+            {  # Not agreement (more disagree)
+                'comment_id': 'c3',
+                'pa': 0.3, 'pd': 0.7,
+                'pat': -2.0, 'pdt': 2.0,
+                'rat': -2.0, 'rdt': 2.0
+            },
+            {  # Passes tests, moderate agreement
+                'comment_id': 'c4',
+                'pa': 0.7, 'pd': 0.3,
+                'pat': 2.5, 'pdt': -2.5,
+                'rat': 2.5, 'rdt': -2.5
+            }
+        ]
+
+        best = best_agree(stats)
+
+        # Should return 2 comments that pass tests
+        assert len(best) == 2
+        comment_ids = [s['comment_id'] for s in best]
+        assert 'c1' in comment_ids
+        assert 'c4' in comment_ids
+        assert 'c3' not in comment_ids
+
+    def test_best_disagree(self):
+        """Test filtering for best disagreement comments."""
+        # Create a mix of stats
+        stats = [
+            {  # Not disagreement (more agree)
+                'comment_id': 'c1',
+                'pa': 0.8, 'pd': 0.2,
+                'pat': 3.0, 'pdt': -3.0,
+                'rat': 3.0, 'rdt': -3.0
+            },
+            {  # Disagreement but doesn't pass tests
+                'comment_id': 'c2',
+                'pa': 0.4, 'pd': 0.6,
+                'pat': -1.0, 'pdt': 1.0,
+                'rat': -1.0, 'rdt': 1.0
+            },
+            {  # Passes tests, high disagreement
+                'comment_id': 'c3',
+                'pa': 0.2, 'pd': 0.8,
+                'pat': -3.0, 'pdt': 3.0,
+                'rat': -3.0, 'rdt': 3.0
+            }
+        ]
+
+        best = best_disagree(stats)
+
+        # Should return 1 comment that passes tests
+        assert len(best) == 1
+        assert best[0]['comment_id'] == 'c3'
+
+    def test_select_rep_comments(self):
+        """Test selecting representative comments."""
+        # Create a mix of stats
+        stats = [
+            {  # Strong agree
+                'comment_id': 'c1',
+                'pa': 0.9, 'pd': 0.1,
+                'pat': 4.0, 'pdt': -4.0,
+                'rat': 4.0, 'rdt': -4.0,
+                'agree_metric': 7.2,
+                'disagree_metric': 0.9
+            },
+            {  # Moderate agree
+                'comment_id': 'c2',
+                'pa': 0.7, 'pd': 0.3,
+                'pat': 2.0, 'pdt': -2.0,
+                'rat': 2.0, 'rdt': -2.0,
+                'agree_metric': 2.8,
+                'disagree_metric': 1.2
+            },
+            {  # Weak agree
+                'comment_id': 'c3',
+                'pa': 0.6, 'pd': 0.4,
+                'pat': 1.0, 'pdt': -1.0,
+                'rat': 1.0, 'rdt': -1.0,
+                'agree_metric': 1.2,
+                'disagree_metric': 0.8
+            },
+            {  # Strong disagree
+                'comment_id': 'c4',
+                'pa': 0.1, 'pd': 0.9,
+                'pat': -4.0, 'pdt': 4.0,
+                'rat': -4.0, 'rdt': 4.0,
+                'agree_metric': 0.8,
+                'disagree_metric': 7.2
+            },
+            {  # Moderate disagree
+                'comment_id': 'c5',
+                'pa': 0.3, 'pd': 0.7,
+                'pat': -2.0, 'pdt': 2.0,
+                'rat': -2.0, 'rdt': 2.0,
+                'agree_metric': 1.2,
+                'disagree_metric': 2.8
+            }
+        ]
+
+        # Set 'repful' for all stats to match the implementation
+        for stat in stats:
+            if stat.get('agree_metric', 0) >= stat.get('disagree_metric', 0):
+                stat['repful'] = 'agree'
+            else:
+                stat['repful'] = 'disagree'
+
+        # Select with default counts
+        selected = select_rep_comments(stats)
+
+        # Check that we get some representative comments
+        assert len(selected) > 0
+
+        # Verify that comments are properly marked
+        agree_comments = [s for s in selected if s['repful'] == 'agree']
+        disagree_comments = [s for s in selected if s['repful'] == 'disagree']
+
+        # Make sure we have both types of comments if available
+        assert len(agree_comments) > 0
+        assert len(disagree_comments) > 0
+
+        # Check that the order is by metrics
+        if len(agree_comments) >= 2:
+            assert agree_comments[0]['agree_metric'] >= agree_comments[1]['agree_metric']
+
+        if len(disagree_comments) >= 2:
+            assert disagree_comments[0]['disagree_metric'] >= disagree_comments[1]['disagree_metric']
+
+        # Test with different counts
+        selected_custom = select_rep_comments(stats, agree_count=2, disagree_count=1)
+
+        assert len(selected_custom) == 3
+        agree_count = sum(1 for s in selected_custom if s['repful'] == 'agree')
+        disagree_count = sum(1 for s in selected_custom if s['repful'] == 'disagree')
+
+        assert agree_count == 2
+        assert disagree_count == 1
+
+        # Test with empty stats
+        assert select_rep_comments([]) == []
+
+
+class TestConsensusAndGroupRepness:
+    """Tests for consensus and group representativeness functions."""
+
+    def test_select_consensus_comments(self):
+        """Test selecting consensus comments."""
+        # Create stats for groups
+        group1_stats = [
+            {
+                'comment_id': 'c1',
+                'group_id': 1,
+                'pa': 0.8, 'pd': 0.2
+            },
+            {
+                'comment_id': 'c2',
+                'group_id': 1,
+                'pa': 0.7, 'pd': 0.3
+            }
+        ]
+
+        group2_stats = [
+            {
+                'comment_id': 'c1',
+                'group_id': 2,
+                'pa': 0.85, 'pd': 0.15
+            },
+            {
+                'comment_id': 'c2',
+                'group_id': 2,
+                'pa': 0.6, 'pd': 0.4
+            },
+            {
+                'comment_id': 'c3',
+                'group_id': 2,
+                'pa': 0.9, 'pd': 0.1
+            }
+        ]
+
+        # Combine stats
+        all_stats = group1_stats + group2_stats
+
+        consensus = select_consensus_comments(all_stats)
+
+        # Comments with high agreement across all groups should be consensus
+        assert len(consensus) > 0
+
+        # Verify comment IDs in consensus list - both c1 and c2 have high agreement
+        consensus_ids = [c['comment_id'] for c in consensus]
+
+        # At least one of these should be in the consensus
+        assert 'c1' in consensus_ids or 'c2' in consensus_ids
+
+        # NOTE: The implementation actually sorts by average agreement
+        # c3 has the highest average agreement (0.9) but is only in one group
+        # So it's actually expected that c3 could be in the consensus
+        # Just verify that the implementation is consistent in its behavior
+
+        # Check all consensus comments have the correct label
+        for comment in consensus:
+            assert comment['repful'] == 'consensus'
+
+
+class TestIntegration:
+    """Integration tests for the representativeness module."""
+
+    def test_conv_repness(self):
+        """Test the main representativeness calculation function."""
+        # Create a test vote matrix
+        vote_data = np.array([
+            [1, 1, -1, None],  # Participant 1
+            [1, 1, -1, 1],     # Participant 2
+            [-1, -1, 1, -1],   # Participant 3
+            [-1, -1, 1, 1]     # Participant 4
+        ])
+
+        row_names = ['p1', 'p2', 'p3', 'p4']
+        col_names = ['c1', 'c2', 'c3', 'c4']
+
+        vote_matrix = pd.DataFrame(vote_data, index=row_names, columns=col_names)
+
+        # Create group clusters
+        group_clusters = [
+            {'id': 1, 'members': ['p1', 'p2']},  # Group 1: mostly agrees with c1, c2
+            {'id': 2, 'members': ['p3', 'p4']}   # Group 2: mostly agrees with c3
+        ]
+
+        # Calculate representativeness
+        repness_result = conv_repness(vote_matrix, group_clusters)
+
+        # Check result structure
+        assert 'comment_ids' in repness_result
+        assert 'group_repness' in repness_result
+        assert 'consensus_comments' in repness_result
+
+        # Check group repness
+        assert 1 in repness_result['group_repness']
+        assert 2 in repness_result['group_repness']
+
+        # Group 1 should identify c1/c2 as representative
+        group1_rep_ids = [s['comment_id'] for s in repness_result['group_repness'][1]]
+        assert 'c1' in group1_rep_ids or 'c2' in group1_rep_ids
+
+        # Group 2 should identify c3 as representative
+        group2_rep_ids = [s['comment_id'] for s in repness_result['group_repness'][2]]
+        assert 'c3' in group2_rep_ids
+
+    def test_participant_stats(self):
+        """Test participant statistics calculation."""
+        # Create a test vote matrix
+        vote_data = np.array([
+            [1, 1, -1, None],  # Participant 1
+            [1, 1, -1, 1],     # Participant 2
+            [-1, -1, 1, -1],   # Participant 3
+            [-1, -1, 1, 1]     # Participant 4
+        ])
+
+        row_names = ['p1', 'p2', 'p3', 'p4']
+        col_names = ['c1', 'c2', 'c3', 'c4']
+
+        vote_matrix = pd.DataFrame(vote_data, index=row_names, columns=col_names)
+
+        # Create group clusters
+        group_clusters = [
+            {'id': 1, 'members': ['p1', 'p2']},
+            {'id': 2, 'members': ['p3', 'p4']}
+        ]
+
+        # Calculate participant stats
+        ptpt_stats = participant_stats(vote_matrix, group_clusters)
+
+        # Check result structure
+        assert 'participant_ids' in ptpt_stats
+        assert 'stats' in ptpt_stats
+
+        # Check participant stats
+        for ptpt_id in row_names:
+            assert ptpt_id in ptpt_stats['stats']
+            stats = ptpt_stats['stats'][ptpt_id]
+
+            assert 'n_agree' in stats
+            assert 'n_disagree' in stats
+            assert 'n_votes' in stats
+            assert 'group' in stats
+            assert 'group_correlations' in stats
+
+        # Check specific stats
+        p1_stats = ptpt_stats['stats']['p1']
+        assert p1_stats['n_agree'] == 2
+        assert p1_stats['n_disagree'] == 1
+        assert p1_stats['group'] == 1
diff --git a/delphi/tests/test_repness_unit.py b/delphi/tests/test_repness_unit.py
index 4dd111b525..69da767264 100644
--- a/delphi/tests/test_repness_unit.py
+++ b/delphi/tests/test_repness_unit.py
@@ -17,7 +17,9 @@
     comment_stats, add_comparative_stats, repness_metric, finalize_cmt_stats,
     passes_by_test, best_agree, best_disagree, select_rep_comments,
     calculate_kl_divergence, select_consensus_comments, conv_repness,
-    participant_stats
+    participant_stats,
+    # DataFrame-native vectorized functions
+    prop_test_vectorized, two_prop_test_vectorized, compute_group_comment_stats_df
 )
 
 
@@ -537,4 +539,174 @@ def test_participant_stats(self):
         p1_stats = ptpt_stats['stats']['p1']
         assert p1_stats['n_agree'] == 2
         assert p1_stats['n_disagree'] == 1
-        assert p1_stats['group'] == 1
\ No newline at end of file
+        assert p1_stats['group'] == 1
+
+
+class TestVectorizedFunctions:
+    """Tests for DataFrame-native vectorized functions."""
+
+    def test_prop_test_vectorized(self):
+        """Test vectorized one-proportion z-test."""
+        p = pd.Series([0.7, 0.2, 0.5])
+        n = pd.Series([100, 50, 100])
+
+        result = prop_test_vectorized(p, n, 0.5)
+
+        # Compare with scalar version
+        assert np.isclose(result.iloc[0], prop_test(0.7, 100, 0.5), atol=0.01)
+        assert np.isclose(result.iloc[1], prop_test(0.2, 50, 0.5), atol=0.01)
+        assert np.isclose(result.iloc[2], prop_test(0.5, 100, 0.5), atol=0.01)
+
+    def test_prop_test_vectorized_edge_cases(self):
+        """Test vectorized prop test handles edge cases."""
+        p = pd.Series([0.5, 0.7])
+        n = pd.Series([0, 100])  # n=0 should return 0
+
+        result = prop_test_vectorized(p, n, 0.5)
+
+        assert result.iloc[0] == 0.0  # n=0 case
+        assert not np.isnan(result.iloc[1])  # normal case
+
+    def test_two_prop_test_vectorized(self):
+        """Test vectorized two-proportion z-test."""
+        p1 = pd.Series([0.7, 0.2])
+        n1 = pd.Series([100, 50])
+        p2 = pd.Series([0.5, 0.3])
+        n2 = pd.Series([100, 50])
+
+        result = two_prop_test_vectorized(p1, n1, p2, n2)
+
+        # Compare with scalar version
+        assert np.isclose(result.iloc[0], two_prop_test(0.7, 100, 0.5, 100), atol=0.01)
+        assert np.isclose(result.iloc[1], two_prop_test(0.2, 50, 0.3, 50), atol=0.01)
+
+    def test_two_prop_test_vectorized_edge_cases(self):
+        """Test vectorized two-prop test handles edge cases."""
+        p1 = pd.Series([0.5, 0.7])
+        n1 = pd.Series([0, 100])  # n1=0 should return 0
+        p2 = pd.Series([0.5, 0.5])
+        n2 = pd.Series([100, 0])  # n2=0 should return 0
+
+        result = two_prop_test_vectorized(p1, n1, p2, n2)
+
+        assert result.iloc[0] == 0.0  # n1=0 case
+        assert result.iloc[1] == 0.0  # n2=0 case
+
+    def test_compute_group_comment_stats_df(self):
+        """Test vectorized computation of group/comment statistics."""
+        # Create test data in long format
+        votes_long = pd.DataFrame({
+            'participant': ['p1', 'p1', 'p2', 'p2', 'p3', 'p3', 'p4', 'p4'],
+            'comment': ['c1', 'c2', 'c1', 'c2', 'c1', 'c2', 'c1', 'c2'],
+            'vote': [1, 1, 1, 1, -1, -1, -1, -1]  # AGREE=1, DISAGREE=-1
+        })
+
+        group_clusters = [
+            {'id': 1, 'members': ['p1', 'p2']},  # Group 1: agrees with c1, c2
+            {'id': 2, 'members': ['p3', 'p4']}   # Group 2: disagrees with c1, c2
+        ]
+
+        stats_df = compute_group_comment_stats_df(votes_long, group_clusters)
+
+        # Check that we have stats for all group/comment combinations
+        assert len(stats_df) == 4  # 2 groups x 2 comments
+
+        # Check group 1, comment c1 stats
+        g1_c1 = stats_df.loc[(1, 'c1')]
+        assert g1_c1['na'] == 2  # 2 agrees
+        assert g1_c1['nd'] == 0  # 0 disagrees
+        assert g1_c1['ns'] == 2  # 2 total votes
+
+        # Check group 2, comment c1 stats
+        g2_c1 = stats_df.loc[(2, 'c1')]
+        assert g2_c1['na'] == 0  # 0 agrees
+        assert g2_c1['nd'] == 2  # 2 disagrees
+        assert g2_c1['ns'] == 2  # 2 total votes
+
+        # Check that probabilities are computed
+        assert 'pa' in stats_df.columns
+        assert 'pd' in stats_df.columns
+        assert 'pat' in stats_df.columns
+        assert 'pdt' in stats_df.columns
+
+        # Check that comparative stats are computed
+        assert 'ra' in stats_df.columns
+        assert 'rd' in stats_df.columns
+        assert 'rat' in stats_df.columns
+        assert 'rdt' in stats_df.columns
+
+        # Check that metrics are computed
+        assert 'agree_metric' in stats_df.columns
+        assert 'disagree_metric' in stats_df.columns
+        assert 'repful' in stats_df.columns
+
+    def test_compute_group_comment_stats_df_with_nan_votes(self):
+        """Test that NaN votes are properly excluded."""
+        # Create test data with some NaN votes
+        votes_long = pd.DataFrame({
+            'participant': ['p1', 'p1', 'p2', 'p2', 'p3', 'p3'],
+            'comment': ['c1', 'c2', 'c1', 'c2', 'c1', 'c2'],
+            'vote': [1, np.nan, 1, 1, -1, -1]  # p1 didn't vote on c2
+        })
+
+        group_clusters = [
+            {'id': 1, 'members': ['p1', 'p2']},
+            {'id': 2, 'members': ['p3']}
+        ]
+
+        stats_df = compute_group_comment_stats_df(votes_long, group_clusters)
+
+        # Group 1, c2 should have only 1 vote (from p2, since p1's NaN is dropped)
+        g1_c2 = stats_df.loc[(1, 'c2')]
+        assert g1_c2['ns'] == 1  # Only p2's vote counted
+
+    def test_compute_group_comment_stats_df_empty(self):
+        """Test handling of empty input."""
+        votes_long = pd.DataFrame(columns=['participant', 'comment', 'vote'])
+        group_clusters = [{'id': 1, 'members': ['p1']}]
+
+        stats_df = compute_group_comment_stats_df(votes_long, group_clusters)
+
+        assert stats_df.empty
+
+    def test_compute_group_comment_stats_matches_scalar(self):
+        """Test that vectorized results match scalar function results."""
+        # Create test data
+        vote_data = np.array([
+            [1, 1, -1],   # p1
+            [1, -1, 1],   # p2
+            [-1, 1, -1],  # p3
+            [-1, -1, 1]   # p4
+        ], dtype=float)
+
+        row_names = ['p1', 'p2', 'p3', 'p4']
+        col_names = ['c1', 'c2', 'c3']
+        vote_matrix = pd.DataFrame(vote_data, index=row_names, columns=col_names)
+
+        group_clusters = [
+            {'id': 1, 'members': ['p1', 'p2']},
+            {'id': 2, 'members': ['p3', 'p4']}
+        ]
+
+        # Get vectorized results
+        votes_long = vote_matrix.melt(
+            ignore_index=False,
+            var_name='comment',
+            value_name='vote'
+        ).reset_index(names='participant')
+        stats_df = compute_group_comment_stats_df(votes_long, group_clusters)
+
+        # Compare with scalar results from conv_repness
+        repness_result = conv_repness(vote_matrix, group_clusters)
+
+        # Check that we have the same number of comment_repness entries
+        assert len(repness_result['comment_repness']) == len(stats_df)
+
+        # Check a few specific values
+        for entry in repness_result['comment_repness']:
+            gid = entry['gid']
+            tid = entry['tid']
+            df_row = stats_df.loc[(gid, tid)]
+
+            assert np.isclose(entry['pa'], df_row['pa'], atol=1e-10)
+            assert np.isclose(entry['pd'], df_row['pd'], atol=1e-10)
\ No newline at end of file