diff --git a/app/models/label.rb b/app/models/label.rb index 9171aecd..ecca3cef 100644 --- a/app/models/label.rb +++ b/app/models/label.rb @@ -4,6 +4,8 @@ class Label < ActiveRecord::Base has_many :tracks, through: :track_labels has_many :scheduled_show_labels, dependent: :destroy has_many :scheduled_shows, through: :scheduled_show_labels + has_many :show_series_labels, dependent: :destroy + has_many :show_series, through: :show_series_labels belongs_to :radio before_save :downcase_name diff --git a/script/README.md b/script/README.md new file mode 100644 index 00000000..0f8281ba --- /dev/null +++ b/script/README.md @@ -0,0 +1,72 @@ +# Label Consolidation Scripts + +This directory contains scripts to identify and consolidate duplicate labels in the system. + +## Scripts + +### `check_duplicate_labels.rb` + +A read-only script that identifies duplicate labels without making any changes. + +**Usage:** +```bash +# Using rails runner +rails runner script/check_duplicate_labels.rb + +# Using Docker (if using the docker setup) +docker-compose -p streampusher -f docker-compose-dev.yml run --rm rails ./docker_wrapper.sh bundle exec rails runner script/check_duplicate_labels.rb +``` + +### `consolidate_duplicate_labels.rb` + +Consolidates duplicate labels by: +1. Finding labels with the same name within the same radio +2. Keeping the oldest label (by created_at timestamp) +3. Moving all track, show, and series associations to the kept label +4. Deleting the duplicate labels + +**Usage:** +```bash +# Run in dry-run mode first to see what would be changed +DRY_RUN=true rails runner script/consolidate_duplicate_labels.rb + +# Actually perform the consolidation +rails runner script/consolidate_duplicate_labels.rb + +# Using Docker (if using the docker setup) +docker-compose -p streampusher -f docker-compose-dev.yml run --rm rails ./docker_wrapper.sh bundle exec rails runner script/consolidate_duplicate_labels.rb +``` + +## What Gets Consolidated + +The script handles these associations: +- **Track Labels** (`track_labels` table) - Links between tracks and labels +- **Scheduled Show Labels** (`scheduled_show_labels` table) - Links between scheduled shows and labels +- **Show Series Labels** (`show_series_labels` table) - Links between show series and labels + +## Safety Features + +- **Transaction Safety**: All operations are wrapped in database transactions +- **Duplicate Prevention**: Checks for existing associations before creating new ones +- **Error Handling**: Rolls back changes if any error occurs +- **Dry Run Mode**: Test the script with `DRY_RUN=true` before making changes +- **Detailed Logging**: Shows exactly what labels are being consolidated and how many associations are moved + +## Example Output + +``` +Starting label consolidation... + +Consolidating 2 duplicate labels for radio 1, name: 'electronic' + Keeping label ID 123 (created: 2024-01-15 10:30:00 UTC) + Removing label IDs: 456 + Moved from label ID 456: 5 track associations, 2 show associations, 1 series associations + ✓ Consolidated successfully + +================================================== +CONSOLIDATION COMPLETE +Found 1 duplicate labels +Consolidated 1 labels into their oldest counterparts +All track, show, and series associations have been preserved +================================================== +``` \ No newline at end of file diff --git a/script/check_duplicate_labels.rb b/script/check_duplicate_labels.rb new file mode 100644 index 00000000..98e1b32b --- /dev/null +++ b/script/check_duplicate_labels.rb @@ -0,0 +1,44 @@ +# Script to check for duplicate labels +# This is a read-only script to identify potential duplicates + +puts "Checking for duplicate labels..." + +# Find duplicate labels grouped by radio_id and name +duplicate_groups = Label.select(:radio_id, :name) + .group(:radio_id, :name) + .having("count(*) > 1") + +total_duplicates = 0 + +if duplicate_groups.any? + puts "\nFound duplicate label groups:" + + duplicate_groups.find_each do |duplicate_group| + # Get all labels with this radio_id and name + labels = Label.where( + radio_id: duplicate_group.radio_id, + name: duplicate_group.name + ).order(:created_at) + + puts "\nRadio #{duplicate_group.radio_id}, name: '#{duplicate_group.name}'" + puts " #{labels.count} duplicate labels:" + + labels.each do |label| + track_count = label.track_labels.count + show_count = label.scheduled_show_labels.count + series_count = label.show_series_labels.count + + puts " ID #{label.id} (created: #{label.created_at}) - #{track_count} tracks, #{show_count} shows, #{series_count} series" + total_duplicates += 1 + end + end + + puts "\n" + "="*50 + puts "SUMMARY" + puts "Found #{duplicate_groups.count} duplicate label groups" + puts "Total duplicate labels: #{total_duplicates}" + puts "Run consolidate_duplicate_labels.rb to fix these duplicates" + puts "="*50 +else + puts "No duplicate labels found. Database is clean!" +end \ No newline at end of file diff --git a/script/consolidate_duplicate_labels.rb b/script/consolidate_duplicate_labels.rb new file mode 100644 index 00000000..365c632d --- /dev/null +++ b/script/consolidate_duplicate_labels.rb @@ -0,0 +1,129 @@ +# Script to consolidate duplicate labels +# If a track belongs to a duplicate label, it should be added to the label we will keep +# to preserve all track label data +# +# Usage: rails runner script/consolidate_duplicate_labels.rb [DRY_RUN=true] + +dry_run = ENV['DRY_RUN'] == 'true' + +if dry_run + puts "=" * 50 + puts "DRY RUN MODE - No changes will be made" + puts "=" * 50 +end + +puts "Starting label consolidation..." + +# Find duplicate labels grouped by radio_id and name +duplicate_groups = Label.select(:radio_id, :name) + .group(:radio_id, :name) + .having("count(*) > 1") + +total_groups = duplicate_groups.count +if total_groups == 0 + puts "No duplicate labels found. Database is clean!" + exit 0 +end + +puts "Found #{total_groups} duplicate label groups to process..." + +duplicate_count = 0 +consolidated_count = 0 +group_counter = 0 + +duplicate_groups.find_each do |duplicate_group| + group_counter += 1 + + # Get all labels with this radio_id and name + labels_to_consolidate = Label.where( + radio_id: duplicate_group.radio_id, + name: duplicate_group.name + ).order(:created_at) + + next if labels_to_consolidate.count <= 1 + + # Keep the oldest label (first created) + label_to_keep = labels_to_consolidate.first + labels_to_remove = labels_to_consolidate[1..-1] + + puts "\n[#{group_counter}/#{total_groups}] Consolidating #{labels_to_consolidate.count} duplicate labels for radio #{duplicate_group.radio_id}, name: '#{duplicate_group.name}'" + puts " Keeping label ID #{label_to_keep.id} (created: #{label_to_keep.created_at})" + puts " Removing label IDs: #{labels_to_remove.map(&:id).join(', ')}" + + if dry_run + puts " [DRY RUN] Would consolidate these labels" + next + end + + Label.transaction do + labels_to_remove.each do |label_to_remove| + duplicate_count += 1 + + begin + # Move track_labels + track_labels_moved = 0 + label_to_remove.track_labels.find_each do |track_label| + # Check if the track is already associated with the label we're keeping + unless TrackLabel.exists?(track_id: track_label.track_id, label_id: label_to_keep.id) + track_label.update!(label_id: label_to_keep.id) + track_labels_moved += 1 + else + # Track is already associated with the label we're keeping, so we can delete this duplicate association + track_label.destroy! + end + end + + # Move scheduled_show_labels + show_labels_moved = 0 + label_to_remove.scheduled_show_labels.find_each do |show_label| + # Check if the show is already associated with the label we're keeping + unless ScheduledShowLabel.exists?(scheduled_show_id: show_label.scheduled_show_id, label_id: label_to_keep.id) + show_label.update!(label_id: label_to_keep.id) + show_labels_moved += 1 + else + # Show is already associated with the label we're keeping, so we can delete this duplicate association + show_label.destroy! + end + end + + # Move show_series_labels + series_labels_moved = 0 + label_to_remove.show_series_labels.find_each do |series_label| + # Check if the show series is already associated with the label we're keeping + unless ShowSeriesLabel.exists?(show_series_id: series_label.show_series_id, label_id: label_to_keep.id) + series_label.update!(label_id: label_to_keep.id) + series_labels_moved += 1 + else + # Show series is already associated with the label we're keeping, so we can delete this duplicate association + series_label.destroy! + end + end + + puts " Moved from label ID #{label_to_remove.id}: #{track_labels_moved} track associations, #{show_labels_moved} show associations, #{series_labels_moved} series associations" + + # Delete the duplicate label + label_to_remove.destroy! + consolidated_count += 1 + + rescue => e + puts " ERROR processing label ID #{label_to_remove.id}: #{e.message}" + raise e # Re-raise to rollback the transaction + end + end + end + + puts " ✓ Consolidated successfully" +end + +puts "\n" + "="*50 +if dry_run + puts "DRY RUN COMPLETE - No changes were made" +else + puts "CONSOLIDATION COMPLETE" +end +puts "Found #{duplicate_count} duplicate labels" +unless dry_run + puts "Consolidated #{consolidated_count} labels into their oldest counterparts" + puts "All track, show, and series associations have been preserved" +end +puts "="*50 \ No newline at end of file diff --git a/spec/integration/label_consolidation_spec.rb b/spec/integration/label_consolidation_spec.rb new file mode 100644 index 00000000..1f701611 --- /dev/null +++ b/spec/integration/label_consolidation_spec.rb @@ -0,0 +1,134 @@ +require 'rails_helper' + +RSpec.describe 'Label consolidation script', type: :integration do + let!(:radio) { FactoryBot.create(:radio) } + + describe 'consolidate_duplicate_labels.rb' do + context 'when there are duplicate labels' do + let!(:track1) { FactoryBot.create(:track, radio: radio) } + let!(:track2) { FactoryBot.create(:track, radio: radio) } + let!(:scheduled_show) { FactoryBot.create(:scheduled_show, radio: radio) } + + let!(:label1) do + # Create the first label normally + Label.create!(name: 'test label', radio: radio, created_at: 1.day.ago) + end + + let!(:label2) do + # Create duplicate by bypassing validation + label = Label.new(name: 'test label', radio: radio, created_at: Time.current) + label.save!(validate: false) + label + end + + before do + # Create associations for both labels + TrackLabel.create!(track: track1, label: label1) + TrackLabel.create!(track: track2, label: label2) + ScheduledShowLabel.create!(scheduled_show: scheduled_show, label: label1) + end + + it 'consolidates duplicate labels and preserves associations' do + expect(Label.where(name: 'test label', radio: radio).count).to eq(2) + + # Capture output to verify script runs + output = capture_stdout do + load Rails.root.join('script', 'consolidate_duplicate_labels.rb') + end + + expect(output).to include('Consolidating 2 duplicate labels') + + # Should only have one label remaining + remaining_labels = Label.where(name: 'test label', radio: radio) + expect(remaining_labels.count).to eq(1) + + # The remaining label should be the older one + remaining_label = remaining_labels.first + expect(remaining_label.id).to eq(label1.id) + + # All track associations should be preserved + expect(remaining_label.tracks).to include(track1, track2) + expect(remaining_label.track_labels.count).to eq(2) + + # Show associations should be preserved + expect(remaining_label.scheduled_shows).to include(scheduled_show) + expect(remaining_label.scheduled_show_labels.count).to eq(1) + end + + it 'supports dry run mode' do + expect(Label.where(name: 'test label', radio: radio).count).to eq(2) + + # Set dry run mode + ENV['DRY_RUN'] = 'true' + + # Capture output to verify script runs in dry run mode + output = capture_stdout do + load Rails.root.join('script', 'consolidate_duplicate_labels.rb') + end + + # Reset environment + ENV.delete('DRY_RUN') + + expect(output).to include('DRY RUN MODE') + expect(output).to include('[DRY RUN] Would consolidate') + + # Labels should not be modified in dry run mode + expect(Label.where(name: 'test label', radio: radio).count).to eq(2) + expect(Label.find(label1.id)).to be_present + expect(Label.find(label2.id)).to be_present + end + end + + context 'when there are no duplicate labels' do + let!(:label1) { Label.create!(name: 'unique label 1', radio: radio) } + let!(:label2) { Label.create!(name: 'unique label 2', radio: radio) } + + it 'does not modify any labels' do + expect(Label.count).to eq(2) + + # Capture output to verify script runs + output = capture_stdout do + load Rails.root.join('script', 'consolidate_duplicate_labels.rb') + end + + expect(output).to include('Starting label consolidation') + + # Should still have both labels + expect(Label.count).to eq(2) + expect(Label.find(label1.id)).to be_present + expect(Label.find(label2.id)).to be_present + end + end + + context 'when labels belong to different radios' do + let!(:radio2) { FactoryBot.create(:radio) } + let!(:label1) { Label.create!(name: 'same name', radio: radio) } + let!(:label2) { Label.create!(name: 'same name', radio: radio2) } + + it 'does not consolidate labels from different radios' do + expect(Label.where(name: 'same name').count).to eq(2) + + # Run the consolidation script + load Rails.root.join('script', 'consolidate_duplicate_labels.rb') + + # Should still have both labels since they belong to different radios + expect(Label.where(name: 'same name').count).to eq(2) + expect(Label.find(label1.id)).to be_present + expect(Label.find(label2.id)).to be_present + end + end + end + + private + + def capture_stdout + original_stdout = $stdout + $stdout = fake = StringIO.new + begin + yield + ensure + $stdout = original_stdout + end + fake.string + end +end \ No newline at end of file