11package com .conveyal .datatools .manager ;
22
3- import com .conveyal .datatools .common . utils . aws . CheckedAWSException ;
3+ import com .conveyal .datatools .manager . models . FeedSource ;
44import com .conveyal .datatools .manager .models .FeedVersion ;
5+ import com .conveyal .datatools .manager .models .FeedVersionSummary ;
6+ import com .conveyal .datatools .manager .models .Project ;
57import com .conveyal .datatools .manager .persistence .FeedStore ;
68import com .conveyal .datatools .manager .persistence .Persistence ;
7- import com .conveyal .gtfs .GTFS ;
89import com .conveyal .gtfs .util .InvalidNamespaceException ;
10+ import com .mongodb .client .model .Sorts ;
911import com .conveyal .gtfs .util .Util ;
1012import com .google .common .collect .Lists ;
1113import com .mongodb .client .model .Projections ;
2628import java .sql .PreparedStatement ;
2729import java .sql .ResultSet ;
2830import java .sql .SQLException ;
31+ import java .util .Collection ;
2932import java .util .ArrayList ;
33+ import java .util .Collections ;
3034import java .util .HashSet ;
3135import java .util .List ;
3236import java .util .Set ;
3337import java .util .stream .Collectors ;
3438
3539import static com .conveyal .datatools .manager .DataManager .GTFS_DATA_SOURCE ;
3640import static com .conveyal .datatools .manager .DataManager .initializeApplication ;
41+ import static com .mongodb .client .model .Filters .eq ;
3742import static com .mongodb .client .model .Aggregates .project ;
3843import static com .mongodb .client .model .Filters .nin ;
3944
4045/**
41- * The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line parameters
42- * should be provided after these e.g.:
46+ * The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line
47+ * parameters should be provided after these e.g.:
4348 * configurations/test/env.yml.tmp configurations/test/server.yml.tmp --orphaned delete (or -O d)
4449 */
4550public class DataSanitizer {
@@ -56,23 +61,44 @@ public static void main(String[] args) throws IOException {
5661 */
5762 public static void parseArguments (String [] arguments ) {
5863 Options options = new Options ();
59- Option orphanedOption = Option .builder ("O" )
64+ Option orphaned = Option .builder ("O" )
6065 .longOpt ("orphaned" )
6166 .desc ("Optional delete command for orphaned items" )
6267 .optionalArg (true )
6368 .argName ("deleteCommand" )
6469 .build ();
65- options .addOption (orphanedOption );
70+ Option feedVersionAudit = Option .builder ("A" )
71+ .longOpt ("audit" )
72+ .desc ("Command for feed version audit" )
73+ .hasArg (false )
74+ .build ();
75+ Option purge = Option .builder ("P" )
76+ .longOpt ("purge-feed-versions" )
77+ .desc ("Command for purging all but the latest feed version for a feed source" )
78+ .hasArg (true )
79+ .argName ("feedSourceId" )
80+ .build ();
81+ options .addOption (orphaned );
82+ options .addOption (feedVersionAudit );
83+ options .addOption (purge );
6684
6785 try {
6886 CommandLineParser parser = new DefaultParser ();
6987 CommandLine cmd = parser .parse (options , arguments );
7088 if (cmd .hasOption ("O" )) {
7189 String deleteCommand = cmd .getOptionValue ("O" );
7290 boolean delete = "delete" .equalsIgnoreCase (deleteCommand ) || "d" .equalsIgnoreCase (deleteCommand );
73- sanitizeFeedVersions (delete );
91+ sanitizeOrphanedFeedVersions (delete );
7492 sanitizeDBSchemas (delete );
7593 }
94+ if (cmd .hasOption ("A" )) {
95+ feedVersionAudit ();
96+ }
97+ if (cmd .hasOption ("P" )) {
98+ String feedSourceId = cmd .getOptionValue ("P" );
99+ System .out .println ("Purge command received for feed source id: " + feedSourceId );
100+ deleteObsoleteFeedVersions (feedSourceId , true );
101+ }
76102 } catch (ParseException e ) {
77103 System .out .println (e .getMessage ());
78104 HelpFormatter formatter = new HelpFormatter ();
@@ -84,7 +110,7 @@ public static void parseArguments(String[] arguments) {
84110 /**
85111 * Group orphaned feed versions and optionally delete.
86112 */
87- public static int sanitizeFeedVersions (boolean delete ) {
113+ public static int sanitizeOrphanedFeedVersions (boolean delete ) {
88114 List <FeedVersion > feedVersions = getOrphanedFeedVersions ();
89115 int orphaned = feedVersions .size ();
90116 if (orphaned == 0 ) {
@@ -116,6 +142,91 @@ private static boolean hasGTFSPlus(FeedVersion feedVersion, FeedStore gtfsPlusSt
116142 return DataManager .isModuleEnabled ("gtfsplus" ) && gtfsPlusStore .getFeed (feedVersion .id + ".db" ) != null ;
117143 }
118144
145+ /**
146+ * For a given feed source, delete all feed versions keeping just the latest.
147+ */
148+ public static void deleteObsoleteFeedVersions (String feedSourceId , boolean sourceIsCli ) {
149+ deleteObsoleteFeedVersions (feedSourceId , 1 , sourceIsCli );
150+ }
151+
152+ /**
153+ * For a given feed source, delete feed version prior to the keep number.
154+ */
155+ public static int deleteObsoleteFeedVersions (
156+ String feedSourceId ,
157+ int numberOfVersionsToKeep ,
158+ boolean sourceIsCli
159+ ) {
160+ Collection <FeedVersion > feedVersions = Persistence .feedVersions .getFiltered (
161+ eq ("feedSourceId" , feedSourceId ),
162+ Sorts .descending ("version" )
163+ );
164+ if (feedVersions .isEmpty () || numberOfVersionsToKeep >= feedVersions .size ()) {
165+ String message = "No feed versions or none that qualify for deletion. Feed source id: " + feedSourceId ;
166+ LOG .info (message );
167+ if (sourceIsCli ) System .out .println (message );
168+ return -1 ;
169+ }
170+
171+ int keepCount = 0 ;
172+ int deleteCount = 0 ;
173+
174+ for (FeedVersion feedVersion : feedVersions ) {
175+ if (keepCount < numberOfVersionsToKeep ) {
176+ keepCount ++;
177+ } else {
178+ feedVersion .delete ();
179+ deleteCount ++;
180+ }
181+ }
182+ String message = String .format ("Deleted %s feed versions from feed source id: %s" , deleteCount , feedSourceId );
183+ LOG .info (message );
184+ if (sourceIsCli ) System .out .println (message );
185+ return deleteCount ;
186+ }
187+
188+ /**
189+ * Group feed source and number of feed versions.
190+ */
191+ public static List <FeedVersionAudit > feedVersionAudit () {
192+ System .out .println ("Producing feed version audit..." );
193+ List <FeedVersionAudit > audit = new ArrayList <>();
194+
195+ List <Project > projects = Persistence .projects .getAll ();
196+
197+ for (Project project : projects ) {
198+ Collection <FeedSource > feedSources = project .retrieveProjectFeedSources ();
199+
200+ for (FeedSource feedSource : feedSources ) {
201+ Collection <FeedVersionSummary > feedVersions = feedSource .retrieveFeedVersionSummaries ();
202+ audit .add (new FeedVersionAudit (project .name , feedSource .name , feedSource .id , feedVersions .size ()));
203+ }
204+ }
205+
206+ Collections .sort (audit );
207+
208+ if (!audit .isEmpty ()) {
209+ boolean hasHeader = false ;
210+ for (FeedVersionAudit feedVersionAudit : audit ) {
211+ if (!hasHeader ) {
212+ System .out .printf ("%-40s | %-40s | %-40s | %s%n" , "Project" , "Feed Source" , "Feed Source Id" , "No. Feed Versions" );
213+ hasHeader = true ;
214+ }
215+ System .out .printf (
216+ "%-40s | %-40s | %-40s | %s%n" ,
217+ feedVersionAudit .projectName ,
218+ feedVersionAudit .feedSourceName ,
219+ feedVersionAudit .feedSourceId ,
220+ feedVersionAudit .numberOfFeedVersions
221+ );
222+ }
223+ } else {
224+ System .out .println ("No feed versions to audit!" );
225+ }
226+ System .out .println ("Feed version audit complete." );
227+ return audit ;
228+ }
229+
119230 /**
120231 * Group orphaned schemas and optionally delete.
121232 */
@@ -142,13 +253,9 @@ public static void sanitizeDBSchemas(boolean delete) {
142253 private static int deleteOrphanedFeedVersions (List <FeedVersion > feedVersions ) {
143254 int deletedFeedVersions = 0 ;
144255 for (FeedVersion feedVersion : feedVersions ) {
145- try {
146- System .out .println ("Deleting orphaned feed version: " + feedVersion .id );
147- feedVersion .deleteOrphan ();
148- deletedFeedVersions ++;
149- } catch (SQLException | CheckedAWSException | InvalidNamespaceException e ) {
150- System .err .printf ("Failed to delete feed version: %s. %s%n" , feedVersion .id , e .getMessage ());
151- }
256+ System .out .println ("Deleting orphaned feed version: " + feedVersion .id );
257+ feedVersion .deleteOrphan ();
258+ deletedFeedVersions ++;
152259 }
153260 return deletedFeedVersions ;
154261 }
@@ -159,12 +266,7 @@ private static int deleteOrphanedFeedVersions(List<FeedVersion> feedVersions) {
159266 public static int deleteOrphanedDBSchemas (Set <String > orphanedSchemas ) {
160267 int deletedSchemas = 0 ;
161268 for (String orphanedSchema : orphanedSchemas ) {
162- try {
163- GTFS .delete (orphanedSchema , DataManager .GTFS_DATA_SOURCE );
164- LOG .info ("Dropped orphaned DB schema from Postgres." );
165- } catch (SQLException | InvalidNamespaceException e ) {
166- System .err .printf ("Failed to delete DB schema: %s. %s%n" , orphanedSchema , e .getMessage ());
167- }
269+ FeedVersion .deleteDBSchema (orphanedSchema );
168270 deletedSchemas ++;
169271 }
170272 return deletedSchemas ;
@@ -192,7 +294,6 @@ public static Set<String> getOrphanedDBSchemas(Set<String> associatedSchemas) {
192294 Set <String > orphanedSchemas = new HashSet <>();
193295 try (Connection connection = GTFS_DATA_SOURCE .getConnection ()) {
194296 String sql = String .format ("SELECT nspname FROM pg_namespace %s" , whereClause );
195- LOG .info (sql );
196297 PreparedStatement preparedStatement = connection .prepareStatement (sql );
197298 ResultSet resultSet = preparedStatement .executeQuery ();
198299 while (resultSet .next ()) {
@@ -240,4 +341,24 @@ public static Set<String> getFieldFromDocument(String field, String document) {
240341 }
241342 return fields ;
242343 }
344+
345+ public static class FeedVersionAudit implements Comparable <FeedVersionAudit > {
346+ public final String projectName ;
347+ public final String feedSourceName ;
348+ public final String feedSourceId ;
349+ public final int numberOfFeedVersions ;
350+
351+ FeedVersionAudit (String projectName , String feedSourceName , String feedSourceId , int numberOfFeedVersions ) {
352+ this .projectName = projectName ;
353+ this .feedSourceName = feedSourceName ;
354+ this .feedSourceId = feedSourceId ;
355+ this .numberOfFeedVersions = numberOfFeedVersions ;
356+ }
357+
358+ @ Override
359+ public int compareTo (FeedVersionAudit other ) {
360+ // Sort by numberOfFeedVersions in ascending order
361+ return Integer .compare (other .numberOfFeedVersions , this .numberOfFeedVersions );
362+ }
363+ }
243364}
0 commit comments