From fd82437b882311491be252382b8dce0786a0c824 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 27 Jun 2024 18:15:58 -0400 Subject: [PATCH 1/4] refactor/fix avoidJoin logic --- .../dataverse/search/SearchServiceBean.java | 245 ++++-------------- .../iq/dataverse/search/SearchUtil.java | 1 + 2 files changed, 53 insertions(+), 193 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 1d25dbcdaba..ad02f8c49ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -42,6 +42,8 @@ import jakarta.ejb.TransactionRolledbackLocalException; import jakarta.inject.Named; import jakarta.persistence.NoResultException; + +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.SortClause; import org.apache.solr.client.solrj.SolrServerException; @@ -187,6 +189,7 @@ public SolrQueryResponse search( SolrQuery solrQuery = new SolrQuery(); query = SearchUtil.sanitizeQuery(query); + solrQuery.setQuery(query); if (sortField != null) { // is it ok not to specify any sort? - there are cases where we @@ -327,24 +330,13 @@ public SolrQueryResponse search( } } - //I'm not sure if just adding null here is good for hte permissions system... i think it needs something - if(dataverses != null) { - for(Dataverse dataverse : dataverses) { - // ----------------------------------- - // PERMISSION FILTER QUERY - // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); - if (permissionFilterQuery != null) { - solrQuery.addFilterQuery(permissionFilterQuery); - } - } - } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); - if (permissionFilterQuery != null) { - solrQuery.addFilterQuery(permissionFilterQuery); - } + // ----------------------------------- + // PERMISSION FILTER QUERY + // ----------------------------------- + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, onlyDatatRelatedToMe, addFacets); + if (permissionFilterQuery != null) { + solrQuery.addFilterQuery(permissionFilterQuery); } - /** * @todo: do sanity checking... throw error if negative @@ -976,7 +968,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -985,38 +977,23 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ if (solrQuery == null) { throw new NullPointerException("solrQuery cannot be null"); } - /** - * @todo For people who are not logged in, should we show stuff indexed - * with "AllUsers" group or not? If so, uncomment the allUsersString - * stuff below. - */ -// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias(); -// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")"; - String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")"; -// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString(); - // initialize to public only to be safe + String dangerZoneNoSolrJoin = null; if (user instanceof PrivateUrlUser) { user = GuestUser.get(); } - AuthenticatedUser au = null; + ArrayList groupList = new ArrayList(); + AuthenticatedUser au = null; Set groups; - - if (user instanceof GuestUser) { - // Yes, GuestUser may be part of one or more groups; such as IP Groups. - groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); - } else { - if (!(user instanceof AuthenticatedUser)) { - logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest"); - throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest"); - } + + if (user instanceof AuthenticatedUser) { au = (AuthenticatedUser) user; - + // ---------------------------------------------------- - // (3) Is this a Super User? + // Is this a Super User? // If so, they can see everything // ---------------------------------------------------- if (au.isSuperuser()) { @@ -1026,11 +1003,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ return dangerZoneNoSolrJoin; } - + // ---------------------------------------------------- - // (4) User is logged in AND onlyDatatRelatedToMe == true + // User is logged in AND onlyDatatRelatedToMe == true // Yes, give back everything -> the settings will be in - // the filterqueries given to search + // the filterqueries given to search // ---------------------------------------------------- if (onlyDatatRelatedToMe == true) { if (systemConfig.myDataDoesNotUsePermissionDocs()) { @@ -1041,169 +1018,51 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ logger.fine("new post-4.2 behavior: MyData is using Solr permission docs"); } } - // ---------------------------------------------------- - // (5) Work with Authenticated User who is not a Superuser + // Work with Authenticated User who is not a Superuser // ---------------------------------------------------- - - groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); + groupList.add(IndexServiceBean.getGroupPerUserPrefix() + au.getId()); } - if (FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled()) { - /** - * Instead of doing a super expensive join, we will rely on the - * new boolean field PublicObject:true for public objects. This field - * is indexed on the content document itself, rather than a permission - * document. An additional join will be added only for any extra, - * more restricted groups that the user may be part of. - * **Note the experimental nature of this optimization**. - */ - StringBuilder sb = new StringBuilder(); - StringBuilder sbgroups = new StringBuilder(); - - // All users, guests and authenticated, should see all the - // documents marked as publicObject_b:true, at least: - sb.append(SearchFields.PUBLIC_OBJECT + ":" + true); + // In addition to the user referenced directly, we will also + // add joins on all the non-public groups that may exist for the + // user: - // One or more groups *may* also be available for this user. Once again, - // do note that Guest users may be part of some groups, such as - // IP groups. - - int groupCounter = 0; - - // An AuthenticatedUser should also be able to see all the content - // on which they have direct permissions: - if (au != null) { - groupCounter++; - sbgroups.append(IndexServiceBean.getGroupPerUserPrefix() + au.getId()); - } - - // In addition to the user referenced directly, we will also - // add joins on all the non-public groups that may exist for the - // user: - for (Group group : groups) { - String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty() && !groupAlias.startsWith("builtIn")) { - groupCounter++; - if (groupCounter > 1) { - sbgroups.append(" OR "); - } - sbgroups.append(IndexServiceBean.getGroupPrefix() + groupAlias); - } - } - - if (groupCounter > 1) { - // If there is more than one group, the parentheses must be added: - sbgroups.insert(0, "("); - sbgroups.append(")"); - } - - if (groupCounter > 0) { - // If there are any groups for this user, an extra join must be - // added to the query, and the extra sub-query must be added to - // the combined Solr query: - sb.append(" OR {!join from=" + SearchFields.DEFINITION_POINT + " to=id v=$q1}"); - // Add the subquery to the combined Solr query: - solrQuery.setParam("q1", SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); - logger.info("The sub-query q1 set to " + SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); - } - - String ret = sb.toString(); - logger.fine("Returning experimental query: " + ret); - return ret; - } - - // END OF EXPERIMENTAL OPTIMIZATION + boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled(); - // Old, un-optimized way of handling permissions. - // Largely left intact, minus the lookups that have already been performed - // above. - - // ---------------------------------------------------- - // (1) Is this a GuestUser? - // ---------------------------------------------------- - if (user instanceof GuestUser) { - - StringBuilder sb = new StringBuilder(); - - String groupsFromProviders = ""; - for (Group group : groups) { - logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); - String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty()) { - sb.append(" OR "); - // i.e. group_builtIn/all-users, ip/ipGroup3 - sb.append(IndexServiceBean.getGroupPrefix()).append(groupAlias); - } - } - groupsFromProviders = sb.toString(); - logger.fine("groupsFromProviders:" + groupsFromProviders); - String guestWithGroups = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + groupsFromProviders + ")"; - logger.fine(guestWithGroups); - return guestWithGroups; - } + // Authenticated users and GuestUser may be part of one or more groups; such + // as IP Groups. + groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); - // ---------------------------------------------------- - // (5) Work with Authenticated User who is not a Superuser - // ---------------------------------------------------- - // It was already confirmed, that if the user is not GuestUser, we - // have an AuthenticatedUser au which is not null. - /** - * @todo all this code needs cleanup and clarification. - */ - /** - * Every AuthenticatedUser is part of a "User Private Group" (UGP), a - * concept we borrow from RHEL: - * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups - */ - /** - * @todo rename this from publicPlusUserPrivateGroup. Confusing - */ - // safe default: public only - String publicPlusUserPrivateGroup = publicOnly; -// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR ")) -// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")"; - -// /** -// * @todo add onlyDatatRelatedToMe option into the experimental JOIN -// * before enabling it. -// */ - /** - * From a search perspective, we don't care about if the group was - * created within one dataverse or another. We just want a list of *all* - * the groups the user is part of. We are greedy. We want all BuiltIn - * Groups, Shibboleth Groups, IP Groups, "system" groups, everything. - * - * A JOIN on "permission documents" will determine if the user can find - * a given "content document" (dataset version, etc) in Solr. - */ - String groupsFromProviders = ""; - StringBuilder sb = new StringBuilder(); for (Group group : groups) { - logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty()) { - sb.append(" OR "); - // i.e. group_builtIn/all-users, group_builtIn/authenticated-users, group_1-explictGroup1, group_shib/2 - sb.append(IndexServiceBean.getGroupPrefix() + groupAlias); + if (groupAlias != null && !groupAlias.isEmpty() && (!avoidJoin || !groupAlias.startsWith("builtIn"))) { + groupList.add(IndexServiceBean.getGroupPrefix() + groupAlias); } } - groupsFromProviders = sb.toString(); - logger.fine(groupsFromProviders); - if (true) { - /** - * @todo get rid of "experimental" in name - */ - String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; - publicPlusUserPrivateGroup = experimentalJoin; + if (!avoidJoin) { + // Add the public group + groupList.add(0, IndexServiceBean.getPublicGroupString()); + } + + String groupString = null; + //If we have additional groups, format them correctly into a search string, with parens if there is more than one + if (groupList.size() > 1) { + groupString = "(" + StringUtils.join(groupList, " OR ") + ")"; + } else if (groupList.size() == 1) { + groupString = groupList.get(0); } - - //permissionFilterQuery = publicPlusUserPrivateGroup; - logger.fine(publicPlusUserPrivateGroup); - - return publicPlusUserPrivateGroup; - + logger.fine("Groups: " + groupString); + String query = avoidJoin ? SearchFields.PUBLIC_OBJECT + ":" + true : ""; + if (groupString != null) { + if (!query.isEmpty()) { + query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + groupString + ")"; + } else { + query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + groupString; + } + } + return query; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index adcc5825766..935a003e0f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -235,4 +235,5 @@ public static String getGeoRadius(String userSuppliedGeoRadius) throws NumberFor return userSuppliedGeoRadius; } + } From 0331585755107f93c0bfc7abb0ff6117d2a1c2b1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 3 Jul 2024 12:51:49 -0400 Subject: [PATCH 2/4] bug fix - default to */All groups for superuser --- .../dataverse/search/SearchServiceBean.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index ad02f8c49ac..6f07da8a96f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -61,6 +61,8 @@ public class SearchServiceBean { private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName()); + private static final String ALL_GROUPS = "*"; + /** * We're trying to make the SearchServiceBean lean, mean, and fast, with as * few injections of EJBs as possible. @@ -978,8 +980,6 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ throw new NullPointerException("solrQuery cannot be null"); } - String dangerZoneNoSolrJoin = null; - if (user instanceof PrivateUrlUser) { user = GuestUser.get(); } @@ -988,8 +988,9 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ AuthenticatedUser au = null; Set groups; - if (user instanceof AuthenticatedUser) { + boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled(); + if (user instanceof AuthenticatedUser) { au = (AuthenticatedUser) user; // ---------------------------------------------------- @@ -1001,7 +1002,7 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ // to see everything in Solr with no regard to permissions. But it's // been this way since Dataverse 4.0. So relax. :) - return dangerZoneNoSolrJoin; + return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS); } // ---------------------------------------------------- @@ -1012,7 +1013,7 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ if (onlyDatatRelatedToMe == true) { if (systemConfig.myDataDoesNotUsePermissionDocs()) { logger.fine("old 4.2 behavior: MyData is not using Solr permission docs"); - return dangerZoneNoSolrJoin; + return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS); } else { // fall-through logger.fine("new post-4.2 behavior: MyData is using Solr permission docs"); @@ -1028,8 +1029,6 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ // add joins on all the non-public groups that may exist for the // user: - boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled(); - // Authenticated users and GuestUser may be part of one or more groups; such // as IP Groups. groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); @@ -1054,16 +1053,23 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ groupString = groupList.get(0); } logger.fine("Groups: " + groupString); - String query = avoidJoin ? SearchFields.PUBLIC_OBJECT + ":" + true : ""; - if (groupString != null) { + return buildPermissionFilterQuery(avoidJoin, groupString); + } + + private String buildPermissionFilterQuery(boolean avoidJoin, String permissionFilterGroups) { + String query = (avoidJoin&& !isAllGroups(permissionFilterGroups)) ? SearchFields.PUBLIC_OBJECT + ":" + true : ""; + if (permissionFilterGroups != null && !isAllGroups(permissionFilterGroups)) { if (!query.isEmpty()) { - query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + groupString + ")"; + query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups + ")"; } else { - query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + groupString; + query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups; } } return query; } - + + private boolean isAllGroups(String groups) { + return (groups!=null &&groups.equals(ALL_GROUPS)); + } } From 1aa2e6417ebaeb32b218b9345fa9950297880174 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 19 Jul 2024 17:22:56 -0400 Subject: [PATCH 3/4] drop extra line from merge --- src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index 935a003e0f7..adcc5825766 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -235,5 +235,4 @@ public static String getGeoRadius(String userSuppliedGeoRadius) throws NumberFor return userSuppliedGeoRadius; } - } From d9488a767be75d612d26b998b27f3ad74501256e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 19 Jul 2024 17:27:23 -0400 Subject: [PATCH 4/4] add logging to allow comparing old and new result --- .../edu/harvard/iq/dataverse/search/SearchServiceBean.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 6f07da8a96f..7ec77ccde09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -1053,7 +1053,9 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ groupString = groupList.get(0); } logger.fine("Groups: " + groupString); - return buildPermissionFilterQuery(avoidJoin, groupString); + String permissionQuery = buildPermissionFilterQuery(avoidJoin, groupString); + logger.fine("Permission Query: " + permissionQuery); + return permissionQuery; } private String buildPermissionFilterQuery(boolean avoidJoin, String permissionFilterGroups) {