diff --git a/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java b/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java index da5b880..200e250 100644 --- a/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java +++ b/API/src/main/java/crawlercommons/urlfrontier/URLFrontierGrpc.java @@ -1189,7 +1189,13 @@ default void listURLs( getListURLsMethod(), responseObserver); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ default void countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request, io.grpc.stub.StreamObserver @@ -1500,7 +1506,13 @@ public void listURLs( responseObserver); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public void countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request, io.grpc.stub.StreamObserver @@ -1730,7 +1742,13 @@ public java.util.Iterator listUR getChannel(), getListURLsMethod(), getCallOptions(), request); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public crawlercommons.urlfrontier.Urlfrontier.Long countURLs( crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request) { return io.grpc.stub.ClientCalls.blockingUnaryCall( @@ -1941,7 +1959,13 @@ protected URLFrontierFutureStub build( getChannel().newCall(getGetURLStatusMethod(), getCallOptions()), request); } - /** */ + /** + * + * + *
+         * * Count URLs currently in the frontier *
+         * 
+ */ public com.google.common.util.concurrent.ListenableFuture< crawlercommons.urlfrontier.Urlfrontier.Long> countURLs(crawlercommons.urlfrontier.Urlfrontier.CountUrlParams request) { diff --git a/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java b/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java index 5a9522a..38109fe 100644 --- a/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java +++ b/API/src/main/java/crawlercommons/urlfrontier/Urlfrontier.java @@ -21082,6 +21082,71 @@ public interface ListUrlParamsOrBuilder * @return The local. */ boolean getLocal(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + boolean hasFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + java.lang.String getFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + com.google.protobuf.ByteString getFilterBytes(); + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 7; + * + * @return Whether the ignoreCase field is set. + */ + boolean hasIgnoreCase(); + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 7; + * + * @return The ignoreCase. + */ + boolean getIgnoreCase(); } /** Protobuf type {@code urlfrontier.ListUrlParams} */ @@ -21099,6 +21164,7 @@ private ListUrlParams(com.google.protobuf.GeneratedMessageV3.Builder builder) private ListUrlParams() { key_ = ""; crawlID_ = ""; + filter_ = ""; } @java.lang.Override @@ -21122,6 +21188,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { crawlercommons.urlfrontier.Urlfrontier.ListUrlParams.Builder.class); } + private int bitField0_; public static final int START_FIELD_NUMBER = 1; private int start_ = 0; @@ -21285,6 +21352,110 @@ public boolean getLocal() { return local_; } + public static final int FILTER_FIELD_NUMBER = 6; + + @SuppressWarnings("serial") + private volatile java.lang.Object filter_ = ""; + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + @java.lang.Override + public boolean hasFilter() { + return ((bitField0_ & 0x00000001) != 0); + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + @java.lang.Override + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + @java.lang.Override + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int IGNORECASE_FIELD_NUMBER = 7; + private boolean ignoreCase_ = false; + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 7; + * + * @return Whether the ignoreCase field is set. + */ + @java.lang.Override + public boolean hasIgnoreCase() { + return ((bitField0_ & 0x00000002) != 0); + } + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 7; + * + * @return The ignoreCase. + */ + @java.lang.Override + public boolean getIgnoreCase() { + return ignoreCase_; + } + private byte memoizedIsInitialized = -1; @java.lang.Override @@ -21315,6 +21486,12 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (local_ != false) { output.writeBool(5, local_); } + if (((bitField0_ & 0x00000001) != 0)) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 6, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + output.writeBool(7, ignoreCase_); + } getUnknownFields().writeTo(output); } @@ -21339,6 +21516,12 @@ public int getSerializedSize() { if (local_ != false) { size += com.google.protobuf.CodedOutputStream.computeBoolSize(5, local_); } + if (((bitField0_ & 0x00000001) != 0)) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(7, ignoreCase_); + } size += getUnknownFields().getSerializedSize(); memoizedSize = size; return size; @@ -21360,6 +21543,14 @@ public boolean equals(final java.lang.Object obj) { if (!getKey().equals(other.getKey())) return false; if (!getCrawlID().equals(other.getCrawlID())) return false; if (getLocal() != other.getLocal()) return false; + if (hasFilter() != other.hasFilter()) return false; + if (hasFilter()) { + if (!getFilter().equals(other.getFilter())) return false; + } + if (hasIgnoreCase() != other.hasIgnoreCase()) return false; + if (hasIgnoreCase()) { + if (getIgnoreCase() != other.getIgnoreCase()) return false; + } if (!getUnknownFields().equals(other.getUnknownFields())) return false; return true; } @@ -21381,6 +21572,14 @@ public int hashCode() { hash = (53 * hash) + getCrawlID().hashCode(); hash = (37 * hash) + LOCAL_FIELD_NUMBER; hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + if (hasFilter()) { + hash = (37 * hash) + FILTER_FIELD_NUMBER; + hash = (53 * hash) + getFilter().hashCode(); + } + if (hasIgnoreCase()) { + hash = (37 * hash) + IGNORECASE_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getIgnoreCase()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -21526,6 +21725,8 @@ public Builder clear() { key_ = ""; crawlID_ = ""; local_ = false; + filter_ = ""; + ignoreCase_ = false; return this; } @@ -21579,6 +21780,16 @@ private void buildPartial0( if (((from_bitField0_ & 0x00000010) != 0)) { result.local_ = local_; } + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000020) != 0)) { + result.filter_ = filter_; + to_bitField0_ |= 0x00000001; + } + if (((from_bitField0_ & 0x00000040) != 0)) { + result.ignoreCase_ = ignoreCase_; + to_bitField0_ |= 0x00000002; + } + result.bitField0_ |= to_bitField0_; } @java.lang.Override @@ -21649,6 +21860,14 @@ public Builder mergeFrom(crawlercommons.urlfrontier.Urlfrontier.ListUrlParams ot if (other.getLocal() != false) { setLocal(other.getLocal()); } + if (other.hasFilter()) { + filter_ = other.filter_; + bitField0_ |= 0x00000020; + onChanged(); + } + if (other.hasIgnoreCase()) { + setIgnoreCase(other.getIgnoreCase()); + } this.mergeUnknownFields(other.getUnknownFields()); onChanged(); return this; @@ -21705,6 +21924,18 @@ public Builder mergeFrom( bitField0_ |= 0x00000010; break; } // case 40 + case 50: + { + filter_ = input.readStringRequireUtf8(); + bitField0_ |= 0x00000020; + break; + } // case 50 + case 56: + { + ignoreCase_ = input.readBool(); + bitField0_ |= 0x00000040; + break; + } // case 56 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { @@ -22114,6 +22345,204 @@ public Builder clearLocal() { return this; } + private java.lang.Object filter_ = ""; + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return Whether the filter field is set. + */ + public boolean hasFilter() { + return ((bitField0_ & 0x00000020) != 0); + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return The filter. + */ + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return The bytes for filter. + */ + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @param value The filter to set. + * @return This builder for chaining. + */ + public Builder setFilter(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + filter_ = value; + bitField0_ |= 0x00000020; + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @return This builder for chaining. + */ + public Builder clearFilter() { + filter_ = getDefaultInstance().getFilter(); + bitField0_ = (bitField0_ & ~0x00000020); + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 6; + * + * @param value The bytes for filter to set. + * @return This builder for chaining. + */ + public Builder setFilterBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + filter_ = value; + bitField0_ |= 0x00000020; + onChanged(); + return this; + } + + private boolean ignoreCase_; + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 7; + * + * @return Whether the ignoreCase field is set. + */ + @java.lang.Override + public boolean hasIgnoreCase() { + return ((bitField0_ & 0x00000040) != 0); + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 7; + * + * @return The ignoreCase. + */ + @java.lang.Override + public boolean getIgnoreCase() { + return ignoreCase_; + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 7; + * + * @param value The ignoreCase to set. + * @return This builder for chaining. + */ + public Builder setIgnoreCase(boolean value) { + + ignoreCase_ = value; + bitField0_ |= 0x00000040; + onChanged(); + return this; + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 7; + * + * @return This builder for chaining. + */ + public Builder clearIgnoreCase() { + bitField0_ = (bitField0_ & ~0x00000040); + ignoreCase_ = false; + onChanged(); + return this; + } + @java.lang.Override public final Builder setUnknownFields( final com.google.protobuf.UnknownFieldSet unknownFields) { @@ -22239,10 +22668,88 @@ public interface CountUrlParamsOrBuilder * * *
-         * only for the current local instance
+         * Search filter on url (can be empty, default is empty)
          * 
* - * bool local = 3; + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + boolean hasFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + java.lang.String getFilter(); + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + com.google.protobuf.ByteString getFilterBytes(); + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 4; + * + * @return Whether the ignoreCase field is set. + */ + boolean hasIgnoreCase(); + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 4; + * + * @return The ignoreCase. + */ + boolean getIgnoreCase(); + + /** + * + * + *
+         * only for the current local instance (default is false)
+         * 
+ * + * optional bool local = 5; + * + * @return Whether the local field is set. + */ + boolean hasLocal(); + + /** + * + * + *
+         * only for the current local instance (default is false)
+         * 
+ * + * optional bool local = 5; * * @return The local. */ @@ -22264,6 +22771,7 @@ private CountUrlParams(com.google.protobuf.GeneratedMessageV3.Builder builder private CountUrlParams() { key_ = ""; crawlID_ = ""; + filter_ = ""; } @java.lang.Override @@ -22287,6 +22795,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { crawlercommons.urlfrontier.Urlfrontier.CountUrlParams.Builder.class); } + private int bitField0_; public static final int KEY_FIELD_NUMBER = 1; @SuppressWarnings("serial") @@ -22340,70 +22849,190 @@ public com.google.protobuf.ByteString getKeyBytes() { } } - public static final int CRAWLID_FIELD_NUMBER = 2; - - @SuppressWarnings("serial") - private volatile java.lang.Object crawlID_ = ""; - + public static final int CRAWLID_FIELD_NUMBER = 2; + + @SuppressWarnings("serial") + private volatile java.lang.Object crawlID_ = ""; + + /** + * + * + *
+         * crawl ID
+         * 
+ * + * string crawlID = 2; + * + * @return The crawlID. + */ + @java.lang.Override + public java.lang.String getCrawlID() { + java.lang.Object ref = crawlID_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + crawlID_ = s; + return s; + } + } + + /** + * + * + *
+         * crawl ID
+         * 
+ * + * string crawlID = 2; + * + * @return The bytes for crawlID. + */ + @java.lang.Override + public com.google.protobuf.ByteString getCrawlIDBytes() { + java.lang.Object ref = crawlID_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + crawlID_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int FILTER_FIELD_NUMBER = 3; + + @SuppressWarnings("serial") + private volatile java.lang.Object filter_ = ""; + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + @java.lang.Override + public boolean hasFilter() { + return ((bitField0_ & 0x00000001) != 0); + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + @java.lang.Override + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } + } + + /** + * + * + *
+         * Search filter on url (can be empty, default is empty)
+         * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + @java.lang.Override + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int IGNORECASE_FIELD_NUMBER = 4; + private boolean ignoreCase_ = false; + + /** + * + * + *
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+         * 
+ * + * optional bool ignoreCase = 4; + * + * @return Whether the ignoreCase field is set. + */ + @java.lang.Override + public boolean hasIgnoreCase() { + return ((bitField0_ & 0x00000002) != 0); + } + /** * * *
-         * crawl ID
+         * Ignore Case sensitivity for search filter (default is false -> case sensitive)
          * 
* - * string crawlID = 2; + * optional bool ignoreCase = 4; * - * @return The crawlID. + * @return The ignoreCase. */ @java.lang.Override - public java.lang.String getCrawlID() { - java.lang.Object ref = crawlID_; - if (ref instanceof java.lang.String) { - return (java.lang.String) ref; - } else { - com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; - java.lang.String s = bs.toStringUtf8(); - crawlID_ = s; - return s; - } + public boolean getIgnoreCase() { + return ignoreCase_; } + public static final int LOCAL_FIELD_NUMBER = 5; + private boolean local_ = false; + /** * * *
-         * crawl ID
+         * only for the current local instance (default is false)
          * 
* - * string crawlID = 2; + * optional bool local = 5; * - * @return The bytes for crawlID. + * @return Whether the local field is set. */ @java.lang.Override - public com.google.protobuf.ByteString getCrawlIDBytes() { - java.lang.Object ref = crawlID_; - if (ref instanceof java.lang.String) { - com.google.protobuf.ByteString b = - com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); - crawlID_ = b; - return b; - } else { - return (com.google.protobuf.ByteString) ref; - } + public boolean hasLocal() { + return ((bitField0_ & 0x00000004) != 0); } - public static final int LOCAL_FIELD_NUMBER = 3; - private boolean local_ = false; - /** * * *
-         * only for the current local instance
+         * only for the current local instance (default is false)
          * 
* - * bool local = 3; + * optional bool local = 5; * * @return The local. */ @@ -22433,8 +23062,14 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(crawlID_)) { com.google.protobuf.GeneratedMessageV3.writeString(output, 2, crawlID_); } - if (local_ != false) { - output.writeBool(3, local_); + if (((bitField0_ & 0x00000001) != 0)) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 3, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + output.writeBool(4, ignoreCase_); + } + if (((bitField0_ & 0x00000004) != 0)) { + output.writeBool(5, local_); } getUnknownFields().writeTo(output); } @@ -22451,8 +23086,14 @@ public int getSerializedSize() { if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(crawlID_)) { size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, crawlID_); } - if (local_ != false) { - size += com.google.protobuf.CodedOutputStream.computeBoolSize(3, local_); + if (((bitField0_ & 0x00000001) != 0)) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(3, filter_); + } + if (((bitField0_ & 0x00000002) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(4, ignoreCase_); + } + if (((bitField0_ & 0x00000004) != 0)) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(5, local_); } size += getUnknownFields().getSerializedSize(); memoizedSize = size; @@ -22472,7 +23113,18 @@ public boolean equals(final java.lang.Object obj) { if (!getKey().equals(other.getKey())) return false; if (!getCrawlID().equals(other.getCrawlID())) return false; - if (getLocal() != other.getLocal()) return false; + if (hasFilter() != other.hasFilter()) return false; + if (hasFilter()) { + if (!getFilter().equals(other.getFilter())) return false; + } + if (hasIgnoreCase() != other.hasIgnoreCase()) return false; + if (hasIgnoreCase()) { + if (getIgnoreCase() != other.getIgnoreCase()) return false; + } + if (hasLocal() != other.hasLocal()) return false; + if (hasLocal()) { + if (getLocal() != other.getLocal()) return false; + } if (!getUnknownFields().equals(other.getUnknownFields())) return false; return true; } @@ -22488,8 +23140,18 @@ public int hashCode() { hash = (53 * hash) + getKey().hashCode(); hash = (37 * hash) + CRAWLID_FIELD_NUMBER; hash = (53 * hash) + getCrawlID().hashCode(); - hash = (37 * hash) + LOCAL_FIELD_NUMBER; - hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + if (hasFilter()) { + hash = (37 * hash) + FILTER_FIELD_NUMBER; + hash = (53 * hash) + getFilter().hashCode(); + } + if (hasIgnoreCase()) { + hash = (37 * hash) + IGNORECASE_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getIgnoreCase()); + } + if (hasLocal()) { + hash = (37 * hash) + LOCAL_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getLocal()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -22633,6 +23295,8 @@ public Builder clear() { bitField0_ = 0; key_ = ""; crawlID_ = ""; + filter_ = ""; + ignoreCase_ = false; local_ = false; return this; } @@ -22678,9 +23342,20 @@ private void buildPartial0( if (((from_bitField0_ & 0x00000002) != 0)) { result.crawlID_ = crawlID_; } + int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000004) != 0)) { + result.filter_ = filter_; + to_bitField0_ |= 0x00000001; + } + if (((from_bitField0_ & 0x00000008) != 0)) { + result.ignoreCase_ = ignoreCase_; + to_bitField0_ |= 0x00000002; + } + if (((from_bitField0_ & 0x00000010) != 0)) { result.local_ = local_; + to_bitField0_ |= 0x00000004; } + result.bitField0_ |= to_bitField0_; } @java.lang.Override @@ -22742,7 +23417,15 @@ public Builder mergeFrom(crawlercommons.urlfrontier.Urlfrontier.CountUrlParams o bitField0_ |= 0x00000002; onChanged(); } - if (other.getLocal() != false) { + if (other.hasFilter()) { + filter_ = other.filter_; + bitField0_ |= 0x00000004; + onChanged(); + } + if (other.hasIgnoreCase()) { + setIgnoreCase(other.getIgnoreCase()); + } + if (other.hasLocal()) { setLocal(other.getLocal()); } this.mergeUnknownFields(other.getUnknownFields()); @@ -22783,12 +23466,24 @@ public Builder mergeFrom( bitField0_ |= 0x00000002; break; } // case 18 - case 24: + case 26: { - local_ = input.readBool(); + filter_ = input.readStringRequireUtf8(); bitField0_ |= 0x00000004; break; - } // case 24 + } // case 26 + case 32: + { + ignoreCase_ = input.readBool(); + bitField0_ |= 0x00000008; + break; + } // case 32 + case 40: + { + local_ = input.readBool(); + bitField0_ |= 0x00000010; + break; + } // case 40 default: { if (!super.parseUnknownField(input, extensionRegistry, tag)) { @@ -23030,16 +23725,230 @@ public Builder setCrawlIDBytes(com.google.protobuf.ByteString value) { return this; } + private java.lang.Object filter_ = ""; + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return Whether the filter field is set. + */ + public boolean hasFilter() { + return ((bitField0_ & 0x00000004) != 0); + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return The filter. + */ + public java.lang.String getFilter() { + java.lang.Object ref = filter_; + if (!(ref instanceof java.lang.String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + filter_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return The bytes for filter. + */ + public com.google.protobuf.ByteString getFilterBytes() { + java.lang.Object ref = filter_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref); + filter_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @param value The filter to set. + * @return This builder for chaining. + */ + public Builder setFilter(java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + filter_ = value; + bitField0_ |= 0x00000004; + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @return This builder for chaining. + */ + public Builder clearFilter() { + filter_ = getDefaultInstance().getFilter(); + bitField0_ = (bitField0_ & ~0x00000004); + onChanged(); + return this; + } + + /** + * + * + *
+             * Search filter on url (can be empty, default is empty)
+             * 
+ * + * optional string filter = 3; + * + * @param value The bytes for filter to set. + * @return This builder for chaining. + */ + public Builder setFilterBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + filter_ = value; + bitField0_ |= 0x00000004; + onChanged(); + return this; + } + + private boolean ignoreCase_; + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 4; + * + * @return Whether the ignoreCase field is set. + */ + @java.lang.Override + public boolean hasIgnoreCase() { + return ((bitField0_ & 0x00000008) != 0); + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 4; + * + * @return The ignoreCase. + */ + @java.lang.Override + public boolean getIgnoreCase() { + return ignoreCase_; + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 4; + * + * @param value The ignoreCase to set. + * @return This builder for chaining. + */ + public Builder setIgnoreCase(boolean value) { + + ignoreCase_ = value; + bitField0_ |= 0x00000008; + onChanged(); + return this; + } + + /** + * + * + *
+             * Ignore Case sensitivity for search filter (default is false -> case sensitive)
+             * 
+ * + * optional bool ignoreCase = 4; + * + * @return This builder for chaining. + */ + public Builder clearIgnoreCase() { + bitField0_ = (bitField0_ & ~0x00000008); + ignoreCase_ = false; + onChanged(); + return this; + } + private boolean local_; /** * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; + * + * @return Whether the local field is set. + */ + @java.lang.Override + public boolean hasLocal() { + return ((bitField0_ & 0x00000010) != 0); + } + + /** + * + * + *
+             * only for the current local instance (default is false)
+             * 
+ * + * optional bool local = 5; * * @return The local. */ @@ -23052,10 +23961,10 @@ public boolean getLocal() { * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; * * @param value The local to set. * @return This builder for chaining. @@ -23063,7 +23972,7 @@ public boolean getLocal() { public Builder setLocal(boolean value) { local_ = value; - bitField0_ |= 0x00000004; + bitField0_ |= 0x00000010; onChanged(); return this; } @@ -23072,15 +23981,15 @@ public Builder setLocal(boolean value) { * * *
-             * only for the current local instance
+             * only for the current local instance (default is false)
              * 
* - * bool local = 3; + * optional bool local = 5; * * @return This builder for chaining. */ public Builder clearLocal() { - bitField0_ = (bitField0_ & ~0x00000004); + bitField0_ = (bitField0_ & ~0x00000010); local_ = false; onChanged(); return this; @@ -23314,40 +24223,44 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + "\005ERROR\020\004\"?\n\020CrawlLimitParams\022\013\n\003key\030\001 \001(" + "\t\022\r\n\005limit\030\002 \001(\r\022\017\n\007crawlID\030\003 \001(\t\"=\n\020URL" + "StatusRequest\022\013\n\003url\030\001 \001(\t\022\013\n\003key\030\002 \001(\t\022" - + "\017\n\007crawlID\030\003 \001(\t\"Y\n\rListUrlParams\022\r\n\005sta" - + "rt\030\001 \001(\r\022\014\n\004size\030\002 \001(\r\022\013\n\003key\030\003 \001(\t\022\017\n\007c" - + "rawlID\030\004 \001(\t\022\r\n\005local\030\005 \001(\010\"=\n\016CountUrlP" - + "arams\022\013\n\003key\030\001 \001(\t\022\017\n\007crawlID\030\002 \001(\t\022\r\n\005l" - + "ocal\030\003 \001(\0102\343\010\n\013URLFrontier\022:\n\tListNodes\022" - + "\022.urlfrontier.Empty\032\027.urlfrontier.String" - + "List\"\000\022;\n\nListCrawls\022\022.urlfrontier.Local" - + "\032\027.urlfrontier.StringList\"\000\022C\n\013DeleteCra" - + "wl\022\037.urlfrontier.DeleteCrawlMessage\032\021.ur" - + "lfrontier.Long\"\000\022?\n\nListQueues\022\027.urlfron" - + "tier.Pagination\032\026.urlfrontier.QueueList\"" - + "\000\022;\n\007GetURLs\022\026.urlfrontier.GetParams\032\024.u" - + "rlfrontier.URLInfo\"\0000\001\022>\n\007PutURLs\022\024.urlf" - + "rontier.URLItem\032\027.urlfrontier.AckMessage" - + "\"\000(\0010\001\022E\n\010GetStats\022#.urlfrontier.QueueWi" - + "thinCrawlParams\032\022.urlfrontier.Stats\"\000\022G\n" - + "\013DeleteQueue\022#.urlfrontier.QueueWithinCr" - + "awlParams\032\021.urlfrontier.Long\"\000\022F\n\017BlockQ" - + "ueueUntil\022\035.urlfrontier.BlockQueueParams" - + "\032\022.urlfrontier.Empty\"\000\0226\n\tSetActive\022\023.ur" - + "lfrontier.Active\032\022.urlfrontier.Empty\"\000\0227" - + "\n\tGetActive\022\022.urlfrontier.Local\032\024.urlfro" - + "ntier.Boolean\"\000\022?\n\010SetDelay\022\035.urlfrontie" - + "r.QueueDelayParams\032\022.urlfrontier.Empty\"\000" - + "\022@\n\013SetLogLevel\022\033.urlfrontier.LogLevelPa" - + "rams\032\022.urlfrontier.Empty\"\000\022D\n\rSetCrawlLi" - + "mit\022\035.urlfrontier.CrawlLimitParams\032\022.url" - + "frontier.Empty\"\000\022E\n\014GetURLStatus\022\035.urlfr" - + "ontier.URLStatusRequest\032\024.urlfrontier.UR" - + "LItem\"\000\022@\n\010ListURLs\022\032.urlfrontier.ListUr" - + "lParams\032\024.urlfrontier.URLItem\"\0000\001\022=\n\tCou" - + "ntURLs\022\033.urlfrontier.CountUrlParams\032\021.ur" - + "lfrontier.Long\"\000B\034\n\032crawlercommons.urlfr" - + "ontierb\006proto3" + + "\017\n\007crawlID\030\003 \001(\t\"\241\001\n\rListUrlParams\022\r\n\005st" + + "art\030\001 \001(\r\022\014\n\004size\030\002 \001(\r\022\013\n\003key\030\003 \001(\t\022\017\n\007" + + "crawlID\030\004 \001(\t\022\r\n\005local\030\005 \001(\010\022\023\n\006filter\030\006" + + " \001(\tH\000\210\001\001\022\027\n\nignoreCase\030\007 \001(\010H\001\210\001\001B\t\n\007_f" + + "ilterB\r\n\013_ignoreCase\"\224\001\n\016CountUrlParams\022" + + "\013\n\003key\030\001 \001(\t\022\017\n\007crawlID\030\002 \001(\t\022\023\n\006filter\030" + + "\003 \001(\tH\000\210\001\001\022\027\n\nignoreCase\030\004 \001(\010H\001\210\001\001\022\022\n\005l" + + "ocal\030\005 \001(\010H\002\210\001\001B\t\n\007_filterB\r\n\013_ignoreCas" + + "eB\010\n\006_local2\343\010\n\013URLFrontier\022:\n\tListNodes" + + "\022\022.urlfrontier.Empty\032\027.urlfrontier.Strin" + + "gList\"\000\022;\n\nListCrawls\022\022.urlfrontier.Loca" + + "l\032\027.urlfrontier.StringList\"\000\022C\n\013DeleteCr" + + "awl\022\037.urlfrontier.DeleteCrawlMessage\032\021.u" + + "rlfrontier.Long\"\000\022?\n\nListQueues\022\027.urlfro" + + "ntier.Pagination\032\026.urlfrontier.QueueList" + + "\"\000\022;\n\007GetURLs\022\026.urlfrontier.GetParams\032\024." + + "urlfrontier.URLInfo\"\0000\001\022>\n\007PutURLs\022\024.url" + + "frontier.URLItem\032\027.urlfrontier.AckMessag" + + "e\"\000(\0010\001\022E\n\010GetStats\022#.urlfrontier.QueueW" + + "ithinCrawlParams\032\022.urlfrontier.Stats\"\000\022G" + + "\n\013DeleteQueue\022#.urlfrontier.QueueWithinC" + + "rawlParams\032\021.urlfrontier.Long\"\000\022F\n\017Block" + + "QueueUntil\022\035.urlfrontier.BlockQueueParam" + + "s\032\022.urlfrontier.Empty\"\000\0226\n\tSetActive\022\023.u" + + "rlfrontier.Active\032\022.urlfrontier.Empty\"\000\022" + + "7\n\tGetActive\022\022.urlfrontier.Local\032\024.urlfr" + + "ontier.Boolean\"\000\022?\n\010SetDelay\022\035.urlfronti" + + "er.QueueDelayParams\032\022.urlfrontier.Empty\"" + + "\000\022@\n\013SetLogLevel\022\033.urlfrontier.LogLevelP" + + "arams\032\022.urlfrontier.Empty\"\000\022D\n\rSetCrawlL" + + "imit\022\035.urlfrontier.CrawlLimitParams\032\022.ur" + + "lfrontier.Empty\"\000\022E\n\014GetURLStatus\022\035.urlf" + + "rontier.URLStatusRequest\032\024.urlfrontier.U" + + "RLItem\"\000\022@\n\010ListURLs\022\032.urlfrontier.ListU" + + "rlParams\032\024.urlfrontier.URLItem\"\0000\001\022=\n\tCo" + + "untURLs\022\033.urlfrontier.CountUrlParams\032\021.u" + + "rlfrontier.Long\"\000B\034\n\032crawlercommons.urlf" + + "rontierb\006proto3" }; descriptor = com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom( @@ -23550,7 +24463,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_urlfrontier_ListUrlParams_descriptor, new java.lang.String[] { - "Start", "Size", "Key", "CrawlID", "Local", + "Start", "Size", "Key", "CrawlID", "Local", "Filter", "IgnoreCase", }); internal_static_urlfrontier_CountUrlParams_descriptor = getDescriptor().getMessageTypes().get(24); @@ -23558,7 +24471,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( internal_static_urlfrontier_CountUrlParams_descriptor, new java.lang.String[] { - "Key", "CrawlID", "Local", + "Key", "CrawlID", "Filter", "IgnoreCase", "Local", }); } diff --git a/API/urlfrontier.proto b/API/urlfrontier.proto index eac9fc5..c9de112 100644 --- a/API/urlfrontier.proto +++ b/API/urlfrontier.proto @@ -326,6 +326,10 @@ message ListUrlParams { string crawlID = 4; // only for the current local instance bool local = 5; + // Search filter on url (can be empty, default is empty) + optional string filter = 6; + // Ignore Case sensitivity for search filter (default is false -> case sensitive) + optional bool ignoreCase = 7; } message CountUrlParams { @@ -333,6 +337,10 @@ message CountUrlParams { string key = 1; // crawl ID string crawlID = 2; - // only for the current local instance - bool local = 3; + // Search filter on url (can be empty, default is empty) + optional string filter = 3; + // Ignore Case sensitivity for search filter (default is false -> case sensitive) + optional bool ignoreCase = 4; + // only for the current local instance (default is false) + optional bool local = 5; } \ No newline at end of file diff --git a/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java b/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java index 4240831..8164a03 100644 --- a/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java +++ b/client/src/main/java/crawlercommons/urlfrontier/client/CountURLs.java @@ -37,6 +37,20 @@ public class CountURLs implements Runnable { "restricts the scope to this frontier instance instead of aggregating over the cluster") private Boolean local; + @Option( + names = {"-f", "--filter"}, + defaultValue = "", + paramLabel = "STRING", + description = "String filter applied to URLs") + private String filter; + + @Option( + names = {"-i", "--ignore-case"}, + defaultValue = "false", + paramLabel = "BOOLEAN", + description = "Ignore case sensitivity for search filter") + private Boolean ignoreCase; + @Override public void run() { ManagedChannel channel = @@ -55,6 +69,11 @@ public void run() { builder.setCrawlID(crawl); builder.setLocal(local); + builder.setFilter(filter); + builder.setIgnoreCase(ignoreCase); + + builder.setFilter(filter); + Long s = blockingFrontier.countURLs(builder.build()); System.out.println(s.getValue() + " URLs in frontier"); diff --git a/client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java b/client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java index eb4f8c4..aa5f4cb 100644 --- a/client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java +++ b/client/src/main/java/crawlercommons/urlfrontier/client/ListURLs.java @@ -93,6 +93,20 @@ public class ListURLs implements Runnable { }) private boolean parse; + @Option( + names = {"-f", "--filter"}, + defaultValue = "", + paramLabel = "STRING", + description = "String filter applied to URLs") + private String filter; + + @Option( + names = {"-i", "--ignore-case"}, + defaultValue = "false", + paramLabel = "BOOLEAN", + description = "Ignore case sensitivity for search filter") + private Boolean ignoreCase; + // Use the system default time zone private ZoneId zoneId = ZoneId.systemDefault(); @@ -108,6 +122,9 @@ public void run() { builder.setStart(start); builder.setCrawlID(crawl); + builder.setFilter(filter); + builder.setIgnoreCase(ignoreCase); + PrintStream outstream = null; if (output.length() > 0) { File f = new File(output); diff --git a/service/pom.xml b/service/pom.xml index cabe5d7..1485c73 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -20,6 +20,7 @@ 1.5.8 5.13.0 2.16.1 + 3.17.0 @@ -118,7 +119,12 @@ ${mockito.version} test - + + + org.apache.commons + commons-lang3 + ${commons.lang.version} + diff --git a/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java b/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java index 3b3f44c..cc6c9b8 100644 --- a/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java +++ b/service/src/main/java/crawlercommons/urlfrontier/service/AbstractFrontierService.java @@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.slf4j.LoggerFactory; public abstract class AbstractFrontierService @@ -905,6 +906,9 @@ public void listURLs( long start = request.getStart(); String key = request.getKey(); + String filter = request.getFilter(); + boolean ignoreCase = request.getIgnoreCase(); + final String normalisedCrawlID = CrawlID.normaliseCrawlID(request.getCrawlID()); // 100 by default @@ -919,7 +923,7 @@ public void listURLs( normalisedCrawlID, key); - long totalCount = -1; + long totalCount = 0; long sentCount = 0; synchronized (getQueues()) { @@ -942,14 +946,23 @@ public void listURLs( CloseableIterator urliter = urlIterator(e); while (urliter.hasNext()) { - totalCount++; - if (totalCount < start) { - urliter.next(); - } else if (sentCount < maxURLs) { - responseObserver.onNext(urliter.next()); - sentCount++; - } else { - break; + URLItem cur = urliter.next(); + + if (StringUtils.isEmpty(filter) + || (!ignoreCase && cur.getKnown().getInfo().getUrl().contains(filter)) + || (ignoreCase + && StringUtils.containsIgnoreCase( + cur.getKnown().getInfo().getUrl(), filter))) { + + if (totalCount < start) { + totalCount++; + } else if (sentCount < maxURLs) { + totalCount++; + sentCount++; + responseObserver.onNext(cur); + } else { + break; + } } } @@ -1001,10 +1014,17 @@ public void countURLs( StreamObserver responseObserver) { String key = request.getKey(); + String filter = request.getFilter(); + boolean ignoreCase = request.getIgnoreCase(); final String normalisedCrawlID = CrawlID.normaliseCrawlID(request.getCrawlID()); - LOG.info("Received request to count URLs [crawlId {}, key {}]", normalisedCrawlID, key); + LOG.info( + "Received request to count URLs [crawlId={}, key={}, filter={}, ignoreCase={}]", + normalisedCrawlID, + key, + filter, + ignoreCase); long totalCount = 0; @@ -1028,8 +1048,15 @@ public void countURLs( CloseableIterator urliter = urlIterator(e); while (urliter.hasNext()) { - urliter.next(); - totalCount++; + URLItem cur = urliter.next(); + + if (StringUtils.isBlank(filter) + || (!ignoreCase && cur.getKnown().getInfo().getUrl().contains(filter)) + || (ignoreCase + && StringUtils.containsIgnoreCase( + cur.getKnown().getInfo().getUrl(), filter))) { + totalCount++; + } } try { diff --git a/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java b/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java index d03000a..fad7926 100644 --- a/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java +++ b/service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java @@ -358,8 +358,138 @@ void testMemoryIteratorSingleQueue() { assertEquals(3, nbUrls); } + @Test + @Order(9) + void testListAllURLsCaseInsensitive() { + + ListUrlParams params = + ListUrlParams.newBuilder() + .setCrawlID("crawl_id") + .setStart(0) + .setSize(100) + .setFilter("COMPLETED") + .setIgnoreCase(true) + .build(); + + final AtomicInteger fetched = new AtomicInteger(0); + final AtomicInteger count = new AtomicInteger(0); + + StreamObserver statusObserver = + new StreamObserver<>() { + + @Override + public void onNext(URLItem value) { + // receives confirmation that the value has been received + logURLItem(value); + + if (value.hasKnown()) { + fetched.incrementAndGet(); + } + count.incrementAndGet(); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + } + + @Override + public void onCompleted() { + LOG.info("completed testListAllURLsCaseInsensitive"); + } + }; + + memoryFrontierService.listURLs(params, statusObserver); + assertEquals(1, count.get()); + } + + @Test + @Order(10) + void testListAllURLsCaseSensitive() { + + ListUrlParams params = + ListUrlParams.newBuilder() + .setCrawlID("crawl_id") + .setStart(0) + .setSize(100) + .setFilter("COMPLETED") + .setIgnoreCase(false) + .build(); + + final AtomicInteger fetched = new AtomicInteger(0); + final AtomicInteger count = new AtomicInteger(0); + + StreamObserver statusObserver = + new StreamObserver<>() { + + @Override + public void onNext(URLItem value) { + // receives confirmation that the value has been received + logURLItem(value); + + if (value.hasKnown()) { + fetched.incrementAndGet(); + } + count.incrementAndGet(); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + } + + @Override + public void onCompleted() { + LOG.info("completed testListAllURLsCaseSensitive"); + } + }; + + memoryFrontierService.listURLs(params, statusObserver); + assertEquals(0, count.get()); + } + + @Test + @Order(11) + void testListAllURLstart() { + + ListUrlParams params = + ListUrlParams.newBuilder().setCrawlID("crawl_id").setStart(3).setSize(10).build(); + + final AtomicInteger fetched = new AtomicInteger(0); + final AtomicInteger count = new AtomicInteger(0); + + StreamObserver statusObserver = + new StreamObserver<>() { + + @Override + public void onNext(URLItem value) { + // receives confirmation that the value has been received + logURLItem(value); + + if (value.hasKnown()) { + fetched.incrementAndGet(); + } + count.incrementAndGet(); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + } + + @Override + public void onCompleted() { + LOG.info("completed testListAllURLs"); + } + }; + + memoryFrontierService.listURLs(params, statusObserver); + assertEquals(1, count.get()); + } + @Test @Order(99) + // Must be last test void testNoRescheduleCompleted() { String crawlId = "crawl_id"; diff --git a/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java b/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java index 0c13f37..5294b8a 100644 --- a/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java +++ b/service/src/test/java/crawlercommons/urlfrontier/service/RocksDBServiceTest.java @@ -423,6 +423,76 @@ public void onCompleted() { rocksDBService.countURLs(builder.build(), responseObserver); } + @Test + @Order(10) + void testCountURLsCaseSensitive() { + + Urlfrontier.CountUrlParams.Builder builder = Urlfrontier.CountUrlParams.newBuilder(); + + builder.setKey("queue_mysite"); + builder.setCrawlID("crawl_id"); + builder.setFilter("COMPLETED"); + builder.setIgnoreCase(false); + + StreamObserver responseObserver = + new StreamObserver<>() { + + @Override + public void onNext(Urlfrontier.Long value) { + // receives confirmation that the value has been received + assertEquals(0, value.getValue()); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + fail(); + } + + @Override + public void onCompleted() { + LOG.info("completed testNoRescheduleCompleted 1/2"); + } + }; + + rocksDBService.countURLs(builder.build(), responseObserver); + } + + @Test + @Order(9) + void testCountURsLCaseInsensitive() { + + Urlfrontier.CountUrlParams.Builder builder = Urlfrontier.CountUrlParams.newBuilder(); + + builder.setKey("queue_mysite"); + builder.setCrawlID("crawl_id"); + builder.setFilter("COMPLETED"); + builder.setIgnoreCase(true); + + StreamObserver responseObserver = + new StreamObserver<>() { + + @Override + public void onNext(Urlfrontier.Long value) { + // receives confirmation that the value has been received + assertEquals(1, value.getValue()); + } + + @Override + public void onError(Throwable t) { + t.printStackTrace(); + fail(); + } + + @Override + public void onCompleted() { + LOG.info("completed testNoRescheduleCompleted 1/2"); + } + }; + + rocksDBService.countURLs(builder.build(), responseObserver); + } + @Test @Order(99) void testNoRescheduleCompleted() {