Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exposes language parameter in TransformPiiText #2996

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,163 changes: 589 additions & 574 deletions backend/gen/go/protos/mgmt/v1alpha1/transformer.pb.go

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions backend/internal/cmds/mgmt/serve/connect/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,10 @@ func serve(ctx context.Context) error {
)

anonymizationService := v1alpha1_anonymizationservice.New(&v1alpha1_anonymizationservice.Config{
IsPresidioEnabled: ncloudlicense.IsValid(),
IsAuthEnabled: isAuthEnabled,
IsNeosyncCloud: ncloudlicense.IsValid(),
IsPresidioEnabled: ncloudlicense.IsValid(),
PresidioDefaultLanguage: getPresidioDefaultLanguage(),
IsAuthEnabled: isAuthEnabled,
IsNeosyncCloud: ncloudlicense.IsValid(),
}, anonymizerMeter, useraccountService, presAnalyzeClient, presAnonClient, db)
api.Handle(
mgmtv1alpha1connect.NewAnonymizationServiceHandler(
Expand Down Expand Up @@ -632,6 +633,14 @@ func serve(ctx context.Context) error {
return nil
}

func getPresidioDefaultLanguage() *string {
lang := viper.GetString("PRESIDIO_DEFAULT_LANGUAGE")
if lang == "" {
return nil
}
return &lang
}

func getPromClientFromEnvironment() (promapi.Client, error) {
roundTripper := promapi.DefaultRoundTripper
promApiKey := getPromApiKey()
Expand Down
5 changes: 5 additions & 0 deletions backend/protos/mgmt/v1alpha1/transformer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ message TransformPiiText {

// Exact match of PII phrases identified by the analysis engine.
repeated string allowed_phrases = 5;

// Two characters for the desired language in ISO_639-1 format.
// If not provided, will use a default language if specified on the server.
// Depending on the server configuration, may attempt to autodetect from input.
optional string language = 6;
}

message PiiDenyRecognizer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (s *Service) AnonymizeMany(
jsonanonymizer.WithTransformerMappings(req.Msg.TransformerMappings),
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithHaltOnFailure(req.Msg.HaltOnFailure),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -165,7 +165,7 @@ func (s *Service) AnonymizeSingle(
anonymizer, err := jsonanonymizer.NewAnonymizer(
jsonanonymizer.WithTransformerMappings(req.Msg.TransformerMappings),
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
)
if err != nil {
return nil, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ type Service struct {
}

type Config struct {
IsAuthEnabled bool
IsPresidioEnabled bool
IsNeosyncCloud bool
IsAuthEnabled bool
IsPresidioEnabled bool
PresidioDefaultLanguage *string
IsNeosyncCloud bool
}

func New(
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/anonymization.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -1215,6 +1230,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/job.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5056,6 +5056,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -5088,6 +5103,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/transformer.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -1735,6 +1750,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/neosync.mgmt.v1alpha1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5155,6 +5155,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -5190,6 +5205,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
2 changes: 1 addition & 1 deletion docs/protos/mgmt/v1alpha1/transformer.proto.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ _**package** mgmt.v1alpha1_


### `TransformPiiText`
<ProtoMessage key={66} message={{"name":"TransformPiiText","longName":"TransformPiiText","fullName":"mgmt.v1alpha1.TransformPiiText","description":"NeosyncCloud/Enterprise only transformer for anonymizing PII Text","hasExtensions":false,"hasFields":true,"hasOneofs":false,"extensions":[],"fields":[{"name":"score_threshold","description":"Minimal detection score for determining PII. 0.0-1.0","label":"","type":"float","longType":"float","fullType":"float","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"default_anonymizer","description":"The default anonmyization config used for all instances of detected PII.","label":"","type":"PiiAnonymizer","longType":"PiiAnonymizer","fullType":"mgmt.v1alpha1.PiiAnonymizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piianonymizer"},{"name":"deny_recognizers","description":"Configure deny lists where each word is treated as PII.","label":"repeated","type":"PiiDenyRecognizer","longType":"PiiDenyRecognizer","fullType":"mgmt.v1alpha1.PiiDenyRecognizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piidenyrecognizer"},{"name":"allowed_entities","description":"Configure a list of entities to be used for PII analysis. If not provided or empty, all entities are considiered\nIf this is specified, any ad-hoc, or deny_recognizers entity names must also be provided.\nTo see available builtin entities, call the GetPiiTextEntities() RPC method to see what is available for your account.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"allowed_phrases","description":"Exact match of PII phrases identified by the analysis engine.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""}]}} />
<ProtoMessage key={66} message={{"name":"TransformPiiText","longName":"TransformPiiText","fullName":"mgmt.v1alpha1.TransformPiiText","description":"NeosyncCloud/Enterprise only transformer for anonymizing PII Text","hasExtensions":false,"hasFields":true,"hasOneofs":true,"extensions":[],"fields":[{"name":"score_threshold","description":"Minimal detection score for determining PII. 0.0-1.0","label":"","type":"float","longType":"float","fullType":"float","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"default_anonymizer","description":"The default anonmyization config used for all instances of detected PII.","label":"","type":"PiiAnonymizer","longType":"PiiAnonymizer","fullType":"mgmt.v1alpha1.PiiAnonymizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piianonymizer"},{"name":"deny_recognizers","description":"Configure deny lists where each word is treated as PII.","label":"repeated","type":"PiiDenyRecognizer","longType":"PiiDenyRecognizer","fullType":"mgmt.v1alpha1.PiiDenyRecognizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piidenyrecognizer"},{"name":"allowed_entities","description":"Configure a list of entities to be used for PII analysis. If not provided or empty, all entities are considiered\nIf this is specified, any ad-hoc, or deny_recognizers entity names must also be provided.\nTo see available builtin entities, call the GetPiiTextEntities() RPC method to see what is available for your account.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"allowed_phrases","description":"Exact match of PII phrases identified by the analysis engine.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"language","description":"Two characters for the desired language in ISO_639-1 format.\nIf not provided, will use a default language if specified on the server.\nDepending on the server configuration, may attempt to autodetect from input.","label":"optional","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":true,"oneofdecl":"_language","defaultValue":""}]}} />


### `TransformString`
Expand Down
14 changes: 13 additions & 1 deletion docs/protos/proto_docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2180,7 +2180,7 @@
"description": "NeosyncCloud/Enterprise only transformer for anonymizing PII Text",
"hasExtensions": false,
"hasFields": true,
"hasOneofs": false,
"hasOneofs": true,
"extensions": [],
"fields": [
{
Expand Down Expand Up @@ -2242,6 +2242,18 @@
"isoneof": false,
"oneofdecl": "",
"defaultValue": ""
},
{
"name": "language",
"description": "Two characters for the desired language in ISO_639-1 format.\nIf not provided, will use a default language if specified on the server.\nDepending on the server configuration, may attempt to autodetect from input.",
"label": "optional",
"type": "string",
"longType": "string",
"fullType": "string",
"ismap": false,
"isoneof": true,
"oneofdecl": "_language",
"defaultValue": ""
}
]
},
Expand Down
10 changes: 10 additions & 0 deletions frontend/packages/sdk/src/client/mgmt/v1alpha1/transformer_pb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1765,6 +1765,15 @@ export class TransformPiiText extends Message<TransformPiiText> {
*/
allowedPhrases: string[] = [];

/**
* Two characters for the desired language in ISO_639-1 format.
* If not provided, will use a default language if specified on the server.
* Depending on the server configuration, may attempt to autodetect from input.
*
* @generated from field: optional string language = 6;
*/
language?: string;

constructor(data?: PartialMessage<TransformPiiText>) {
super();
proto3.util.initPartial(data, this);
Expand All @@ -1778,6 +1787,7 @@ export class TransformPiiText extends Message<TransformPiiText> {
{ no: 3, name: "deny_recognizers", kind: "message", T: PiiDenyRecognizer, repeated: true },
{ no: 4, name: "allowed_entities", kind: "scalar", T: 9 /* ScalarType.STRING */, repeated: true },
{ no: 5, name: "allowed_phrases", kind: "scalar", T: 9 /* ScalarType.STRING */, repeated: true },
{ no: 6, name: "language", kind: "scalar", T: 9 /* ScalarType.STRING */, opt: true },
]);

static fromBinary(bytes: Uint8Array, options?: Partial<BinaryReadOptions>): TransformPiiText {
Expand Down
2 changes: 1 addition & 1 deletion internal/ee/transformers/functions/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TransformPiiText(
allowedEntities := config.GetAllowedEntities()
analyzeResp, err := analyzeClient.PostAnalyzeWithResponse(ctx, presidioapi.AnalyzeRequest{
Text: value,
Language: supportedLanguage,
Language: config.GetLanguage(),
ScoreThreshold: &threshold,
AdHocRecognizers: &adhocRecognizers,
Entities: &allowedEntities,
Expand Down
16 changes: 9 additions & 7 deletions internal/json-anonymizer/json-anonymizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ type JsonAnonymizer struct {
}

type anonymizeConfig struct {
analyze presidioapi.AnalyzeInterface
anonymize presidioapi.AnonymizeInterface
analyze presidioapi.AnalyzeInterface
anonymize presidioapi.AnonymizeInterface
defaultLanguage *string
}

// Option is a functional option for configuring the Anonymizer
Expand Down Expand Up @@ -73,12 +74,13 @@ func NewAnonymizer(opts ...Option) (*JsonAnonymizer, error) {
}

// WithAnonymizeConfig sets the analyze and anonymize clients for use by the presidio transformers only if isEnabled is true
func WithConditionalAnonymizeConfig(isEnabled bool, analyze presidioapi.AnalyzeInterface, anonymize presidioapi.AnonymizeInterface) Option {
func WithConditionalAnonymizeConfig(isEnabled bool, analyze presidioapi.AnalyzeInterface, anonymize presidioapi.AnonymizeInterface, defaultLanguage *string) Option {
return func(ja *JsonAnonymizer) {
if isEnabled && analyze != nil && anonymize != nil {
ja.anonymizeConfig = &anonymizeConfig{
analyze: analyze,
anonymize: anonymize,
analyze: analyze,
anonymize: anonymize,
defaultLanguage: defaultLanguage,
}
}
}
Expand Down Expand Up @@ -330,7 +332,7 @@ func initTransformerExecutors(
executors := []*transformer.TransformerExecutor{}
execOpts := []transformer.TransformerExecutorOption{}
if anonymizeConfig != nil && anonymizeConfig.analyze != nil && anonymizeConfig.anonymize != nil {
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize))
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize, anonymizeConfig.defaultLanguage))
}

for _, mapping := range transformerMappings {
Expand All @@ -356,7 +358,7 @@ func initDefaultTransformerExecutors(
) (*DefaultExecutors, error) {
execOpts := []transformer.TransformerExecutorOption{}
if anonymizeConfig != nil && anonymizeConfig.analyze != nil && anonymizeConfig.anonymize != nil {
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize))
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize, anonymizeConfig.defaultLanguage))
}

var stringExecutor, numberExecutor, booleanExecutor *transformer.TransformerExecutor
Expand Down
264 changes: 132 additions & 132 deletions python/src/neosync/mgmt/v1alpha1/transformer_pb2.py

Large diffs are not rendered by default.

Loading