From 88f34f5180013cf3d21d7fa47ce68bbc0c2c7638 Mon Sep 17 00:00:00 2001 From: Nikola Grcevski <6207777+grcevski@users.noreply.github.com> Date: Wed, 4 Oct 2023 03:27:13 -0400 Subject: [PATCH] Add auto route classifier (#323) * Add auto route classifier * Add some german and spanish APIs * french apis * Integration test * Benchmarks * Improve performance * Update name * Happy linter * Don't make heuristic default * Add warning message * Add docs * Update integration test --------- Co-authored-by: Mario Macias --- docs/sources/configure/_index.md | 5 + docs/sources/configure/options.md | 26 +++ docs/sources/setup/_index.md | 4 + go.mod | 1 + go.sum | 2 + pkg/internal/pipe/instrumenter_test.go | 2 +- pkg/internal/transform/route/classifier.json | 1 + pkg/internal/transform/route/cluster.go | 121 ++++++++++ pkg/internal/transform/route/cluster_test.go | 29 +++ pkg/internal/transform/routes.go | 27 ++- pkg/internal/transform/routes_test.go | 67 ++++++ .../configs/instrumenter-config-no-route.yml | 6 + test/integration/red_test.go | 212 ++++++++++++++++++ test/integration/suites_test.go | 11 + .../go-gibberish/AUTHORS.md | 5 + .../AlessandroPomponio/go-gibberish/LICENSE | 21 ++ .../go-gibberish/analysis/analysis.go | 118 ++++++++++ .../consts/accepted_characters.go | 9 + .../go-gibberish/gibberish/gibberish.go | 15 ++ .../go-gibberish/persistence/persistence.go | 52 +++++ .../go-gibberish/structs/structs.go | 18 ++ vendor/modules.txt | 7 + 22 files changed, 757 insertions(+), 2 deletions(-) create mode 100644 pkg/internal/transform/route/classifier.json create mode 100644 pkg/internal/transform/route/cluster.go create mode 100644 pkg/internal/transform/route/cluster_test.go create mode 100644 test/integration/configs/instrumenter-config-no-route.yml create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/AUTHORS.md create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/LICENSE create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/analysis/analysis.go create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/consts/accepted_characters.go create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/gibberish/gibberish.go create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/persistence/persistence.go create mode 100644 vendor/github.com/AlessandroPomponio/go-gibberish/structs/structs.go diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index 3d3af0cd8..a1d1995a2 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -20,3 +20,8 @@ Beyla can be configured in the following ways: For information on the metrics Beyla exports, see the [exported metrics]({{< relref "../metrics.md" >}}) documentation. For information on profiling Beyla, see the [profiling]({{< relref "../profiling.md" >}}) documentation. + +**Note**: If you will be using Beyla to generate traces, please make sure you've read our documentation section on configuring +the [Routes Decorator]({{< relref "../configure/options#routes-decorator" >}}). Since Beyla is auto-instrumenting your application without any +special language level support, configuring the low cardinality routes decorator is very important for optimal results. + diff --git a/docs/sources/configure/options.md b/docs/sources/configure/options.md index ebbc09d85..38a3e1758 100644 --- a/docs/sources/configure/options.md +++ b/docs/sources/configure/options.md @@ -216,6 +216,32 @@ Possible values for the `unmatch` property are: - `path` will copy the `http.route` field property to the path value. - 🚨 Caution: this option could lead to cardinality explosion at the ingester side. - `wildcard` will set the `http.route` field property to a generic asterisk based `/**` value. +- `heuristic` will automatically derive the `http.route` field property from the path value, based on the following rules: + - Any path components which have numbers or characters outside of the ASCII alphabet (or `-` and `_`), will be replaced by an asterisk `*`. + - Any alphabetical components which don't look like words, will be replaced by an asterisk `*`. + +### Special considerations when using the `heuristic` route decorator mode + +The `heuristic` decorator is a best effort route decorator, which may still lead to cardinality explosion in certain scenarios. +For example, the GitHub URL paths are a good example where the `heuristic` route decorator will not work, since the URL paths +are constructed like a directory tree. In this scenario all paths will remain unique and lead to cardinality explosion. + +On the other hand, if your URL path patterns follow certain structure, and the unique IDs are made up of numbers or random characters, +then the `heuristic` decorator may be a low effort configuration option which is suitable for your use-case. For example, the following +mock Google Docs URLs will be correctly reduced to a low cardinality version: + +Both URL paths below: + +``` +document/d/CfMkAGbE_aivhFydEpaRafPuGWbmHfG/edit (no numbers in the ID) +document/d/C2fMkAGb3E_aivhFyd5EpaRafP123uGWbmHfG/edit +``` + +will be converted to a low cardinality route: + +``` +document/d/*/edit +``` ## OTEL metrics exporter diff --git a/docs/sources/setup/_index.md b/docs/sources/setup/_index.md index b8d493939..31375253b 100644 --- a/docs/sources/setup/_index.md +++ b/docs/sources/setup/_index.md @@ -19,3 +19,7 @@ There are different options to set up and run Beyla: 3. [As a Kubernetes sidecar container]({{< relref "./kubernetes.md" >}}) For information on configuration options and data export modes, see the [Configure Beyla]({{< relref "../configure/_index.md" >}}) documentation. + +**Note**: If you will be using Beyla to generate traces, please make sure you've read our documentation section on configuring +the [Routes Decorator]({{< relref "../configure/options#routes-decorator" >}}). Since Beyla is auto-instrumenting your application without any +special language level support, configuring the low cardinality routes decorator is very important for optimal results. diff --git a/go.mod b/go.mod index 3c86adffd..5f0211f21 100644 --- a/go.mod +++ b/go.mod @@ -44,6 +44,7 @@ require ( ) require ( + github.com/AlessandroPomponio/go-gibberish v0.0.0-20191004143433-a2d4156f0396 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.9.1 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect diff --git a/go.sum b/go.sum index 9ee76a3f9..1b15c3360 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/AlessandroPomponio/go-gibberish v0.0.0-20191004143433-a2d4156f0396 h1:cKIHT8I2mrmw/VgdyNeACP/AvetK8AgGsiRfOC3ZjmQ= +github.com/AlessandroPomponio/go-gibberish v0.0.0-20191004143433-a2d4156f0396/go.mod h1:2VCDG9kHYQ5vfYUqeoB7foVlcvIvB7rp9LxTELLD1qU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= diff --git a/pkg/internal/pipe/instrumenter_test.go b/pkg/internal/pipe/instrumenter_test.go index 1f7ee11f5..7c2488c2c 100644 --- a/pkg/internal/pipe/instrumenter_test.go +++ b/pkg/internal/pipe/instrumenter_test.go @@ -133,7 +133,7 @@ func TestRouteConsolidation(t *testing.T) { return func(out chan<- []request.Span) { out <- newRequest("svc-1", 1, "GET", "/user/1234", "1.1.1.1:3456", 200) out <- newRequest("svc-1", 2, "GET", "/products/3210/push", "1.1.1.1:3456", 200) - out <- newRequest("svc-1", 3, "GET", "/attach", "1.1.1.1:3456", 200) // undefined route: won't report as route + out <- newRequest("svc-1", 3, "GET", "/attach", "1.1.1.1:3456", 200) // closing prematurely the input node would finish the whole graph processing // and OTEL exporters could be closed, so we wait. time.Sleep(testTimeout) diff --git a/pkg/internal/transform/route/classifier.json b/pkg/internal/transform/route/classifier.json new file mode 100644 index 000000000..e9f2d5206 --- /dev/null +++ b/pkg/internal/transform/route/classifier.json @@ -0,0 +1 @@ +{"Occurrences":[[-8.569137312930899,-3.9369332597631863,-3.220670162697391,-3.0482479869676102,-6.052279063336297,-4.69956099775001,-3.9941585968087816,-6.710407217596661,-3.2453041060602184,-7.060740255010108,-4.512283359624297,-2.4997201529644935,-3.642636781640966,-1.5707462805725019,-7.978468801653891,-3.8936418102220776,-9.821900281426267,-2.3025283782801376,-2.348366425382398,-1.9448651421947813,-4.539158126663701,-3.871849760115083,-4.706359120463831,-6.560313338465017,-3.649725323207633,-6.641954302926283,-2.7134701747591117],[-2.5528619980785,-5.139226208055755,-6.049719822245583,-6.219404795035026,-1.173596307609444,-8.563954087128105,-8.805116143944993,-8.494961215641153,-3.3328454702735173,-5.004532700251055,-8.805116143944993,-2.1390850632227165,-6.121607051758899,-6.808562262070924,-2.1459387811703974,-8.312639658847198,-8.900426323749317,-2.719375008855968,-3.788438535392774,-4.703224376087508,-2.137350056136624,-6.320209494156992,-7.676650892127201,-8.900426323749317,-2.3611294272462486,-8.900426323749317,-4.738423113053401],[-2.08946313988477,-9.398284978640158,-3.8466182187208457,-7.678499009037193,-1.7391136109740999,-8.792149175069843,-9.580606535434113,-1.9093388132314668,-2.9331775988939013,-9.485296355629789,-3.343650033348159,-3.276523430584318,-8.515895798441685,-8.838669190704735,-1.6000355228278762,-9.485296355629789,-6.386023403134956,-3.377666241879603,-5.838186314392146,-2.390910634629309,-3.2184408726543063,-9.485296355629789,-8.838669190704735,-9.580606535434113,-4.621264535725407,-8.299672689972049,-3.8668737299247438],[-3.7200475908408235,-7.418434550575756,-7.88681348409449,-4.564865474086475,-1.9699465394791131,-6.796896902393934,-5.430448837645974,-6.754155354016663,-2.4627220077183942,-6.277375571660389,-7.666270714480338,-4.558326450319418,-5.5188785108919935,-5.946484744877372,-3.117009722931763,-8.380471304238116,-8.136274343726074,-3.6243165394601604,-3.6811521539816408,-7.1936663035345445,-3.994198378682783,-5.568764165402863,-7.077327311450395,-9.928033812954128,-4.613843097249801,-9.745712256160173,-0.5394468422260206],[-3.0956911156796725,-6.3279563530093395,-3.711394117529345,-2.414309096615009,-3.7280101535107066,-4.555095809949321,-4.9582369569836615,-6.340824177620733,-4.465115884463944,-8.043262266521952,-7.012917948982845,-3.4395652463282116,-3.7489856294972617,-2.389153595060143,-5.281198278652138,-4.426598784036354,-6.265131628349503,-1.976271246918681,-2.519225987547763,-3.7331106440131854,-6.05129367580717,-4.115226028040922,-4.719249301196115,-4.451178867151557,-4.535588241035776,-7.731754615658102,-1.1291364595675442],[-2.7634422911455703,-7.9114774546442685,-7.529542843946299,-8.494623739989885,-2.451100566435661,-2.926120215794328,-7.612234559791412,-8.53718335440868,-2.4505074466080714,-9.033620240722573,-8.72823859117139,-3.748243385450271,-8.839464226281615,-7.66534438510536,-1.9043799278633868,-8.305381740351358,-9.370092477343785,-2.3558173549839756,-5.929674382528349,-3.315653131074415,-3.5058932812816797,-9.370092477343785,-7.801476559429941,-9.370092477343785,-6.135343303319295,-9.370092477343785,-0.9976707550554196],[-2.681117318742622,-8.560252680876685,-8.083328608786376,-6.8862762473050125,-1.9631827738890724,-7.924263914156688,-4.61975020412667,-2.2213641819441556,-2.8612966495974126,-9.148039345778804,-8.560252680876685,-3.35807917488155,-6.075346031088684,-3.7399711152716475,-2.831958278425978,-7.954116877306369,-9.052729165974478,-2.5526687518888194,-4.062915199691808,-4.943346726387837,-3.494498087559349,-9.148039345778804,-7.579423427864958,-9.148039345778804,-5.882279935011752,-8.617411094716633,-1.0302198528344138],[-1.8949866865832032,-7.38720654177459,-8.05310407988716,-7.542278456121169,-0.7286353651447546,-7.858315754328075,-9.589971299486425,-8.779041083270096,-1.9951372153459488,-10.100796923252416,-7.885223207247999,-6.635061020452689,-6.261344610659105,-6.705170586639715,-2.5617698674284206,-9.253499062865211,-10.100796923252416,-4.5720294302077304,-6.222675469499951,-3.7645613367085256,-4.622243506401445,-9.541181135316993,-7.3816968859636205,-10.28311848004637,-5.024581983510178,-10.187808300242045,-2.36230880075777],[-3.712244886548001,-4.717475282130211,-2.783984370515817,-3.2216013768067646,-3.1683365236496233,-3.9038254558612397,-3.680790547058952,-9.119304544100272,-6.422989599216483,-10.323277348426208,-5.240838322200969,-3.0795261367137394,-3.173687619686372,-1.3126793991869439,-2.664381175855551,-4.9236074876178835,-7.895529112478156,-3.409705684122631,-2.051579832828198,-2.1011937452905483,-6.511074678280273,-3.816746183294981,-9.672689782285058,-6.168308164387673,-10.410288725415837,-5.511770729440288,-3.788398987959084],[-2.3427609160575655,-6.1024094410597085,-6.037870919922137,-6.507874549167873,-1.4153520955994334,-6.245510284700382,-6.17140231254666,-6.325552992373918,-5.51462277615759,-6.507874549167873,-6.245510284700382,-6.412564369363548,-6.325552992373918,-6.507874549167873,-1.2783714986201964,-6.412564369363548,-6.325552992373918,-6.1024094410597085,-6.1024094410597085,-6.245510284700382,-1.1002544477293865,-6.507874549167873,-6.245510284700382,-6.507874549167873,-6.507874549167873,-6.507874549167873,-4.859215923580492],[-3.6194933584945135,-7.047703539402737,-6.062419936041631,-7.671857848475732,-1.2114318817004575,-6.285563487355841,-6.824559988088527,-3.6311485020862624,-1.7851984157338756,-7.814958692116405,-7.489536291681777,-3.88766821455747,-6.036102627724257,-2.3543916630889337,-3.8417682258102714,-7.546694705521725,-7.9820127767795706,-5.48206824962703,-3.0008999219496366,-6.6910285954640045,-3.73872587983735,-7.2444138336487915,-5.438265626968637,-8.077322956583895,-4.682814563072537,-8.077322956583895,-1.4288566755523215],[-2.269183388314388,-6.573297799694782,-5.745479365449931,-2.8596367214634224,-1.7841347050080587,-4.144047900909572,-6.844091654118041,-7.7437746495925355,-2.175115871017978,-9.701519256294851,-4.956587127931601,-2.065933606191924,-5.060338632783727,-6.492693767280152,-2.4508837443961715,-5.604400767190025,-9.701519256294851,-5.648286082315182,-3.879460040714278,-3.864517937852428,-3.820986269894151,-5.091361528795721,-5.466205750947557,-9.883840813088806,-2.3336002683489516,-8.602906967626742,-2.0434097729831913],[-1.7539942375247688,-3.6841198980845076,-6.559311098803656,-8.439623965373157,-1.3654444296574595,-6.4075846625879045,-9.337565558579115,-8.238953269911006,-2.4389427631602505,-9.17051147391595,-8.902247487321269,-6.206031743866062,-3.641082386404525,-5.662416297277081,-2.2280575251437695,-2.6972144960398214,-9.432875738383439,-5.501050105659114,-3.5381974599631496,-6.907147094075184,-3.4685254837670296,-9.250554181589486,-8.071899185247839,-9.432875738383439,-3.424799925470261,-9.432875738383439,-1.8968318511898539],[-3.41157713428474,-6.728285208389976,-3.0909427430720093,-1.740264825689735,-2.5015190512398306,-4.817371901371758,-2.113210530516431,-6.807749379744223,-3.295792549094097,-6.252798951069294,-4.917797884336954,-4.632769841879079,-5.98877585293416,-4.66665175664129,-2.876759518067293,-7.603753945743875,-6.864465609186075,-7.187239001449126,-3.052336215755031,-2.2647479268985964,-4.9001580949910455,-5.348959516586179,-7.14773655847288,-7.454376544669275,-4.546351845156791,-8.807726750069811,-1.4656789800236865],[-5.082241389138844,-5.142393631993602,-4.276598353328924,-4.085216817689867,-5.7599683850109145,-2.174773615995364,-5.288621572218711,-6.090174110360315,-4.4715723308194235,-6.90687468303798,-4.432658655412265,-3.2277431063139637,-2.8212923556513845,-1.7681088393713913,-3.5156224239092224,-3.9518016568316168,-8.986316224717816,-2.1664590794141954,-3.3712131841519417,-3.1224483347145418,-2.2101893882141153,-3.5450716407123704,-3.140479750467313,-6.663111844521034,-5.51344438505264,-7.704225641127928,-2.2129736790077486],[-2.136265349667144,-7.648749930275526,-6.522163789565011,-8.503165258431594,-1.734614577170007,-6.50168525822147,-7.785325465281278,-3.642577960578998,-2.654272848600284,-8.454375094262163,-7.8613113722592,-2.3851781986138283,-6.363099094935324,-7.294204912594619,-2.116201767527298,-2.7907534570773387,-9.196312438991539,-1.7868731941606955,-3.848728831140585,-3.2942257030347744,-3.1971277688684197,-9.196312438991539,-7.0800569241889875,-9.196312438991539,-4.987152202340858,-9.196312438991539,-2.9363486388485507],[-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-0.057170565024993084,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-6.182291496945648,-5.540437610773254],[-2.5722807952437394,-5.99205725679638,-4.319316764551239,-3.7284294070555273,-1.4225483817556492,-5.357231686671419,-4.331760927128789,-6.033757985995324,-2.364541745819138,-9.405183209323809,-4.850464566831816,-4.673556599383159,-3.74500437214974,-3.916576116332641,-2.322164557359624,-5.413979406021222,-9.0405400957359,-3.636862213530037,-2.9016138569514562,-3.2345687965875074,-3.9816438780223904,-4.905818082232694,-6.257588586460572,-10.13915238440401,-3.258597025348527,-8.070182142591468,-1.7281348556024394],[-3.2090119684802527,-6.334899523154303,-4.093299064417374,-7.59203503906228,-2.1586345703035548,-6.182995193953419,-7.849503332917564,-2.915257814589145,-2.7754710419906368,-9.550291023939893,-4.551024457450397,-4.707131510786311,-4.589272322367449,-6.243245073400844,-2.9689285157149676,-3.853927304671137,-7.00903143760076,-8.243133983378726,-2.8108514947509753,-2.1177557637833035,-3.256550330868065,-7.865503674264006,-5.380597024476005,-10.383200146874996,-5.201416596582911,-9.913196517629261,-0.9911300598753203],[-3.1682706209020797,-8.217706195099385,-5.878899838671022,-9.525219678366163,-2.3388918749787213,-7.172402459885784,-8.478432457562638,-1.1074718716074006,-2.3662341497302157,-10.456777882371107,-9.689522729657439,-4.418539855249544,-6.032391973858083,-7.21960886445558,-2.3367153961191565,-8.16191483547097,-10.719142146838596,-3.4624215669877803,-3.6797447588047447,-4.0251990917417855,-3.9002180815630756,-9.555991337032916,-5.18259559155831,-10.131355481936478,-4.204725796166784,-7.984774637419013,-1.5835253210583506],[-3.6907063474960813,-3.7341336409281,-3.2454721262149633,-4.020608712629846,-3.281297978688059,-5.0172437658127595,-3.1933271849501734,-7.7963898885987,-3.73111886626001,-9.35453450664525,-6.208229374611885,-2.257675168572427,-3.4112977818505237,-2.089862646254399,-6.096437968623768,-3.100705695069777,-8.949069398537086,-1.905375903432984,-1.9722545836234537,-1.9648676140617645,-9.274491798971713,-6.7831953510849425,-9.35453450664525,-7.31765257938421,-7.3735330377786665,-5.267158613739243,-3.2700350935700784],[-2.4677551327310105,-8.46601472297182,-8.561324902776146,-7.868177722216201,-0.5192363285514866,-8.561324902776146,-8.298960638308655,-8.46601472297182,-1.7429474349452256,-8.46601472297182,-7.919471016603751,-5.443374996497906,-8.561324902776146,-4.566800675836256,-2.7892609207035393,-8.561324902776146,-8.561324902776146,-6.268790145635601,-4.407140340198028,-7.973538237874027,-6.289199017266808,-8.379003345982191,-8.030696651713976,-8.46601472297182,-5.258107929474194,-8.561324902776146,-3.1228109057348257],[-1.596798460957614,-7.6001421705956735,-7.640964165115928,-5.344648685135478,-1.8921021056868312,-6.839336341561913,-8.110967794361665,-1.6228272590921338,-1.7628783045649916,-8.87310784640856,-7.001305669506969,-5.467159861987808,-8.179960665848615,-3.2183655671770013,-2.524218636471301,-8.293289351155618,-9.209580083029774,-4.57000847032435,-4.2425484264156506,-5.65423202154036,-7.560921457442392,-9.209580083029774,-7.307472556632853,-9.209580083029774,-6.877436187794184,-9.02725852623582,-2.172728230713227],[-2.254780968033424,-6.898209866138606,-2.027603216646053,-6.492744758030441,-2.462642464536694,-5.833499129146177,-6.898209866138606,-4.310445830910898,-2.0731012597852527,-6.898209866138606,-6.898209866138606,-6.42820623689287,-6.715888309344651,-6.898209866138606,-4.4558628307694015,-1.4955324842663262,-6.3675816150764355,-4.636446767664815,-5.766807754647505,-1.8703897472882491,-4.37248122183035,-4.348764695213034,-6.802899686334281,-4.473407140420311,-5.175443268397502,-6.898209866138606,-2.5557039896270073],[-3.872874353532438,-6.07983833755968,-5.605956228985375,-6.1495716755743555,-2.9181087352006836,-5.833140189836589,-6.998723104610883,-7.317176835729417,-3.8266394412494407,-8.857621876676566,-7.45070822835394,-4.745382824577915,-4.447858487031085,-5.448125692199715,-2.2255103117197566,-4.737230605516364,-9.03994343347052,-5.751541545953709,-3.132676045163294,-4.230201081753655,-7.038463433260397,-7.65364907235063,-6.230540738108023,-7.70494236673818,-8.16447469611662,-7.731610613820342,-0.3817890191036424],[-2.531179599331457,-6.00314605188182,-5.907835872077495,-4.694813232231641,-0.9292230185496455,-5.907835872077495,-6.00314605188182,-3.199785670975285,-2.3395844057521735,-6.00314605188182,-5.666673815260607,-3.8868905370792675,-4.0016660516716955,-5.009894278871537,-1.7362497244615698,-6.00314605188182,-6.00314605188182,-6.00314605188182,-5.820824495087865,-5.907835872077495,-3.118345339035111,-5.666673815260607,-5.907835872077495,-6.00314605188182,-4.568061526592498,-3.7731316517226094,-3.1699327078256037],[-2.154456318300654,-3.132028909232904,-3.2042402732214343,-3.554775966080049,-3.8320798875631166,-3.2625149066892174,-4.1318426237305275,-2.7847122791384975,-2.7534204117779435,-5.68607428491371,-5.271518205104294,-3.779792629468334,-3.3524684982622075,-3.8123859357235683,-2.644532475328425,-3.3676057306367433,-6.237872536056438,-3.6809218434868227,-2.7030074975159986,-1.86142386740762,-4.4723885304095425,-4.918696830481552,-2.8042850405884527,-7.783949528282384,-4.702039558487341,-8.486442571260568,-3.2910629924454486]],"Positions":{"100":3,"101":4,"102":5,"103":6,"104":7,"105":8,"106":9,"107":10,"108":11,"109":12,"110":13,"111":14,"112":15,"113":16,"114":17,"115":18,"116":19,"117":20,"118":21,"119":22,"120":23,"121":24,"122":25,"32":26,"97":0,"98":1,"99":2},"Threshold":0.018782003473122023} \ No newline at end of file diff --git a/pkg/internal/transform/route/cluster.go b/pkg/internal/transform/route/cluster.go new file mode 100644 index 000000000..eccf39997 --- /dev/null +++ b/pkg/internal/transform/route/cluster.go @@ -0,0 +1,121 @@ +package route + +import ( + "embed" + "encoding/json" + "fmt" + + "github.com/AlessandroPomponio/go-gibberish/gibberish" + "github.com/AlessandroPomponio/go-gibberish/structs" + lru "github.com/hashicorp/golang-lru/v2" +) + +var classifier *structs.GibberishData + +const maxSegments = 10 + +var words, _ = lru.New[string, bool](8192) + +//go:embed classifier.json +var dataFile embed.FS + +func loadKnowledgeBase() (*structs.GibberishData, error) { + content, err := dataFile.ReadFile("classifier.json") + if err != nil { + return nil, fmt.Errorf("LoadKnowledgeBase: unable to read knowledge base content: %w", err) + } + + var data structs.GibberishData + err = json.Unmarshal(content, &data) + if err != nil { + return nil, fmt.Errorf("LoadKnowledgeBase: unable to unmarshal knowledge base content: %w", err) + } + + return &data, nil +} + +func InitAutoClassifier() error { + var err error + classifier, err = loadKnowledgeBase() + if err != nil { + return err + } + + return nil +} + +//nolint:cyclop +func ClusterPath(path string) string { + if path == "" { + return path + } + + p := []byte(path) + sPos := 0 + sFwd := 0 + + skip := false + nSegments := 0 + for _, c := range p { + if c == '/' { + nSegments++ + if skip { + p[sPos] = '*' + sPos++ + } else if sFwd > sPos { + if !okWord(string(p[sPos:sFwd])) { + p[sPos] = '*' + sPos++ + } else { + sPos = sFwd + } + } + + if nSegments >= maxSegments { + break + } + + p[sPos] = '/' + sPos++ + sFwd = sPos + skip = false + } else if !skip { + p[sFwd] = c + sFwd++ + if !isAlpha(c) { + skip = true + } + } + } + + if skip { + p[sPos] = '*' + sPos++ + } else if sFwd > sPos { + if !okWord(string(p[sPos:sFwd])) { + p[sPos] = '*' + sPos++ + } else { + sPos = sFwd + } + } + + return string(p[:sPos]) +} + +func okWord(w string) bool { + _, ok := words.Get(w) + if ok { + return ok + } + if gibberish.IsGibberish(w, classifier) { + return false + } + + words.Add(w, true) + return true +} + +func isAlpha(c byte) bool { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '-' || c == '_' +} diff --git a/pkg/internal/transform/route/cluster_test.go b/pkg/internal/transform/route/cluster_test.go new file mode 100644 index 000000000..33fdc2d86 --- /dev/null +++ b/pkg/internal/transform/route/cluster_test.go @@ -0,0 +1,29 @@ +package route + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestURLClustering(t *testing.T) { + err := InitAutoClassifier() + assert.NoError(t, err) + assert.Equal(t, "", ClusterPath("")) + assert.Equal(t, "/users/*/*/*/job/*", ClusterPath("/users/fdklsd/j4elk/23993/job/2")) + assert.Equal(t, "*", ClusterPath("123")) + assert.Equal(t, "/*", ClusterPath("/123")) + assert.Equal(t, "*/", ClusterPath("123/")) + assert.Equal(t, "*/*", ClusterPath("123/ljgdflgjf")) + assert.Equal(t, "/*", ClusterPath("/**")) + assert.Equal(t, "/u/*", ClusterPath("/u/2")) + assert.Equal(t, "/products/*/org/*", ClusterPath("/products/1/org/3")) + assert.Equal(t, "/products//org/*", ClusterPath("/products//org/3")) + assert.Equal(t, "/attach", ClusterPath("/attach")) + assert.Equal(t, "/usuarios/*/*/*/trabajo/*", ClusterPath("/usuarios/fdklsd/j4elk/23993/trabajo/2")) + assert.Equal(t, "/Benutzer/*/*/*/Arbeit/*", ClusterPath("/Benutzer/fdklsd/j4elk/23993/Arbeit/2")) + assert.Equal(t, "/utilisateurs/*/*/*/tache/*", ClusterPath("/utilisateurs/fdklsd/j4elk/23993/tache/2")) + assert.Equal(t, "/products/", ClusterPath("/products/")) + assert.Equal(t, "/user-space/", ClusterPath("/user-space/")) + assert.Equal(t, "/user_space/", ClusterPath("/user_space/")) +} diff --git a/pkg/internal/transform/routes.go b/pkg/internal/transform/routes.go index 740f64bb6..0527331ac 100644 --- a/pkg/internal/transform/routes.go +++ b/pkg/internal/transform/routes.go @@ -20,6 +20,8 @@ const ( UnmatchPath = UnmatchType("path") // UnmatchWildcard sets the route field to a generic asterisk symbol UnmatchWildcard = UnmatchType("wildcard") + // UnmatchHeuristic detects the route field using a heuristic + UnmatchHeuristic = UnmatchType("heuristic") UnmatchDefault = UnmatchWildcard ) @@ -38,12 +40,29 @@ func RoutesProvider(rc *RoutesConfig) (node.MiddleFunc[[]request.Span, []request // set default value for Unmatch action var unmatchAction func(span *request.Span) switch rc.Unmatch { - case UnmatchWildcard, "": // default + case UnmatchWildcard, "": unmatchAction = setUnmatchToWildcard + + if len(rc.Patterns) == 0 { + slog.With("component", "RoutesProvider"). + Warn("No route match patterns configured. " + + "Without route definitions Beyla will not be able to generate a low cardinality " + + "route for trace span names. For optimal experience, please define your application " + + "HTTP route patterns or enable the route 'heuristic' mode. " + + "For more information please see the documentation at: " + + "https://grafana.com/docs/grafana-cloud/monitor-applications/beyla/configure/options/#routes-decorator. " + + "If your application is only using gRPC you can ignore this warning.") + } case UnmatchUnset: unmatchAction = leaveUnmatchEmpty case UnmatchPath: unmatchAction = setUnmatchToPath + case UnmatchHeuristic: // default + err := route.InitAutoClassifier() + if err != nil { + return nil, err + } + unmatchAction = classifyFromPath default: slog.With("component", "RoutesProvider"). Warn("invalid 'unmatch' value in configuration, defaulting to '"+string(UnmatchDefault)+"'", @@ -75,3 +94,9 @@ func setUnmatchToPath(str *request.Span) { str.Route = str.Path } } + +func classifyFromPath(s *request.Span) { + if s.Route == "" && (s.Type == request.EventTypeHTTP || s.Type == request.EventTypeHTTPClient) { + s.Route = route.ClusterPath(s.Path) + } +} diff --git a/pkg/internal/transform/routes_test.go b/pkg/internal/transform/routes_test.go index 8cbcd4142..e33b2f75b 100644 --- a/pkg/internal/transform/routes_test.go +++ b/pkg/internal/transform/routes_test.go @@ -69,3 +69,70 @@ func TestUnmatchedEmpty(t *testing.T) { Path: "/some/path", }}, testutil.ReadChannel(t, out, testTimeout)) } + +func TestUnmatchedAuto(t *testing.T) { + for _, tc := range []UnmatchType{UnmatchHeuristic} { + t.Run(string(tc), func(t *testing.T) { + router, err := RoutesProvider(&RoutesConfig{Unmatch: tc, Patterns: []string{"/user/:id"}}) + require.NoError(t, err) + in, out := make(chan []request.Span, 10), make(chan []request.Span, 10) + defer close(in) + go router(in, out) + in <- []request.Span{{Path: "/user/1234"}} + assert.Equal(t, []request.Span{{ + Path: "/user/1234", + Route: "/user/:id", + }}, testutil.ReadChannel(t, out, testTimeout)) + in <- []request.Span{{Path: "/some/path", Type: request.EventTypeHTTP}} + assert.Equal(t, []request.Span{{ + Path: "/some/path", + Route: "/some/path", + Type: request.EventTypeHTTP, + }}, testutil.ReadChannel(t, out, testTimeout)) + in <- []request.Span{{Path: "/customer/1/job/2", Type: request.EventTypeHTTP}} + assert.Equal(t, []request.Span{{ + Path: "/customer/1/job/2", + Route: "/customer/*/job/*", + Type: request.EventTypeHTTP, + }}, testutil.ReadChannel(t, out, testTimeout)) + in <- []request.Span{{Path: "/customer/lfdsjd/job/erwejre", Type: request.EventTypeHTTPClient}} + assert.Equal(t, []request.Span{{ + Path: "/customer/lfdsjd/job/erwejre", + Route: "/customer/*/job/*", + Type: request.EventTypeHTTPClient, + }}, testutil.ReadChannel(t, out, testTimeout)) + }) + } +} + +func BenchmarkRoutesProvider_Wildcard(b *testing.B) { + benchProvider(b, UnmatchWildcard) +} + +func BenchmarkRoutesProvider_Heuristic(b *testing.B) { + benchProvider(b, UnmatchHeuristic) +} + +func benchProvider(b *testing.B, unmatch UnmatchType) { + router, err := RoutesProvider(&RoutesConfig{Unmatch: unmatch, Patterns: []string{ + "/users/{id}", + "/users/{id}/product/{pid}", + }}) + if err != nil { + b.Fatal(err) + } + inCh, outCh := make(chan []request.Span, 10), make(chan []request.Span, 10) + // 40% of unmatched routes + benchmarkInput := []request.Span{ + {Type: request.EventTypeHTTP, Path: "/users/123"}, + {Type: request.EventTypeHTTP, Path: "/users/123/product/456"}, + {Type: request.EventTypeHTTP, Path: "/users"}, + {Type: request.EventTypeHTTP, Path: "/products/34322"}, + {Type: request.EventTypeHTTP, Path: "/users/123/delete"}, + } + go router(inCh, outCh) + for i := 0; i < b.N; i++ { + inCh <- benchmarkInput + <-outCh + } +} diff --git a/test/integration/configs/instrumenter-config-no-route.yml b/test/integration/configs/instrumenter-config-no-route.yml new file mode 100644 index 000000000..ad298c3ad --- /dev/null +++ b/test/integration/configs/instrumenter-config-no-route.yml @@ -0,0 +1,6 @@ +routes: + unmatch: heuristic +otel_metrics_export: + endpoint: http://otelcol:4318 +otel_traces_export: + endpoint: http://jaeger:4318 diff --git a/test/integration/red_test.go b/test/integration/red_test.go index 181da6ba9..ccd4d6ebc 100644 --- a/test/integration/red_test.go +++ b/test/integration/red_test.go @@ -288,3 +288,215 @@ func testREDMetricsGRPC(t *testing.T) { } }) } + +func testREDMetricsForHTTPLibraryNoRoute(t *testing.T, url, svcName string) { + path := "/basic/" + rndStr() + + // Call 3 times the instrumented service, forcing it to: + // - take at least 30ms to respond + // - returning a 404 code + for i := 0; i < 3; i++ { + doHTTPGet(t, url+path+"?delay=30ms&status=404", 404) + doHTTPGet(t, url+"/echo", 203) + doHTTPGet(t, url+"/echoCall", 204) + } + + // Eventually, Prometheus would make this query visible + pq := prom.Client{HostPort: prometheusHostPort} + var results []prom.Result + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_duration_seconds_count{` + + `http_method="GET",` + + `http_status_code="404",` + + `service_namespace="integration-test",` + + `service_name="` + svcName + `",` + + `http_route="/basic/*",` + + `http_target="` + path + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls and all the arguments + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + if len(results) > 0 { + res := results[0] + addr := net.ParseIP(res.Metric["net_sock_peer_addr"]) + assert.NotNil(t, addr) + } + }) + + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_request_size_bytes_count{` + + `http_method="GET",` + + `http_status_code="404",` + + `service_namespace="integration-test",` + + `service_name="` + svcName + `",` + + `http_route="/basic/*",` + + `http_target="` + path + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls and all the arguments + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + if len(results) > 0 { + res := results[0] + addr := net.ParseIP(res.Metric["net_sock_peer_addr"]) + assert.NotNil(t, addr) + } + }) + + // Make sure we see /echo + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_duration_seconds_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `http_route="/echo",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_request_size_bytes_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `http_route="/echo",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + // Make sure we see /echoBack server + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_duration_seconds_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `http_route="/echoBack",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_server_request_size_bytes_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `http_route="/echoBack",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + // make sure we see /echo client + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_client_duration_seconds_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`http_client_request_size_bytes_count{` + + `http_method="GET",` + + `http_status_code="203",` + + `service_namespace="integration-test",` + + `service_name="` + svcName + `"}`) + require.NoError(t, err) + // check duration_count has 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + test.Eventually(t, testTimeout, func(t require.TestingT) { + var err error + results, err = pq.Query(`rpc_client_duration_seconds_count{` + + `rpc_grpc_status_code="0",` + + `service_name="` + svcName + `",` + + `service_namespace="integration-test",` + + `rpc_method="/routeguide.RouteGuide/GetFeature"}`) + require.NoError(t, err) + // check duration_count has at least 3 calls + enoughPromResults(t, results) + val := totalPromCount(t, results) + assert.LessOrEqual(t, 3, val) + }) + + // check duration_sum is at least 90ms (3 * 30ms) + var err error + results, err = pq.Query(`http_server_duration_seconds_sum{` + + `http_method="GET",` + + `http_status_code="404",` + + `service_name="` + svcName + `",` + + `service_namespace="integration-test",` + + `http_route="/basic/*",` + + `http_target="` + path + `"}`) + require.NoError(t, err) + enoughPromResults(t, results) + res := results[0] + require.Len(t, res.Value, 2) + sum, err := strconv.ParseFloat(fmt.Sprint(res.Value[1]), 64) + require.NoError(t, err) + assert.Less(t, sum, 1.0) + assert.Greater(t, sum, (90 * time.Millisecond).Seconds()) + addr := net.ParseIP(res.Metric["net_sock_peer_addr"]) + assert.NotNil(t, addr) + + // check request_size_sum is at least 114B (3 * 38B) + results, err = pq.Query(`http_server_request_size_bytes_sum{` + + `http_method="GET",` + + `http_status_code="404",` + + `service_name="` + svcName + `",` + + `service_namespace="integration-test",` + + `http_route="/basic/*",` + + `http_target="` + path + `"}`) + require.NoError(t, err) + enoughPromResults(t, results) + res = results[0] + require.Len(t, res.Value, 2) + sum, err = strconv.ParseFloat(fmt.Sprint(res.Value[1]), 64) + require.NoError(t, err) + assert.GreaterOrEqual(t, sum, 114.0) + addr = net.ParseIP(res.Metric["net_sock_peer_addr"]) + assert.NotNil(t, addr) +} + +func testREDMetricsHTTPNoRoute(t *testing.T) { + for _, testCaseURL := range []string{ + instrumentedServiceGorillaURL, + } { + t.Run(testCaseURL, func(t *testing.T) { + waitForTestComponents(t, testCaseURL) + testREDMetricsForHTTPLibraryNoRoute(t, testCaseURL, "testserver") + }) + } +} diff --git a/test/integration/suites_test.go b/test/integration/suites_test.go index 1a6cbee7c..059e7292b 100644 --- a/test/integration/suites_test.go +++ b/test/integration/suites_test.go @@ -366,3 +366,14 @@ func TestSuiteNodeClientTLS(t *testing.T) { require.NoError(t, compose.Close()) t.Run("BPF pinning folder unmounted", testBPFPinningUnmounted) } + +func TestSuiteNoRoutes(t *testing.T) { + compose, err := docker.ComposeSuite("docker-compose.yml", path.Join(pathOutput, "test-suite.log")) + compose.Env = append(compose.Env, "INSTRUMENTER_CONFIG_SUFFIX=-no-route") + require.NoError(t, err) + require.NoError(t, compose.Up()) + t.Run("RED metrics", testREDMetricsHTTPNoRoute) + t.Run("BPF pinning folder mounted", func(t *testing.T) { testBPFPinningMountedWithCount(t, 2) }) + require.NoError(t, compose.Close()) + t.Run("BPF pinning folder unmounted", testBPFPinningUnmounted) +} diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/AUTHORS.md b/vendor/github.com/AlessandroPomponio/go-gibberish/AUTHORS.md new file mode 100644 index 000000000..53e2f051c --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/AUTHORS.md @@ -0,0 +1,5 @@ +# AUTHORS + +- [Rob Neuhaus](https://github.com/rrenaud), original author +- [Alessandro Pomponio](https://github.com/AlessandroPomponio) +- [Federico Domeniconi](https://github.com/domef) diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/LICENSE b/vendor/github.com/AlessandroPomponio/go-gibberish/LICENSE new file mode 100644 index 000000000..eb531ff98 --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Rob Renaud + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/analysis/analysis.go b/vendor/github.com/AlessandroPomponio/go-gibberish/analysis/analysis.go new file mode 100644 index 000000000..662302468 --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/analysis/analysis.go @@ -0,0 +1,118 @@ +// Package analysis contains the functions needed to +// analyze lines. +package analysis + +import ( + "fmt" + "math" + "strings" + + "github.com/AlessandroPomponio/go-gibberish/consts" + "github.com/AlessandroPomponio/go-gibberish/structs" +) + +// AverageTransitionProbability returns the probability of +// generating the input string digraph by digraph according +// to the occurrences matrix. +func AverageTransitionProbability(line string, occurrences [][]float64, position map[rune]int) (float64, error) { + + logProb := 0.0 + transitionCt := 0.0 + + for _, pair := range GetDigraphs(line) { + + firstPosition, firstRuneFound := position[pair.First] + if !firstRuneFound { + return -1, fmt.Errorf("AverageTransitionProbability: unable to find the position of the rune %s", string(pair.First)) + } + + secondPosition, secondRuneFound := position[pair.Second] + if !secondRuneFound { + return -1, fmt.Errorf("AverageTransitionProbability: unable to find the position of the rune %s", string(pair.First)) + } + + logProb += occurrences[firstPosition][secondPosition] + transitionCt++ + + } + + if transitionCt == 0 { + transitionCt = 1 + } + + return math.Exp(logProb / transitionCt), nil + +} + +// GetDigraphs returns pairs of adjacent runes, after +// normalizing the input line. +func GetDigraphs(line string) []structs.Digraph { + + runes := Normalize(line) + if len(runes) == 0 { + return []structs.Digraph{} + } + + digraphs := make([]structs.Digraph, len(runes)-1) + for i := 0; i < len(runes)-1; i++ { + digraphs[i] = structs.Digraph{First: runes[i], Second: runes[i+1]} + } + + return digraphs + +} + +// Normalize returns the subset of runes in the line +// that are in the accepted characters. This helps +// keeping the model relatively small by ignoring +// punctuation, symbols, etc. +func Normalize(line string) []rune { + + line = strings.ToLower(line) + result := make([]rune, 0, len(line)) + + for _, r := range line { + + if strings.ContainsRune(consts.AcceptedCharacters, r) { + result = append(result, r) + } + + } + + return result + +} + +// MaxForSlice returns the maximum value in a +// float64 slice. +func MaxForSlice(slice []float64) float64 { + + max := -math.MaxFloat64 + for _, item := range slice { + + if item > max { + max = item + } + + } + + return max + +} + +// MinForSlice returns the minimum value in +// a float64 slice. +func MinForSlice(slice []float64) float64 { + + min := math.MaxFloat64 + for _, item := range slice { + + if item < min { + min = item + } + + } + + return min + +} diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/consts/accepted_characters.go b/vendor/github.com/AlessandroPomponio/go-gibberish/consts/accepted_characters.go new file mode 100644 index 000000000..7bfa0d942 --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/consts/accepted_characters.go @@ -0,0 +1,9 @@ +// Package consts contains constants. +package consts + +const ( + + // AcceptedCharacters is a string with all the letters + // in the English alphabet, plus a space. + AcceptedCharacters = "abcdefghijklmnopqrstuvwxyz " +) diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/gibberish/gibberish.go b/vendor/github.com/AlessandroPomponio/go-gibberish/gibberish/gibberish.go new file mode 100644 index 000000000..2521d39de --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/gibberish/gibberish.go @@ -0,0 +1,15 @@ +// Package gibberish contains methods to tell whether +// the input is gibberish or not. +package gibberish + +import ( + "github.com/AlessandroPomponio/go-gibberish/analysis" + "github.com/AlessandroPomponio/go-gibberish/structs" +) + +// IsGibberish returns true if the input string is likely +// to be gibberish +func IsGibberish(input string, data *structs.GibberishData) bool { + value, err := analysis.AverageTransitionProbability(input, data.Occurrences, data.Positions) + return value <= data.Threshold && err == nil +} diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/persistence/persistence.go b/vendor/github.com/AlessandroPomponio/go-gibberish/persistence/persistence.go new file mode 100644 index 000000000..c37f75719 --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/persistence/persistence.go @@ -0,0 +1,52 @@ +// Package persistence contains functions needed to serialize +// and deserialize the model data. +package persistence + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + + "github.com/AlessandroPomponio/go-gibberish/structs" +) + +// WriteKnowledgeBase writes the gibberish data model to disk. +func WriteKnowledgeBase(data *structs.GibberishData, outputFileName string) error { + + toWrite, err := json.Marshal(data) + if err != nil { + return fmt.Errorf("WriteKnowledgeBase: unable to marshal training data: %s", err) + } + + err = ioutil.WriteFile(outputFileName, toWrite, 0644) + if err != nil { + return fmt.Errorf("WriteKnowledgeBase: unable to save knowledge file on disk: %s", err) + } + + return nil + +} + +// LoadKnowledgeBase loads the gibberish data model from disk. +func LoadKnowledgeBase(fileName string) (*structs.GibberishData, error) { + + file, err := os.Open(fileName) + if err != nil { + return nil, fmt.Errorf("LoadKnowledgeBase: unable to open knowledge base: %s", err) + } + + content, err := ioutil.ReadAll(file) + if err != nil { + return nil, fmt.Errorf("LoadKnowledgeBase: unable to read knowledge base content: %s", err) + } + + var data structs.GibberishData + err = json.Unmarshal(content, &data) + if err != nil { + return nil, fmt.Errorf("LoadKnowledgeBase: unable to unmarshal knowledge base content: %s", err) + } + + return &data, nil + +} diff --git a/vendor/github.com/AlessandroPomponio/go-gibberish/structs/structs.go b/vendor/github.com/AlessandroPomponio/go-gibberish/structs/structs.go new file mode 100644 index 000000000..53a48c825 --- /dev/null +++ b/vendor/github.com/AlessandroPomponio/go-gibberish/structs/structs.go @@ -0,0 +1,18 @@ +// Package structs contains the definition +// of the structures used. +package structs + +// Digraph represents a two-dimensional +// n-gram. +type Digraph struct { + First rune + Second rune +} + +// GibberishData contains the data needed +// in order to perform gibberish detection. +type GibberishData struct { + Occurrences [][]float64 + Positions map[rune]int + Threshold float64 +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 9d50cde61..d103fc35c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,3 +1,10 @@ +# github.com/AlessandroPomponio/go-gibberish v0.0.0-20191004143433-a2d4156f0396 +## explicit; go 1.13 +github.com/AlessandroPomponio/go-gibberish/analysis +github.com/AlessandroPomponio/go-gibberish/consts +github.com/AlessandroPomponio/go-gibberish/gibberish +github.com/AlessandroPomponio/go-gibberish/persistence +github.com/AlessandroPomponio/go-gibberish/structs # github.com/beorn7/perks v1.0.1 ## explicit; go 1.11 github.com/beorn7/perks/quantile