diff --git a/Beda.go b/Beda.go
new file mode 100644
index 0000000..6c0d881
--- /dev/null
+++ b/Beda.go
@@ -0,0 +1,290 @@
+package beda
+
+// NewStringDiff will create a new instance of StringDiff
+func NewStringDiff(s1, s2 string) *StringDiff {
+ return &StringDiff{
+ S1: s1,
+ S2: s2,
+ }
+}
+
+// StringDiff is a utility struct to compare similarity between two string.
+//
+// read https://medium.com/@appaloosastore/string-similarity-algorithms-compared-3f7b4d12f0ff
+type StringDiff struct {
+ S1 string
+ S2 string
+}
+
+// LevenshteinDistance is the minimum number of single-character edits
+// required to change one word into the other, so the result is a positive
+// integer, sensitive to string length .
+// Which make it more difficult to draw pattern.
+//
+// Read https://github.com/mhutter/string-similarity and
+// https://en.wikipedia.org/wiki/Levenshtein_distance
+func LevenshteinDistance(s1, s2 string) int {
+ sd := NewStringDiff(s1, s2)
+ return sd.LevenshteinDistance()
+}
+
+// LevenshteinDistance is the minimum number of single-character edits
+// required to change one word into the other, so the result is a positive
+// integer, sensitive to string length .
+// Which make it more difficult to draw pattern.
+//
+// Read https://github.com/mhutter/string-similarity and
+// https://en.wikipedia.org/wiki/Levenshtein_distance
+func (sd *StringDiff) LevenshteinDistance() int {
+ s := []byte(sd.S1)
+ t := []byte(sd.S2)
+ m := len(s)
+ n := len(t)
+ // for all i and j, d[i,j] will hold the Levenshtein distance between
+ // the first i characters of s and the first j characters of t
+ // note that d has (m+1)*(n+1) values
+ d := make([][]byte, m+1)
+ for i := range d {
+ d[i] = make([]byte, n+1)
+ }
+ // source prefixes can be transformed into empty string by
+ // dropping all characters
+ for i := 1; i <= m; i++ {
+ d[i][0] = byte(i)
+ }
+ // target prefixes can be reached from empty source prefix
+ // by inserting every character
+ for j := 1; j <= n; j++ {
+ d[0][j] = byte(j)
+ }
+
+ for j := 0; j < n; j++ {
+ for i := 0; i < m; i++ {
+ var substitutionCost byte
+ if s[i] == t[j] {
+ substitutionCost = 0
+ } else {
+ substitutionCost = 1
+ }
+ d[i+1][j+1] = byte(minimum(int(d[i][j+1]+1), // deletion
+ int(d[i+1][j]+1), // insertion
+ int(d[i][j]+substitutionCost))) // substitution
+ }
+ }
+ return int(d[m][n])
+}
+
+type trigram struct {
+ Data []byte
+}
+type trigramuniqueset struct {
+ Set []*trigram
+}
+
+func (tus *trigramuniqueset) Add(t *trigram) {
+ for _, c := range tus.Set {
+ if c.Equals(t) {
+ return
+ }
+ }
+ tus.Set = append(tus.Set, t)
+}
+
+func (t *trigram) Equals(that *trigram) bool {
+ if len(t.Data) != len(that.Data) {
+ return false
+ }
+ for i, b := range t.Data {
+ if that.Data[i] != b {
+ return false
+ }
+ }
+ return true
+}
+
+func maketrigrams(d []byte) []*trigram {
+ ret := make([]*trigram, 0)
+ if len(d) == 0 {
+ return ret
+ }
+ dd := make([]byte, 0)
+ dd = append(dd, []byte(" ")...)
+ dd = append(dd, d...)
+ dd = append(dd, []byte(" ")...)
+
+ for i := 0; i < len(dd)-2; i++ {
+ tg := &trigram{Data: dd[i : i+3]}
+ ret = append(ret, tg)
+ }
+ return ret
+}
+
+// TrigramCompare is a case of n-gram, a contiguous sequence
+// of n (three, in this case) items from a given sample.
+// In our case, an application name is a sample and a
+// character is an item.
+func TrigramCompare(s1, s2 string) float32 {
+ sd := NewStringDiff(s1, s2)
+ return sd.TrigramCompare()
+}
+
+// TrigramCompare is a case of n-gram, a contiguous sequence
+// of n (three, in this case) items from a given sample.
+// In our case, an application name is a sample and a
+// character is an item.
+//
+// Read https://github.com/milk1000cc/trigram/blob/master/lib/trigram.rb
+// Read http://search.cpan.org/dist/String-Trigram/Trigram.pm
+// Read https://en.wikipedia.org/wiki/N-gram
+func (sd *StringDiff) TrigramCompare() float32 {
+ s := []byte(sd.S1)
+ t := []byte(sd.S2)
+ sSet := maketrigrams(s)
+ tSet := maketrigrams(t)
+ matching := 0.0
+ unique := 0.0
+ for _, s := range sSet {
+ for _, t := range tSet {
+ if s.Equals(t) {
+ matching++
+ //fmt.Printf("Match '%s'\n", string(s.Data))
+ }
+ }
+ }
+ tus := &trigramuniqueset{Set: make([]*trigram, 0)}
+ for _, s := range sSet {
+ tus.Add(s)
+ }
+ for _, t := range tSet {
+ tus.Add(t)
+ }
+ unique = float64(len(tus.Set))
+ //fmt.Printf("Matching is %f, Unique is %f\n", matching, unique )
+ return float32(matching / unique)
+}
+
+func minimum(args ...int) int {
+ var min int
+ for i, v := range args {
+ if i == 0 || v < min {
+ min = v
+ }
+ }
+ return min
+}
+
+func nonmatching(a, b []byte) int {
+ ret := 0
+ var s, l []byte
+ if len(a) > len(b) {
+ l = a
+ s = b
+ } else {
+ l = b
+ s = a
+ }
+ ret += len(l) - len(s)
+ for i, ca := range s {
+ if l[i] != ca {
+ ret++
+ }
+ }
+ return ret
+}
+
+func matching(a, b []byte) int {
+ var s, l []byte
+ if len(a) > len(b) {
+ l = a
+ s = b
+ } else {
+ l = b
+ s = a
+ }
+ ret := 0
+ for _, ca := range s {
+ for _, cb := range l {
+ if ca == cb {
+ ret++
+ break
+ }
+ }
+ }
+ return ret
+}
+
+// JaroDistance distance between two words is the minimum number
+// of single-character transpositions required to change one word
+// into the other.
+func JaroDistance(s1, s2 string) float32 {
+ sd := NewStringDiff(s1, s2)
+ return sd.JaroDistance()
+}
+
+// JaroDistance distance between two words is the minimum number
+// of single-character transpositions required to change one word
+// into the other.
+func (sd *StringDiff) JaroDistance() float32 {
+ s := []byte(sd.S1)
+ t := []byte(sd.S2)
+ m := float32(matching(s, t))
+ tt := float32(nonmatching(s, t)) / 2
+ s1 := float32(len(s))
+ s2 := float32(len(t))
+
+ dj := (1.0 / 3.0) * ((m / s1) + (m / s2) + ((m - tt) / m))
+
+ return dj
+}
+
+// JaroWinklerDistance uses a prefix scale which gives more
+// favourable ratings to strings that match from the beginning
+// for a set prefix length
+//
+// p argument is constant scaling factor for how much the score
+// is adjusted upwards for having common prefixes.
+// The standard value for this constant in Winkler’s work is p=0.1
+func JaroWinklerDistance(s1, s2 string, p float32) float32 {
+ sd := NewStringDiff(s1, s2)
+ return sd.JaroWinklerDistance(p)
+}
+
+// JaroWinklerDistance uses a prefix scale which gives more
+// favourable ratings to strings that match from the beginning
+// for a set prefix length
+//
+// p argument is constant scaling factor for how much the score
+// is adjusted upwards for having common prefixes.
+// The standard value for this constant in Winkler’s work is p=0.1
+//
+// Read https://github.com/flori/amatch
+// Read https://fr.wikipedia.org/wiki/Distance_de_Jaro-Winkler
+// Read https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
+func (sd *StringDiff) JaroWinklerDistance(p float32) float32 {
+ a := []byte(sd.S1)
+ b := []byte(sd.S2)
+ dj := sd.JaroDistance()
+ sim := 0
+ var s, l []byte
+ if len(a) > len(b) {
+ l = a
+ s = b
+ } else {
+ l = b
+ s = a
+ }
+ for i, c := range s {
+ if c == l[i] {
+ sim++
+ if sim > 4 {
+ break
+ }
+ } else {
+ break
+ }
+ }
+
+ dw := dj + ((p * float32(sim)) * (1.0 - dj))
+
+ return dw
+}
diff --git a/Beda_test.go b/Beda_test.go
new file mode 100644
index 0000000..d560e20
--- /dev/null
+++ b/Beda_test.go
@@ -0,0 +1,122 @@
+package beda
+
+import "testing"
+
+type TestLehvenstein struct {
+ S1 string
+ S2 string
+ D int
+}
+
+func TestLevenshteinDistance(t *testing.T) {
+ testData := make([]*TestLehvenstein, 0)
+ testData = append(testData, &TestLehvenstein{
+ S1: "abc",
+ S2: "abd",
+ D: 1,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "abc",
+ D: 0,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "ade",
+ D: 2,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "def",
+ D: 3,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "abca",
+ D: 1,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "abcabc",
+ D: 3,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "ab",
+ D: 1,
+ }, &TestLehvenstein{
+ S1: "abc",
+ S2: "",
+ D: 3,
+ })
+
+ for _, td := range testData {
+ sd := NewStringDiff(td.S1, td.S2)
+ if sd.LevenshteinDistance() != td.D {
+ t.Error("Distance between", td.S1, "and", td.S2, "expected to", td.D, "but", sd.LevenshteinDistance())
+ }
+ }
+}
+
+type TestTrigram struct {
+ S1 string
+ S2 string
+ D float32
+}
+
+func TestTrigramCompare(t *testing.T) {
+ testData := make([]*TestTrigram, 0)
+ testData = append(testData, &TestTrigram{
+ S1: "Twitter v1",
+ S2: "Twitter v2",
+ D: 0.6666667,
+ }, &TestTrigram{
+ S1: "Twitter v1",
+ S2: "Twitter v1",
+ D: 1,
+ })
+ for _, td := range testData {
+ sd := NewStringDiff(td.S1, td.S2)
+ if sd.TrigramCompare() != td.D {
+ t.Error("trigram Compare between", td.S1, "and", td.S2, "expected to", td.D, "but", sd.TrigramCompare())
+ }
+ }
+}
+
+type TestJaroDistancce struct {
+ S1 string
+ S2 string
+ DJ float32
+}
+
+func TestJaroDistance(t *testing.T) {
+ testData := make([]*TestJaroDistancce, 0)
+ testData = append(testData, &TestJaroDistancce{
+ S1: "martha",
+ S2: "marhta",
+ DJ: 0.9444444,
+ }, &TestJaroDistancce{
+ S1: "martha",
+ S2: "martha",
+ DJ: 1,
+ })
+ for _, td := range testData {
+ sd := NewStringDiff(td.S1, td.S2)
+ if sd.JaroDistance() != td.DJ {
+ t.Error("Jaro Distance between", td.S1, "and", td.S2, "expected to", td.DJ, "but", sd.JaroDistance())
+ }
+ }
+}
+
+func TestJaroWinklerDistance(t *testing.T) {
+ testData := make([]*TestJaroDistancce, 0)
+ testData = append(testData, &TestJaroDistancce{
+ S1: "martha",
+ S2: "marhta",
+ DJ: 0.96111107,
+ }, &TestJaroDistancce{
+ S1: "martha",
+ S2: "martha",
+ DJ: 1,
+ })
+ for _, td := range testData {
+ sd := NewStringDiff(td.S1, td.S2)
+ if sd.JaroWinklerDistance(0.1) != td.DJ {
+ t.Error("Jaro Distance between", td.S1, "and", td.S2, "expected to", td.DJ, "but", sd.JaroWinklerDistance(0.1))
+ }
+ }
+}
diff --git a/CODE_OF_CONDUCTS.md b/CODE_OF_CONDUCTS.md
new file mode 100644
index 0000000..d7c30db
--- /dev/null
+++ b/CODE_OF_CONDUCTS.md
@@ -0,0 +1,74 @@
+## Code of Conduct
+
+### Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of experience,
+nationality, personal appearance, race, religion, or sexual identity and
+orientation.
+
+### Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+### Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+### Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+### Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at `oss@hyperjump.tech`. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+### Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at [http://contributor-covenant.org/version/1/4][version]
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..9eaff05
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,32 @@
+# Contributing
+
+When contributing to this repository, please first discuss the change you wish to make via issue,
+email, or any other method with the owners of this repository before making a change.
+
+Please note we have a code of conduct, please follow it in all your interactions with the project.
+
+## Fork Process
+
+1. Ensure that you've installed the Golang (minimum 1.13) in your system.
+2. For this project into your own Github account.
+3. Clone the `beda` forked repository on your account.
+4. Enter the cloned directory.
+5. Apply new "upstream" to original `hyperjumptech/beda` git
+4. Now you can work on your account
+5. Remember to pull from your upstream often. `git pull upstream master`
+
+## Pull Request Process
+
+1. Make sure you always have the most recent update from your upstream. `git pull upstream master`
+2. Resolve all conflict, if any.
+3. Make sure `make test` always successful (you wont be able to create pull request if this fail, circle-ci, travis-ci and azure-devops will make sure of this.)
+4. Push your code to your project's master repository.
+5. Create PullRequest.
+ * Go to `github.com/hyperjumptech/beda`
+ * Select `Pull Request` tab
+ * Click "New pull request" button
+ * Click "compare across fork"
+ * Change the source head repository from your fork and target is `hyperjumptech/beda`
+ * Hit the "Create pull request" button
+ * Fill in all necessary information to help us understand about your pull request.
+
diff --git a/LICENSE-2.0.txt b/LICENSE-2.0.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE-2.0.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..99ce77d
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,13 @@
+Copyright 2019 hyperjump.tech
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..7afa4b4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+GO111MODULE=on
+
+.PHONY: all test clean build docker
+
+build:
+ export GO111MODULE on; \
+ go build ./...
+
+test: build
+ go test ./... -v -covermode=count -coverprofile=coverage.out
+ golint -set_exit_status .
+
+test-coverage: test
+ go tool cover -html=coverage.out
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cd73392
--- /dev/null
+++ b/README.md
@@ -0,0 +1,210 @@
+# BEDA
+
+[data:image/s3,"s3://crabby-images/bb68c/bb68c3df7a7a4bea9e826266026b4eabef574e16" alt="Build Status"](https://travis-ci.org/hyperjumptech/beda)
+[data:image/s3,"s3://crabby-images/17683/1768380119c0c89a3a7922cc565ecc789708079d" alt="License"](https://opensource.org/licenses/Apache-2.0)
+
+## Get BEDA
+
+```
+go get github.com/hyperjumptech/beda
+```
+
+## Introduction
+
+**BEDA** is a golang library to detect differences or similarities between two words or string.
+Some time you want to detect whether a string is "the same" or "somehow similar to" another string.
+Suppose your system wants to detect whenever the user is putting bad-word as their user name, or
+to forbid them from using unwanted words in their postings. You need to implement some, *not so easy* ,
+algorithm to do this task.
+
+**BEDA** contains implementation of algorithm for detecting word differences. They are
+
+1. Levenshtein Distance : A string metric for measuring the difference between two sequences. [Wikipedia](https://en.wikipedia.org/wiki/Levenshtein_distance)
+2. Trigram or n-gram : A contiguous sequence of n items from a given sample of text or speech. [Wikipedia](https://en.wikipedia.org/wiki/N-gram)
+3. Jaro & Jaro Winkler Distance : A string metric measuring an edit distance between two sequences. [Wikipedia](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance)
+
+**BEDA** is an Indonesia word for "different".
+
+## Usage
+
+```go
+import "github.com/hyperjumptech/beda"
+
+sd := beda.NewStringDiff("The First String", "The Second String")
+lDist := sd.LevenshteinDistance()
+tDiff := sd.TrigramCompare()
+jDiff := sd.JaroDistance()
+jwDiff := sd.JaroWinklerDistance(0.1)
+
+fmt.Printf("Levenshtein Distance is %d \n", lDist)
+fmt.Printf("Trigram Compare is is %f \n", lDist)
+fmt.Printf("Jaro Distance is is %d \n", jDiff)
+fmt.Printf("Jaro Wingkler Distance is %d \n", jwDiff)
+```
+
+## Algorithms and APIs
+
+String comparison is not so easy.
+There are a couple of algorithm to do this comparison, and each of them yield different result.
+Thus may suited for one purposses compared to the other.
+
+To understand how and when or which algorithm should benefit your string comparisson quest,
+Please read this [String similarity algorithms compared](https://medium.com/@appaloosastore/string-similarity-algorithms-compared-3f7b4d12f0ff).
+Read them through, they will help you, a lot.
+
+```go
+type StringDiff struct {
+ S1 string
+ S2 string
+}
+```
+
+### Levenshtein Distance
+
+LevenshteinDistance is the minimum number of single-character edits
+required to change one word into the other, so the result is a positive
+integer. The algorithm is sensitive to string length. Which make it more difficult to draw pattern.
+
+Reading :
+
+- [https://github.com/mhutter/string-similarity](https://github.com/mhutter/string-similarity)
+- [https://en.wikipedia.org/wiki/Levenshtein_distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
+
+API :
+
+```go
+func LevenshteinDistance(s1, s2 string) int
+func (sd *StringDiff) LevenshteinDistance() int
+```
+
+`s1` is the first string to compare
+`s2` is the second string to compare
+The closer return value to 0 means the more similar the two words.
+
+Example :
+
+```go
+sd := beda.NewStringDiff("abcd", "bc")
+lDist := sd.LevenshteinDistance()
+fmt.Printf("Distance is %d \n", lDist) // prints : Distance is 2
+```
+
+or
+
+```go
+fmt.Printf("Distance is %d \n", beda.LevenshteinDistance("abcd", "bc"))
+```
+
+### TriGram Compare
+
+TrigramCompare is a case of n-gram, a contiguous sequence of n (three, in this case) items from a given sample.
+In our case, an application name is a sample and a character is an item.
+
+Reading:
+
+- [https://github.com/milk1000cc/trigram/blob/master/lib/trigram.rb](https://github.com/milk1000cc/trigram/blob/master/lib/trigram.rb)
+- [http://search.cpan.org/dist/String-Trigram/Trigram.pm](http://search.cpan.org/dist/String-Trigram/Trigram.pm)
+- [https://en.wikipedia.org/wiki/N-gram](https://en.wikipedia.org/wiki/N-gram)
+
+API :
+
+```go
+func TrigramCompare(s1, s2 string) float32
+func (sd *StringDiff) TrigramCompare() float32
+```
+
+`s1` is the first string to compare
+`s2` is the second string to compare
+The closer the result to 1 (one) means that the word is closer 100% similarities in 3 grams sequence.
+
+Example :
+
+```go
+sd := beda.NewStringDiff("martha", "marhta")
+diff := sd.TrigramCompare()
+fmt.Printf("Differences is %f \n", diff)
+```
+
+or
+
+```go
+fmt.Printf("Distance is %f \n", beda.TrigramCompare("martha", "marhta"))
+```
+
+### Jaro Distance
+
+JaroDistance distance between two words is the minimum number
+of single-character transpositions required to change one word
+into the other.
+
+API :
+
+```go
+func JaroDistance(s1, s2 string) float32
+func (sd *StringDiff) JaroDistance() float32
+```
+
+`s1` is the first string to compare
+`s2` is the second string to compare
+The closer the result to 1 (one) means that the word is closer 100% similarities
+
+Example :
+
+```go
+sd := beda.NewStringDiff("martha", "marhta")
+diff := sd.JaroDistance()
+fmt.Printf("Differences is %f \n", diff)
+```
+
+or
+
+```go
+fmt.Printf("Distance is %f \n", beda.JaroDistance("martha", "marhta"))
+```
+
+### Jaro Wingkler Distance
+
+JaroWinklerDistance uses a prefix scale which gives more
+favourable ratings to strings that match from the beginning
+for a set prefix length
+
+Reading :
+
+- [https://github.com/flori/amatch](https://github.com/flori/amatch)
+- [https://fr.wikipedia.org/wiki/Distance_de_Jaro-Winkler](https://fr.wikipedia.org/wiki/Distance_de_Jaro-Winkler)
+- [https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance)
+
+API :
+
+```go
+func JaroWinklerDistance(s1, s2 string) float32
+func (sd *StringDiff) JaroWinklerDistance(p float32) float32
+```
+
+or
+
+```go
+fmt.Printf("Distance is %f \n", beda.JaroWinklerDistance("martha", "marhta"))
+```
+
+`s1` is the first string to compare
+`s2` is the second string to compare
+`p` argument is constant scaling factor for how much the score is adjusted upwards for having common prefixes.
+The standard value for this constant in Winkler’s work is `p = 0.1`
+
+The closer the result to 1 (one) means that the word is closer 100% similarities
+
+Example :
+
+```go
+sd := beda.NewStringDiff("martha", "marhta")
+diff := sd.JaroWinklerDistance(0.1)
+fmt.Printf("Differences is %f \n", diff)
+```
+
+# Tasks and Help Wanted.
+
+Yes. We need contributor to make **BEDA** even better and useful to Open Source Community.
+
+If you really want to help us, simply `Fork` the project and apply for Pull Request.
+Please read our [Contribution Manual](CONTRIBUTING.md) and [Code of Conduct](CODE_OF_CONDUCTS.md)
\ No newline at end of file
diff --git a/coverage.out b/coverage.out
new file mode 100644
index 0000000..e75cdac
--- /dev/null
+++ b/coverage.out
@@ -0,0 +1,72 @@
+mode: count
+github.com/hyperjumptech/beda/Beda.go:4.47,9.2 1 14
+github.com/hyperjumptech/beda/Beda.go:26.45,29.2 2 0
+github.com/hyperjumptech/beda/Beda.go:38.49,47.19 6 8
+github.com/hyperjumptech/beda/Beda.go:52.2,52.26 1 8
+github.com/hyperjumptech/beda/Beda.go:57.2,57.26 1 8
+github.com/hyperjumptech/beda/Beda.go:61.2,61.25 1 8
+github.com/hyperjumptech/beda/Beda.go:74.2,74.21 1 8
+github.com/hyperjumptech/beda/Beda.go:47.19,49.3 1 32
+github.com/hyperjumptech/beda/Beda.go:52.26,54.3 1 24
+github.com/hyperjumptech/beda/Beda.go:57.26,59.3 1 24
+github.com/hyperjumptech/beda/Beda.go:61.25,62.26 1 24
+github.com/hyperjumptech/beda/Beda.go:62.26,64.20 2 72
+github.com/hyperjumptech/beda/Beda.go:69.4,71.36 1 72
+github.com/hyperjumptech/beda/Beda.go:64.20,66.5 1 18
+github.com/hyperjumptech/beda/Beda.go:66.10,68.5 1 54
+github.com/hyperjumptech/beda/Beda.go:84.46,85.28 1 40
+github.com/hyperjumptech/beda/Beda.go:90.2,90.30 1 22
+github.com/hyperjumptech/beda/Beda.go:85.28,86.18 1 202
+github.com/hyperjumptech/beda/Beda.go:86.18,88.4 1 18
+github.com/hyperjumptech/beda/Beda.go:93.46,94.35 1 402
+github.com/hyperjumptech/beda/Beda.go:97.2,97.27 1 402
+github.com/hyperjumptech/beda/Beda.go:102.2,102.13 1 36
+github.com/hyperjumptech/beda/Beda.go:94.35,96.3 1 0
+github.com/hyperjumptech/beda/Beda.go:97.27,98.24 1 496
+github.com/hyperjumptech/beda/Beda.go:98.24,100.4 1 366
+github.com/hyperjumptech/beda/Beda.go:105.40,107.17 2 4
+github.com/hyperjumptech/beda/Beda.go:110.2,115.33 5 4
+github.com/hyperjumptech/beda/Beda.go:119.2,119.12 1 4
+github.com/hyperjumptech/beda/Beda.go:107.17,109.3 1 0
+github.com/hyperjumptech/beda/Beda.go:115.33,118.3 2 40
+github.com/hyperjumptech/beda/Beda.go:126.44,129.2 2 0
+github.com/hyperjumptech/beda/Beda.go:139.48,146.25 7 2
+github.com/hyperjumptech/beda/Beda.go:154.2,155.25 2 2
+github.com/hyperjumptech/beda/Beda.go:158.2,158.25 1 2
+github.com/hyperjumptech/beda/Beda.go:161.2,163.35 2 2
+github.com/hyperjumptech/beda/Beda.go:146.25,147.26 1 20
+github.com/hyperjumptech/beda/Beda.go:147.26,148.19 1 200
+github.com/hyperjumptech/beda/Beda.go:148.19,151.5 1 18
+github.com/hyperjumptech/beda/Beda.go:155.25,157.3 1 20
+github.com/hyperjumptech/beda/Beda.go:158.25,160.3 1 20
+github.com/hyperjumptech/beda/Beda.go:166.31,168.25 2 72
+github.com/hyperjumptech/beda/Beda.go:173.2,173.12 1 72
+github.com/hyperjumptech/beda/Beda.go:168.25,169.24 1 216
+github.com/hyperjumptech/beda/Beda.go:169.24,171.4 1 123
+github.com/hyperjumptech/beda/Beda.go:176.35,179.21 3 4
+github.com/hyperjumptech/beda/Beda.go:186.2,187.23 2 4
+github.com/hyperjumptech/beda/Beda.go:192.2,192.12 1 4
+github.com/hyperjumptech/beda/Beda.go:179.21,182.3 2 0
+github.com/hyperjumptech/beda/Beda.go:182.8,185.3 2 4
+github.com/hyperjumptech/beda/Beda.go:187.23,188.17 1 24
+github.com/hyperjumptech/beda/Beda.go:188.17,190.4 1 4
+github.com/hyperjumptech/beda/Beda.go:195.32,197.21 2 4
+github.com/hyperjumptech/beda/Beda.go:204.2,205.23 2 4
+github.com/hyperjumptech/beda/Beda.go:213.2,213.12 1 4
+github.com/hyperjumptech/beda/Beda.go:197.21,200.3 2 0
+github.com/hyperjumptech/beda/Beda.go:200.8,203.3 2 4
+github.com/hyperjumptech/beda/Beda.go:205.23,206.24 1 24
+github.com/hyperjumptech/beda/Beda.go:206.24,207.16 1 68
+github.com/hyperjumptech/beda/Beda.go:207.16,209.10 2 24
+github.com/hyperjumptech/beda/Beda.go:219.42,222.2 2 0
+github.com/hyperjumptech/beda/Beda.go:227.46,238.2 8 4
+github.com/hyperjumptech/beda/Beda.go:247.60,250.2 2 0
+github.com/hyperjumptech/beda/Beda.go:263.62,269.21 6 2
+github.com/hyperjumptech/beda/Beda.go:276.2,276.22 1 2
+github.com/hyperjumptech/beda/Beda.go:287.2,289.11 2 2
+github.com/hyperjumptech/beda/Beda.go:269.21,272.3 2 0
+github.com/hyperjumptech/beda/Beda.go:272.8,275.3 2 2
+github.com/hyperjumptech/beda/Beda.go:276.22,277.16 1 9
+github.com/hyperjumptech/beda/Beda.go:277.16,279.15 2 8
+github.com/hyperjumptech/beda/Beda.go:279.15,280.10 1 1
+github.com/hyperjumptech/beda/Beda.go:282.9,283.9 1 1
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..be0da7a
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module github.com/hyperjumptech/beda
+
+go 1.13