Skip to content

Commit daf374c

Browse files
authored
Merge pull request #357 from CrushN8r/remSpCh
Remove Special Characters Function
2 parents 5e16eb0 + 7a39fac commit daf374c

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

chipper/pkg/wirepod/ttr/kgsim.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ import (
99
"regexp"
1010
"strings"
1111
"time"
12+
"unicode"
13+
14+
"golang.org/x/text/transform"
15+
"golang.org/x/text/unicode/norm"
1216

1317
"github.com/fforchino/vector-go-sdk/pkg/vector"
1418
"github.com/fforchino/vector-go-sdk/pkg/vectorpb"
@@ -63,11 +67,49 @@ func Remember(user, ai openai.ChatCompletionMessage, esn string) {
6367
PlaceChat(currentChat)
6468
}
6569

70+
func isMn(r rune) bool {
71+
return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
72+
}
73+
6674
func removeSpecialCharacters(str string) string {
75+
76+
// these two lines create a transformation that decomposes characters, removes non-spacing marks (like diacritics), and then recomposes the characters, effectively removing special characters
77+
t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
78+
result, _, _ := transform.String(t, str)
79+
80+
// Define the regular expression to match special characters
6781
re := regexp.MustCompile(`[&^*#@]`)
68-
return removeEmojis(re.ReplaceAllString(str, ""))
82+
83+
// Replace special characters with an empty string
84+
result = removeEmojis(re.ReplaceAllString(result, ""))
85+
86+
// Replace special characters with ASCII
87+
// * COPY/PASTE TO ADD MORE CHARACTERS:
88+
// result = strings.ReplaceAll(result, "", "")
89+
result = strings.ReplaceAll(result, "‘", "'")
90+
result = strings.ReplaceAll(result, "’", "'")
91+
result = strings.ReplaceAll(result, "“", "\"")
92+
result = strings.ReplaceAll(result, "”", "\"")
93+
result = strings.ReplaceAll(result, "—", "-")
94+
result = strings.ReplaceAll(result, "–", "-")
95+
result = strings.ReplaceAll(result, "…", "...")
96+
result = strings.ReplaceAll(result, "\u00A0", " ")
97+
result = strings.ReplaceAll(result, "•", "*")
98+
result = strings.ReplaceAll(result, "¼", "1/4")
99+
result = strings.ReplaceAll(result, "½", "1/2")
100+
result = strings.ReplaceAll(result, "¾", "3/4")
101+
result = strings.ReplaceAll(result, "×", "x")
102+
result = strings.ReplaceAll(result, "÷", "/")
103+
result = strings.ReplaceAll(result, "ç", "c")
104+
result = strings.ReplaceAll(result, "©", "(c)")
105+
result = strings.ReplaceAll(result, "®", "(r)")
106+
result = strings.ReplaceAll(result, "™", "(tm)")
107+
result = strings.ReplaceAll(result, "@", "(a)")
108+
result = strings.ReplaceAll(result, " AI ", " A. I. ")
109+
return result
69110
}
70111

112+
71113
func removeEmojis(input string) string {
72114
// a mess, but it works!
73115
re := regexp.MustCompile(`[\x{1F600}-\x{1F64F}]|[\x{1F300}-\x{1F5FF}]|[\x{1F680}-\x{1F6FF}]|[\x{1F1E0}-\x{1F1FF}]|[\x{2600}-\x{26FF}]|[\x{2700}-\x{27BF}]|[\x{1F900}-\x{1F9FF}]|[\x{1F004}]|[\x{1F0CF}]|[\x{1F18E}]|[\x{1F191}-\x{1F251}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]|[\x{1F004}-\x{1F0CF}]|[\x{1F191}-\x{1F251}]|[\x{2B50}]`)

chipper/pkg/wirepod/ttr/kgsim_cmds.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func ModelIsSupported(cmd LLMCommand, model string) bool {
142142
}
143143

144144
func CreatePrompt(origPrompt string, model string) string {
145-
prompt := origPrompt + "\n\n" + "The user input might not be spelt/punctuated correctly as it is coming from speech-to-text software. Do not include special characters in your answer. This includes the following characters (not including the quotes): '& ^ * # @ -'. DON'T INCLUDE THESE. DON'T MAKE LISTS WITH FORMATTING. THINK OF THE SPEECH-TO-TEXT ENGINE. If you want to use a hyphen, Use it like this: 'something something -- something -- something something'."
145+
prompt := origPrompt + "\n\n" + "Keep in mind, user input comes from speech-to-text software, so respond accordingly. No special characters, especially these: & ^ * # @ - . No lists. No formatting."
146146
if vars.APIConfig.Knowledge.CommandsEnable {
147147
prompt = prompt + "\n\n" + "You are running ON an Anki Vector robot. You have a set of commands. If you include an emoji, I will make you start over. If you want to use a command but it doesn't exist or your desired parameter isn't in the list, avoid using the command. The format is {{command||parameter}}. You can embed these in sentences. Example: \"User: How are you feeling? | Response: \"{{playAnimationWI||sad}} I'm feeling sad...\". Square brackets ([]) are not valid.\n\nUse the playAnimation or playAnimationWI commands if you want to express emotion! You are very animated and good at following instructions. Animation takes precendence over words. You are to include many animations in your response.\n\nHere is every valid command:"
148148
for _, cmd := range ValidLLMCommands {
@@ -268,6 +268,10 @@ func DoPlaySound(sound string, robot *vector.Vector) error {
268268
}
269269

270270
func DoSayText(input string, robot *vector.Vector) error {
271+
272+
// just before vector speaks
273+
removeSpecialCharacters(input)
274+
271275
// TODO
272276
if (vars.APIConfig.STT.Language != "en-US" && vars.APIConfig.Knowledge.Provider == "openai") || os.Getenv("USE_OPENAI_VOICE") == "true" {
273277
err := DoSayText_OpenAI(robot, input)

0 commit comments

Comments
 (0)