Skip to content

Commit

Permalink
do not exit when token refresh fails while pod is running
Browse files Browse the repository at this point in the history
extend cmd line support to accept multiple arguments

Signed-off-by: Henry Avetisyan <[email protected]>
  • Loading branch information
havetisyan committed Sep 12, 2024
1 parent 5555a83 commit da4485b
Show file tree
Hide file tree
Showing 12 changed files with 430 additions and 324 deletions.
81 changes: 52 additions & 29 deletions libs/go/sia/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ func RoleKey(rotateKey bool, roleKey, svcKey string) (*rsa.PrivateKey, error) {
}
}

func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, int) {
func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, []string) {

//initialize our return state to success
failures := 0
failures := make([]string, 0)

for _, role := range opts.Roles {
var roleRequest = new(zts.RoleCertificateRequest)
Expand All @@ -99,7 +99,7 @@ func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, int) {
client, err := util.ZtsClient(ztsUrl, opts.ZTSServerName, svcKeyFile, svcCertFile, opts.ZTSCACertFile)
if err != nil {
log.Printf("unable to initialize ZTS Client with url %s for role %s, err: %v\n", ztsUrl, role.Name, err)
failures += 1
failures = append(failures, role.Name)
continue
}
client.AddCredentials("User-Agent", opts.Version)
Expand All @@ -112,7 +112,7 @@ func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, int) {
}
if err != nil {
log.Printf("unable to read private key role %s, err: %v\n", role.Name, err)
failures += 1
failures = append(failures, role.Name)
continue
}

Expand All @@ -134,7 +134,7 @@ func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, int) {
csr, err := util.GenerateRoleCertCSR(key, roleCertReqOptions)
if err != nil {
log.Printf("unable to generate CSR for %s, err: %v\n", role.Name, err)
failures += 1
failures = append(failures, role.Name)
continue
}
roleRequest.Csr = csr
Expand All @@ -155,18 +155,18 @@ func GetRoleCertificates(ztsUrl string, opts *options.Options) (int, int) {
roleCert, err := client.PostRoleCertificateRequestExt(roleRequest)
if err != nil {
log.Printf("PostRoleCertificateRequest failed for %s, err: %v\n", role.Name, err)
failures += 1
failures = append(failures, role.Name)
continue
}
roleKeyBytes := util.PrivatePem(key)
err = util.SaveRoleCertKey([]byte(roleKeyBytes), []byte(roleCert.X509Certificate), role.RoleKeyFilename, role.RoleCertFilename, svcKeyFile, role.Name, role.Uid, role.Gid, role.FileMode, opts.GenerateRoleKey, opts.RotateKey, opts.BackupDir, opts.FileDirectUpdate)
if err != nil {
log.Printf("Unable to save role cert key for role %s, err: %v\n", role.Name, err)
failures += 1
failures = append(failures, role.Name)
continue
}
}
log.Printf("SIA processed %d (failures %d) role certificate requests\n", len(opts.Roles), failures)
log.Printf("SIA processed %d (failures %d) role certificate requests\n", len(opts.Roles), len(failures))
return len(opts.Roles), failures
}

Expand Down Expand Up @@ -601,7 +601,14 @@ func SetupAgent(opts *options.Options, siaMainDir, siaLinkDir string) {
}
}

func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
func RunAgent(siaCmds, ztsUrl string, opts *options.Options) {
cmds := strings.Split(siaCmds, ",")
for _, cmd := range cmds {
runAgentCommand(cmd, ztsUrl, opts)
}
}

func runAgentCommand(siaCmd, ztsUrl string, opts *options.Options) {

//make sure the meta endpoint is configured by the caller
if opts.MetaEndPoint == "" {
Expand All @@ -626,19 +633,25 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
switch cmd {
case "rolecert":
count, failures := GetRoleCertificates(ztsUrl, opts)
if failures != 0 && !skipErrors {
log.Fatalf("unable to fetch %d out of %d requested role certificates\n", failures, count)
if len(failures) != 0 {
util.ExecuteScript(opts.RunAfterCertsErrParts, strings.Join(failures, ","), false)
if !skipErrors {
log.Fatalf("unable to fetch %d out of %d requested role certificates\n", len(failures), count)
}
}
if count != 0 {
util.ExecuteScript(opts.RunAfterParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterCertsOkParts, "", opts.RunAfterFailExit)
}
case "token":
if tokenOpts != nil {
err := fetchAccessToken(tokenOpts)
if err != nil && !skipErrors {
log.Fatalf("Unable to fetch access tokens, err: %v\n", err)
if err != nil {
util.ExecuteScript(opts.RunAfterTokensErrParts, err.Error(), false)
if !skipErrors {
log.Fatalf("Unable to fetch access tokens, err: %v\n", err)
}
}
util.ExecuteScript(opts.RunAfterTokensParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterTokensOkParts, "", opts.RunAfterFailExit)
} else {
log.Print("unable to fetch access tokens, invalid or missing configuration")
}
Expand All @@ -647,14 +660,14 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
if err != nil {
log.Fatalf("Unable to register identity, err: %v\n", err)
}
util.ExecuteScript(opts.RunAfterParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterCertsOkParts, "", opts.RunAfterFailExit)
log.Printf("identity registered for services: %s\n", svcs)
case "rotate", "refresh":
err = RefreshInstance(ztsUrl, opts)
if err != nil {
log.Fatalf("Refresh identity failed, err: %v\n", err)
}
util.ExecuteScript(opts.RunAfterParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterCertsOkParts, "", opts.RunAfterFailExit)
log.Printf("Identity successfully refreshed for services: %s\n", svcs)
case "init":
err := RegisterInstance(ztsUrl, opts, false)
Expand All @@ -663,16 +676,22 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
}
log.Printf("identity registered for services: %s\n", svcs)
count, failures := GetRoleCertificates(ztsUrl, opts)
if failures != 0 && !skipErrors {
log.Fatalf("unable to fetch %d out of %d requested role certificates\n", failures, count)
if len(failures) != 0 {
util.ExecuteScript(opts.RunAfterCertsErrParts, strings.Join(failures, ","), false)
if !skipErrors {
log.Fatalf("unable to fetch %d out of %d requested role certificates\n", len(failures), count)
}
}
util.ExecuteScript(opts.RunAfterParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterCertsOkParts, "", opts.RunAfterFailExit)
if tokenOpts != nil {
err := fetchAccessToken(tokenOpts)
if err != nil && !skipErrors {
log.Fatalf("Unable to fetch access tokens, err: %v\n", err)
if err != nil {
util.ExecuteScript(opts.RunAfterTokensErrParts, err.Error(), false)
if !skipErrors {
log.Fatalf("Unable to fetch access tokens, err: %v\n", err)
}
}
util.ExecuteScript(opts.RunAfterTokensParts, opts.RunAfterFailExit)
util.ExecuteScript(opts.RunAfterTokensOkParts, "", opts.RunAfterFailExit)
}
default:
// we're going to iterate through our configured services.
Expand Down Expand Up @@ -729,6 +748,7 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
errors <- fmt.Errorf("refresh identity failed: %v\n", err)
return
} else {
util.ExecuteScriptWithoutBlock(opts.RunAfterCertsErrParts, svcs, false)
log.Printf("refresh identity failed for svcs %s, error: %v\n", svcs, err)
log.Printf("refresh will be retried in %d minutes, failure %d of %d\n", opts.RefreshInterval, failedRefreshCount, opts.FailCountForExit)
}
Expand All @@ -741,15 +761,18 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
if tokenOpts != nil {
err := accessTokenRequest(tokenOpts)
if err != nil {
errors <- fmt.Errorf("Unable to fetch access tokens after identity refresh, err: %v\n", err)
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensErrParts, err.Error(), false)
} else {
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensParts, opts.RunAfterFailExit)
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensOkParts, "", opts.RunAfterFailExit)
}
} else {
log.Print("token config does not exist - do not refresh tokens")
}
GetRoleCertificates(ztsUrl, opts)
util.ExecuteScriptWithoutBlock(opts.RunAfterParts, opts.RunAfterFailExit)
_, failures := GetRoleCertificates(ztsUrl, opts)
if len(failures) != 0 {
util.ExecuteScriptWithoutBlock(opts.RunAfterCertsErrParts, strings.Join(failures, ","), false)
}
util.ExecuteScriptWithoutBlock(opts.RunAfterCertsOkParts, "", opts.RunAfterFailExit)
util.NotifySystemdReadyForCommand(cmd, "systemd-notify-all")

if opts.SDSUdsPath != "" {
Expand Down Expand Up @@ -799,9 +822,9 @@ func RunAgent(siaCmd, ztsUrl string, opts *options.Options) {
log.Printf("refreshing access-token..")
err := accessTokenRequest(tokenOpts)
if err != nil {
errors <- fmt.Errorf("refresh access-token task got error: %v\n", err)
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensErrParts, err.Error(), false)
} else {
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensParts, opts.RunAfterFailExit)
util.ExecuteScriptWithoutBlock(opts.RunAfterTokensOkParts, "", opts.RunAfterFailExit)
}
case <-stop:
errors <- nil
Expand Down
2 changes: 1 addition & 1 deletion libs/go/sia/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ func TestRoleCertificateRequest(test *testing.T) {
}

_, failures := GetRoleCertificates("http://127.0.0.1:5084/zts/v1", opts)
if failures != 0 {
if len(failures) != 0 {
test.Errorf("Unable to get role certificate: %v", err)
return
}
Expand Down
Loading

0 comments on commit da4485b

Please sign in to comment.