diff --git a/internal/provider/resource_openai_vector_store_file.go b/internal/provider/resource_openai_vector_store_file.go index 00ee20e..9f5bf80 100644 --- a/internal/provider/resource_openai_vector_store_file.go +++ b/internal/provider/resource_openai_vector_store_file.go @@ -5,7 +5,9 @@ import ( "encoding/json" "fmt" "strings" + "time" + "github.com/hashicorp/terraform-plugin-log/tflog" "github.com/hashicorp/terraform-plugin-sdk/v2/diag" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" ) @@ -232,9 +234,13 @@ func resourceOpenAIVectorStoreFileCreate(ctx context.Context, d *schema.Resource return diag.Errorf("Error parsing response: %s", err.Error()) } + tflog.Debug(ctx, fmt.Sprintf("Vector store file created successfully: %s", string(responseBytes))) + // Set ID and other attributes if id, ok := response["id"]; ok && id != nil { - d.SetId(id.(string)) + fileIDFromResponse := id.(string) + d.SetId(fileIDFromResponse) + tflog.Info(ctx, fmt.Sprintf("Vector store file ID set to: %s", fileIDFromResponse)) } else { return diag.Errorf("Response missing required 'id' field") } @@ -257,7 +263,85 @@ func resourceOpenAIVectorStoreFileCreate(ctx context.Context, d *schema.Resource } } - return resourceOpenAIVectorStoreFileRead(ctx, d, m) + // Wait for the file to be available in the vector store with retry logic. + // This addresses eventual consistency issues where the OpenAI API returns + // "No file found" errors immediately after file creation (issue #35). + return resourceOpenAIVectorStoreFileReadWithRetry(ctx, d, m, 5) +} + +// containsRetriableError checks if an error message indicates a retriable error. +// Uses case-insensitive matching to catch "404 Not Found", "No file found", etc. +func containsRetriableError(message string) bool { + lowerMsg := strings.ToLower(message) + return strings.Contains(lowerMsg, "no file found") || strings.Contains(lowerMsg, "not found") +} + +// resourceOpenAIVectorStoreFileReadWithRetry attempts to read the vector store file with retry logic +// to handle eventual consistency issues with the OpenAI API. +// +// When a vector store file is created, the OpenAI API may temporarily return "file not found" +// errors due to eventual consistency delays in their backend. This is especially common when +// creating multiple files simultaneously. +// +// Retry Behavior: +// - Retries up to maxRetries times (default: 5) +// - Uses exponential backoff: 1s, 2s, 4s, 8s, 16s (max ~31s total) +// - Only retries on "not found" errors (case-insensitive) +// - Returns immediately on other errors (unauthorized, rate limit, etc.) +// - Logs retry attempts for debugging +// +// Parameters: +// - ctx: Context for logging +// - d: Resource data +// - m: Provider metadata containing OpenAI client +// - maxRetries: Maximum number of read attempts (must be >= 1) +// +// Returns: +// - nil diagnostics on success +// - diagnostics with error details if all retries are exhausted or non-retriable error occurs +func resourceOpenAIVectorStoreFileReadWithRetry(ctx context.Context, d *schema.ResourceData, m interface{}, maxRetries int) diag.Diagnostics { + // Validate maxRetries configuration + if maxRetries <= 0 { + return diag.Errorf("maxRetries must be at least 1 for vector store file read retries") + } + + var lastErr diag.Diagnostics + + for attempt := 0; attempt < maxRetries; attempt++ { + if attempt > 0 { + // Exponential backoff: 1s, 2s, 4s, 8s, 16s + backoffDuration := time.Duration(1< 500*time.Millisecond { + t.Errorf("Expected to complete quickly, took %v", elapsed) + } + }) + + t.Run("SuccessAfterThreeRetries", func(t *testing.T) { + callCount := 0 + startTime := time.Now() + + // Create a mock read function that fails 3 times then succeeds + mockRead := func(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + callCount++ + if callCount < 3 { + return diag.Errorf("Error reading vector store file: API error: No file found with id 'file-123' in vector store 'vs-456'") + } + return nil // Success on 3rd attempt + } + + result := simulateRetryLogic(mockRead, 5) + + elapsed := time.Since(startTime) + + // Assertions + if result.HasError() { + t.Errorf("Expected no error after retries, got: %v", result) + } + if callCount != 3 { + t.Errorf("Expected 3 calls (1 initial + 2 retries), got %d", callCount) + } + // Should have waited: 1s + 2s = 3s (approximately) + // Using a range to account for test execution time + if elapsed < 2*time.Second || elapsed > 4*time.Second { + t.Errorf("Expected ~3s elapsed (1s + 2s backoff), got %v", elapsed) + } + }) + + t.Run("MaxRetriesExhausted", func(t *testing.T) { + callCount := 0 + maxRetries := 5 + + // Create a mock read function that always fails with retriable error + mockRead := func(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + callCount++ + return diag.Errorf("Error reading vector store file: API error: No file found with id 'file-123' in vector store 'vs-456'") + } + + result := simulateRetryLogic(mockRead, maxRetries) + + // Assertions + if !result.HasError() { + t.Error("Expected error after max retries exhausted") + } + if callCount != maxRetries { + t.Errorf("Expected %d calls (max retries), got %d", maxRetries, callCount) + } + }) + + t.Run("NonRetriableErrorFailsImmediately", func(t *testing.T) { + callCount := 0 + startTime := time.Now() + + // Create a mock read function that fails with non-retriable error + mockRead := func(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + callCount++ + return diag.Errorf("Error reading vector store file: API error: Unauthorized") + } + + result := simulateRetryLogic(mockRead, 5) + + elapsed := time.Since(startTime) + + // Assertions + if !result.HasError() { + t.Error("Expected error for unauthorized") + } + if callCount != 1 { + t.Errorf("Expected 1 call (no retries for non-retriable error), got %d", callCount) + } + // Should fail immediately without waiting + if elapsed > 500*time.Millisecond { + t.Errorf("Expected to fail immediately, took %v", elapsed) + } + }) + + t.Run("ExponentialBackoffTiming", func(t *testing.T) { + callCount := 0 + attemptTimes := []time.Time{} + + // Create a mock read function that always fails + mockRead := func(ctx context.Context, d *schema.ResourceData, m interface{}) diag.Diagnostics { + callCount++ + attemptTimes = append(attemptTimes, time.Now()) + return diag.Errorf("Error reading vector store file: API error: No file found with id 'file-123' in vector store 'vs-456'") + } + + simulateRetryLogic(mockRead, 5) + + // Verify exponential backoff: 1s, 2s, 4s, 8s + expectedBackoffs := []time.Duration{ + 0 * time.Second, // First attempt (no wait) + 1 * time.Second, // Wait 1s before 2nd attempt + 2 * time.Second, // Wait 2s before 3rd attempt + 4 * time.Second, // Wait 4s before 4th attempt + 8 * time.Second, // Wait 8s before 5th attempt + } + + if len(attemptTimes) != 5 { + t.Fatalf("Expected 5 attempts, got %d", len(attemptTimes)) + } + + // Check timing between attempts (with tolerance) + tolerance := 200 * time.Millisecond + for i := 1; i < len(attemptTimes); i++ { + actual := attemptTimes[i].Sub(attemptTimes[i-1]) + expected := expectedBackoffs[i] + diff := actual - expected + + if diff < -tolerance || diff > tolerance { + t.Errorf("Attempt %d: expected ~%v backoff, got %v (diff: %v)", + i, expected, actual, diff) + } + } + }) +} + +// simulateRetryLogic simulates the retry logic from resourceOpenAIVectorStoreFileReadWithRetry +// This is a simplified version for testing purposes +func simulateRetryLogic(readFunc mockReadFunc, maxRetries int) diag.Diagnostics { + if maxRetries <= 0 { + return diag.Errorf("maxRetries must be at least 1 for vector store file read retries") + } + + ctx := context.Background() + d := &schema.ResourceData{} + var lastErr diag.Diagnostics + + for attempt := 0; attempt < maxRetries; attempt++ { + if attempt > 0 { + // Exponential backoff: 1s, 2s, 4s, 8s, 16s + backoffDuration := time.Duration(1<