Feature/rteco 814 implement build info collection for hugging face#368
Conversation
…ction-for-Hugging-Face
| installCmd.Stdout = os.Stdout | ||
| installCmd.Stderr = os.Stderr | ||
| if err := installCmd.Run(); err != nil { | ||
| // If --user fails, try with --break-system-packages as fallback |
There was a problem hiding this comment.
this flag comes with risks and can also break os packages using python, hope we understand the consequences before moving forward with this
There was a problem hiding this comment.
Addressed it.
| installCmd := exec.Command(pythonPath, "-m", "pip", "install", "huggingface_hub", "--user", "--quiet") | ||
| installCmd.Stdout = os.Stdout | ||
| installCmd.Stderr = os.Stderr |
There was a problem hiding this comment.
for accesing standard error or output easily we can use .Output method that encapsulates this logic and handles error properly.
There was a problem hiding this comment.
Addressed it.
| installCmd.Stderr = os.Stderr | ||
| if err := installCmd.Run(); err != nil { | ||
| // If --user fails, try with --break-system-packages as fallback | ||
| log.Debug("User install failed, trying with --break-system-packages") |
There was a problem hiding this comment.
There was a problem hiding this comment.
Addressed it.
| func GetRepoKeyFromHFEndpoint() (string, error) { | ||
| endpoint := os.Getenv("HF_ENDPOINT") | ||
| if endpoint == "" { | ||
| return "", errorutils.CheckErrorf("HF_ENDPOINT environment variable is not set") |
There was a problem hiding this comment.
is there any other fallback mechanism that we can use apart from env?
There was a problem hiding this comment.
No, its from huggingface itself.
| // Fall back to Python module mode | ||
| pythonPath, pythonErr := GetPythonPath() | ||
| if pythonErr != nil { | ||
| return "", nil, errorutils.CheckErrorf("neither huggingface-cli nor hf found in PATH, and Python is not available: %w", pythonErr) |
There was a problem hiding this comment.
can we improve the log? we can start with informing that huggingface-cli and hf not found, searching for python, later if that fails, we can log that python executable not found
There was a problem hiding this comment.
Addressed it.
| cmd := exec.Command(hfCliPath, cmdArgs...) | ||
| cmd.Stdout = os.Stdout | ||
| cmd.Stderr = os.Stderr |
There was a problem hiding this comment.
can we remove standard output and error?
There was a problem hiding this comment.
Addressed it.
| if hfd.buildConfiguration == nil { | ||
| return nil | ||
| } | ||
| isCollectBuildInfo, err := hfd.buildConfiguration.IsCollectBuildInfo() | ||
| if err != nil { | ||
| return errorutils.CheckError(err) | ||
| } | ||
| args := map[string]interface{}{ | ||
| "repo_id": hfd.repoId, | ||
| "revision": hfd.revision, | ||
| "etag_timeout": hfd.etagTimeout, | ||
| if !isCollectBuildInfo { | ||
| return nil | ||
| } | ||
| if hfd.repoType != "" { | ||
| args["repo_type"] = hfd.repoType | ||
| } else { | ||
| args["repo_type"] = "model" | ||
| log.Info("Collecting build info for executed huggingface ", hfd.name, "command") | ||
| buildName, err := hfd.buildConfiguration.GetBuildName() | ||
| if err != nil { | ||
| return errorutils.CheckError(err) | ||
| } | ||
| argsJSON, err := json.Marshal(args) | ||
| buildNumber, err := hfd.buildConfiguration.GetBuildNumber() | ||
| if err != nil { | ||
| return errorutils.CheckErrorf("failed to marshal arguments to JSON: %w", err) | ||
| } | ||
| pythonCmd := BuildPythonDownloadCmd(string(argsJSON)) | ||
| log.Debug("Executing Python function to download ", args["repo_type"], ": ", hfd.repoId) | ||
| cmd := exec.Command(pythonPath, "-c", pythonCmd) | ||
| cmd.Dir = scriptDir | ||
| output, err := cmd.CombinedOutput() | ||
| if len(output) == 0 { | ||
| if err != nil { | ||
| return errorutils.CheckErrorf("Python script produced no output and exited with error: %w", err) | ||
| } | ||
| return errorutils.CheckErrorf("Python script produced no output. The script may not be executing correctly.") | ||
| return errorutils.CheckError(err) | ||
| } | ||
| var result Response | ||
| if jsonErr := json.Unmarshal(output, &result); jsonErr != nil { | ||
| if err != nil { | ||
| return errorutils.CheckErrorf("failed to execute Python script: %w, output: %s", err, string(output)) | ||
| } | ||
| return errorutils.CheckErrorf("failed to parse Python script output: %w, output: %s", jsonErr, string(output)) | ||
| project := hfd.buildConfiguration.GetProject() | ||
| buildInfoService := buildUtils.CreateBuildInfoService() | ||
| build, err := buildInfoService.GetOrCreateBuildWithProject(buildName, buildNumber, project) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to create build info: %w", err) | ||
| } | ||
| if !result.Success { | ||
| return errorutils.CheckErrorf("%s", result.Error) | ||
| buildInfo, err := build.ToBuildInfo() | ||
| if err != nil { | ||
| return fmt.Errorf("failed to build info: %w", err) |
There was a problem hiding this comment.
seems like repititive code from huggingface_upload.py
There was a problem hiding this comment.
Addressed it.
| revisionPattern = hfd.revision + "_*" | ||
| multipleDirsInSearchResults = true | ||
| } | ||
| aqlQuery := fmt.Sprintf(`{"repo": "%s", "path": {"$match": "%s/%s/%s/*"}}`, |
There was a problem hiding this comment.
are this files not available locally?
| Aql: servicesUtils.Aql{ItemsFind: aqlQuery}, | ||
| }, | ||
| } | ||
| reader, err := serviceManager.SearchFiles(searchParams) |
There was a problem hiding this comment.
let's be careful for this extra call as well here - https://github.com/jfrog/jfrog-client-go/blob/63e06cde72c48e96c735271eee7d32e6d04e5a37/artifactory/services/utils/searchutil.go#L213
| Sha256: resultItem.Sha256, | ||
| }, | ||
| }) | ||
| if latestCreatedDir != resultItem.Path { |
There was a problem hiding this comment.
what does this condition signify?
There was a problem hiding this comment.
Actually, there are multiple folders in huggingface repo with same name but with timestamp difference. So, need to pick latest one, so this condition breaks on 2nd folder items.
…ction-for-Hugging-Face

What: [Added build info collection logic for hugging face download and upload command
Depend on other PR: oNo,
Testing: Done, On artifactory instance