Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix encoding of non-ascii contents written to parameter files. #18972

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/actions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:filetype",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:var_int",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.google.devtools.build.lib.util.GccParamFileEscaper;
import com.google.devtools.build.lib.util.ShellEscaper;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.util.StringUtil;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
Expand All @@ -33,6 +34,7 @@
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.stream.StreamSupport;

/**
* Support for parameter file generation (as used by gcc and other tools, e.g.
Expand Down Expand Up @@ -170,6 +172,10 @@ private static void writeContentUtf8(OutputStream outputStream, Iterable<String>
byte[] bytes = stringUnsafe.getByteArray(line);
if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) {
outputStream.write(bytes);
} else if (!StringUtil.decodeBytestringUtf8(line).equals(line)) {
// We successfully decoded line from utf8 - meaning it was already encoded as utf8.
// We do not want to double-encode.
outputStream.write(bytes);
} else {
ByteBuffer encodedBytes = encoder.encode(CharBuffer.wrap(line));
outputStream.write(
Expand Down
25 changes: 25 additions & 0 deletions src/test/shell/integration/unicode_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,28 @@ write_file_rule = rule(
"is_executable": attr.bool(),
},
)

def _run_executable_with_param_file_impl(ctx):
args = ctx.actions.args()
args.use_param_file("%s", use_always = True)
args.add(ctx.attr.content)
ctx.actions.run(
inputs = [],
outputs = [ctx.outputs.out],
arguments = [args, ctx.outputs.out.path],
executable = ctx.executable.executable,
)

run_executable_with_param_file_rule = rule(
implementation = _run_executable_with_param_file_impl,
doc = "Writes `content` to a param file and passes the file to the executable",
attrs = {
"out": attr.output(mandatory = True),
"content": attr.string(mandatory = True),
"executable": attr.label(
allow_files = True,
executable = True,
cfg = "exec",
),
},
)
21 changes: 21 additions & 0 deletions src/test/shell/integration/unicode_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,25 @@ function test_unicode_action_write_content {
>>"${TEST_log}" 2>&1 || fail "Output not as expected"
}

function test_unicode_action_run_param_file {
local test_name="action_run_param_file"
bazel build --genrule_strategy=local --spawn_strategy=local \
"//:${test_name}" >& "$TEST_log" \
|| fail "expected build to succeed"

quoted_unicode_test_expected="'$(cat unicode_test_expected.txt)'"

echo "Expected: ${quoted_unicode_test_expected}"

cat_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out")
assert_equals "${cat_output}" \
"${quoted_unicode_test_expected}" \
|| fail "Output not as expected"

param_file_output=$(cat "${PRODUCT_NAME}-bin/${test_name}.out-0.params")
assert_equals "${param_file_output}" \
"${quoted_unicode_test_expected}" \
|| fail "Output not as expected"
}

run_suite "Integration tests for ${PRODUCT_NAME}'s unicode i/o in actions"
18 changes: 17 additions & 1 deletion src/test/shell/integration/unicode_test_BUILD
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# BUILD file for unicode_test
load(":unicode_test.bzl", "run_executable_rule", "write_file_rule")
load(":unicode_test.bzl", "run_executable_rule", "write_file_rule", "run_executable_with_param_file_rule")

# In Russian and Bengali: "Down with mojibake! We want unicode!"
non_ascii_string = "Долой кракозябры! আমরা ইউনিকোড চাই!"
Expand Down Expand Up @@ -31,3 +31,19 @@ write_file_rule(
content = non_ascii_string,
out = "action_write_content.out",
)

run_executable_with_param_file_rule(
name = "action_run_param_file",
executable = "cat_param_file.sh",
content = non_ascii_string,
out = "action_run_param_file.out",
)

write_file_rule(
name = "cat_param_file",
content = """#!/bin/bash
cat "$1" >> "$2";
""",
out = "cat_param_file.sh",
is_executable = True,
)
Loading