diff --git a/base/builtin/env.c b/base/builtin/env.c index 770aea3f..bda3e385 100644 --- a/base/builtin/env.c +++ b/base/builtin/env.c @@ -37,9 +37,8 @@ extern int return_val; printf("%s", s->str); return $R_CONT(c$cont, B_None); } -void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { - log_info("read_stdin: %p", stream->data); +void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { if (nread < 0){ if (nread == UV_EOF) { uv_close((uv_handle_t *)stream, NULL); @@ -50,11 +49,9 @@ void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { cb->$class->__asyn__(cb, to$bytesD_len(buf->base, nread)); } } - - if (buf->base) - acton_free(buf->base); } -$R B_EnvD_stdin_installG_local (B_Env self, $Cont c$cont, $action cb) { + +$R B_EnvD__on_stdin_bytesG_local (B_Env self, $Cont c$cont, $action cb) { // This should be the only call in env that does IO stuff, so it is safe to // pin affinity here (and not earlier).. pin_actor_affinity(); @@ -64,6 +61,7 @@ void read_stdin(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { uv_read_start((uv_stream_t*)tty, alloc_buffer, read_stdin); return $R_CONT(c$cont, B_None); } + $R B_EnvD_exitG_local (B_Env self, $Cont c$cont, B_int n) { return_val = from$int(n); rts_shutdown(); diff --git a/base/src/__builtin__.act b/base/src/__builtin__.act index 0a0d37fa..b59e1185 100644 --- a/base/src/__builtin__.act +++ b/base/src/__builtin__.act @@ -927,6 +927,38 @@ def type(a: value) -> str: """ NotImplemented +actor StringDecoder(cb_out: action(str) -> None, encoding: ?str="utf-8", on_error: ?action(str, bytes) -> None): + """Bytes to string decoder + + Decodes bytes to string using the provided encoding. If no encoding is given + UTF-8 is used. The decoder is stateful in order to buffer incomplete multi- + byte characters. + """ + MAX_UNICODE_CHAR_SIZE = 4 + var buf: bytes = b"" + + if encoding is not None: + if encoding.lower() != "utf-8": + raise ValueError("Only utf-8 encoding is supported") + + def decode(input: bytes) -> None: + buf += input + # Attempt to decode all of buf. If it fails we are likely in the middle + # of a multi-byte character so we try again by removing the last bytes + # iteratively until we succeed. UTF-8 has up to 4 bytes per character. + for i in range(1, MAX_UNICODE_CHAR_SIZE+1): + try: + s = buf[:-i].decode() + buf = buf[-i:] + cb_out(s) + return + except ValueError: + pass + if on_error is not None: + on_error("Invalid UTF-8", buf) + else: + raise ValueError("Invalid UTF-8: %s" % str(buf)) + ## Environment ################################################ class WorldCap(): @@ -968,10 +1000,64 @@ actor Env (wc: WorldCap, sc: SysCap, args: list[str]): argv = args nr_wthreads: int = 0 + action def getenv(name: str) -> ?str: + """Get the value of an environment variable""" + res = getenvb(name.encode()) + if res is not None: + return res.decode() + return None + + action def getenvb(name: bytes) -> ?bytes: + """Get the value of an environment variable""" + NotImplemented + + action def setenv(n: str, v: str) -> None: + """Set the value of an environment variable""" + setenvb(n.encode(), v.encode()) + + action def setenvb(n: bytes, v: bytes) -> None: + """Set the value of an environment variable""" + NotImplemented + + action def unsetenv(n: str) -> None: + """Unset an environment variable""" + unsetenvb(n.encode()) + + action def unsetenvb(n: bytes) -> None: + """Unset an environment variable""" + NotImplemented + action def stdout_write(s: str) -> None: NotImplemented - action def stdin_install(cb: action(str) -> None) -> None: + action def stdin_install(on_stdin: ?action(str) -> None, encoding: ?str=None, on_error: ?action(str, bytes) -> None, on_stdin_bytes: ?action(bytes) -> None) -> None: + if on_stdin is None and on_stdin_bytes is None: + raise ValueError("At least one of on_stdin or on_stdin_bytes must be set") + elif on_stdin_bytes is not None: + if encoding is not None: + raise ValueError("encoding must not be set when on_stdin_bytes is set, it is only used for decoding stdin bytes to string") + if on_error is not None: + raise ValueError("on_error must not be set when on_stdin_bytes is set, it is only used for decoding error when decoding stdin bytes to string") + _on_stdin_bytes(on_stdin_bytes) + elif on_stdin is not None: + if encoding is None: + # If no encoding is given, attempt to discover the encoding used + # Default to utf-8 if we're unable to discover the encoding + encoding = "utf-8" + # Read encoding from the LANG environment variable + lang_env = getenv("LANG") + if lang_env is not None: + try: + encoding = lang_env.split(".")[1].lower() + except: + pass + # If stdin is attached to a terminal, attempt to discover the + # encoding used by the terminal by inspecting the LANG environment + # variable. + sd = StringDecoder(on_stdin, encoding, on_error) + _on_stdin_bytes(sd.decode) + + action def _on_stdin_bytes(cb: action(bytes) -> None) -> None: NotImplemented action def exit(n: int): diff --git a/base/src/__builtin__.ext.c b/base/src/__builtin__.ext.c index 6c266d52..1d8aed43 100644 --- a/base/src/__builtin__.ext.c +++ b/base/src/__builtin__.ext.c @@ -25,3 +25,54 @@ B_str B_BaseExceptionD__name (B_BaseException self) { B_str B_type(B_value a) { return to$str(unmangle_name(a->$class->$GCINFO)); } + +$R B_EnvD_getenvbG_local (B_Env self, $Cont C_cont, B_bytes name) { + // uv_os_getenv is not threadsafe but our Env actor forces serial execution + + // Try to use a small fixed size buffer + size_t len = 256; + char smallval[256]; + char *value = smallval; + + const char* env_var = fromB_bytes(name); + + // First, query the required buffer size by passing NULL as the buffer + int r = uv_os_getenv(env_var, value, &len); + if (r == UV_ENOENT) { + // The environment variable does not exist + return $R_CONT(C_cont, B_None); + } else if (r == UV_ENOBUFS) { + // Allocate the buffer and actually get the environment variable value + value = (char*)acton_malloc(len); + r = uv_os_getenv(env_var, value, &len); + } + if (r < 0) { + char *s; + asprintf(&s, "Failed to read the environment variable %s: %s", env_var, uv_strerror(r)); + $RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s))); + } + return $R_CONT(C_cont, to$bytes(value)); +} + +$R B_EnvD_setenvbG_local (B_Env self, $Cont C_cont, B_bytes name, B_bytes value) { + const char* env_var = fromB_bytes(name); + const char* env_val = fromB_bytes(value); + int r = uv_os_setenv(env_var, env_val); + if (r < 0) { + char *s; + asprintf(&s, "Failed to set the environment variable %s: %s", env_var, uv_strerror(r)); + $RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s))); + } + return $R_CONT(C_cont, B_None); +} + +$R B_EnvD_unsetenvbG_local (B_Env self, $Cont C_cont, B_bytes name) { + const char* env_var = fromB_bytes(name); + int r = uv_os_unsetenv(env_var); + if (r < 0) { + char *s; + asprintf(&s, "Failed to unset the environment variable %s: %s", env_var, uv_strerror(r)); + $RAISE((B_BaseException)B_RuntimeErrorG_new(to$str(s))); + } + return $R_CONT(C_cont, B_None); +} diff --git a/docs/acton-by-example/src/SUMMARY.md b/docs/acton-by-example/src/SUMMARY.md index 79c11555..ed12b061 100644 --- a/docs/acton-by-example/src/SUMMARY.md +++ b/docs/acton-by-example/src/SUMMARY.md @@ -34,6 +34,9 @@ - [Explicit types](types/explicit.md) - [Security](security.md) - [Capabilities](security/capabilities.md) +- [Environment](environment.md) + - [Environment variables](environment/variables.md) + - [Reading stdin input](environment/stdin.md) - [Standard library](stdlib.md) - [Regular Expression](stdlib/re.md) diff --git a/docs/acton-by-example/src/environment.md b/docs/acton-by-example/src/environment.md new file mode 100644 index 00000000..6edb7e12 --- /dev/null +++ b/docs/acton-by-example/src/environment.md @@ -0,0 +1,4 @@ +# Environment + +The environment of an Acton application is the outside world. Any useful application typically needs to interact with the environment in some way, like reading arguments or taking input from stdin and printing output. + diff --git a/docs/acton-by-example/src/environment/stdin.md b/docs/acton-by-example/src/environment/stdin.md new file mode 100644 index 00000000..b5634b5c --- /dev/null +++ b/docs/acton-by-example/src/environment/stdin.md @@ -0,0 +1,39 @@ +# Reading stdin input + +Read input from stdin by installing a handler for stdin data. The returned data is `str` +```python +actor main(env): + def interact(input): + print("Got some input:", input) + + env.stdin_install(interact) +``` + + +It is possible to specify the encoding and an on_error() callback which is invoked if there are problem with decoding the data. When encoding is not specified (default `None`), an attempt is made to discover the encoding by reading the `LANG` environment variable. If no encoding is discovered, the default is to use `utf-8`. + +```python +actor main(env): + def interact(input): + print("Got some input:", input) + + def on_stdin_error(err, data): + print("Some error with decoding the input data:", err) + print("Raw bytes data:", data) + + env.stdin_install(on_stdin=interact, encoding="utf-8", on_error=on_stdin_error) +``` + +You can read the raw data in `bytes` form by installing a bytes handler instead: + +```python +actor main(env): + def interact(bytes_input): + # Note how the input might contain parts (some bytes) of a multi-byte + # Unicode character in which case decoding will fail + print("Got some input:", bytes_input.decode()) + + env.stdin_install(on_stdin_bytes=interact) +``` + +This allows reading binary data and more explicit control over how to decode the data. diff --git a/docs/acton-by-example/src/environment/variables.md b/docs/acton-by-example/src/environment/variables.md new file mode 100644 index 00000000..798e350e --- /dev/null +++ b/docs/acton-by-example/src/environment/variables.md @@ -0,0 +1,23 @@ +# Environment variables + +It is possible to read, set and unset environment variables. The standard functions `env.getenv`, `env.setenv` and `env.unsetenv` all assume `str` input and output, which is a convenience based on the assumption that all data is encoded using UTF-8. POSIX systems really use binary encoding for both environment names and variables. To access the environment as bytes and handle decoding explicitly, use `env.getenvb`, `env.setenvb` and `env.unsetenvb`. + +Source: +```python +actor main(env): + env_user = env.getenv("USER") + if env_user is not None: + print("User:", env_user) + env.setenv("FOO", "bar") + env.unsetenv("LANG") + foo_env = env.getenv("FOO") + if foo_env is not None: + print("FOO:", foo_env) + env.exit(0) +``` + +Output: +```sh +User: myuser +FOO: bar +```