diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 60e12bcec..282e679a2 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -395,6 +395,19 @@ *** xref:develop:connect/cookbooks/rag.adoc[] *** xref:develop:connect/cookbooks/jira.adoc[] +* xref:sql:index.adoc[Redpanda SQL] +// ** quickstart.adoc +** xref:sql:get-started/what-is-redpanda-sql.adoc[Overview] +*** xref:sql:get-started/oltp-vs-olap.adoc[] +*** xref:sql:get-started/redpanda-sql-vs-postgresql.adoc[] +** xref:sql:connect-to-sql/index.adoc[Connect to Redpanda SQL] +*** xref:sql:connect-to-sql/language-clients/psycopg2.adoc[] +*** xref:sql:connect-to-sql/language-clients/java-jdbc.adoc[] +*** xref:sql:connect-to-sql/language-clients/php-pdo.adoc[] +*** xref:sql:connect-to-sql/language-clients/dotnet-dapper.adoc[] +** xref:sql:troubleshoot/index.adoc[Troubleshoot] +*** xref:sql:troubleshoot/degraded-state-handling.adoc[] + * xref:develop:index.adoc[Develop] ** xref:develop:kafka-clients.adoc[] ** xref:get-started:create-topic.adoc[Create a Topic] @@ -553,6 +566,209 @@ * xref:get-started:partner-integration.adoc[] * xref:reference:index.adoc[Reference] +** xref:reference:sql/index.adoc[Redpanda SQL Reference] +*** xref:reference:sql/sql-statements/index.adoc[Statements] +**** xref:reference:sql/sql-statements/select.adoc[] +**** xref:reference:sql/sql-statements/copy-to/index.adoc[COPY TO] +***** xref:reference:sql/sql-statements/copy-to/copy-to.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-csv.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-with-delimiter.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-with-endpoint.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-with-header.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-with-null.adoc[] +***** xref:reference:sql/sql-statements/copy-to/copy-to-stdout.adoc[] +**** xref:reference:sql/sql-statements/describe.adoc[] +**** xref:reference:sql/sql-statements/set-show.adoc[] +**** xref:reference:sql/sql-statements/show-tables.adoc[] +**** xref:reference:sql/sql-statements/show-nodes.adoc[] +**** xref:reference:sql/sql-statements/keywords.adoc[] +*** xref:reference:sql/sql-clauses/index.adoc[Clauses] +**** xref:reference:sql/sql-clauses/from/index.adoc[FROM] +***** xref:reference:sql/sql-clauses/from/from.adoc[] +***** xref:reference:sql/sql-clauses/from/join.adoc[] +***** xref:reference:sql/sql-clauses/from/left-join.adoc[] +***** xref:reference:sql/sql-clauses/from/outer-join.adoc[] +***** xref:reference:sql/sql-clauses/from/right-join.adoc[] +**** xref:reference:sql/sql-clauses/where.adoc[] +**** xref:reference:sql/sql-clauses/group-by.adoc[] +**** xref:reference:sql/sql-clauses/having.adoc[] +**** xref:reference:sql/sql-clauses/order-by.adoc[] +**** xref:reference:sql/sql-clauses/limit.adoc[] +**** xref:reference:sql/sql-clauses/offset.adoc[] +**** xref:reference:sql/sql-clauses/set-operations/index.adoc[Set Operations] +***** xref:reference:sql/sql-clauses/set-operations/except.adoc[] +***** xref:reference:sql/sql-clauses/set-operations/intersect.adoc[] +***** xref:reference:sql/sql-clauses/set-operations/union.adoc[] +**** xref:reference:sql/sql-clauses/with.adoc[] +**** xref:reference:sql/sql-clauses/over-window.adoc[] +*** xref:reference:sql/sql-data-types/index.adoc[Data Types] +**** xref:reference:sql/sql-data-types/data-type-operators.adoc[] +**** xref:reference:sql/sql-data-types/numeric-type/index.adoc[Numeric] +***** xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[] +***** xref:reference:sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc[] +**** xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[] +**** xref:reference:sql/sql-data-types/timestamp-with-time-zone.adoc[] +**** xref:reference:sql/sql-data-types/date.adoc[] +**** xref:reference:sql/sql-data-types/time-type/index.adoc[Time] +***** xref:reference:sql/sql-data-types/time-type/time.adoc[] +***** xref:reference:sql/sql-data-types/time-type/time-operators.adoc[] +**** xref:reference:sql/sql-data-types/interval.adoc[] +**** xref:reference:sql/sql-data-types/bool.adoc[] +**** xref:reference:sql/sql-data-types/text.adoc[] +**** xref:reference:sql/sql-data-types/json.adoc[] +**** xref:reference:sql/sql-data-types/array.adoc[] +*** xref:reference:sql/sql-functions/index.adoc[Functions] +**** xref:reference:sql/sql-functions/boolean-functions/index.adoc[Boolean] +***** xref:reference:sql/sql-functions/boolean-functions/if-function.adoc[] +***** xref:reference:sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc[] +***** xref:reference:sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc[] +**** xref:reference:sql/sql-functions/math-functions/index.adoc[Math] +***** xref:reference:sql/sql-functions/math-functions/abs.adoc[] +***** xref:reference:sql/sql-functions/math-functions/bitwise-shift-left.adoc[] +***** xref:reference:sql/sql-functions/math-functions/bitwise-shift-right.adoc[] +***** xref:reference:sql/sql-functions/math-functions/cbrt.adoc[] +***** xref:reference:sql/sql-functions/math-functions/ceil.adoc[] +***** xref:reference:sql/sql-functions/math-functions/exp.adoc[] +***** xref:reference:sql/sql-functions/math-functions/floor.adoc[] +***** xref:reference:sql/sql-functions/math-functions/greatest.adoc[] +***** xref:reference:sql/sql-functions/math-functions/least.adoc[] +***** xref:reference:sql/sql-functions/math-functions/ln.adoc[] +***** xref:reference:sql/sql-functions/math-functions/log.adoc[] +***** xref:reference:sql/sql-functions/math-functions/power.adoc[] +***** xref:reference:sql/sql-functions/math-functions/random.adoc[] +***** xref:reference:sql/sql-functions/math-functions/round.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sign.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sin.adoc[] +***** xref:reference:sql/sql-functions/math-functions/sqrt.adoc[] +***** xref:reference:sql/sql-functions/math-functions/to-char-from-number.adoc[] +**** xref:reference:sql/sql-functions/string-functions/index.adoc[String] +***** xref:reference:sql/sql-functions/string-functions/concat.adoc[] +***** xref:reference:sql/sql-functions/string-functions/ends-with.adoc[] +***** xref:reference:sql/sql-functions/string-functions/length.adoc[] +***** xref:reference:sql/sql-functions/string-functions/lower.adoc[] +***** xref:reference:sql/sql-functions/string-functions/position.adoc[] +***** xref:reference:sql/sql-functions/string-functions/replace.adoc[] +***** xref:reference:sql/sql-functions/string-functions/starts-with.adoc[] +***** xref:reference:sql/sql-functions/string-functions/strpos.adoc[] +***** xref:reference:sql/sql-functions/string-functions/substr.adoc[] +***** xref:reference:sql/sql-functions/string-functions/substring.adoc[] +***** xref:reference:sql/sql-functions/string-functions/upper.adoc[] +***** xref:reference:sql/sql-functions/string-functions/regex/index.adoc[Regex] +****** xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[] +****** xref:reference:sql/sql-functions/string-functions/regex/regexp-match.adoc[] +****** xref:reference:sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc[] +**** xref:reference:sql/sql-functions/timestamp-functions/index.adoc[Timestamp] +***** xref:reference:sql/sql-functions/timestamp-functions/current-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/date-trunc.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/extract.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/format-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-micros.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-millis.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-seconds.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/timestamp-trunc.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/to-char.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/to-timestamp.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-macros.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-micros.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-millis.adoc[] +***** xref:reference:sql/sql-functions/timestamp-functions/unix-seconds.adoc[] +**** xref:reference:sql/sql-functions/trigonometric-functions/index.adoc[Trigonometric] +**** xref:reference:sql/sql-functions/json-functions/index.adoc[JSON] +***** xref:reference:sql/sql-functions/json-functions/json-array-extract.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-array-length.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-extract-path.adoc[] +***** xref:reference:sql/sql-functions/json-functions/json-extract-path-text.adoc[] +**** xref:reference:sql/sql-functions/aggregate-functions/index.adoc[Aggregate] +***** xref:reference:sql/sql-functions/aggregate-functions/avg.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/bool-and.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/bool-or.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/count.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/distinct.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/for-max.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/for-min.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/max.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/min.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/sum.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc[Ordered-Set] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc[] +***** xref:reference:sql/sql-functions/aggregate-functions/statistics/index.adoc[Statistics] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/corr.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-count.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/variance.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/var-pop.adoc[] +****** xref:reference:sql/sql-functions/aggregate-functions/statistics/var-samp.adoc[] +**** xref:reference:sql/sql-functions/window-functions/index.adoc[Window] +***** xref:reference:sql/sql-functions/window-functions/avg.adoc[] +***** xref:reference:sql/sql-functions/window-functions/bool-and.adoc[] +***** xref:reference:sql/sql-functions/window-functions/bool-or.adoc[] +***** xref:reference:sql/sql-functions/window-functions/count.adoc[] +***** xref:reference:sql/sql-functions/window-functions/cume-dist.adoc[] +***** xref:reference:sql/sql-functions/window-functions/dense-rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/first-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/lag.adoc[] +***** xref:reference:sql/sql-functions/window-functions/last-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/lead.adoc[] +***** xref:reference:sql/sql-functions/window-functions/max.adoc[] +***** xref:reference:sql/sql-functions/window-functions/min.adoc[] +***** xref:reference:sql/sql-functions/window-functions/nth-value.adoc[] +***** xref:reference:sql/sql-functions/window-functions/ntile.adoc[] +***** xref:reference:sql/sql-functions/window-functions/percent-rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/rank.adoc[] +***** xref:reference:sql/sql-functions/window-functions/row-number.adoc[] +***** xref:reference:sql/sql-functions/window-functions/sum.adoc[] +**** xref:reference:sql/sql-functions/other-functions/index.adoc[Other] +***** xref:reference:sql/sql-functions/other-functions/coalesce.adoc[] +***** xref:reference:sql/sql-functions/other-functions/col-description.adoc[] +***** xref:reference:sql/sql-functions/other-functions/current-database.adoc[] +***** xref:reference:sql/sql-functions/other-functions/current-schema.adoc[] +***** xref:reference:sql/sql-functions/other-functions/has-schema-privillege.adoc[] +***** xref:reference:sql/sql-functions/other-functions/nullif.adoc[] +***** xref:reference:sql/sql-functions/other-functions/obj-description.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-backend-pid.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-encoding-to-char.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-constraintdef.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-expr.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-indexdef.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-get-userbyid.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-relation-is-publishable.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-size-pretty.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-table-is-visible.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-table-size.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-total-relation-size.adoc[] +***** xref:reference:sql/sql-functions/other-functions/pg-typeof.adoc[] +***** xref:reference:sql/sql-functions/other-functions/shobj-description.adoc[] +*** xref:reference:sql/schema.adoc[] +*** xref:reference:sql/comment-support.adoc[] +*** xref:reference:sql/transactions.adoc[] +*** xref:reference:sql/system-catalogs/index.adoc[System Catalogs] +**** xref:reference:sql/system-catalogs/catalogs/pg_attrdef.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_attribute.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_class.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_constraint.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_depend.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_description.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_index.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_namespace.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_settings.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_statio_user_tables.adoc[] +**** xref:reference:sql/system-catalogs/catalogs/pg_type.adoc[] + ** xref:reference:tiers/index.adoc[Cloud Tiers and Regions] *** xref:reference:tiers/serverless-regions.adoc[] *** xref:reference:tiers/byoc-tiers.adoc[] diff --git a/modules/reference/pages/sql/comment-support.adoc b/modules/reference/pages/sql/comment-support.adoc new file mode 100644 index 000000000..dbfb456fc --- /dev/null +++ b/modules/reference/pages/sql/comment-support.adoc @@ -0,0 +1,61 @@ += Comment Support +:description: Redpanda SQL fully supports comments in your queries. +:page-topic-type: reference + +Redpanda SQL fully supports comments in your queries. Comments provide a way to add explanatory notes and improve the readability of queries, making it easier for developers and stakeholders to understand complex queries. + +There are two types of comments in Redpanda SQL: *single-line* and *multi-line (block)*. + +== Single line comments + +A single-line comment in Redpanda SQL starts with two consecutive hyphens (--) and extends to the end of the line. These comments are used to annotate specific parts of a query, providing brief explanations or notes to assist in understanding the query. + +*Syntax:* + +[source,sql] +---- +-- This is an example single line comment +---- + +== Multi-line (block) comments + +Redpanda SQL also supports multi-line comments, often referred to as block comments. These comments begin with `/*` and end with `*/`, allowing for multi-line explanations or temporarily disabling sections of the query. + +*Syntax:* + +[source,sql] +---- +/* +This is an example multi-line comment. +It can span multiple lines and is useful for providing detailed explanations. +*/ +---- + +== Comment placement + +In Redpanda SQL, single-line comments should always be placed at the end of the line they refer to, whereas multi-line comments can be positioned anywhere within the query. + +*Example - Comment on Single Line:* + +[source,sql] +---- +SELECT column1, column2 -- This is an example single line comment +FROM table_name; +---- + +*Example - Comment on Multiple Lines:* + +[source,sql] +---- +SELECT /* comment 1 */ column1, column2 +FROM table_name /* comment 2 */ +WHERE column3 = 42 /* comment 3 */ ; +---- + +== Best practices for commenting + +To maximize the benefits of comments in Redpanda SQL queries, follow these best practices: + +* *Be concise.* Write clear and concise comments that provide meaningful insights into the specific parts of the query. +* *Update comments during code changes.* Whenever the query is modified, update the associated comments to reflect the changes accurately. +* *Avoid over-commenting.* While comments are helpful, excessive commenting can clutter the code and reduce readability. diff --git a/modules/reference/pages/sql/index.adoc b/modules/reference/pages/sql/index.adoc new file mode 100644 index 000000000..35483761f --- /dev/null +++ b/modules/reference/pages/sql/index.adoc @@ -0,0 +1,12 @@ += SQL Reference +:description: This section provides information about the syntax and semantics of SQL queries, clauses, data types, and functions that Redpanda SQL supports. + +This section provides information about the syntax and semantics of SQL queries, clauses, data types, and functions that Redpanda SQL supports. The information in this section is divided into groups according to the kind of operation they perform as follows: + +* *xref:reference:sql/sql-statements/index.adoc[SQL Statements].* Learn how to create a request for data or information from one or more database tables using supported statements. +* *xref:reference:sql/sql-clauses/index.adoc[SQL Clauses].* Learn how to write user-friendly queries and analyze data using different constraints and conditions. +* *xref:reference:sql/sql-data-types/index.adoc[SQL Data Types].* Learn how to implement supported data types to run your operations, such as text, timestamp, numeric, and many more. +* *xref:reference:sql/sql-functions/index.adoc[SQL Functions].* See how you can combine statements, data types, and other references into specific functions for particular tasks. +* *xref:reference:sql/schema.adoc[Schema].* Learn about a logical container that holds database objects and relationships of data in a database. +* *xref:reference:sql/comment-support.adoc[Comment Support].* Add comments in your queries for better documentation and collaboration. +* *xref:reference:sql/transactions.adoc[Transactions].* Learn more about managing your transactions. diff --git a/modules/reference/pages/sql/schema.adoc b/modules/reference/pages/sql/schema.adoc new file mode 100644 index 000000000..305e17e8f --- /dev/null +++ b/modules/reference/pages/sql/schema.adoc @@ -0,0 +1,245 @@ += Schema Definition +:description: Have you ever wondered how to work with your fellows in one database without interfering with each other? Is it possible to organize the database obje +:page-topic-type: reference + +== What is a schema? + +Have you ever wondered how to work with your fellows in one database without interfering with each other? Is it possible to organize the database objects into logical groups which do not collide with the other objects’ names? + +We can do those things with *Schema*: + +A *schema* is a collection of tables. A schema also contains views, indexes, sequences, data types, operators, and functions. We support multiple schemas. For example, you can have a database named `oxla` and have multiple schemas based on your needs, like `auth`, `model`, `business`, etc. + +== Default schema in Redpanda SQL + +By default, the `public` schema is used in Redpanda SQL. When unqualified `table_name` is used, that `table_name` is equivalent to `public.table_name`. It also applies to `CREATE`, `DROP`, and `SELECT TABLE` statements. + +[NOTE] +==== +Furthermore, you can create multiple schemas per your needs. +==== + +== Schema usage scenarios + +=== Create a schema + +The basic syntax of creating a schema is as follows: + +[source,sql] +---- +CREATE SCHEMA [IF NOT EXISTS] schema_name; +---- + +* `schema_name` is the schema name you are going to create. +* `IF NOT EXISTS` is an optional parameter to avoid errors if the schema already exists. + +=== Create a table in schema + +The syntax to create a table in a specified schema is as follows: + +[source,sql] +---- +CREATE TABLE schema_name.table_name( +... +); +---- + +* `schema_name` is the schema that you have created. +* `table_name` is the table name you are going to create. + +=== Select a table in schema + +After creating the table and inserting some data, display all rows with the syntax below: + +[source,sql] +---- +SELECT * FROM schema_name.table_name; +---- + +* `schema_name` is the name of the schema. +* `table_name` is the name of the table you want to display. + +=== Drop the schema + +*Option 1*: To drop an empty schema where no objects remain in it, use the command below: + +[source,sql] +---- +DROP SCHEMA [IF EXISTS] schema_name; +---- + +* `schema_name` is the schema name you are going to create. +* `IF EXISTS` is an optional parameter to avoid errors if the schema does not exist. + +*Option 2*: Tables reside in a schema, so it is impossible to drop a schema without also dropping the tables. With the command below, you will also drop the schema with the tables. + +[source,sql] +---- +DROP SCHEMA schema_name CASCADE; +---- + +== Examples + +=== Create schema + +. First, connect to Redpanda SQL and create a schema as shown below: ++ +[source,sql] +---- +CREATE SCHEMA oxlarefs; +---- + +. Next, create a table in the above schema with the following details: ++ +[source,sql] +---- +CREATE TABLE oxlarefs.functions( + id int, + function_name text, + active bool +); + +INSERT INTO oxlarefs.functions(id, function_name, active) +VALUES +('1111', 'Numeric', 'TRUE'), +('2222', 'Text', 'TRUE'), +('3333', 'Timestamp', 'TRUE'), +('4444', 'JSON', 'TRUE'), +('5555', 'Boolean', 'TRUE'); +---- + +. You can verify and show the table made with the command below: ++ +[source,sql] +---- +SELECT * FROM oxlarefs.functions; +---- + +. You will get the following result: ++ +[source,sql] +---- ++------+---------------+---------+ +| id | function_name | active | ++------+---------------+---------+ +| 1111 | Numeric | t | +| 2222 | Text | t | +| 3333 | Timestamp | t | +| 4444 | JSON | t | +| 5555 | Boolean | t | ++------+---------------+---------+ +---- + +=== Create schema using IF NOT EXISTS + +To avoid errors when the schema already exists, use the `IF NOT EXISTS` option. Here is how it works: + +==== Example without IF NOT EXISTS + +. First, create the schema without using the `IF NOT EXISTS` option. ++ +[source,sql] +---- +CREATE SCHEMA oxladb; +---- ++ +Output: ++ +[source,sql] +---- +CREATE SCHEMA +---- + +. If you attempt to create the schema again without using `IF NOT EXISTS`, it will result in an error. ++ +[source,sql] +---- +CREATE SCHEMA oxladb; +---- ++ +Output: ++ +[source,sql] +---- +ERROR: Schema: oxladb already exists +---- + +==== Example with IF NOT EXISTS + +Now, create the schema using the `IF NOT EXISTS` option to avoid the error. + +[source,sql] +---- +CREATE SCHEMA IF NOT EXISTS oxladb; +---- + +Using `IF NOT EXISTS` allows the query to create a schema even if it already exists. + +[source,sql] +---- +CREATE +---- + +=== Drop schema + +Use the command below to delete the schema and also the tables in it. + +[source,sql] +---- +DROP SCHEMA oxlarefs CASCADE; +---- + +Another case is if there is no table or object created inside the schema, you can use the following command to drop the schema. + +[source,sql] +---- +DROP SCHEMA oxlarefs; +---- + +=== Drop schema using IF EXISTS + +==== Example without IF EXISTS + +. First, drop the schema without using the `IF EXISTS` option. ++ +[source,sql] +---- +DROP SCHEMA oxladb; +---- ++ +Output: ++ +[source,sql] +---- +DROP +---- + +. If you attempt to drop the schema again without using `IF EXISTS`, it will result in an error. ++ +[source,sql] +---- +DROP SCHEMA oxladb; +---- ++ +Output: ++ +[source,sql] +---- +ERROR: schema "oxladb" does not exist +---- + +==== Example with IF EXISTS + +Now, drop the schema using the `IF EXISTS` option. + +[source,sql] +---- +DROP SCHEMA IF EXISTS oxladb; +---- + +Using `IF` EXISTS allows the query to succeed even if the schema does not exist. + +[source,sql] +---- +DROP +---- diff --git a/modules/reference/pages/sql/sql-clauses/from/from.adoc b/modules/reference/pages/sql/sql-clauses/from/from.adoc new file mode 100644 index 000000000..0cebf79e6 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/from.adoc @@ -0,0 +1,253 @@ += FROM +:description: The FROM clause is used to specify which table or joins are required for the query/statement (e.g., SELECTstatement) to return or obtain data. +:page-topic-type: reference + +The `FROM` clause is used to specify which table or joins are required for the query/statement (e.g., `SELECT`statement) to return or obtain data. + +== Syntax + +There must be at least one table listed in the `FROM` clause. See the following syntax: + +[source,sql] +---- +query FROM table_name; +---- + +If two or more tables are listed in the `FROM` clause, these tables are joined using xref:reference:sql/sql-clauses/from/join.adoc[JOIN], xref:reference:sql/sql-clauses/from/right-join.adoc[RIGHT JOIN], xref:reference:sql/sql-clauses/from/left-join.adoc[LEFT JOIN], or xref:reference:sql/sql-clauses/from/outer-join.adoc[OUTER JOIN], depending on the operations to be queried as seen in the syntax below: + +[source,sql] +---- +FROM table1_name +[ { JOIN + | LEFT JOIN + | RIGHT JOIN + | OUTER JOIN } table2_name +ON table1_name.column1 = table2_name.column1 ] +---- + +[NOTE] +==== +The examples below are executed in the `public` schema, the default schema in Redpanda SQL. You can also create, insert, and display a table from other schemas - +click xref:reference:sql/schema.adoc[here] for more info. +==== + +== Example + +We’ll start by looking at how to use the `FROM` clause with only a single table. + +There is a *client* table, and we want to know the client’s name and the city where the company is based. + +[source,sql] +---- +CREATE TABLE client ( + client_id int, + client_name text, + client_origin text +); +INSERT INTO client + (client_id, client_name, client_origin) +VALUES + (181891,'Oxla','Poland'), + (181892,'Google','USA'), + (181893,'Samsung','South Korea'); +---- + +[source,sql] +---- +SELECT * FROM client; +---- + +It will create a table as shown below: + +[source,sql] +---- ++------------+--------------+------------------+ +| client_id | client_name | client_origin | ++------------+--------------+------------------+ +| 181891 | Oxla | Poland | +| 181892 | Google | USA | +| 181893 | Samsung | South Korea | ++------------+--------------+------------------+ +---- + +. Run the following query: ++ +[source,sql] +---- +SELECT client_name, client_origin FROM client; +---- + +. You will get a list of the client’s data for a successful result: ++ +[source,sql] +---- ++--------------+------------------+ +| client_name | client_origin | ++--------------+------------------+ +| Oxla | Poland | +| Google | USA | +| Samsung | South Korea | ++--------------+------------------+ +---- + +[TIP] +==== +If two or more tables are listed in the FROM clause, please refer to these sections for more examples related to this: +xref:reference:sql/sql-clauses/from/join.adoc[JOIN], +xref:reference:sql/sql-clauses/from/right-join.adoc[RIGHT JOIN], +xref:reference:sql/sql-clauses/from/left-join.adoc[LEFT JOIN], or +xref:reference:sql/sql-clauses/from/outer-join.adoc[OUTER JOIN]. +==== + +== FROM - sub queries + +FROM clause is also used to specify a sub-query expression. The relation created from the sub-query is then used as a new relation on the other query. + +[NOTE] +==== +More than one table can be defined by separating it with a comma *(,)*. +==== + +=== Syntax + +Here is an example of the sub-query syntax that uses a FROM clause: + +[source,sql] +---- +SELECT X.column1, X.column2, X.column3 +FROM table_2 as X, table_1 as Y +WHERE conditions (X.column, Y.column); +---- + +. The sub-query in the first `FROM` clause will select the columns from the specific table using a new temporary relation (`SELECT X.column1, X.column2, X.column3 FROM` ). +. Set the tables into a new temporary relation (`table_2 as X, table_1 as Y`). +. Next, the query is evaluated, selecting only those rows from the temporary relation that fulfill the conditions stated in the `WHERE` clause. + +=== Example + +We want to find a product whose price exceeds all categories’ average budget. + +[source,sql] +---- +CREATE TABLE product ( + id int, + product text, + category text, + price int +); +INSERT INTO product + (id, product, category, price) +VALUES + (445747,'Court vision women’s shoes nike','Shoes', 8000), + (445641,'Disney kids h&m','Shirt', 6500), + (477278,'Defacto adidas','Hat', 8500), + (481427,'Sophie shopping bag','Bag', 6500), + (411547,'Candy skirt zara','Skirt', 6500), + (488198,'Slim cut skirt hush puppies','Skirt', 7600); +---- + +[source,sql] +---- +SELECT * FROM product; +---- + +It will create a table as shown below: + +[source,sql] +---- ++---------+----------------------------------+-----------+--------+ +| id | product | category | price | ++---------+----------------------------------+-----------+--------+ +| 445747 | Court vision women’s shoes nike | Shoes | 8000 | +| 445641 | Disney kids h&m | Shirt | 6500 | +| 477278 | Defacto adidas | Hat | 8500 | +| 481427 | Sophie shopping bag | Bag | 6500 | +| 411547 | Candy skirt zara | Skirt | 6500 | +| 488198 | Slim cut skirt hush puppies | Skirt | 7600 | ++---------+----------------------------------+-----------+--------+ +---- + +.... + **category table** +.... + +[source,sql] +---- +CREATE TABLE category ( + categoryName text, + budget int +); +INSERT INTO category + (categoryName, budget) +VALUES + ('Shoes', 7000), + ('Shirt', 9000), + ('Bag', 8000), + ('Skirt', 7500), + ('Hat', 7000); +---- + +[source,sql] +---- +SELECT * FROM category; +---- + +It will create a table as shown below: + +[source,sql] +---- ++---------------+----------+ +| categoryName | budget | ++---------------+----------+ +| Shoes | 7000 | +| Shirt | 9000 | +| Bag | 8000 | +| Skirt | 7500 | +| Hat | 7000 | ++---------------+----------+ +---- + +''''' + +. Run the following query to know and ensure the average value of all category’s budgets: ++ +[source,sql] +---- +select avg(budget) as avgBudget from category; +---- + +. The average budget of all categories from the *category* table is 7700. ++ +[source,sql] +---- ++--------------------+ +| avgbudget | ++--------------------+ +| 7700.000000000000 | ++--------------------+ +---- + +. Now, run the following query: ++ +* We specify the *product* table as *P* and the budget’s average value from the *category* table as C. +* We will display the product’s name, category, and price. +* We set the conditions where the product’s price exceeds the budget’s average value. ++ +[source,sql] +---- +select P.product, P.category, P.price from +(select avg(budget) as avgBudget from category) as C, product as P +where P.price > C.avgBudget; +---- ++ +➡️ The output will display “court vision women’s shoes nike” and “Defacto adidas” as the products with a price of more than 7700. ++ +[source,sql] +---- ++------------------------------------+-----------+----------+ +| product | category | price | ++------------------------------------+-----------+----------+ +| court vision women`s shoes nike | shoes | 8000 | +| Defacto adidas | hat | 8500 | ++------------------------------------+-----------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/from/index.adoc b/modules/reference/pages/sql/sql-clauses/from/index.adoc new file mode 100644 index 000000000..7cca9b355 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/index.adoc @@ -0,0 +1,3 @@ += FROM +:description: Reference for the FROM clause and JOIN variants in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-clauses/from/join.adoc b/modules/reference/pages/sql/sql-clauses/from/join.adoc new file mode 100644 index 000000000..091acc2b7 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/join.adoc @@ -0,0 +1,152 @@ += JOIN +:description: JOIN clause is used to create a new table by combining records and using common fields between two tables in a database. +:page-topic-type: reference + +`JOIN` clause is used to create a new table by combining records and using common fields between two tables in a database. + +[TIP] +==== +We support table aliasing used in the `JOIN` clause. +==== + +== Syntax + +The following is the syntax of the `JOIN` clause: + +[source,sql] +---- +SELECT table_1.column_1, table_2.column_2... +FROM table_1 +JOIN table_2 +ON table_1.common_filed = table_2.common_field +---- + +. `SELECT table_1.column_1, table_2.column_2...` will select the columns to be displayed from both tables. +. `FROM table_1 JOIN table_2` represents the joined tables. +. `ON table_1.common_filed = table_2.common_field` compares each row of table_1 with each row of table_2 to find all pairs of rows that meet the join-common field. +. When the join-common field is met, column values for each matched pair of rows from table_1 and table_2 are combined into a result row. + +=== Table alias + +You can use table aliasing to refer to the table’s name. An alias is a temporary name given to a table, column, or expression in a query. + +The results will stay the same, but it can help you to write the query easier. + +[source,sql] +---- +SELECT left.column_1, right.column_2... +FROM table_1 as left +JOIN table_2 as right +ON left.common_filed = right.common_field +---- + +== Examples + +Before we move on, let us assume two tables: + +*movies table* + +[source,sql] +---- +CREATE TABLE movies ( + movie_id int, + movie_name text, + category_id int +); +INSERT INTO movies + (movie_id, movie_name, category_id) +VALUES + (201011, 'The Avengers', 181893), + (200914, 'Avatar', 181894), + (201029, 'Shutter Island', 181891), + (201925, 'Tune in Your Love', 181892); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +It will create a table as shown below: + +[source,sql] +---- ++------------+-----------------------+--------------+ +| movie_id | movie_name | category_id | ++------------+-----------------------+--------------+ +| 201011 | The Avengers | 181893 | +| 200914 | Avatar | 181894 | +| 201029 | Shutter Island | 181891 | +| 201925 | Tune in Your Love | 181892 | ++------------+-----------------------+--------------+ +---- + +*categories table* + +[source,sql] +---- +CREATE TABLE categories ( + id int, + category_name text +); +INSERT INTO categories + (id, category_name) +VALUES + (181891, 'Psychological Thriller'), + (181892, 'Romance'), + (181893, 'Fantasy'), + (181894, 'Science Fiction'), + (181895, 'Action'); +---- + +[source,sql] +---- +SELECT * FROM categories; +---- + +It will create a table as shown below: + +[source,sql] +---- ++--------------+-----------------------+ +| id | category_name | ++-----------+--------------------------+ +| 181891 | Psychological Thriller | +| 181892 | Romance | +| 181893 | Fantasy | +| 181894 | Science Fiction | +| 181895 | Action | ++-----------+--------------------------+ +---- + +''''' + +. Based on the above tables, we can write a `JOIN` query as follows: ++ +[source,sql] +---- +SELECT a.movie_name, c.category_name +FROM movies AS a +JOIN categories AS c +ON a.category_id = c.id; +---- + +. The above query will give the following result: ++ +[source,sql] +---- ++-----------------------+---------------------------+ +| movie_name | category_name | ++-----------------------+---------------------------+ +| Shutter Island | Psychological Thriller | +| Tune in Your Love | Romance | +| The Avengers | Fantasy | +| Avatar | Science Fiction | ++-----------------------+---------------------------+ +---- ++ +The JOIN checks each row of the *category_id* column in the first table (*movies*) with the value in the *id* column of each row in the second table (*categories*). ++ +If the values are equal, it will create a new row that contains columns from both tables (*category_name)* and adds the new row *(movie_name)* to the result set. ++ +Below is the Venn diagram based on the example: diff --git a/modules/reference/pages/sql/sql-clauses/from/left-join.adoc b/modules/reference/pages/sql/sql-clauses/from/left-join.adoc new file mode 100644 index 000000000..e910e98c9 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/left-join.adoc @@ -0,0 +1,175 @@ += LEFT JOIN +:description: The LEFT JOINreturns all matching records from the left table combined with the right table. +:page-topic-type: reference + +The `LEFT JOIN`returns *all* matching records from the left table combined with the right table. Even if there are no matching records in the right table, the `LEFT JOIN` will still return a row in the result, but with NULL in each column from the right table. + +[NOTE] +==== +`LEFT JOIN` is also known as `LEFT OUTER JOIN`. +==== + +== Syntax + +[TIP] +==== +We support table aliasing used in the `LEFT JOIN` clause. +==== + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +LEFT JOIN table_2 +ON table_1.matching_field = table2.matching_field; +---- + +In the above syntax: + +. `SELECT column_1, column_2...` defines the *columns* from both tables where we want the data to be selected. +. `FROM table_1` defines the *left table* as the main table in the FROM clause. +. `LEFT JOIN table_2` defines the *right table* as the table the main table joins. +. `ON table_1.matching_field = table2.matching_field` sets the join condition after the *ON* keyword with the matching field between the two tables. + +=== Table alias + +You can use an alias to refer to the table’s name. The results will stay the same. It only helps to write the query easier. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A //table_1 as A +LEFT JOIN table_2 B //table_2 as B +ON A.matching_field = B.matching_field; +---- + +== Example + +*item table* + +[source,sql] +---- +CREATE TABLE item ( + item_no int NOT NULL, + item_name text +); + +INSERT INTO item + (item_no,item_name) +VALUES + (111,'Butter'), + (113,'Tea'), + (116,'Bread'), + (119,'Coffee'); +---- + +[source,sql] +---- +SELECT * FROM item; +---- + +It will create a table as shown below: + +[source,sql] +---- ++-----------+----------------+ +| item_no | item_name | ++-----------+----------------+ +| 111 | Butter | +| 113 | Tea | +| 116 | Bread | +| 119 | Coffee | ++-----------+----------------+ +---- + +*invoice table* + +[source,sql] +---- +CREATE TABLE invoice ( + inv_no int NOT NULL, + item int, + sold_qty int, + sold_price int +); + +INSERT INTO invoice + (inv_no, item, sold_qty, sold_price) +VALUES + (020219,111,3,9000), + (020220,116,6,30000), + (020221,116,2,10000), + (020222,116,1,5000), + (020223,119,5,20000), + (020224,119,4,16000); +---- + +[source,sql] +---- +SELECT * FROM invoice; +---- + +It will create a table as shown below: + +[source,sql] +---- ++----------+---------+-----------+-------------+ +| inv_no | item | sold_qty | sold_price | ++----------+---------+-----------+-------------+ +| 20219 | 111 | 3 | 9000 | +| 20220 | 116 | 6 | 30000 | +| 20221 | 116 | 2 | 10000 | +| 20222 | 116 | 1 | 5000 | +| 20223 | 119 | 5 | 20000 | +| 20224 | 119 | 4 | 16000 | ++----------+---------+-----------+-------------+ +---- + +''''' + +{empty}1) Based on the above tables, we can write a `LEFT JOIN` query as follows: + +[source,sql] +---- +SELECT item_no, item_name, sold_qty, sold_price +FROM item +LEFT JOIN invoice +ON item.item_no = invoice.item; +---- + +* The *item* = left table, and the *invoice* = right table. +* Then it combines the values from the *item* table using the *item_no* and matches the records using the *item* column of each row from the *invoice* table. +* If the records are equal, a new row will be created with `item_no`, *`item_name`*, and `sold_qty`, `sold_price` columns as defined in the `SELECT` clause. +* *ELSE* it will create a new row with a `NULL` value from the right table *(invoice)*. + +{empty}2) The above query will give the following result: + +[source,sql] +---- ++-----------+-------------+------------+---------------+ +| item_no | item_name | sold_qty | sold_price | ++-----------+-------------+------------+---------------+ +| 111 | Butter | 3 | 9000 | +| 113 | Tea | null | null | +| 116 | Bread | 6 | 30000 | +| 116 | Bread | 2 | 10000 | +| 116 | Bread | 1 | 5000 | +| 119 | Coffee | 5 | 20000 | +| 119 | Coffee | 4 | 16000 | ++-----------+-------------+------------+---------------+ +---- + +Based on the data from the *item* and *invoice* tables: + +* The result matches the total item stored in the *item* table: *4 items.* +* The result will display all the item’s data from the *left table (item table)*, even if there is 1 item that hasn’t been sold. +* The item id: `111` matches the item `butter` and has been sold for 3pcs/9000. +* The item id: `113` matches the item `tea` but has never been sold. Thus the sold_qty & sold_price columns are filled with: null. +* The item id: `116` matches the item `Bread` and has been sold three times, for 6pcs/3000, 2pcs/10000, and 1pc/5000. +* The item id: `119` matches the item `Coffee` and has been sold two times, for 5pcs/20000 and 4pcs/16000. + +[TIP] +==== +An *item* can have zero or many invoices. An *invoice* belongs to zero or one *item*. +==== +The following Venn diagram illustrates the `LEFT JOIN`: diff --git a/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc b/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc new file mode 100644 index 000000000..9bab1956c --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/outer-join.adoc @@ -0,0 +1,223 @@ += OUTER JOIN +:description: The OUTER JOIN or FULL OUTER JOIN returns all the records from the selected fields between the two tables (left table & right table) whether the join +:page-topic-type: reference + +The `OUTER JOIN` *or* `FULL OUTER JOIN` returns all the records from the selected fields between the two tables (left table & right table) whether the join condition is met or not. + +== Inner join vs. outer join + +The most significant difference between an `INNER JOIN` and an `OUTER JOIN` is that the `INNER JOIN` only returns the information from both tables which are common and related to each other. The OUTER JOIN will return all rows (matched/unmatched) from both tables. + +[TIP] +==== +We support table aliasing used in the OUTER JOIN clause. +==== + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +FULL OUTER JOIN table_2 +ON table_1.matching_field = table2.matching_field; +---- + +In the above syntax: + +. `SELECT column_1, column_2...` defines the *columns* from both tables where we want to display data. +. `FROM table_1` represents the *left table* with table_1 in the FROM clause. +. `FULL OUTER JOIN table_2` represents the *right table* with table_2 in the FULL OUTER JOIN condition. +. `ON table_1.matching_field = table2.matching_field` sets the join condition after the *ON* keyword with the matching field between the two tables. + +=== Table alias + +You can use an alias to refer to the table’s name. The results will stay the same. It only helps to write the query easier. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A //table_1 as A +FULL OUTER JOIN table_2 B //table_2 as B +ON A.matching_field = B.matching_field; +---- + +[NOTE] +==== +If there are no matched records from the joined tables, the `NULL` values will return in every column of the table that doesn’t have the matching record. +==== + +== Example + +*departments table* + +[source,sql] +---- +CREATE TABLE departments ( + department_id int, + department_name text +); +INSERT INTO departments (department_id,department_name) +VALUES + (1001, 'Sales'), + (1002, 'Marketing'), + (1003, 'HR'), + (1004, 'Project'), + (1005, 'Product'); +---- + +[source,sql] +---- +SELECT * FROM departments; +---- + +It will create a *departments* table as shown below: + +[source,sql] +---- ++----------------+------------------+ +| department_id | department_name | ++----------------+------------------+ +| 1001 | Sales | +| 1002 | Marketing | +| 1003 | HR | +| 1004 | Project | +| 1005 | Product | ++----------------+------------------+ +---- + +*employee table* + +[source,sql] +---- +CREATE TABLE employee ( + employee_id int, + employee_name text, + dept_id int +); +INSERT INTO employee ( + employee_id, + employee_name, + dept_id +) +VALUES + (2001,'Tony Stark', 1002), + (2002,'Christian Bale', 1002), + (2003,'Anne Hailey', 1003), + (2004,'Wilson Cliff', 1004), + (2005,'Susan Oh', 1001), + (2006,'Julian Robert', 1001), + (2007,'Gilbert Tom', null); +---- + +[source,sql] +---- +SELECT * FROM employee; +---- + +It will create an *employee* table as shown below: + +[source,sql] +---- ++--------------+-------------------+------------+ +| employee_id | employee_name | dept_id | ++--------------+-------------------+------------+ +| 2001 | Tony Stark | 1002 | +| 2002 | Christian Bale | 1002 | +| 2003 | Anne Hailey | 1003 | +| 2004 | Wilson Cliff | 1004 | +| 2005 | Susan Oh | 1001 | +| 2006 | Julian Robert | 1001 | +| 2007 | Gilbert Tom | null | ++--------------+-------------------+------------+ +---- + +''''' + +=== FULL OUTER JOIN + +{empty}1) Based on the above tables, we can write an `OUTER JOIN` query as follows: + +[source,sql] +---- +SELECT employee_name, department_name +FROM departments +FULL OUTER JOIN employee +ON departments.department_id = employee.dept_id; +---- + +{empty}2) The result will show every department with an employee and the employee who works under a specific department. + +{empty}3) It also includes every department that does not have any employees and the employees who do not belong to a specific department. + +[source,sql] +---- ++-------------------+-------------------+ +| employee_name | department_name | ++-------------------+-------------------+ +| Julian Robert | Sales | +| Susan Oh | Sales | +| Christian Bale | Marketing | +| Tony Stark | Marketing | +| Anne Hailey | HR | +| Wilson Cliff | Project | +| Gilbert Tom | null | +| null | Product | ++-------------------+-------------------+ +---- + +The following Venn diagram illustrates the FULL OUTER JOIN: + +''''' + +=== `FULL OUTER JOIN` with `WHERE` clause + +*a) Employee* + +. We can look up the department that does not have any employees by adding a `WHERE` clause and `NULL` as the following query: ++ +[source,sql] +---- +SELECT employee_name, department_name +FROM departments +FULL OUTER JOIN employee +ON departments.department_id = employee.dept_id +WHERE employee_name IS NULL; +---- + +. The result will indicate that the *Product* department doesn’t have any employees 👨🏻‍💼 ++ +[source,sql] +---- ++------------------+--------------------+ +| employee_name | department_name | ++------------------+--------------------+ +| null | Product | ++------------------+--------------------+ +---- ++ +*b) Department* ++ +{empty}1) Let’s find out the employee who doesn’t belong to any department by adding a WHERE clause and NULL as the following query: ++ +[source,sql] +---- +SELECT employee_name, department_name +FROM employee +FULL OUTER JOIN departments +ON employee.dept_id = departments.department_id +WHERE department_name IS NULL; +---- ++ +{empty}2) The result will show that *Gilbert Tom* doesn’t belong to any department 👨🏻‍💼 ++ +[source,sql] +---- ++------------------+--------------------+ +| employee_name | department_name | ++------------------+--------------------+ +| Gilbert Tom | null | ++------------------+--------------------+ +---- ++ +The following Venn diagram illustrates how the FULL OUTER JOIN works for the department and employee with a null value: diff --git a/modules/reference/pages/sql/sql-clauses/from/right-join.adoc b/modules/reference/pages/sql/sql-clauses/from/right-join.adoc new file mode 100644 index 000000000..f934e4e9a --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/from/right-join.adoc @@ -0,0 +1,169 @@ += RIGHT JOIN +:description: The RIGHT JOIN returns all matching records from the right table combined with the left table. +:page-topic-type: reference + +The `RIGHT JOIN` returns *all* matching records from the right table combined with the left table. Even if there are no match records in the left table, the `RIGHT JOIN` will still return a row in the result, but with `NULL` in each column from the left table. + +[TIP] +==== +We support table aliasing used in the `RIGHT JOIN` clause. +==== + +== Syntax + +[source,sql] +---- +SELECT column_1, column_2... +FROM table_1 +RIGHT JOIN table_2 +ON table_1.matching_field = table2.matching_field; +---- + +In the above syntax: + +. `SELECT column_1, column_2...` defines the *columns* from both tables where we want to display data. +. `FROM table_1`, defines the *left table* with table_1 in the FORM clause. +. `RIGHT JOIN table_2` defines the *right table* with table_2 in the RIGHT JOIN condition. +. `ON table_1.matching_field = table2.matching_field` sets the join condition after the *ON* keyword with the matching field between the two tables. + +=== Table alias + +You can use an alias to refer to the table’s name. The results will stay the same. It only helps to write the query easier. + +[source,sql] +---- +SELECT A.column_1, B.column_2... +FROM table_1 A //table_1 as A +RIGHT JOIN table_2 B //table_2 as B +ON A.matching_field = B.matching_field; +---- + +== Example + +*customer table* + +[source,sql] +---- +CREATE TABLE customer ( + id int NOT NULL, + customer_name text +); + +INSERT INTO customer + (id, customer_name) +VALUES + (201011,'James'), + (200914,'Harry'), + (201029,'Ellie'), + (201925,'Mary'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +It will create a table as shown below: + +[source,sql] +---- ++-----------+----------------+ +| id | customer_name | ++-----------+----------------+ +| 201011 | James | +| 200914 | Harry | +| 201029 | Ellie | +| 201925 | Mary | ++-----------+----------------+ +---- + +*orders table* + +[source,sql] +---- +CREATE TABLE orders ( + order_id int NOT NULL, + order_date date, + order_amount int, + customer_id int +); + +INSERT INTO orders + (order_id, order_date, order_amount, customer_id) +VALUES + (181893,'2021-10-08',3000,201029), + (181894,'2021-11-18',2000,201029), + (181891,'2021-10-08',9000,201011), + (181892,'2021-10-08',7000,201925), + (181897,'2021-10-08',6000,null), + (181899,'2021-10-08',4500,201011); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +It will create a table as shown below: + +[source,sql] +---- ++------------+------------------+---------------+-------------+ +| order_id | order_date | order_amount | customer_id | ++------------+------------------+---------------+-------------+ +| 181893 | 2021-10-08 | 3000 | 201029 | +| 181894 | 2021-11-18 | 2000 | 201029 | +| 181891 | 2021-09-10 | 9000 | 201011 | +| 181892 | 2021-10-10 | 7000 | 201925 | +| 181897 | 2022-05-27 | 6700 | null | +| 181899 | 2021-07-22 | 4500 | 201011 | ++------------+------------------+---------------+-------------+ +---- + +''''' + +. Based on the above tables, we can write a `RIGHT JOIN` query as follows: ++ +[source,sql] +---- +SELECT customer_name, order_date, order_amount +FROM customer +RIGHT JOIN orders +ON customer.id = orders.customer_id; +---- ++ +* The **customer**= left table and the *orders* = right table. +* Then it combines the values from the *orders* table using the *customer_id* and matches the records using the *id* column from the *customer* table. +* If the records are equal, a new row will be created with `customer_name` and `order_amount` columns as defined in the `SELECT` clause. +* *ELSE* will still create a new row with a `NULL` value from the left table (*customer*). + +. The above query will give the following result: ++ +[source,sql] +---- ++------------------+----------------+-----------------+ +| customer_name | order_date | order_amount | ++------------------+----------------+-----------------+ +| James | 2021-09-10 | 9000 | +| James | 2021-07-22 | 4500 | +| Ellie | 2021-10-08 | 3000 | +| Ellie | 2021-11-18 | 2000 | +| Mary | 2021-10-10 | 7000 | +| null | 2022-05-27 | 6700 | ++------------------+----------------+-----------------+ +---- ++ +Based on the data from the *customer* and *orders* tables: ++ +* The order id: `181893` matches the customer: `Ellie.` +* The order id: `181894` matches the customer: `Ellie`. +* The order id: `181891` matches the customer: `James`. +* The order id: `181899` matches the customer: `James`. +* The order id: `181892` matches the customer: `Mary`. +* The order id: `181897` doesn’t match with any customer. Thus the customer_name column is filled with: `null`. + +[NOTE] +==== +A *customer* can have zero or many *orders*. An item from *orders* belongs to zero or one *customer*. +==== +The following Venn diagram illustrates the `RIGHT JOIN`: diff --git a/modules/reference/pages/sql/sql-clauses/group-by.adoc b/modules/reference/pages/sql/sql-clauses/group-by.adoc new file mode 100644 index 000000000..eef35267b --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/group-by.adoc @@ -0,0 +1,294 @@ += GROUP BY +:description: The GROUP BY clause returns a group of records from a table or multiple tables with the same values as the specified columns. +:page-topic-type: reference + +The `GROUP BY` clause returns a group of records from a table or multiple tables with the same values as the specified columns. + +The result of the `GROUP BY` clause returns a single row for each value of the column. + +[NOTE] +==== +You can use xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate functions] such as `COUNT()`, `MAX()`, `MIN()`, `SUM()`, etc., to perform the operations on the grouped values in the `SELECT` statement. +==== + +== Syntax + +[WARNING] +==== +Ensure the column you are using to group is available in the column list. +==== + +The basic syntax of the `GROUP BY` clause is as follows − + +[source,sql] +---- +SELECT +column_1, column_2, aggregate_function(column_3) +FROM +table_name +GROUP BY +column_1, column_2,...; +---- + +Let’s explore the above syntax: + +* `SELECT column_1, column_2, aggregate_function(column_3)` defines the columns you want to group (`column_1, column_2`) and the column that you want to apply an aggregate function to (`column_3`). +* `FROM table_name` defines the table where the data comes from. +* `GROUP BY column_1, column_2,...;` lists the columns that you want to group in the `GROUP BY` clause. + +[NOTE] +==== +The column specified in the `SELECT` command must also appear in the `GROUP BY` clause. +==== + +=== Syntax with `WHERE` clause + +Please take note that the `GROUP BY` clause must precisely appear after the `WHERE` clause, as shown below: + +[source,sql] +---- +SELECT +column_1, column_2, aggregate_function(column_3) +FROM +table_name +WHERE +conditions +GROUP BY +column_1, column_2,...; +---- + +== Examples + +Let’s assume that we have two tables here, the customer table and the orders table: + +*customer table* + +[source,sql] +---- +CREATE TABLE customer ( + cust_id int, + cust_name text +); +INSERT INTO customer + (cust_id, cust_name) +VALUES + (11001, 'Maya'), + (11003, 'Ricky'), + (11009, 'Sean'), + (11008, 'Chris'), + (11002, 'Emily'), + (11005, 'Rue'), + (11007, 'Tom'), + (11006, 'Casey'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +It will create a table as shown below: + +[source,sql] +---- ++-----------+------------+ +| cust_id | cust_name | ++-----------+------------+ +| 11001 | Maya | +| 11003 | Ricky | +| 11009 | Sean | +| 11008 | Chris | +| 11002 | Emily | +| 11005 | Rue | +| 11007 | Tom | +| 11006 | Casey | ++-----------+------------+ +---- + +[width="100%",cols="100%",options="header",] +|=== +|orders table +|=== + +[source,sql] +---- +CREATE TABLE orders ( + order_id int, + order_date date, + order_prod text, + order_qty int, + order_price int, + cust_id int +); +INSERT INTO orders + (order_id, order_date, order_prod, order_qty, order_price, cust_id) +VALUES + (999191, '2021-01-08','Butter', 1, 4000, 11001), + (999192, '2021-09-30','Sugar', 1, 10000, 11002), + (999193, '2021-04-17','Sugar', 1, 10000, 11009), + (999194, '2021-08-29','Flour', 4, 20000, 11006), + (999195, '2021-05-04','Sugar', 2, 20000, 11008), + (999196, '2021-07-27','Butter', 2, 8000, 11006), + (999197, '2021-10-30','Flour', 2, 10000, 11001), + (999198, '2021-12-18','Flour', 2, 10000, 11007); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +It will create a table as shown below: + +[source,sql] +---- ++------------+--------------+--------------+-------------+---------------+-----------+ +| order_id | order_date | order_prod | order_qty | order_price | cust_id | ++------------+--------------+--------------+-------------+---------------+-----------+ +| 999191 | 2021-01-08 | Butter | 1 | 4000 | 11001 | +| 999192 | 2021-09-30 | Sugar | 1 | 10000 | 11002 | +| 999193 | 2021-04-17 | Sugar | 1 | 10000 | 11009 | +| 999194 | 2021-08-29 | Flour | 4 | 20000 | 11006 | +| 999195 | 2021-05-04 | Sugar | 2 | 20000 | 11008 | +| 999196 | 2021-07-27 | Butter | 2 | 8000 | 11006 | +| 999197 | 2021-10-30 | Flour | 2 | 10000 | 11001 | +| 999198 | 2021-12-18 | Flour | 2 | 10000 | 11007 | ++------------+--------------+--------------+-------------+---------------+-----------+ +---- + +=== Basic `GROUP BY` + +Here we will get all product names by grouping them using the products ordered from the *orders* table: + +[source,sql] +---- +SELECT order_prod +FROM orders +GROUP BY order_prod; +---- + +The query above will return the output as below: + +[source,sql] +---- ++--------------+ +| order_prod | ++--------------+ +| flour | +| sugar | +| butter | ++--------------+ +---- + +=== `GROUP BY` on multiple columns + +The following example uses multiple columns in the `GROUP BY` clause: + +[source,sql] +---- +SELECT order_id, order_prod +FROM orders +GROUP BY order_id, order_prod; +---- + +The above query will create the following result: + +[source,sql] +---- ++-----------+--------------+ +| order_id | order_prod | ++-----------+--------------+ +| 999194 | flour | +| 999191 | butter | +| 999196 | flour | +| 999192 | sugar | +| 999195 | butter | +| 999198 | sugar | +| 999193 | flour | +| 999197 | sugar | ++-----------+--------------+ +---- + +=== `GROUP BY` with aggregate functions + +For this example, we will calculate the total amount each customer has paid for their orders. We will use one of the aggregate functions, i.e., the `SUM()` function. + +[source,sql] +---- +SELECT cust_id, SUM (order_price) +FROM orders +GROUP BY cust_id; +---- + +The query above will return the output as shown below: + +[source,sql] +---- ++-----------+----------+ +| cust_id | sum | ++-----------+----------+ +| 11009 | 10000 | +| 11007 | 10000 | +| 11006 | 28000 | +| 11002 | 10000 | +| 11001 | 14000 | +| 11008 | 20000 | ++-----------+----------+ +---- + +=== `GROUP BY` with `JOIN` condition + +Unlike the previous example, the following query joins the orders table with the customer table and groups customers by their names. Here we will use `COUNT()` as the aggregate function to count the number of products each customer has purchased. + +[source,sql] +---- +SELECT C.cust_name, COUNT (order_prod) +FROM orders O +JOIN customer C ON O.cust_id = C.cust_id +GROUP BY C.cust_name; +---- + +The above command will create the following result: + +[source,sql] +---- ++------------+---------+ +| cust_name | count | ++------------+---------+ +| Tom | 1 | +| Chris | 1 | +| Casey | 2 | +| Maya | 2 | +| Sean | 1 | +| Emily | 1 | ++------------+---------+ +---- + +=== `GROUP BY` with date data type + +The `order_date` column uses a `DATE` data type. In this example, we will group the order’s quantity and total price by dates using the `DATE()` function. + +[source,sql] +---- +SELECT DATE(order_date), order_qty, SUM(order_price) +FROM orders +GROUP BY order_qty, DATE(order_date); +---- + +The above query will generate the following result: + +[source,sql] +---- ++---------------+------------+---------+ +| date | order_qty | sum | ++---------------+------------+---------+ +| 2021-07-27 | 2 | 8000 | +| 2021-08-29 | 4 | 20000 | +| 2021-04-17 | 1 | 10000 | +| 2021-09-30 | 1 | 10000 | +| 2021-05-04 | 2 | 20000 | +| 2021-01-08 | 1 | 4000 | +| 2021-12-18 | 2 | 10000 | +| 2021-10-30 | 2 | 10000 | ++---------------+------------+---------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/having.adoc b/modules/reference/pages/sql/sql-clauses/having.adoc new file mode 100644 index 000000000..ea9f4e1ed --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/having.adoc @@ -0,0 +1,251 @@ += HAVING +:description: The HAVING clause specifies a search condition by using an aggregate function. +:page-topic-type: reference + +The `HAVING` clause specifies a search condition by using an xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate function]. It will filter out the records returned from a `GROUP BY` clause that do not fulfill a specified condition. + +== Differences between WHERE and HAVING clause + +The following table will illustrate the differences between the `HAVING` and `WHERE` clause: + +[width="100%",cols="51%,49%",options="header",] +|=== +|*WHERE* |*HAVING* +|The `GROUP BY` clause appears after the WHERE clause. |The `GROUP BY` clause appears before the HAVING clause. +|The `WHERE` clause can’t work with an aggregate function. |The `HAVING` clause can work with an aggregate function. +|The `WHERE` clause filters particular records. |The `HAVING` clause filters the group of records. +|=== + +== Syntax + +The basic syntax of the `GROUP BY` clause is as follows: + +[source,sql] +---- +SELECT column_1, column_2,... +FROM table_name +GROUP BY column_name(s) +HAVING condition_aggregate_function +---- + +Let’s explore the above syntax: + +* `SELECT column_1, column_2,...` selects the columns you want to display. +* `FROM table_name` selects the table where the data comes from. +* `GROUP BY column_name(s)` lists the columns you want to group in the GROUP BY clause. +* `HAVING condition_aggregate_function` provides the condition for filtering rows, which the `GROUP BY` clause forms. The condition can use an aggregate function, such as `SUM()`, `COUNT()`, `MIN()`, and so on. + +== Examples + +Let’s assume that we have two tables here, the student table and the score table: + +*student table* + +[source,sql] +---- +CREATE TABLE student ( + stud_id int, + stud_name text +); +INSERT INTO student + (stud_id, stud_name) +VALUES + (992831192, 'Mary'), + (992811191, 'Bobby'), + (992311195, 'Sean'), + (998311193, 'Harry'), + (998311194, 'William'), + (928311197, 'Kate'), + (928311190, 'Tom'), + (928311199, 'Sully'), + (998311196, 'Susan'); +---- + +[source,sql] +---- +SELECT * FROM student; +---- + +It will create a table as shown below: + +[source,sql] +---- ++------------+------------+ +| stud_id | stud_name | ++------------+------------+ +| 992831192 | Mary | +| 992811191 | Bobby | +| 992311195 | Sean | +| 998311193 | Harry | +| 998311194 | William | +| 928311197 | Kate | +| 928311190 | Tom | +| 928311199 | Sully | +| 998311196 | Susan | ++------------+------------+ +---- + +*score table* + +[source,sql] +---- +CREATE TABLE score ( + score_id int, + subject text, + score_val int, + stud_id int, + score_stat text +); +INSERT INTO score + (score_id, subject, score_val, stud_id, score_stat) +VALUES + (12221, 'Math', 90, 992811191, 'PASSED'), + (12222, 'Biology', 90, 992811191, 'PASSED'), + (12223, 'Art', 80, 992831192, 'PASSED'), + (12224, 'History', 70, 928311197, 'FAILED'), + (12225, 'Pyshics', 75, 928311190, 'FAILED'), + (12226, 'Art', 85, 928311197, 'PASSED'), + (12227, 'Biology', 90, 998311196, 'PASSED'), + (12228, 'Biology', 70, 928311199, 'FAILED'), + (12229, 'Pyshics', 80, 998311194, 'PASSED'), + (12231, 'Math', 80, 998311193, 'PASSED'), + (12232, 'History', 90, 992811191, 'PASSED'), + (12233, 'Math', 70, 998311194, 'FAILED'), + (12234, 'Math', 80, 928311190, 'PASSED'); +---- + +[source,sql] +---- +SELECT * FROM score; +---- + +It will create a table as shown below: + +[source,sql] +---- ++-----------+----------+------------+------------+-------------+ +| score_id | subject | score_val | stud_id | score_stat | ++-----------+----------+------------+------------+-------------+ +| 12221 | Math | 90 | 992811191 | PASSED | +| 12222 | Biology | 90 | 992811191 | PASSED | +| 12223 | Art | 80 | 992831192 | PASSED | +| 12224 | History | 70 | 928311197 | FAILED | +| 12225 | Pyshics | 75 | 928311190 | FAILED | +| 12226 | Art | 85 | 928311197 | PASSED | +| 12227 | Biology | 90 | 998311196 | PASSED | +| 12228 | Biology | 70 | 928311199 | FAILED | +| 12229 | Pyshics | 80 | 998311194 | PASSED | +| 12231 | Math | 80 | 998311193 | PASSED | +| 12232 | History | 90 | 992811191 | PASSED | +| 12233 | Math | 70 | 998311194 | FAILED | +| 12234 | Math | 80 | 928311190 | PASSED | ++-----------+----------+------------+------------+-------------+ +---- + +=== `HAVING` clause with `AVG` function + +The following example uses an `AVG` aggregate function to filter the student ID with the subject which has an average score of more than 80: + +[source,sql] +---- +SELECT subject +FROM score +GROUP BY subject +HAVING AVG (score_val) > 80; +---- + +The above query will give the following result: + +[source,sql] +---- ++-----------+ +| subject | ++-----------+ +| Art | +| Biology | ++-----------+ +---- + +=== `HAVING` clause with `COUNT` function + +The following query lists the number of score statuses that have more than 2 “*PASSED*” values: + +[source,sql] +---- +SELECT COUNT(score_id), subject +FROM score +GROUP BY subject +HAVING COUNT(score_stat = 'PASSED') > 2; +---- + +The above query will show that *Math* and *Biology* have more than 2 “*PASSED*” values: + +[source,sql] +---- ++--------+--------------+ +| count | subject | ++--------+--------------+ +| 4 | Math | +| 3 | Biology | ++--------+--------------+ +---- + +=== `HAVING` clause with `MAX` function + +Let’s assume that the minimum score criteria is *75*. Here we will find the maximum score of each subject with the condition that it should be more than *75*. + +[source,sql] +---- +SELECT subject, MAX(score_val) +FROM score +GROUP BY subject +HAVING MAX(score_val)>75; +---- + +The returned result will have the maximum score of each subject, as shown below: + +[source,sql] +---- ++-----------+--------+ +| subject | max | ++-----------+--------+ +| Math | 90 | +| History | 90 | +| Physics | 80 | +| Art | 85 | +| Biology | 90 | ++-----------+--------+ +---- + +=== `HAVING` with `JOIN` condition + +Assume that you want to know which students have failed in their subject. + +You can combine the *student* table with the *score* table using the `JOIN` clause and apply a condition on the `score_stat` column where the values should be equal to *FAILED*, as shown in the following query: + +[source,sql] +---- +SELECT stud_name, subject, score_val, score_stat +FROM student A +JOIN score C ON A.stud_id = C.stud_id +GROUP BY stud_name, subject, score_val, score_stat +HAVING score_stat = 'FAILED'; +---- + +* The `JOIN` clause will combine the two tables. +* Then, the `GROUP BY` clause will filter all records from both tables based on the specified columns. +* The `HAVING` clause, then, will filter the records returned from the `GROUP BY` clause according to the specified condition. + +It will deliver the successful result as shown below: + +[source,sql] +---- ++------------+------------+------------+--------------+ +| stud_name | subject | score_val | score_stat | ++------------+------------+------------+--------------+ +| Kate | History | 70 | FAILED | +| Sully | Biology | 70 | FAILED | +| Tom | Physics | 75 | FAILED | +| William | Math | 70 | FAILED | ++------------+------------+------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/index.adoc b/modules/reference/pages/sql/sql-clauses/index.adoc new file mode 100644 index 000000000..53db5cd7c --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/index.adoc @@ -0,0 +1,21 @@ += SQL CLAUSES +:description: SQL clauses help define how data is retrieved, filtered and manipulated. + +SQL clauses help define how data is retrieved, filtered and manipulated. They provide a structured way to specify what data to include, how to organize it and what conditions must be met for rows to be included in the result set. + +Redpanda SQL supports the following clauses: + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|Clause Name |Description +|xref:reference:sql/sql-clauses/from/from.adoc[FROM] |Defines the source table(s) or view(s) for the query +|xref:reference:sql/sql-clauses/where.adoc[WHERE] |Filters rows based on specified conditions +|xref:reference:sql/sql-clauses/group-by.adoc[GROUP BY] |Groups rows sharing common values in specified columns for aggregation +|xref:reference:sql/sql-clauses/having.adoc[HAVING] |Filters grouped rows based on aggregate conditions +|xref:reference:sql/sql-clauses/order-by.adoc[ORDER BY] |Sorts the result set by specified columns in ascending or descending order +|xref:reference:sql/sql-clauses/limit.adoc[LIMIT] |Restricts the number of rows returned by the query +|xref:reference:sql/sql-clauses/offset.adoc[OFFSET] |Skips a specified number of rows before returning results +|xref:reference:sql/sql-clauses/set-operations/index.adoc[SET OPERATIONS] |Combine or compare results from multiple `SELECT` statements, such as `UNION`, `INTERSECT` and `EXCEPT` +|xref:reference:sql/sql-clauses/with.adoc[WITH] |Creates temporary named result sets (Common Table Expressions) for reuse within queries +|xref:reference:sql/sql-clauses/over-window.adoc[OVER] |Specifies the window over which window functions to operate on subsets of data +|=== diff --git a/modules/reference/pages/sql/sql-clauses/limit.adoc b/modules/reference/pages/sql/sql-clauses/limit.adoc new file mode 100644 index 000000000..117b9928e --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/limit.adoc @@ -0,0 +1,209 @@ += LIMIT +:description: LIMIT is an optional clause that can be combined with SELECT statements used for retrieving records from one or more tables. +:page-topic-type: reference + +`LIMIT` is an optional clause that can be combined with `SELECT` statements used for retrieving records from one or more tables. It basically specifies the number of records a query should return after filtering the data. + +== Syntax + +There are two versions available for the `LIMIT` clause syntax: + +.Version 1 +[source,sql] +---- +SELECT column_list +FROM table_name +ORDER BY sort_expression +LIMIT row_count +---- + +.Version 2 +[source,sql] +---- +SELECT column_list +FROM table_name +ORDER BY sort_expression +FETCH NEXT row_count ROWS ONLY +---- +The parameters and arguments for specific version of the syntax are described below: + +* `column_list`: The columns or calculations that you wish to retrieve. +* `table_name`: The tables that you want to retrieve records from. + +[NOTE] +==== +It is possible to have more than one table in the `FROM` clause. +==== +* `ORDER BY`: It is an expression used to order the results as you wish to return. The expression could be ascending *(ASC)* or descending *(DESC)* +* `LIMIT row_count`: It specifies a limited number of rows to be returned based on *row_count*. + +=== Special case + +. If the `row_count` value is *NULL,* the query will produce a similar outcome because it does not contain the `LIMIT` clause. +. If `row_count` is *zero*, the statement will return an empty set. + +== Examples + +Let’s take some examples of the `LIMIT` clause. + +Here we are creating one new table called *comporders* using the `CREATE TABLE` command and inserting some values into the table using the `INSERT` command: + +[source,sql] +---- +CREATE TABLE comporders +( + order_id int, + cust_name text, + prod_name text, + prod_price float, + status text +); + +INSERT INTO comporders +VALUES +(1002, 'Mike', 'Lenovo IdeaPad Flex 5', 600, 'PAID'), +(1003, 'Sean', 'Acer Aspire 3', 450, 'PAID'), +(1004, 'Victor', 'Microsoft Surface Laptop Go 2', 500, 'PENDING'), +(1005, 'Lewis', 'Lenovo Duet 5i', 700, 'PAID'), +(1006, 'David', 'Acer Swift 3', 640, 'PAID'), +(1007, 'Meghan', 'Lenovo IdeaPad Duet 5 Chromebook', 750, 'PAID'), +(1008, 'Harry', 'Apple iPad Air', 449, 'PENDING'), +(1009, 'Steve', 'Microsoft Surface Go 3', 680, 'PENDING'), +(1010, 'Omar', 'HP Victus 16', 800,'PAID'); +---- + +To verify that the values have been inserted successfully, retrieve the result set using the command below: + +[source,sql] +---- +SELECT * FROM comporders; +---- + +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1002 | Mike | Lenovo IdeaPad Flex 5 | 600 | PAID | +| 1003 | Sean | Acer Aspire 3 | 450 | PAID | +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PENDING | +| 1006 | David | Acer Swift 3 | 640 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1008 | Harry | Apple iPad Air | 449 | PENDING | +| 1009 | Steve | Microsoft Surface Go 3 | 680 | PENDING | +| 1010 | Omar | HP Victus 16 | 800 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +=== Use `LIMIT` with the `ORDER BY` expression + +This example uses the `LIMIT` clause to get the first four orders sorted by `order_id`: + +[source,sql] +---- +SELECT order_id, prod_name, prod_price +FROM comporders +ORDER BY order_id +LIMIT 4; +---- + +The above query will give the following result: + +[source,sql] +---- ++-----------+-------------------------------+-------------+ +| order_id | prod_name | prod_price | ++-----------+-------------------------------+-------------+ +| 1002 | Lenovo IdeaPad Flex 5 | 600 | +| 1003 | Acer Aspire 3 | 450 | +| 1004 | Microsoft Surface Laptop Go 2 | 500 | +| 1005 | Lenovo Duet 5i | 700 | ++-----------+-------------------------------+-------------+ +---- + +=== Use `LIMIT` with ASC/DESC + +You can use the `LIMIT` clause to select rows with the highest or lowest values from a table. + +. To get the top 5 most expensive orders, you sort orders by the product price in descending order *(DESC)* and use the `LIMIT` clause to get the first 5 orders. ++ +The following query depicts the idea: ++ +[source,sql] +---- +SELECT * FROM comporders +ORDER BY prod_price DESC +LIMIT 5; +---- ++ +The result of the query is as follows: ++ +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1010 | Omar | HP Victus 16 | 800 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PENDING | +| 1009 | Steve | Microsoft Surface Go 3 | 680 | PENDING | +| 1006 | David | Acer Swift 3 | 640 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +. We will fetch the top 5 cheapest orders this time. You sort orders by the product price in ascending order *(ASC)* and use the `LIMIT` clause to get the first 5 orders. ++ +The following query depicts the idea: ++ +[source,sql] +---- +SELECT * FROM comporders +ORDER BY prod_price ASC +LIMIT 5; +---- ++ +We will get the below output: ++ +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1008 | Harry | Apple iPad Air | 449 | PENDING | +| 1003 | Sean | Acer Aspire 3 | 450 | PAID | +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1002 | Mike | Lenovo IdeaPad Flex 5 | 600 | PAID | +| 1006 | David | Acer Swift 3 | 640 | PAID | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +=== Use `LIMIT` with `OFFSET` + +In this example, we will use `LIMIT` and `OFFSET` clauses to get 5 orders using the below query: + +[source,sql] +---- +SELECT * FROM comporders +LIMIT 5 OFFSET 2; +---- + +After implementing the above command, we will get the below output: + +[source,sql] +---- ++-----------+------------+----------------------------------+-------------+----------+ +| order_id | cust_name | prod_name | prod_price | status | ++-----------+------------+----------------------------------+-------------+----------+ +| 1004 | Victor | Microsoft Surface Laptop Go 2 | 500 | PENDING | +| 1005 | Lewis | Lenovo Duet 5i | 700 | PENDING | +| 1006 | David | Acer Swift 3 | 640 | PAID | +| 1007 | Meghan | Lenovo IdeaPad Duet 5 Chromebook | 750 | PAID | +| 1008 | Harry | Apple iPad Air | 449 | PENDING | ++-----------+------------+----------------------------------+-------------+----------+ +---- + +The result above shows that: + +* The orders with `order_id`= *1002 & 1003* aren’t displayed because we put the `OFFSET` value with 2. So the first 2 lines are ignored. +* The orders with `order_id`= *1009 & 1010* aren’t displayed because the `LIMIT` value is 5, which will display only 5 rows. diff --git a/modules/reference/pages/sql/sql-clauses/offset.adoc b/modules/reference/pages/sql/sql-clauses/offset.adoc new file mode 100644 index 000000000..ea2f38e83 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/offset.adoc @@ -0,0 +1,103 @@ += OFFSET +:description: The OFFSET is a clause that skips some records from the result set. +:page-topic-type: reference + +== *overview* + +The `OFFSET` is a clause that skips some records from the result set. + +== *syntax* + +The basic syntax of the `OFFSET` clause is shown below: + +[source,sql] +---- +SELECT columns +FROM table_name +OFFSET num; +---- + +The parameters and arguments from the syntax are: + +* `columns`: the columns to be fetched. +* `table_name`: a table from which the records will be fetched. +* `OFFSET`: a clause that will skip a subset of records. +** `num`: the number of records to be skipped. + +== *example* + +*1.* Here, we are creating one new table called *oxlafunctions* using the `CREATE TABLE` command and inserting some values into the table using the `INSERT` command: + +[source,sql] +---- +CREATE TABLE oxlafunctions +( + func_name text, + func_sub text +); + +INSERT INTO oxlafunctions +VALUES +('Numeric', 'ABS'), +('Numeric', 'CEIL'), +('Text', 'LENGTH'), +('Numeric', 'SQRT'), +('Boolean', 'IF'), +('Text', 'STRPOS'), +('Numeric', 'FLOOR'), +('Text', 'CONCAT'), +('Text', 'LOWER'); +---- + +*2.* To verify that the values have been inserted successfully, retrieve the result set using the command below: + +[source,sql] +---- +SELECT * FROM oxlafunctions; +---- + +[source,sql] +---- ++------------+------------+ +| func_name | func_sub | ++------------+------------+ +| Numeric | ABS | +| Numeric | CEIL | +| Text | LENGTH | +| Numeric | SQRT | +| Boolean | IF | +| Text | STRPOS | +| Numeric | FLOOR | +| Text | CONCAT | +| Text | LOWER | ++------------+------------+ +---- + +*3.* Use the *LIMIT* clause in conjunction with the *OFFSET* clause to skip a subset of records: + +[source,sql] +---- +SELECT * FROM oxlafunctions +ORDER BY func_name +LIMIT 5 OFFSET 2; +---- + +In the above query: + +* The *“LIMIT 5”* clause is used to fetch only five records. +* The *“OFFSET 2”* clause is used to skip the first two records before retrieving the result set of the limit clause. + +*4.* You will get the following output: + +[source,sql] +---- ++------------+------------+ +| func_name | func_sub | ++------------+------------+ +| Boolean | IF | +| Numeric | SQRT | +| Numeric | CEIL | +| Numeric | ABS | +| Numeric | FLOOR | ++------------+------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/order-by.adoc b/modules/reference/pages/sql/sql-clauses/order-by.adoc new file mode 100644 index 000000000..598712578 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/order-by.adoc @@ -0,0 +1,272 @@ += ORDER BY +:description: The ORDER BY clause is used to sort rows of the result received from a SELECT statement, which retrieves records from one or more tables. +:page-topic-type: reference + +The `ORDER BY` clause is used to sort rows of the result received from a `SELECT` statement, which retrieves records from one or more tables. + +== Syntax + +The following illustrates the syntax of the `ORDER BY` clause: + +[source,sql] +---- +SELECT columns +FROM table_name +ORDER BY sort_expression1 [ASC | DESC]; +---- + +=== Parameters + +* `columns`: columns that you wish to retrieve +* `table_name`: table that you want to retrieve records from. +* `ORDER BY`: expression used to order the results +* `ASC` or `DESC`: optional parameter to specify the order in which the results should be returned, either ascending or descending. Default is set to `ASC` + +== Examples + +We will use the table called *salaryemp* as an example. In order to create the table, please run the query below: + +[source,sql] +---- +CREATE TABLE salaryemp +( + emp_id int, + emp_name text, + emp_div text, + emp_sal int +); + +INSERT INTO salaryemp +VALUES +(1002, 'Mike', 'Marketing', 6000), +(1003, 'Sean', 'Marketing', 6500), +(1004, 'Victor', 'Finance', 7000), +(1005, 'Lewis', 'Sales', 5500), +(1006, 'David', 'Marketing', 8000), +(1007, 'Omar', 'Finance', 8000), +(1008, 'Meghan', 'Finance', 7500), +(1009, 'Harry', 'Operations', 4500), +(1010, 'Steve', 'Marketing', 6800), +(1011, 'David', 'Sales', 8200); +---- + +To verify that the values have been inserted successfully, retrieve the results by executing the following code: + +[source,sql] +---- +SELECT * FROM salaryemp; +---- + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1002 | Mike | Marketing | 6000 | +| 1003 | Sean | Marketing | 6500 | +| 1004 | Victor | Finance | 7000 | +| 1005 | Lewis | Sales | 5500 | +| 1006 | David | Marketing | 8000 | +| 1007 | Meghan | Finance | 7500 | +| 1008 | Harry | Operations | 4500 | +| 1009 | Steve | Marketing | 6800 | +| 1010 | Omar | Finance | 8000 | +| 1011 | David | Sales | 8200 | ++-----------+------------+----------------+-------------+ +---- + +=== Use `ORDER BY` in ascending order + +This example uses the `ORDER BY` clause to sort employees by their division: + +[source,sql] +---- +SELECT emp_name, emp_div +FROM salaryemp +ORDER BY emp_div; +---- + +The above query will provide you with the following output: + +[source,sql] +---- ++------------+----------------+ +| emp_name | emp_div | ++------------+----------------+ +| Victor | Finance | +| Omar | Finance | +| Meghan | Finance | +| Mike | Marketing | +| Sean | Marketing | +| David | Marketing | +| Steve | Marketing | +| Harry | Operations | +| Lewis | Sales | +| David | Sales | ++------------+----------------+ +---- + +=== Use `ORDER BY` in descending order + +The following statement selects the employee name and employee salary from the *salaryemp* table and sorts the records in the `emp_sal` column in descending order: + +[source,sql] +---- +SELECT * FROM salaryemp +ORDER BY emp_sal DESC; +---- + +The result of the query is as follows: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1011 | David | Sales | 8200 | +| 1006 | David | Marketing | 8000 | +| 1010 | Omar | Finance | 8000 | +| 1007 | Meghan | Finance | 7500 | +| 1004 | Victor | Finance | 7000 | +| 1009 | Steve | Marketing | 6800 | +| 1003 | Sean | Marketing | 6500 | +| 1002 | Mike | Marketing | 6000 | +| 1005 | Lewis | Sales | 5500 | +| 1008 | Harry | Operations | 4500 | ++-----------+------------+----------------+-------------+ +---- + +=== Use `ORDER BY` with both ASC & DESC parameters + +The following statement selects all records from the *salaryemp* table and sorts the rows by employee salary in ascending order and employee division in descending order: + +[source,sql] +---- +SELECT * FROM salaryemp +ORDER BY emp_sal ASC, emp_div DESC; +---- + +After implementing the above command, we will get the following output: + +[source,sql] +---- ++-----------+------------+----------------+-------------+ +| emp_id | emp_name | emp_div | emp_sal | ++-----------+------------+----------------+-------------+ +| 1009 | Harry | Operations | 4500 | +| 1005 | Lewis | Sales | 5500 | +| 1002 | Mike | Marketing | 6000 | +| 1003 | Sean | Marketing | 6500 | +| 1009 | Steve | Marketing | 6800 | +| 1004 | Victor | Finance | 7000 | +| 1007 | Meghan | Finance | 7500 | +| 1006 | David | Marketing | 8000 | +| 1010 | Omar | Finance | 8000 | +| 1011 | David | Sales | 8200 | ++-----------+------------+----------------+-------------+ +---- + +=== Use `ORDER BY` with `TEXT` data types + +In this example we are going to create to small tables with above mentioned data types: + +[source,sql] +---- +CREATE TABLE strings +( + column1 text +); + +INSERT INTO strings +VALUES ('A'), ('B'), ('a'), ('b'); + +CREATE TABLE texts +( + column1 TEXT +); + +INSERT INTO texts +VALUES ('A'), ('B'), ('a'), ('b'); +---- + +When using the `ORDER BY` clause with these types of data, records with uppercase letters will be sorted lexicographically first, followed by records with lowercase letters. + +[source,sql] +---- +SELECT * FROM strings ORDER BY column1; +SELECT * FROM texts ORDER BY column1; +---- + +[source,sql] +---- + column1 +--------- + A + B + a + b +---- + +=== Use `ORDER BY` with `INTERVAL` data type + +For this example, we’ll create a new table called `interval_data`: + +[source,sql] +---- +CREATE TABLE interval_data ( + duration INTERVAL +); + +INSERT INTO interval_data (duration) +VALUES + (INTERVAL '1 month 30 days 20 hours'), + (INTERVAL '2 months 20 hours'), + (INTERVAL '1 month 30 days 19 hours'), + (INTERVAL '2 months 1 hours'); +---- + +`ORDER BY` on `INTERVAL` column will sort the values by their leading most significant time unit. In this case `months`. First are all `1 month` values, then all `2 months` values. + +[source,sql] +---- +SELECT * FROM interval_data ORDER BY duration; +---- + +[source,sql] +---- + duration +------------------------ + 1 mon 30 days 19:00:00 + 1 mon 30 days 20:00:00 + 2 mons 01:00:00 + 2 mons 20:00:00 +---- + +It works the same for other time units, such as `hours` and `days`. + +[source,sql] +---- +INSERT INTO interval_data (duration) +VALUES + (INTERVAL '24 hours 5 minutes'), + (INTERVAL '1 day 5 minutes'), + (INTERVAL '1 day 2 minutes'); +---- + +[source,sql] +---- +SELECT * FROM interval_data ORDER BY duration; +---- + +[source,sql] +---- + duration +------------------------ + 24:05:00 + 1 day 00:02:00 + 1 day 00:05:00 + 1 mon 30 days 19:00:00 + 1 mon 30 days 20:00:00 + 2 mons 01:00:00 + 2 mons 20:00:00 +---- diff --git a/modules/reference/pages/sql/sql-clauses/over-window.adoc b/modules/reference/pages/sql/sql-clauses/over-window.adoc new file mode 100644 index 000000000..dcd5d8ab7 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/over-window.adoc @@ -0,0 +1,165 @@ += OVER / WINDOW +:description: All window functions utilise a set of clauses specific for them, some of which are mandatory while others are optional. +:page-topic-type: reference + +All window functions utilise a set of clauses specific for them, some of which are mandatory while others are optional. + +== OVER clause + +When it comes to required ones, there is the `OVER` clause, which defines a window or user-specified set of rows within a query result set. It is a mandatory element of window functions, defining the window specification and differentiating them from other SQL functions. + +=== Syntax + +The syntax for this clause looks as follows: + +[source,sql] +---- +OVER (PARTITION BY rows1 ORDER BY rows2) +---- + +where, the `PARTITION BY` clause is a list of `expressions` interpreted in much the same fashion as the elements of a `GROUP BY` clause, with major exception that they are always simple expressions and never the name or number of an output column. Another difference is that these expressions can contain aggregate function calls, which are not allowed in a regular `GROUP BY` clause (they are allowed here because windowing occurs after grouping and aggregation) + +`[ PARTITION BY expression [, ...] ]` (optional window partition) + +The `ORDER BY` clause used in the `OVER` clause above is a list of `expressions` interpreted in much the same fashion as the elements of a statement-level `ORDER BY` clause, except that the expressions are always taken as simple expressions and never the name or number of an output column. + +`[ ORDER BY expression [ ASC | DESC | USING operator ] [ NULLS { FIRST | LAST } ] [, ...] ]` (optional window ordering) + +== WINDOW clause + +In terms of window functions’ optional clauses, there is the `WINDOW` clause that defines one or more named window specification, as a `window_name` and `window_definition` pair. + +=== Syntax + +The syntax for this clause looks as follows: + +[source,sql] +---- +WINDOW window_name AS (window_definition) [, ...] +---- + +where `window_name` is a name that can be referenced from the `OVER` clauses or subsequent `window definition`. There are a few important things to keep in mind here: + +* The `window_definition` may use an `existing_window_name` to refer to a previous `window_definition` in the `WINDOW` clause, but the previous `window_definition` must not specify a `frame` clause +* The `window_definition` copies the `PARTITION BY` clause and `ORDER BY` clause from previous `window_definition`, but it cannot specify its own `PARTION BY` clause, and can specify an `ORDER BY` clause if the previous `window_definition` does not have one. + +`[ existing_window_name ] [ PARTITION BY clause ] [ ORDER BY clause ] [ frame clause ]` (all arguments are optional) + +[NOTE] +==== +The `window_definition` without arguments defines a window with all rows without partition and ordering +==== +The `frame` clause referenced above defines the window frame for window functions that depend on the frame (not all do). The window frame is a set of related rows for each row of the query (called the current row). + +* `{ RANGE | ROWS | GROUPS } frame_start [ frame_exclusion ]` +* `{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end [ frame_exclusion ]` + +[NOTE] +==== +The `frame` clause of the window specification is limited to the `ROWS` clause without `frame exclusion` one +==== +There are a couple of things, to keep in mind here: + +* `frame_start` and `frame_end` can be one of: `UNBOUNDED PRECEDING`, `offset PRECEDING`, `CURRENT ROW`, `offset FOLLOWING`, `UNBOUNDED FOLLOWING`. +* If `frame_end` is omitted it defaults to `CURRENT ROW`. Restrictions here are as follows: +** `frame_start` cannot be `UNBOUNDED FOLLOWING` +** `frame_end` cannot be `UNBOUNDED PRECEDING` +** `frame_end` choice cannot appear earlier in the above list of `frame_start` and `frame_end` options than the `frame_start` choice does + +In `ROWS` mode, `CURRENT ROW` means that the frame starts or ends with the current row, the offset is an integer indicating that the frame starts or ends that many rows before or after the current row. + +[NOTE] +==== +Beware that the `ROWS` mode can produce unpredictable results if the `ORDER BY` ordering does not order the rows uniquely +==== + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== OVER clause in window functions with window definition, PARTITION BY and ORDER BY clauses + +In this example, we will focus on executing a window function with the `OVER` clause, window definition and `PARTITION BY` and `ORDER BY` clauses: + +[source,sql] +---- +SELECT * + SUM(qty) OVER (PARTITION BY sellerid) AS seller_qty +FROM winsales +ORDER BY sellerid, salesid; +---- + +Here’s the output for the above code: + +[source,sql] +---- + salesid | dateid | sellerid | buyerid | qty | qty_shipped | seller_qty +---------+------------+----------+---------+-----+-------------+------------ + 10001 | 2003-12-24 | 1 | c | 10 | 10 | 50 + 10005 | 2003-12-24 | 1 | a | 30 | | 50 + 10006 | 2004-01-18 | 1 | c | 10 | | 50 + 20001 | 2004-02-12 | 2 | b | 20 | 20 | 40 + 20002 | 2004-02-16 | 2 | c | 20 | 20 | 40 + 30001 | 2003-08-02 | 3 | b | 10 | 10 | 75 + 30003 | 2004-04-18 | 3 | b | 15 | | 75 + 30004 | 2004-04-18 | 3 | b | 20 | | 75 + 30007 | 2004-09-07 | 3 | c | 30 | | 75 + 40001 | 2004-01-09 | 4 | a | 40 | | 50 + 40005 | 2004-02-12 | 4 | a | 10 | 10 | 50 +(11 rows) +---- + +=== OVER clause in window functions with window name, PARTITION BY and ORDER BY clauses + +In this example, we will focus on executing a window function with the `OVER` clause, window name and `PARITION BY` and `ORDER BY` clauses: + +[source,sql] +---- +SELECT * + SUM(qty) OVER seller AS seller_qty +FROM winsales WINDOW seller AS (PARTITION BY sellerid) +ORDER BY sellerid, salesid; +---- + +When executing the code above, we will get the following output: + +[source,sql] +---- + salesid | dateid | sellerid | buyerid | qty | qty_shipped | seller_qty +---------+------------+----------+---------+-----+-------------+------------ + 10001 | 2003-12-24 | 1 | c | 10 | 10 | 50 + 10005 | 2003-12-24 | 1 | a | 30 | | 50 + 10006 | 2004-01-18 | 1 | c | 10 | | 50 + 20001 | 2004-02-12 | 2 | b | 20 | 20 | 40 + 20002 | 2004-02-16 | 2 | c | 20 | 20 | 40 + 30001 | 2003-08-02 | 3 | b | 10 | 10 | 75 + 30003 | 2004-04-18 | 3 | b | 15 | | 75 + 30004 | 2004-04-18 | 3 | b | 20 | | 75 + 30007 | 2004-09-07 | 3 | c | 30 | | 75 + 40001 | 2004-01-09 | 4 | a | 40 | | 50 + 40005 | 2004-02-12 | 4 | a | 10 | 10 | 50 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc new file mode 100644 index 000000000..7c9dd705e --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/except.adoc @@ -0,0 +1,264 @@ += EXCEPT +:description: The EXCEPT combines the result sets of two or more tables and retrieves rows specific to the first SELECT statement but not present in the subsequent +:page-topic-type: reference + +== EXCEPT + +The `EXCEPT` combines the result sets of two or more tables and retrieves rows specific to the first `SELECT` statement but not present in the subsequent ones. + +=== Syntax + +The syntax for the `EXCEPT` is as follows: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +EXCEPT +SELECT value1, value2, ... value_n +FROM table2; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you want to retrieve. +* `table1, table2`: The tables from which you wish to retrieve records. + +=== Example + +Let’s assume you have two tables: `vehicles` and `vehicles1`. You want to find the vehicle which was present in 2021 but is not present in 2022: + +[source,sql] +---- +CREATE TABLE vehicles ( + vhc_id INT, + vhc_name TEXT +); + +CREATE TABLE vehicles1 ( + vhc_id INT, + vhc_name TEXT +); + +INSERT INTO vehicles VALUES +(1, 'Truck'), +(2, 'Car'), +(3, 'Motorcycle'); + +INSERT INTO vehicles1 VALUES +(2, 'Car'), +(3, 'Bus'), +(4, 'Motorcycle'); +---- + +Display the tables with the query below: + +[source,sql] +---- +SELECT * FROM vehicles; +SELECT * FROM vehicles1; +---- + +[source,sql] +---- +vhc_id | vhc_name +--------+------------ + 1 | Truck + 2 | Car + 3 | Motorcycle + + vhc_id | vhc_name +--------+------------ + 2 | Car + 3 | Bus + 4 | Motorcycle +---- + +Using the `EXCEPT` to find employees present in 2021 but not in 2022: + +[source,sql] +---- +SELECT vhc_name FROM vehicles +EXCEPT +SELECT vhc_name FROM vehicles1; +---- + +The result will include the names of employees who were present in 2021 but are not present in 2022: + +[source,sql] +---- +vhc_name +---------- + Truck +---- + +From the diagram below, we learn that the result is a list of vehicle names present in the first table (`vehicles`) but not found in the second table (`vehicles1`). In this case, the result is the vehicle name "`Truck.`" + +== EXCEPT ALL + +=== Overview + +The `EXCEPT ALL` allows you to find rows specific to the first `SELECT` statement while preserving duplicate entries. + +=== Syntax + +The syntax for the `EXCEPT ALL` is similar to `EXCEPT`: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +EXCEPT ALL +SELECT value1, value2, ... value_n +FROM table2; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you want to retrieve. +* `table1, table2`: The tables from which you wish to retrieve records. + +[NOTE] +==== +The data types of corresponding columns in the `SELECT` queries must be compatible. +==== + +=== Example #1 + +You aim to identify customers who have bought products from one marketplace but have not purchased from another. Start by creating the tables and populating them with relevant data. + +[source,sql] +---- +CREATE TABLE marketplace1_transactions ( + customer_id INT, + product_id INT, + amount FLOAT +); + +CREATE TABLE marketplace2_transactions ( + customer_id INT, + product_id INT, + amount FLOAT +); + +INSERT INTO marketplace1_transactions VALUES +(101, 1, 100.00), +(102, 2, 150.00), +(103, 3, 200.00), +(104, 1, 120.00); + +INSERT INTO marketplace2_transactions VALUES +(102, 3, 180.00), +(103, 2, 160.00), +(105, 4, 90.00), +(106, 1, 110.00); +---- + +Display the tables using the query below: + +[source,sql] +---- +SELECT * FROM marketplace1_transactions; +SELECT * FROM marketplace2_transactions; +---- + +[source,sql] +---- +customer_id | product_id | amount +-------------+------------+-------- + 101 | 1 | 100 + 102 | 2 | 150 + 103 | 3 | 200 + 104 | 1 | 120 + + customer_id | product_id | amount +-------------+------------+-------- + 102 | 3 | 180 + 103 | 2 | 160 + 105 | 4 | 90 + 106 | 1 | 110 +---- + +Using the `EXCEPT ALL` to find customers who have purchased products from one marketplace but not from the other: + +[source,sql] +---- +SELECT customer_id FROM marketplace1_transactions +EXCEPT ALL +SELECT customer_id FROM marketplace2_transactions; +---- + +This result will show a `customer_id` who has only transacted in the first marketplace and has not engaged in any corresponding transactions in the second marketplace. + +[source,sql] +---- +customer_id +------------- + 104 + 101 +---- + +The diagram below shows a list of customer-product pairs found in the first marketplace (`marketplace1_transactions`) but missing in the second marketplace (`marketplace2_transactions`). + +=== Example #2 + +Let’s create two tables, `left_array_values` and `right_array_values`, to hold sets of values. + +[source,sql] +---- +CREATE TABLE left_array_values ( + value INT +); + +CREATE TABLE right_array_values ( + value INT +); + +INSERT INTO left_array_values VALUES (1), (1), (3); +INSERT INTO right_array_values VALUES (1), (2); +---- + +View the contents of the two arrays before performing the comparison. + +[source,sql] +---- +SELECT * FROM left_array_values; +SELECT * FROM right_array_values; +---- + +Upon execution, the tables will appear as follows: + +[source,sql] +---- +value +------- + 1 + 1 + 3 + + value +------- + 1 + 2 +---- + +We will now use the `EXCEPT ALL` operation to compare the values within the arrays, focusing on unique elements while retaining duplicate entries. + +[source,sql] +---- +SELECT value +FROM left_array_values +EXCEPT ALL +SELECT value +FROM right_array_values; +---- + +The `EXCEPT ALL` operation processes each element individually from both inputs at a time. The comparison occurs element-wise, leading to the inclusion of both 1 and 3 in the final result. + +[source,sql] +---- +value +------- + 3 + 1 +---- diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc new file mode 100644 index 000000000..c02bcf9bc --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/index.adoc @@ -0,0 +1,10 @@ += Overview +:description: Set operations are operations used to manipulate and analyze sets. + +Set operations are operations used to manipulate and analyze sets. It includes the following operations: + +. xref:reference:sql/sql-clauses/set-operations/union.adoc[*Union*]: Combines two or more sets to create a new set containing all unique elements from the input sets. +. xref:reference:sql/sql-clauses/set-operations/intersect.adoc[*Intersect*]: Yields a new set with elements common to all input sets. +. xref:reference:sql/sql-clauses/set-operations/except.adoc[*Except*]: Generates a set containing elements from the first set that are not present in the second set. ++ +These operations allow for comparisons, combinations, and distinctions among sets in various contexts. diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc new file mode 100644 index 000000000..c73891802 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/intersect.adoc @@ -0,0 +1,227 @@ += INTERSECT +:description: The INTERSECT combines the result sets of two or more SELECT statements, retrieving only the common rows between them. +:page-topic-type: reference + +== INTERSECT + +The `INTERSECT` combines the result sets of two or more `SELECT` statements, retrieving only the common rows between them. Unlike `UNION`, which combines all rows and removes duplicates, `INTERSECT` focuses on returning rows that appear in all `SELECT` statements. + +=== Syntax + +The syntax for the `INTERSECT` is as follows: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +INTERSECT +SELECT value1, value2, ... value_n +FROM table2; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you want to retrieve. You can also use `SELECT * FROM` to retrieve all columns. +* `table1, table2`: The tables from which you wish to retrieve records. + +[NOTE] +==== +The data types of corresponding columns must be compatible. +==== + +=== Example + +Suppose you have two tables: `customers_old` and `customers_new`, containing customer data for different periods. You want to find the customers who are present in both tables: + +[source,sql] +---- +CREATE TABLE customers_old ( + customer_id INT, + customer_name TEXT +); + +CREATE TABLE customers_new ( + customer_id INT, + customer_name TEXT +); + +INSERT INTO customers_old VALUES +(1, 'Alice'), +(2, 'Bob'), +(3, 'Charlie'); + +INSERT INTO customers_new VALUES +(2, 'Bob'), +(3, 'Charlie'), +(4, 'David'); +---- + +Viewing the inserted values: + +[source,sql] +---- +SELECT * FROM customers_old; +SELECT * FROM customers_new; +---- + +[source,sql] +---- +customer_id | customer_name +-------------+--------------- + 1 | Alice + 2 | Bob + 3 | Charlie + + customer_id | customer_name +-------------+--------------- + 2 | Bob + 3 | Charlie + 4 | David +---- + +Now, let’s combine common customers using the `INTERSECT`: + +[source,sql] +---- +SELECT customer_name FROM customers_old +INTERSECT +SELECT customer_name FROM customers_new; +---- + +The result will include only the names that appear in both tables: + +[source,sql] +---- +customer_name +--------------- + Bob + Charlie +---- + +The picture displays a list of customer names that appear in both tables. Only "`Bob`" and "`Charlie`" are found in both tables and shown as INTERSECT’s final result. + +== INTERSECT ALL + +=== Overview + +The `INTERSECT ALL` retrieves all common rows between two or more tables, including duplicates. + +This means that if a row appears multiple times in any of the `SELECT` statements, it will be included in the final result set multiple times. + +=== Syntax + +The syntax for `INTERSECT ALL` is similar to `INTERSECT`: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM tables +INTERSECT ALL +SELECT value1, value2, ... value_n +FROM tables; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you wish to retrieve. You can also retrieve all the values using the `SELECT * FROM` query. +* `table1, table2`: The tables from which you want to retrieve records. + +[NOTE] +==== +The data types of corresponding columns in the `SELECT` queries must be compatible. +==== + +=== Example + +Let’s create three tables of products from different years. You want to find the common products among all three categories, including duplicates. + +[source,sql] +---- +CREATE TABLE products_electronics2021 ( + product_id INT, + product_name TEXT +); + +CREATE TABLE products_electronics2022 ( + product_id INT, + product_name TEXT +); + +CREATE TABLE products_electronics2023 ( + product_id INT, + product_name TEXT +); + +INSERT INTO products_electronics2021 VALUES +(1, 'Laptop'), +(2, 'Phone'), +(3, 'Tablet'), +(4, 'Headphones'); + +INSERT INTO products_electronics2022 VALUES +(2, 'TV'), +(3, 'Printer'), +(4, 'Monitor'), +(5, 'Phone'); + +INSERT INTO products_electronics2023 VALUES +(3, 'Laptop'), +(4, 'Phone'), +(5, 'Oven'), +(6, 'AC'); +---- + +Display the tables using the query below: + +[source,sql] +---- +SELECT * FROM products_electronics2021; +SELECT * FROM products_electronics2022; +SELECT * FROM products_electronics2023; +---- + +[source,sql] +---- +product_id | product_name +------------+-------------- + 1 | Laptop + 2 | Phone + 3 | Tablet + 4 | Headphones + + product_id | product_name +------------+-------------- + 2 | TV + 3 | Printer + 4 | Monitor + 5 | Phone + + product_id | product_name +------------+-------------- + 3 | Laptop + 4 | Phone + 5 | Oven + 6 | AC +---- + +Then, combine common products from all three categories using the `INTERSECT ALL`: + +[source,sql] +---- +SELECT product_name FROM products_electronics2021 +INTERSECT ALL +SELECT product_name FROM products_electronics2022 +INTERSECT ALL +SELECT product_name FROM products_electronics2023; +---- + +The result will include the products that are common among all three categories, including duplicates: + +[source,sql] +---- +product_name +-------------- + Phone +---- + +The illustration shows a list of product names common to all three years, including duplicates. In this case, the result is the product name "`Phone,`" which appears across all three tables. diff --git a/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc b/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc new file mode 100644 index 000000000..d3b27fb88 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/set-operations/union.adoc @@ -0,0 +1,212 @@ += UNION +:description: The UNION combines the result sets of 2 or more select statements, removing duplicate rows between the tables. +:page-topic-type: reference + +== UNION + +The `UNION` combines the result sets of 2 or more select statements, removing duplicate rows between the tables. + +=== Syntax + +Below is the syntax of the `UNION`: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM table1 +UNION +SELECT value1, value2, ... value_n +FROM table2; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you wish to retrieve. You can also retrieve all the values using the `SELECT * FROM` query. +* `table1, table2`: The tables that you wish to retrieve records from. + +[TIP] +==== +*Things to consider:* + 1. The data types of corresponding columns in the `SELECT` queries must be compatible. + 2. The order of columns is flexible as long as the columns in consecutive places are pairwise compatible. For example, you can do `SELECT col1, col2 FROM table1 UNION SELECT col2, col1 FROM table2`. +==== + +=== Example + +Let’s consider an example of the `UNION`. Assume we have a table called `employees` and another table called `contractors`. We want to retrieve a combined list of names from both tables, excluding duplicates: + +[source,sql] +---- +CREATE TABLE employees ( + emp_id INT, + emp_name TEXT +); + +CREATE TABLE contractors ( + contractor_id INT, + contractor_name TEXT +); + +INSERT INTO employees VALUES +(1, 'John'), +(2, 'Alice'), +(3, 'Bob'); + +INSERT INTO contractors VALUES +(101, 'Alice'), +(102, 'Eve'), +(103, 'Tom'); +---- + +Verifying inserted values by using the `SELECT` statement: + +[source,sql] +---- +SELECT * FROM employees; +SELECT * FROM contractors; +---- + +[source,sql] +---- +emp_id | emp_name +--------+---------- + 1 | John + 2 | Alice + 3 | Bob + + contractor_id | contractor_name +---------------+----------------- + 101 | Alice + 102 | Eve + 103 | Tom +---- + +Let’s combine the values from the tables: + +[source,sql] +---- +SELECT emp_name FROM employees +UNION +SELECT contractor_name FROM contractors; +---- + +You will get the values of both tables, and there won’t be any duplicate values. + +[source,sql] +---- +emp_name +---------- + Alice + Bob + Eve + John + Tom +---- + +The diagram below shows that the duplicate name "`Alice`" is represented only once in the output, fulfilling the requirement to avoid duplicate entries. + +== UNION ALL + +=== Overview + +The `UNION ALL` combines the result sets of 2 or more select statements, returning all rows from the query and not removing duplicate rows between the tables. + +=== Syntax + +Below is the syntax of the `UNION ALL`: + +[source,sql] +---- +SELECT value1, value2, ... value_n +FROM tables +UNION ALL +SELECT value1, value2, ... value_n +FROM tables; +---- + +The parameters from the syntax are explained below: + +* `value1, value2, ... value_n`: The columns you wish to retrieve. You can also retrieve all the values using the `SELECT * FROM` query. +* `table1, table2`: The tables that you wish to retrieve records from. + +[TIP] +==== +*Things to consider:* + 1. The data types of corresponding columns in the `SELECT` queries must be compatible. + 2. The order of columns is flexible as long as the columns in consecutive places are pairwise compatible. +==== + +=== Example + +Suppose you have two separate tables, `sales_2022` and `sales_2023`, containing sales data for different years. You want to combine the sales data from both tables to get a complete list of sales transactions without removing duplicates. + +[source,sql] +---- +CREATE TABLE sales_2022 ( + transaction_id INT, + product_name TEXT, + sale_amount INT +); + +CREATE TABLE sales_2023 ( + transaction_id INT, + product_name TEXT, + sale_amount INT +); + +INSERT INTO sales_2022 VALUES +(1, 'Product A', 1000), +(2, 'Product B', 500), +(3, 'Product C', 750); + +INSERT INTO sales_2023 VALUES +(4, 'Product A', 1200), +(5, 'Product D', 800), +(6, 'Product E', 950); +---- + +Verifying inserted values by using the `SELECT` statement: + +[source,sql] +---- +SELECT * FROM sales_2022; +SELECT * FROM sales_2023; +---- + +[source,sql] +---- +transaction_id | product_name | sale_amount +----------------+--------------+------------- + 1 | Product A | 1000 + 2 | Product B | 500 + 3 | Product C | 750 + + transaction_id | product_name | sale_amount +----------------+--------------+------------- + 4 | Product A | 1200 + 5 | Product D | 800 + 6 | Product E | 950 +---- + +Let’s combine all values from the tables by using the `UNION ALL`: + +[source,sql] +---- +SELECT product_name, sale_amount FROM sales_2022 UNION ALL SELECT product_name, sale_amount FROM sales_2023; +---- + +In this case, it will display all the values of the first table followed by all the contents of the second table. + +[source,sql] +---- +product_name | sale_amount +--------------+------------- + Product A | 1000 + Product B | 500 + Product C | 750 + Product A | 1200 + Product D | 800 + Product E | 950 +---- + +The diagram illustrates that with the `UNION ALL`, all values are displayed, including the duplicate ones. diff --git a/modules/reference/pages/sql/sql-clauses/where.adoc b/modules/reference/pages/sql/sql-clauses/where.adoc new file mode 100644 index 000000000..369dfb1d3 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/where.adoc @@ -0,0 +1,238 @@ += WHERE +:description: The WHERE clause returns a specific value from a table or multiple tables based on specified conditions. +:page-topic-type: reference + +The `WHERE` clause returns a specific value from a table or multiple tables based on specified conditions. It will filter out records you do not want to be included and only returns the exact result when the condition is fulfilled. + +== Syntax + +The basic syntax of the WHERE clause is as follows − + +[source,sql] +---- +SELECT column1, column2, ... +FROM table_name +WHERE [condition] +---- + +Let’s explore the above syntax: + +* `SELECT column1, column2, ...` defines the columns where the records will be displayed. +* `FROM table_name` sets the table name where the records will be taken from. +* `WHERE [condition]`specifies the search condition using comparison or logical operators (e.g., `>`, `=`, `LIKE`) + +[TIP] +==== +It starts with the `FROM` clause *->* then it executes the `WHERE` condition *->* after that, it will `SELECT` the specified columns. +==== + +== Examples + +Let’s assume that we have a table salary with records as follows: + +[source,sql] +---- +CREATE TABLE salary ( + empid int, + empname text, + empdept text, + empaddress text, + empsalary int +); +INSERT INTO salary + (empid, empname, empdept, empaddress, empsalary) +VALUES + (2001,'Paul','HR', 'California', null ), + (2002,'Brandon','Product', 'Norway', 15000), + (2003,'Bradley','Marketing', 'Texas', null), + (2004,'Lisa','Marketing', 'Houston', 10000), + (2005,'Emily','Marketing', 'Texas', 20000), + (2006,'Bobby','Finance', 'Seattle', 20000), + (2007,'Parker','Project', 'Texas', 45000); +---- + +[source,sql] +---- +SELECT * FROM salary; +---- + +It will create a table as shown below: + +[source,sql] +---- ++--------+-----------+------------+-------------+------------+ +| empid | empname | empdept | empaddress | empsalary | ++--------+-----------+------------+-------------+------------+ +| 2001 | Paul | HR | California | null | +| 2002 | Brandon | Product | Norway | 15000 | +| 2003 | Bradley | Marketing | Texas | null | +| 2004 | Lisa | Marketing | Houston | 10000 | +| 2005 | Emily | Marketing | Texas | 20000 | +| 2006 | Bobby | Finance | Seattle | 20000 | +| 2007 | Parker | Project | Texas | 45000 | ++--------+-----------+------------+-------------+------------+ +---- + +=== WHERE clause with `=` operator + +Here we will be using the “equal” operator to look up the employee who works in the Marketing department: + +[source,sql] +---- +SELECT empname, empdept +FROM salary +WHERE empdept = 'Marketing'; +---- + +The above command will create the following result: + +[source,sql] +---- ++------------+-------------+ +| empname | empdept | ++------------+-------------+ +| Bradley | Marketing | +| Emily | Marketing | +| Lisa | Marketing | ++------------+-------------+ +---- + +[WARNING] +==== +The value defined in the `WHERE` clause’s condition is *case-sensitive*, so ensure that you specify the correct and precise value. +==== + +=== WHERE clause with `!=` operator + +Here we will be using the “not equal” operator to look up the employee who doesn’t live in Texas: + +[source,sql] +---- +SELECT empname, empdept, empaddress +FROM salary +WHERE empaddress != 'Texas'; +---- + +[NOTE] +==== +We can use the `<>` operator for another “not equal” operator. +==== +The above query will give the following result: + +[source,sql] +---- ++------------+------------+--------------+ +| empname | empdept | empaddress | ++------------+------------+--------------+ +| Paul | HR | California | +| Brandon | Product | Norway | +| Lisa | Marketing | Houston | +| Bobby | Finance | Seattle | ++------------+------------+--------------+ +---- + +[WARNING] +==== +The value defined in the `WHERE` clause's condition is *case-sensitive*. If you set `texas` it will return all records from the salary table. +==== + +=== WHERE clause with `>` operator + +Here we will be using the “greater than” operator to figure out who has a salary above 20000: + +[source,sql] +---- +SELECT empname, empdept, empsalary +FROM salary +WHERE empsalary > 20000; +---- + +[NOTE] +==== +We can use the `<` operator for a “less than” condition. +==== +The output will let us know that Parker has a salary greater than 20000: + +[source,sql] +---- ++------------+------------+-------------+ +| empname | empdept | empsalary | ++------------+------------+-------------+ +| Parker | Project | 45000 | ++------------+------------+-------------+ +---- + +=== WHERE clause with `<=` operator + +Here we will be using the “less than or equal to” operator to see who has a salary less than or equal to 15000: + +[source,sql] +---- +SELECT empname, empdept, empsalary +FROM salary +WHERE empsalary <= '15000'; +---- + +[NOTE] +==== +We can use the `>=` operator for a “greater than or equal to” condition. +==== +The output will let us know that Brandon has a salary equal to 15000 and Lisa has a salary of less than 15000: + +[source,sql] +---- ++------------+------------+-------------+ +| empname | empdept | empsalary | ++------------+------------+-------------+ +| Brandon | Product | 15000 | +| Lisa | Marketing | 10000 | ++------------+------------+-------------+ +---- + +=== WHERE clause with `LIKE` operator + +Here we will use the “like” operator to retrieve the employee whose first name starts with *Br*. + +[source,sql] +---- +SELECT * FROM salary +WHERE empname LIKE 'Br%'; +---- + +[NOTE] +==== +Do the reverse to get the result based on the last string, `%string`. +==== +We will get an output where the above query fetches **Br**andon & **Br**adley. + +[source,sql] +---- ++---------+------------+--------------+--------------+-----------+ +| empid | empname | empdept | empaddress | empsalary | ++---------+------------+-------------+--------------+------------+ +| 2002 | Brandon | Product | Norway | null | +| 2003 | Bradley | Marketing | Texas | 45000 | ++---------+------------+-------------+--------------+------------+ +---- + +=== WHERE clause with `IS NULL` operator + +Here we will use the “is null” operator to search for the employee who doesn’t have a salary value. It will return `true` and display the result set if a value is `NULL`; otherwise, it will return `false` with no result set. + +[source,sql] +---- +SELECT * FROM salary +WHERE empsalary IS NULL; +---- + +The above command will create the following result: + +[source,sql] +---- ++---------+------------+-------------+--------------+------------+ +| empid | empname | empdept | empaddress | empsalary | ++---------+------------+-------------+--------------+------------+ +| 2001 | Paul | HR | California | null | +| 2003 | Brandon | Product | Norway | null | ++---------+------------+-------------+--------------+------------+ +---- diff --git a/modules/reference/pages/sql/sql-clauses/with.adoc b/modules/reference/pages/sql/sql-clauses/with.adoc new file mode 100644 index 000000000..76ac87077 --- /dev/null +++ b/modules/reference/pages/sql/sql-clauses/with.adoc @@ -0,0 +1,90 @@ += WITH +:description: The WITH clause provides a way to define auxiliary statements (referred by their alias names), that can be used within a more complex query sets. +:page-topic-type: reference + +The `WITH` clause provides a way to define auxiliary statements (referred by their alias names), that can be used within a more complex query sets. They are also known as Common Table Expressions (CTEs). + +== Syntax + +The `WITH` clause precedes the primary statement it is attached to and contains a list of auxiliary statements with corresponding aliases. + +[source,sql] +---- +WITH [with_statement_alias AS (with_statement_body)]+ primary_statement; +---- + +* *`primary_statement`*: has to be one of the following: `SELECT`, `INSERT`, `UPDATE`, `DELETE` +* *`with_statement_body`*: has to be a `SELECT` statement (it can refer to aliases defined earlier in the query) + +== Semantic + +Currently, Redpanda SQL only supports not materialised CTEs (e.g. each auxiliary query alias is replaced with its corresponding body at the early stages of the query processing). The following query: + +[source,sql] +---- +WITH a AS (SELECT 77), b AS (SELECT * FROM a) SELECT * FROM b +---- + +is effectively turned into: + +[source,sql] +---- +SELECT * FROM (SELECT * FROM (SELECT 77) AS a) AS b +---- + +Used auxiliary query gets the same alias (`AS b` part) as in the `WITH` clause. It can be changed by explicitly setting a new alias upon usage. + +[source,sql] +---- +WITH b AS (SELECT 1 AS c1) SELECT b.c1, b1.c1 FROM b CROSS JOIN b AS b1; +---- + +== Usage + +Not materialised `WITH` clauses are useful when you want to refactor some complex query to make it more readable. You can extract subqueries or even reuse them in several places, having only one definition. Thanks to code insertion, each use of a query will be optimized separately, specifically for the usage of its results by the parent query. For example: + +[source,sql] +---- +WITH math_grades AS (SELECT g_date, semester_id, grade FROM grades WHERE subject="Math") +SELECT * FROM +(SELECT AVG(grade) FROM math_grades WHERE semester_id=2137) AS avg_semester_grades, +(SELECT AVG(grade) FROM math_grades WHERE g_date >= (CURRENT_TIMESTAMP() - INTERVAL '1 y')) AS avg_year_grades +---- + +Both subqueries use the same auxiliary `math_grades` query, but each of them filters it using different keys. This way, both scans will only read a part of the table. If materialized CTE was used (which we don’t support yet), the query engine would need to scan the whole table first and then filter the result twice, for each subquery. + +== Alias context + +You can’t create more than one CTE with the same alias within a single `WITH` clause. However, if you create nested `SELECT` statements, each of them can have their own `WITH` clauses, creating their own contexts for defined aliases. + +[NOTE] +==== +The same alias can be defined in more than one context +==== +[source,sql] +---- +WITH a AS ( # <-- creates context 1 + SELECT 1 +) +SELECT * FROM ( + WITH a AS (SELECT 2) # <-- creates context 2 + SELECT * FROM a # <-- uses context 2 +) CROSS JOIN a; # <-- uses context 1 +---- + +By executing the query above, you will receive `2, 1` as an output. + +When referencing an alias we use the context, which was defined at the nested query level. If it does not define the referenced alias, we move up one level and repeat searching for an alias definition. + +[source,pgsql] +---- +WITH a AS ( + SELECT 1 +) +SELECT * FROM ( + WITH b as (SELECT 2) + SELECT * FROM b +) CROSS JOIN b; # <-- error +---- + +That query returns `ERROR: relation "b" does not exist`, as `b` is not defined in this context or any of the above. diff --git a/modules/reference/pages/sql/sql-data-types/array.adoc b/modules/reference/pages/sql/sql-data-types/array.adoc new file mode 100644 index 000000000..d4d088fa5 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/array.adoc @@ -0,0 +1,184 @@ += Array +:description: In Redpanda SQL, an array allows you to represent a collection of elements that have the same data type (any built-in data type can be used). +:page-topic-type: reference + +In Redpanda SQL, an array allows you to represent a collection of elements that have the same data type (any built-in data type can be used). + +[NOTE] +==== +Currently, the implementation is limited only to single-dimensional arrays +==== + +== Array type declaration + +An array type can be declared by appending square brackets to the data type of its elements: + +[source,sql] +---- +CREATE TABLE movie_night ( + event_date DATE NOT NULL, + movies_planned TEXT[5] NOT NULL +); +---- + +The syntax above allows you to specify the size of the array. However, it does not enforce any limits and the behavior will be the same for arrays of unspecified length. There is also another way to declare an array, by prepending the `ARRAY` keyword after the data type of the elements: + +[source,sql] +---- +CREATE TABLE movie_night ( + event_date DATE NOT NULL, + movies_planned TEXT ARRAY NOT NULL +); +---- + +== Array values + +You can create array literals by using the `ARRAY` keyword and combining it with the array’s values enclosed in square brackets and separated by commas: + +[source,sql] +---- +ARRAY[ value1 , value2 , ... ] +---- + +Such a literal can be used with, e.g. `SELECT` or `INSERT INTO` statements: + +[source,sql] +---- +SELECT ARRAY['10:14:25'::time, '22:58:11'::time]; + ?column? +--------------------- + {10:14:25,22:58:11} +(1 row) + +INSERT INTO movie_night VALUES +('2024-12-01', ARRAY['Inception', 'Interstellar', 'The Prestige']); +INSERT 0 1 + +SELECT * FROM movie_night; + event_date | movies_planned +------------+----------------------------------------- + 2024-12-01 | {Inception,Interstellar,"The Prestige"} +(1 row) +---- + +You can also use a string representation of an array as another available option for array’s values syntax. It requires the elements’ values to be enclosed in curly braces and separated by commas: + +[source,sql] +---- +'{ value1 , value2 , ... }' +---- + +Such an array value representation can be used in e.g. `INSERT INTO` statements with the `VALUES` clause: + +[source,sql] +---- +INSERT INTO movie_night VALUES ('2024-12-15', '{The Matrix, John Wick}'); +INSERT 0 1 + +SELECT * FROM movie_night; +event_date | movies_planned +------------+----------------------------------------- +2024-12-01 | {Inception,Interstellar,"The Prestige"} +2024-12-15 | {"The Matrix","John Wick"} +(2 rows) +---- + +Any element can be enclosed in double quotes and this is required, if the value contains commas or curly braces: + +[source,sql] +---- +SELECT '{"{\"key1\": 1, \"key2\": \"value\"}", NULL, true}'::json[]; + ?column? +----------------------------------------------- + {"{\"key1\":1,\"key2\":\"value\"}",NULL,true} +(1 row) +---- + +[NOTE] +==== +In the example above, the double quotes which are a part of the JSON value are required to be escaped with a backslash, so that they are not mistaken with the double quote, which marks the end of the element +==== + +== Access arrays + +You can retrieve a single element from an array using the array subscript operator. When it comes to array values indexing, the elements of an n-length array start at index `1` and end at index `n`: + +[source,sql] +---- +SELECT movies_planned, + movies_planned[1] AS first_movie, + movies_planned[3] AS third_movie +FROM movie_night; + movies_planned | first_movie | third_movie +-----------------------------------------+-------------+-------------- + {Inception,Interstellar,"The Prestige"} | Inception | The Prestige + {"The Matrix","John Wick"} | The Matrix | +(2 rows) +---- + +[NOTE] +==== +If the index exceeds the length of an array, the returned value will be `NULL` +==== +Arrays can also be accessed by using array slices. An array slice is denoted by writing `lower_bound:upper_bound`. The bounds can be omitted, in which case the slice is unbounded from a given side: + +[source,sql] +---- +SELECT movies_planned[:] as "unbounded slice", + movies_planned[1:2] AS "[1:2] slice", + movies_planned[2:] AS "[2:] slice" +FROM movie_night; + unbounded slice | [1:2] slice | [2:] slice +-----------------------------------------+----------------------------+------------------------------- + {Inception,Interstellar,"The Prestige"} | {Inception,Interstellar} | {Interstellar,"The Prestige"} + {"The Matrix","John Wick"} | {"The Matrix","John Wick"} | {"John Wick"} +(2 rows) +---- + +== Limitations + +=== Field size limit + +In Redpanda SQL, the field size limit for variable-size types is 32MB and this limit applies to arrays as well. If a value exceeds the given limit, an error is returned: + +[source,sql] +---- +CREATE TABLE tb (array_column bigint[]); +CREATE + +COPY tb FROM '/.oxla/long_array_value.csv'; +ERROR: Error in row 1, column array_column value exceeds size of 33554432 +---- + +=== Unsupported SQL clauses + +Array columns cannot be used as the key columns in `ORDER BY`, `GROUP BY` or `JOIN` operations. It is also impossible to use the array columns as a part of the index of a table. For all the operations mentioned above, an appropriate error message will be returned: + +[source,sql] +---- +SELECT * FROM movie_night ORDER BY movies_planned; +ERROR: could not identify an ordering operator for type text[] +---- + +Arrays can still be used in `ORDER BY` or `JOIN` operations, if the array column is not the key: + +[source,sql] +---- +SELECT * FROM movie_night ORDER BY event_date ASC; + event_date | movies_planned +------------+----------------------------------------- + 2024-12-01 | {Inception,Interstellar,"The Prestige"} + 2024-12-15 | {"The Matrix","John Wick"} +(2 rows) +---- + +=== Unsupported SQL statements + +Specific SQL statements currently do not support arrays. These include: + +* `INSERT INTO` with `SELECT`: Arrays cannot be directly imported using an `INSERT INTO` with a `SELECT` statement. Instead, we encourage you to either use the `COPY FROM CSV` command or the `INSERT INTO` statement with the `VALUES` keyword +* `UPDATE` and `DELETE`: Updating or deleting records from a table, which contains array columns is not supported +* `COPY TO`: Exporting data from array columns using the `COPY TO` command is not available +* `CREATE INDEX`: Index on a table cannot be created on an array column. + +Any effort to use such operations with arrays will result in an error. For now, these limitations should be considered when designing tables that include array columns. diff --git a/modules/reference/pages/sql/sql-data-types/bool.adoc b/modules/reference/pages/sql/sql-data-types/bool.adoc new file mode 100644 index 000000000..1142a5c15 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/bool.adoc @@ -0,0 +1,153 @@ += Bool +:description: A BOOL is a data type mainly used for expressions that will return only two possible values, true and false. +:page-topic-type: reference + +== Overview + +A `BOOL` is a data type mainly used for expressions that will return only two possible values, `true` and `false`. + +[NOTE] +==== +Bool is stored as a bitmap in `u64` values. +==== +[WARNING] +==== +*BOOLEAN* is an alias for the *BOOL* data type. You can create a table using *BOOLEAN*. However, it will be stored and processed equivalently to *BOOL*. +==== + +== Format + +* `FALSE` +* `TRUE` + +== Examples + +Below are a few examples of using a bool data type: + +=== Create a table + +A librarian will create a *borrowBook* table that he will use to store book borrowing data. The table comprises the borrowed ID, the book name, the borrower, and the book’s returned status, which uses the *bool* data type. + +[source,sql] +---- +CREATE TABLE borrowBook ( + borrowID INT, + bookName TEXT, + borrower TEXT, + returnedStat BOOL NOT NULL +); +INSERT INTO borrowBook (borrowID,bookName, borrower, returnedStat) +VALUES + (101, 'The Silent Patient', 'Mike', TRUE), + (201, 'Malibu Rising', 'Jean', TRUE), + (301, 'The Guest List', 'Mark', FALSE), + (401, 'The Four Winds', 'Cliff', TRUE), + (501, 'The Vanishing Half: A Novel', 'Sarah', TRUE), + (601, 'Red, White & Royal Blue', 'Anna', FALSE), + (701, 'The Duke and I', 'Blake', FALSE), + (801, 'The Lord of the Rings', 'Sandra', FALSE); +---- + +The *borrowBook* table has been successfully created after executing the above query: + +[source,sql] +---- +COMPLETE +INSERT 0 8 +---- + +=== Display the table + +Run the `SELECT` statement to get all records from the *borrowBook* table: + +[source,sql] +---- +SELECT * FROM borrowBook; +---- + +It will return the result as displayed below: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 101 | The Silent Patient | Mike | t | +| 201 | Malibu Rising | Jean | t | +| 301 | The Guest List | Mark | f | +| 401 | The Four Winds | Cliff | t | +| 501 | The Vanishing Half: A Novel | Sarah | t | +| 601 | Red, White & Royal Blue | Anna | f | +| 701 | The Duke and I | Blake | f | +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- + +=== List of the returned books + +In the below example, the following statement is used to retrieve all the *books* that have already been returned: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE returnedstat= 'true'; +---- + +We will get the following results: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 101 | The Silent Patient | Mike | t | +| 201 | Malibu Rising | Jean | t | +| 401 | The Four Winds | Cliff | t | +| 501 | The Vanishing Half: A Novel | Sarah | t | ++-----------+---------------------------------+------------+---------------+ +---- + +=== List of the unreturned books + +Now, we will acquire all of the book records that haven’t been returned yet by running the `SELECT` statement with a specified `WHERE` condition as `false`: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE returnedstat= 'false'; +---- + +We will get the following results: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 301 | The Guest List | Mark | f | +| 601 | Red, White & Royal Blue | Anna | f | +| 701 | The Duke and I | Blake | f | +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- + +=== Check a book’s return status + +In this example, we are going to figure out the returned status of the book *“The Lord of the Rings”* by executing the `SELECT` statement with a specified column in the `WHERE` clause: + +[source,sql] +---- +SELECT * FROM borrowbook +WHERE bookname = 'The Lord of the Rings'; +---- + +The above query will filter all records based on the specified conditions, and we know that Sandra hasn’t returned the book yet: + +[source,sql] +---- ++-----------+---------------------------------+------------+---------------+ +| borrowid | bookname | borrower | returnedstat | ++-----------+---------------------------------+------------+---------------+ +| 801 | The Lord of the Rings | Sandra | f | ++-----------+---------------------------------+------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-data-types/data-type-operators.adoc b/modules/reference/pages/sql/sql-data-types/data-type-operators.adoc new file mode 100644 index 000000000..99df5bfa2 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/data-type-operators.adoc @@ -0,0 +1,20 @@ += Data Type Operators +:description: The Operator data type is any parsed expression that returns a value. +:page-topic-type: reference + +The Operator data type is any parsed expression that returns a value. An operator is used in the form of a special symbol or function. + +The following table shows a list of logical operators that Redpanda SQL supports: + +[width="100%",cols="8%,14%,7%,56%,15%",options="header",] +|=== +|*Type* |*Name* |*Operator* |*Description* |*Example* +|Relational |Equal to |`=` |This shows that the value of one item is *equal* to another item’s value. |`cust_name = 'Mike'` +|Relational |Greater than |`>` |This shows that the value of one item is *greater* than another item’s value. |`stock_value > 10` +|Relational |Less than |`<` |This shows that the value of one item is *less* than another item’s value. |`stock_value < 20` +|Relational |Not equal to |`<>` or `!=` |Indicates that the value of one item is *not equal* to the other item’s value. |`subj_score != 'FAILED'` +|Relational |Greater than or equal to |`>=` |Indicates that the value of one item is *greater than or equal to* the other item’s value. |`prod_price >= 3000` +|Relational |Less than or equal to |`<=` |Indicates that the value of one item is *less than or equal to* the other item’s value. |`prod_price <= 9000` +|Logical |Not |`NOT` |It shows a record if the condition(s) is NOT TRUE. |`NOT true = false` +|Logical |Is null |`IS NULL` |Used to check for empty values (`NULL` values). |`WHERE empsalary IS NULL;` +|=== diff --git a/modules/reference/pages/sql/sql-data-types/date.adoc b/modules/reference/pages/sql/sql-data-types/date.adoc new file mode 100644 index 000000000..a70d1c60f --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/date.adoc @@ -0,0 +1,77 @@ += Date +:description: The DATE data type is used to store and insert date values. +:page-topic-type: reference + +The `DATE` data type is used to store and insert date values. + +[NOTE] +==== +The date value is stored without the time zone. +==== + +== Structure + +The date type contains three components: year, month, and day. It’s represented in a 32-bit integer. Here is the breakdown: + +* *Day component:* 5 bits store the number of days within a month. Its value is in the range `<1, 31>`. +* *Month component*: 4 bits store the month of the year. Its value is in the range `<1, 12>`. +* *Year component*: 23 bits store the number of years. Its value is from range `<0, 2^23 - 1>`. + +== Format + +[source,sql] +---- +YYYY-MM-DD +---- + +* `YYYY` - Four-digit year +* `MM` - One / two-digit month +* `DD` - One / two-digit day + +== Example + +In this example, we will create an *emp_submission* table that consists of the candidate ID, candidate name, the submitted department, and a submission date with a `DATE` data type. + +[source,sql] +---- +CREATE TABLE emp_submission ( + candidate_ID INT, + candidate_Name TEXT, + sub_dept TEXT, + sub_date DATE +); + +INSERT INTO emp_submission (candidate_ID, candidate_Name, sub_dept, sub_date) +VALUES +(8557411, 'Kumar', 'HR', '2022-05-01'), +(8557421, 'Ricky', 'HR', '2022-01-09'), +(8557451, 'Alice', 'Finance', '2022-08-02'), +(8557461, 'Angel', 'Product', '2012-04-16'), +(8557431, 'Joan', 'Finance', '2022-02-02'), +(8557471, 'Cody', 'Product', '2022-03-20'), +(8557491, 'Liam', 'Product', '2022-06-15'); +---- + +Now that the data has been inserted, let’s execute the `SELECT` statement below: + +[source,sql] +---- +SELECT * FROM emp_submission; +---- + +The following is the result of the `SELECT` statement where the values in the `sub_date` column have `DATE` data type: + +[source,sql] +---- ++---------------+------------------+------------+---------------+ +| candidate_id | candidate_name | sub_dept | sub_date | ++---------------+------------------+------------+---------------+ +| 8557411 | Kumar | HR | 2022-05-01 | +| 8557421 | Ricky | HR | 2022-01-09 | +| 8557451 | Alice | Finance | 2022-08-02 | +| 8557461 | Angel | Product | 2012-04-16 | +| 8557431 | Joan | Finance | 2022-02-02 | +| 8557471 | Cody | Product | 2022-03-20 | +| 8557491 | Liam | Product | 2022-06-15 | ++---------------+------------------+------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-data-types/index.adoc b/modules/reference/pages/sql/sql-data-types/index.adoc new file mode 100644 index 000000000..7c9116ce1 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/index.adoc @@ -0,0 +1,35 @@ += SQL DATA TYPES +:description: Redpanda SQL supports a wide range of data types, each designed to handle specific types of data efficiently. + +Redpanda SQL supports a wide range of data types, each designed to handle specific types of data efficiently. + +The following table summarizes the data types supported by Redpanda SQL: + +[width="100%",cols="<48%,<29%,<23%",options="header",] +|=== +|Data Type |Definition |Format +|xref:reference:sql/sql-data-types/numeric-type/numeric#int-type.adoc[INT] |32-bit signed integer |one or more digits "`0`" to "`9`" +|xref:reference:sql/sql-data-types/numeric-type/numeric#bigint-type.adoc[BIGINT] |64-bit signed integer |large numeric/decimal value +|xref:reference:sql/sql-data-types/numeric-type/numeric#real-type.adoc[REAL] |32-bit floating point number |`float(n)` +|xref:reference:sql/sql-data-types/numeric-type/numeric#double-precision-type.adoc[DOUBLE PRECISION] |64-bit floating point number |`decimal(p, s)` +|xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[TIMESTAMP WITHOUT TIME ZONE] |Time and date values without a time zone |`YYYY-MM-DD [HH:MM:SS[.SSSSSS]]` +|xref:reference:sql/sql-data-types/timestamp-with-time-zone.adoc[TIMESTAMP WITH TIME ZONE] |Date and time values, including the time zone information |`YYYY-MM-DD HH:MM:SS.SSSSSS{plus}TZ` +|xref:reference:sql/sql-data-types/date.adoc[DATE] |Date value |`YYYY-MM-DD` +|xref:reference:sql/sql-data-types/time-type/time.adoc[TIME] |Time values without any date information |`HH:MM:SS[.SSSSSS]` +|xref:reference:sql/sql-data-types/interval.adoc[INTERVAL] |Encodes a span of time |`year-month (YYYY-MM); day-time (DD HH:MM:SS)` +|xref:reference:sql/sql-data-types/bool.adoc[BOOL] |Boolean value |`True` or `False` +|xref:reference:sql/sql-data-types/text.adoc[TEXT] |UTF8 encoded string with Unicode support |'`text`' +|xref:reference:sql/sql-data-types/json.adoc[JSON] |A value in JSON standard format |`variable_name JSON` +|xref:reference:sql/sql-data-types/array.adoc[ARRAY] |An array of a specific data type |`'{value1, value2, value3}'::data_type[]` +|=== + +[WARNING] +==== +*Overflow Risks* +When performing operations on numeric or temporal types, please be aware that overflows can lead to *undefined behavior*, resulting in unexpected values or errors. Ensure input values are within the allowed range for each numeric type to prevent overflows. This can occur during arithmetic operations or function execution (e.g. `AVG()`), where the result does not fit the result type. Using larger data types such as `BIGINT` can help mitigate overflow risks. +==== +[NOTE] +==== +*Casting Considerations* +Explicit casting between types can cause data *loss* due to altered precision or magnitude, such as truncating fractional seconds in `TIME` or silently clipping out-of-range values. Please verify input ranges to prevent unintended data loss. +==== diff --git a/modules/reference/pages/sql/sql-data-types/interval.adoc b/modules/reference/pages/sql/sql-data-types/interval.adoc new file mode 100644 index 000000000..54ed0fd35 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/interval.adoc @@ -0,0 +1,177 @@ += Interval +:description: The Interval data type represents periods between dates or times, which can be precisely calculated and expressed through various units. +:page-topic-type: reference + +The Interval data type represents periods between dates or times, which can be precisely calculated and expressed through various units. Those can be combined and include additional options for different interval calculations. + +In this doc, you’ll find more about the *interval syntax*, learn what are *supported units and abbreviations*, browse through *examples* and finally find out how to *extract data from intervals*. + +== Syntax + +The syntax for specifying an interval is as follows: + +[source,sql] +---- +SELECT INTERVAL 'quantity unit [quantity unit...] [direction]' [OPTION] +---- + +[cols="1,3",options="header"] +|=== +|Parameter |Description + +|`quantity` +|The value representing the number of units. + +|`unit` +a|Year, month, day, hour, minute, etc. Abbreviations, short forms, and dash format are supported. Plural forms are also acceptable (e.g. months, days, weeks). + +|`direction` +|An optional parameter: *ago* or empty string. + +|`OPTION` +|Additional options when parsing interval. +|=== + +== Supported units and abbreviations + +[cols=",",options="header",] +|=== +|*Unit* |*Abbreviations* +|Millennium |- +|Century |- +|Decade |- +|Year |`y`, `yr`, `yrs` +|Month |- +|Week |- +|Day |`d` +|Hour |`h`, `hr`, `hrs` +|Minute |`min`, `mins`, `m` +|Second |`s`, `sec`, `secs` +|Millisecond |`ms` +|Microsecond |- +|=== + +== Options for interval parsing + +* `YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` +* `YEAR TO MONTH`, `DAY TO HOUR`, `DAY TO MINUTE`, `DAY TO SECOND`, `HOUR TO MINUTE`, `HOUR TO SECOND`, `MINUTE TO SECOND` + +== Examples + +=== Select interval with multiple units + +In this example, we’ll calculate the interval by combining multiple units of time. + +[source,sql] +---- +SELECT INTERVAL '5 years 4 months 2 weeks 3 days 5 hours 10 minutes 25 seconds' as "Interval"; +---- + +[source,sql] +---- + Interval +--------------------------------- + 5 years 4 mons 17 days 05:10:25 +(1 row) +---- + +=== Use abbreviations + +This example shows how to use abbreviated units for time intervals. + +[source,sql] +---- +SELECT INTERVAL '10 yr 8 months 2 weeks 6 days 5 hrs 10 min 20 s as "Interval"; +---- + +[source,sql] +---- + Interval +---------------------------------- + 10 years 8 mons 20 days 05:10:20 +(1 row) +---- + +=== Use dash format + +Here you’ll find out how to use the dash format for specifying intervals. + +[source,sql] +---- +SELECT INTERVAL '1-2 3 DAYS 04:05:06.070809' as "Interval"; +---- + +[source,sql] +---- + Interval +-------------------------------------- + 1 year 2 mons 3 days 04:05:06.070809 +(1 row) +---- + +=== Parse intervals using specific units + +By running the code below, the output will show everything up to minutes and ignore seconds and miliseconds. + +[source,sql] +---- +SELECT INTERVAL '1-2 5 DAYS 07:08:06.040809' MINUTE as "Interval"; +---- + +[source,sql] +---- + Interval +------------------------------- + 1 year 2 mons 5 days 07:08:00 +(1 row) +---- + +=== Display specific range only + +Executing the query below will result only years and months being displayed excluding days, hours, minutes, and seconds from the input. + +[source,sql] +---- +SELECT INTERVAL '2-4 5 DAYS 04:05:06.070809' YEAR TO MONTH as "Interval"; +---- + +[source,sql] +---- + Interval +---------------- + 2 years 4 mons +(1 row) +---- + +=== Extract data from interval + +In order to extract the interval numbers from the timestamp, you can use the *EXTRACT()* function the following way: + +[source,sql] +---- +SELECT EXTRACT (field FROM interval) +---- + +* `field`: supports time units, such as `YEAR`, `MONTH`, `DAY`, `HOUR`, etc. +* `interval`: specified timestamp. + +[source,sql] +---- +SELECT EXTRACT (MINUTE +FROM INTERVAL '2 hours 30 minutes'); +---- + +As the output of the above query, only the minutes part will be returned. + +[source,sql] +---- + extract +------------ + 30 +(1 row) +---- + +[NOTE] +==== +If you query a field that is not specified in the timestamp, you will get `0` as an output. +==== diff --git a/modules/reference/pages/sql/sql-data-types/json.adoc b/modules/reference/pages/sql/sql-data-types/json.adoc new file mode 100644 index 000000000..766440be8 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/json.adoc @@ -0,0 +1,80 @@ += JSON +:description: JSON stands for JavaScript Object Notation. +:page-topic-type: reference + +== *overview* + +JSON stands for *JavaScript Object Notation*. It is an open standard format with key-value pairs to transport data between a server and a web application. + +== Syntax + +The JSON data type in Redpanda SQL has the following syntax: + +[source,sql] +---- +variable_name JSON +---- + +== Examples + +=== Create a table + +First, create the *orders table* using the below command: + +[source,sql] +---- +CREATE TABLE orders ( + orders_Detail JSON +); +---- + +This will create a table with the `orders_Detail`column to store key-value pairs of data. + +=== Insert data + +Next, insert data into the orders table as follows: + +[source,sql] +---- +INSERT INTO orders (orders_Detail) +VALUES +('{ "customer": "Dean Smith", "items": {"product": "cup","qty": 2}}'), +('{ "customer": "Sissy Kate", "items": {"product": "knife","qty": 1}}'), +('{ "customer": "Emma Stone", "items": {"product": "spoon","qty": 4}}'), +('{ "customer": "Chris Bale", "items": {"product": "fork","qty": 5}}'), +('{ "customer": "Mike Stuart", "items": {"product": "spatula","qty": 2}}'); +---- + +This will insert data values where `orders_Detail`has the following keys: + +* `customer`: it will store a customer’s data who purchased the product. +* `items`: it will store the order details, `product` & `qty`. + +=== Retrieve data + +Use the `SELECT` command to retrieve the orders table’s data. + +[source,sql] +---- +SELECT * FROM orders; +---- + +You will get the following output: + +[source,sql] +---- ++--------------------------------------------------------------------------+ +| orders_detail | ++--------------------------------------------------------------------------+ +| {"customer":"Dean Smith","items":{"qty":2.000000,"product":"cup"}} | +| {"customer":"Sissy Kate","items":{"product":"knife","qty":1.000000}} | +| {"customer":"Emma Stone","items":{"qty":4.000000,"product":"spoon"}} | +| {"customer":"Chris Bale","items":{"product":"fork","qty":5.000000}} | +| {"customer":"Mike Stuart","items":{"qty":2.000000,"product":"spatula"}} | ++--------------------------------------------------------------------------+ +---- + +[TIP] +==== +It is normal for the JSON type’s result to look disordered. +==== diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc new file mode 100644 index 000000000..1951c000e --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/index.adoc @@ -0,0 +1,3 @@ += Numeric Types +:description: Reference for numeric data types in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc new file mode 100644 index 000000000..a7a33318b --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric-data-type-aliases.adoc @@ -0,0 +1,91 @@ += Numeric Data Type - Aliases +:description: We allow aliases that can be used interchangeably with the primary data types. +:page-topic-type: reference + +We allow aliases that can be used interchangeably with the primary data types. However, while these aliases can be used, they will be mapped to their corresponding primary data types during data processing. + +Here, we’ll discuss the numeric data type aliases: + +== INTEGER alias + +The `INTEGER` alias is an alternative name for the `INT` data type. For example, the following two queries are functionally the same: + +[source,sql] +---- +CREATE TABLE ExampleTable ( + id INTEGER, +); + +-- Functionally the same as the previous table +CREATE TABLE AnotherTable ( + id INT, +); +---- + +[WARNING] +==== +It's important to note that even though `INTEGER` is used, the data is stored and treated as `INT`. +==== + +== LONG alias + +The `LONG` alias is often used to represent larger integer values. For example: + +[source,sql] +---- +CREATE TABLE LargeValues ( + value LONG, +); + +-- Functionally the same as the previous table +CREATE TABLE LargeValuesEquivalent ( + value BIGINT, +); +---- + +[WARNING] +==== +Any usage of `LONG` is stored and treated as `BIGINT`. +==== + +== FLOAT alias + +The `FLOAT` alias corresponds to the `REAL` data type. For example: + +[source,sql] +---- +CREATE TABLE FloatExample ( + price FLOAT, +); + +-- Functionally the same as the previous table +CREATE TABLE FloatEquivalent ( + price REAL, +); +---- + +[WARNING] +==== +When you use `FLOAT`, it's stored and treated as `REAL`. +==== + +== DOUBLE alias + +The `DOUBLE` alias is used to define `DOUBLE PRECISION` floating-point numbers. For example: + +[source,sql] +---- +CREATE TABLE DoubleExample ( + measurement DOUBLE, +); + +-- Functionally the same as the previous table +CREATE TABLE DoubleEquivalent ( + measurement DOUBLE PRECISION, +); +---- + +[WARNING] +==== +When you use `DOUBLE`, it's stored and treated as `DOUBLE PRECISION`. +==== diff --git a/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc new file mode 100644 index 000000000..09d3377c1 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/numeric-type/numeric.adoc @@ -0,0 +1,304 @@ += Numeric +:description: The INT data type represents whole numbers without decimal points. +:page-topic-type: reference + +== Int type + +The `INT` data type represents whole numbers without decimal points. It is a 32-bit signed integer with a range from -2147483648 to 2147483647. + +=== Format + +[source,sql] +---- +column_name INT +---- + +=== Example + +The following is an example of how to create a column using an `INT` type. + +[source,sql] +---- +CREATE TABLE cities ( + city_id INT, + cityname TEXT, + population INT +); +INSERT INTO cities (city_id, cityname, population) +VALUES +(8557411, 'New York', 8419000), +(8557421, 'London', 8982000), +(8557451, 'Hongkong', 7482000), +(8557491, 'Seoul', 9776000); +---- + +Now, run the following query to display the table. + +[source,sql] +---- +SELECT * FROM cities; +---- + +It will result in a table show below. + +[source,sql] +---- + city_id | cityname | population +---------+----------+------------ + 8557411 | New York | 8419000 + 8557421 | London | 8982000 + 8557451 | Hongkong | 7482000 + 8557491 | Seoul | 9776000 +(4 rows) +---- + +== Bigint type + +The `BIGINT` data type stores large whole numbers that exceed the `INT` range. It is a 64-bit signed integer with a range from -9223372036854775808 to 9223372036854775807. + +=== Format + +[source,sql] +---- +column_name BIGINT +---- + +=== Example + +The following is an example of how to create a column using the `BIGINT` type: + +[source,sql] +---- +CREATE TABLE galaxies ( + galaxy_name TEXT, + star BIGINT +); +INSERT INTO galaxies (galaxy_name, star) +VALUES +('Milky Way', 100000000000), +('Cigar', 30000000000), +('Andromeda', 1000000000000), +('Cosmos', 2000000000000000000); +---- + +Now, run the following query to display the table: + +[source,sql] +---- +SELECT * FROM galaxies; +---- + +You will get the following output: + +[source,sql] +---- + galaxy_name | star +-------------+--------------------- + Milky Way | 100000000000 + Cigar | 30000000000 + Andromeda | 1000000000000 + Cosmos | 2000000000000000000 +(4 rows) +---- + +== Real type + +The `REAL` data type is a 32-bit floating-point number compliant with the IEEE 754 binary32 format. + +=== Format + +[source,sql] +---- +column_name REAL +---- + +=== Example + +*1. Create a Table* + +Here, we are creating a table with a `REAL` column type. + +[source,sql] +---- +CREATE TABLE numbers ( + column_1 REAL +); + +INSERT into numbers (column_1) +VALUES (1.234568); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers; +---- + +The stored value is shown below. + +[source,sql] +---- + column_1 +---------- + 1.234568 +(1 row) +---- + +*2. Rounding* + +Rounding might happen if the precision of an input number is too high. + +[source,sql] +---- +CREATE TABLE numbers1 ( +column_1 REAL +); + +INSERT into numbers1 (column_1) +VALUES (1.2345689); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers1; +---- + +The table below shows the value after rounding. + +[source,sql] +---- + column_1 +---------- + 1.234569 +(1 row) +---- + +*3. Create a Table With Numbers Exceeding the Range* + +The `REAL` type only stores 32-bit floating-point numbers. In this example, we input the numbers that exceed the range. + +[source,sql] +---- +CREATE TABLE numbers2 ( + column_1 REAL +); + +INSERT into numbers2 (column_1) +VALUES (1.2345682991822); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbers2; +---- + +The final output will only return numbers that match the range. + +[source,sql] +---- + column_1 +----------- + 1.2345684 +(1 row) +---- + +== Double precision type + +The `DOUBLE PRECISION` data type is a 64-bit floating-point number compliant with the IEEE 754 binary64 format. + +=== Format + +[source,sql] +---- +column_name DOUBLE PRECISION +---- + +=== Example + +*1. Create a Table* + +Here, we are creating a table with a `DOUBLE PRECISION` type column. + +[source,sql] +---- +CREATE TABLE numbersdouble ( + column_1 DOUBLE PRECISION +); + +INSERT into numbersdouble (column_1) +VALUES (1.234568817283122); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbersdouble; +---- + +The output is shown below. + +[source,sql] +---- + column_1 +------------------- + 1.234568817283122 +(1 row) +---- + +*2. Rounding* + +Rounding might happen if the precision of an input number is too high. + +[source,sql] +---- +CREATE TABLE numbersdouble1 ( + column_1 DOUBLE PRECISION +); + +INSERT into numbersdouble1 (column_1) +VALUES (1.234568817283122773); +---- + +Display the table with the following query. + +[source,sql] +---- +SELECT * FROM numbersdouble1; +---- + +The table below shows the value after rounding. + +[source,sql] +---- + column_1 +-------------------- + 1.2345688172831228 +(1 row) +---- + +== Scientific notation support + +Redpanda SQL now supports scientific notation for floating-point types. This feature allows you to use expressions like 1.1e{plus}3, 1e-20, 1.1e02 and similar in your queries. + +*Example* + +[source,sql] +---- +SELECT 1.1e+3, 1e-20, 1.1e02; +---- + +*_Output_* + +[source,sql] +---- + ?column? | ?column? | ?column? +----------+----------+---------- + 1100 | 1e-20 | 110 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-data-types/text.adoc b/modules/reference/pages/sql/sql-data-types/text.adoc new file mode 100644 index 000000000..a8626fa0b --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/text.adoc @@ -0,0 +1,126 @@ += Text +:description: The text data type is a UTF8-encoded text with Unicode support, which stores a sequence of characters (text). +:page-topic-type: reference + +The text data type is a UTF8-encoded text with Unicode support, which stores a sequence of characters (text). + +== Examples + +Let’s create an employee table with a text data type in each column: + +[source,sql] +---- +CREATE TABLE employee ( + employeeName text, + employeeDept text, + employeeRole text +); +INSERT INTO employee (employeeName, employeeDept, employeeRole) +VALUES ('John','Finance','Staff'), + ('Maya','Product','Staff'), + ('Jane','Finance','Staff'), + ('Phil','HR','Manager'); +---- + +[TIP] +==== +Insert the text value between the single quotes *' '*. +==== +The created table is shown below: + +[source,sql] +---- ++---------------+---------------+---------------+ +| employeename | employeedept | employeerole | ++---------------+---------------+---------------+ +| John | Finance | Staff | +| Maya | Product | Staff | +| Jane | Finance | Staff | +| Phil | HR | Manager | ++---------------+---------------+---------------+ +---- + +== Text with SUBSTR function + +The `substr()` function extracts a specific number of characters from a text. + +=== Syntax + +[source,sql] +---- +substr( text, start_position, length ) +---- + +Let’s analyze the above syntax: + +* `text`is the specified text. +* `start_position` is used as the starting position, specifying the part from which the substring will be returned. It is written as an int value. +* `length` is used to determine the number of characters to be extracted. It can be one or more characters. + +[NOTE] +==== +The first position in the `text` is 1. +==== + +=== Example + +Insert a value into the text column. + +[source,sql] +---- +SELECT substr('Watermelon',6,5) AS "Fruit"; +---- + +The updated table is shown below: + +[source,sql] +---- ++-------------+ +| Fruit | ++-------------+ +| melon | ++-------------+ +---- + +== Text with LENGTH function + +The `length()` function returns the number of characters in a text. + +[NOTE] +==== +The number of characters might be different from the byte length. +==== + +=== Syntax + +The length function will take a text as a parameter. + +[source,sql] +---- +LENGTH (text); +---- + +=== Example + +Insert a value into the text column. + +[source,sql] +---- +SELECT LENGTH ('UNITED STATES'); +---- + +The updated table is shown below. + +[source,sql] +---- ++---------+ +| f | ++---------+ +| 13 | ++---------+ +---- + +[NOTE] +==== +The `length()` function will also count spaces. +==== diff --git a/modules/reference/pages/sql/sql-data-types/time-type/index.adoc b/modules/reference/pages/sql/sql-data-types/time-type/index.adoc new file mode 100644 index 000000000..a96a32040 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/index.adoc @@ -0,0 +1,3 @@ += Time Types +:description: Reference for time data types and operators in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc b/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc new file mode 100644 index 000000000..c71a26f1b --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/time-operators.adoc @@ -0,0 +1,379 @@ += Time operators +:description: Time operators in Redpanda SQL allow you to perform various operations on dates, times, and intervals. +:page-topic-type: reference + +Time operators in Redpanda SQL allow you to perform various operations on dates, times, and intervals. Here’s a guide to using these operators: + +== DATE {plus} INTEGER + +Add a specific number of days to a date. + +*Example* + +[source,sql] +---- +select date '2022-03-15' + 14 as "result"; +---- + +The result will be 14 days after '`2022-03-15`'. + +[source,sql] +---- + result +------------ + 2022-03-29 +---- + +=== INTEGER {plus} DATE + +Adding and multiplying time operators can also be done in reverse order. For example, we add a number of days to a date in the format of `Integer {plus} Date`. + +[source,sql] +---- +select 14 + date '2022-03-15' AS "result"; +---- + +The result will be the same, which is 14 days after '`2022-03-15`' is '`2022-03-29`'. + +[source,sql] +---- + result +------------ + 2022-03-29 +---- + +== DATE {plus} INTERVAL + +Add a specified interval to a date. + +*Example* + +[source,sql] +---- +select date '2022-03-15' + interval '3 months' as "result"; +---- + +The result will be the date three months after '`2022-03-15`'. + +[source,sql] +---- + result +---------------------------- + 2022-06-15 00:00:00.000000 +---- + +== DATE - INTEGER + +Subtract a certain number of days from a date. + +*Example* + +[source,sql] +---- +select date '2022-03-15' - 7 as "result"; +---- + +The result will be 7 days before '`2022-03-15`'. + +[source,sql] +---- + result +------------ + 2022-03-08 +---- + +== DATE - INTERVAL + +Subtract a specified interval from a date. + +*Example* + +[source,sql] +---- +select date '2022-03-15' - interval '2 hour' as "result"; +---- + +The result will be the timestamp with two hours before '`2022-03-15`'. + +[source,sql] +---- + result +---------------------------- + 2022-03-14 22:00:00.000000 +---- + +== DATE - DATE + +Subtract dates. + +*Example* + +[source,sql] +---- +select date '2023-03-15' - date '2023-01-10' as "result"; +---- + +The number of days elapsed between '`2023-03-15`' and '`2023-01-10`' is 64 days. + +[source,sql] +---- + result +-------- + 64 +---- + +== DATE {plus} TIME + +Add a time-of-day to a date. + +*Example* + +[source,sql] +---- +select date '2010-05-20' + time '02:00' as "result"; +---- + +The result will be a timestamp with the specified time added to the given date. + +[source,sql] +---- + result +---------------------------- + 2010-05-20 02:00:00.000000 +---- + +== TIME {plus} INTERVAL + +Add a certain interval to a given time. + +*Example* + +[source,sql] +---- +select time '12:30' + interval '1 hour' as "result"; +---- + +The result will be the time 1 hour after '`12:30`'. + +[source,sql] +---- + result +----------------- + 13:30:00.000000 +---- + +== TIME - INTERVAL + +Subtract a specified interval from a given time. + +*Example* + +[source,sql] +---- +select time '18:45' - interval '45 minutes' as "result"; +---- + +The result will be the time 18:00. + +[source,sql] +---- + result +----------------- + 18:00:00.000000 +---- + +== TIME - TIME + +Get a time difference by subtracting one time from another. + +*Example* + +[source,sql] +---- +select time '10:00' - TIME '08:20' as "result"; +---- + +In this example, the time difference between the two provided times is 1 hour and 40 minutes. + +[source,sql] +---- + result +----------------- + 01:40:00.000000 +---- + +== TIMESTAMP {plus} INTERVAL + +Add a timestamp and an interval. + +*Example* + +[source,sql] +---- +select timestamp '2021-01-05 12:00:00' + interval '5 days' as "result"; +---- + +The result will be a new timestamp, adding 5 days to '`2021-01-05 12:00:00`'. + +[source,sql] +---- + result +---------------------------- + 2021-01-10 12:00:00.000000 +---- + +== TIMESTAMP - INTERVAL + +Subtract an interval from a timestamp. + +*Example* + +[source,sql] +---- +select timestamp '2022-01-04 12:00:00' - interval '3 days' as "result"; +---- + +In this example, it subtracts 3 days from '`2022-01-04 12:00:00`'. + +[source,sql] +---- + result +---------------------------- + 2022-01-01 12:00:00.000000 +---- + +== TIMESTAMP - TIMESTAMP + +Get an interval by subtracting one timestamp from another. + +*Example* + +[source,sql] +---- +select timestamp '2022-01-05 18:30:00' - timestamp '2022-01-01 12:00:00' as "result"; +---- + +It gives the interval between the two timestamps, 102 hours and 30 minutes. + +[source,sql] +---- + result +------------------ + 102:30:00.000000 +---- + +== INTERVAL {plus} INTERVAL + +Add intervals. + +*Example* + +[source,sql] +---- +select interval '2 months 2 days' + interval '6 days' as "result"; +---- + +It adds 6 days to 2 days, resulting in a total of 2 months and 8 days. + +[source,sql] +---- + result +--------------- + 2 mons 8 days +---- + +== INTERVAL - INTERVAL + +Subtract intervals. + +*Example* + +[source,sql] +---- +select interval '2 months' - interval '20 days' as "result"; +---- + +It subtracts 20 days from 2 months. + +[source,sql] +---- + result +----------------- + 2 mons -20 days +---- + +== INTERVAL * INTEGER + +Multiply an interval by an integer. + +*Example* + +[source,sql] +---- +select interval '2 hours' * 3 as "result"; +---- + +It multiplies '`2 hours`' by 3, the result is 6 hours. + +[source,sql] +---- + result +----------------- + 06:00:00.000000 +---- + +== INTERVAL * DOUBLE PRECISION + +Multiply an interval by a scalar. + +*Example* + +[source,sql] +---- +select interval '2 hours' * 1.5 as "result"; +---- + +It multiplies '`2 hours`' by 1.5, and returns 3 hours. + +[source,sql] +---- + result +----------------- + 03:00:00.000000 +---- + +== INTERVAL / NUMBER + +Divide an interval by an integer or scalar. + +=== Divide by an integer + +[source,sql] +---- +select interval '1 hour' / 2 as "result"; +---- + +It divides '`1 hour`' by 2, and returns 30 minutes. + +[source,sql] +---- + result +----------------- + 00:30:00.000000 +---- + +=== Divide by a scalar + +[source,sql] +---- +select interval '2 hours' / 1.5 as "result"; +---- + +It divides '`2 hours`' by 1.5, and returns 1 hour 20 minutes. + +[source,sql] +---- + result +----------------- + 01:20:00.000000 +---- diff --git a/modules/reference/pages/sql/sql-data-types/time-type/time.adoc b/modules/reference/pages/sql/sql-data-types/time-type/time.adoc new file mode 100644 index 000000000..43b98f288 --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/time-type/time.adoc @@ -0,0 +1,68 @@ += Time +:description: The TIME data type in Redpanda SQL stores time values without any date information. +:page-topic-type: reference + +The `TIME` data type in Redpanda SQL stores time values without any date information. It represents a specific time of day, independent of any time zone or date. + +== Format + +The format for the TIME data type is as follows: + +[source,sql] +---- +HH:MM:SS[.SSSSSS] +---- + +* `HH`: One or two-digit hour (valid values from 00 to 23). +* `MM`: One or two-digit minutes (valid values from 00 to 59). +* `SS`: One or two-digit seconds (valid values from 00 to 59). +* `[.SSSSSS]` : Optional fractional seconds, with up to six decimal places (microsecond precision). + +== Examples + +=== Create a schedule table + +Let’s create a table to manage employee schedules, containing their names and the time they are scheduled to start work. The TIME data type will be used for the `start_time` column. + +[source,sql] +---- +CREATE TABLE employee_schedule ( + employee_name TEXT, + start_time TIME +); + +INSERT INTO employee_schedule (employee_name, start_time) +VALUES +('John Doe', '08:30:00'), +('Jane Smith', '09:00:00'), +('Michael Johnson', '10:15:00'); +---- + +The table has been successfully created after executing the above query: + +[source,sql] +---- +COMPLETE +INSERT 0 3 +---- + +=== View the employee schedule + +To view all employee schedules in the `employee_schedule` table, we can use the `SELECT` statement. + +[source,sql] +---- +SELECT * FROM employee_schedule; +---- + +The output will display the employee names and their corresponding scheduled start times: + +[source,sql] +---- + employee_name | start_time +-----------------+----------------- + John Doe | 08:30:00.000000 + Jane Smith | 09:00:00.000000 + Michael Johnson | 10:15:00.000000 +(3 rows) +---- diff --git a/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc b/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc new file mode 100644 index 000000000..d44357e6f --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/timestamp-with-time-zone.adoc @@ -0,0 +1,155 @@ += Timestamp with Time Zone +:description: Redpanda SQL provides you with two data types for handling timestamps: +:page-topic-type: reference + +Redpanda SQL provides you with two data types for handling timestamps: + +. xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[*Timestamp without Time Zone*]: It allows you to store both date and time. +. *Timestamp with Time Zone*: It stores date and time values but does not store time zone information within the database. Instead, it processes the time zone information during operations. +.. During *INSERT* operation, the time zone is ignored. The date and time are stored without considering the time zone. +.. During *the SELECT* operation, the time zone information from the user’s session is also ignored. The data is returned exactly as it is stored without adjusting the time zone. + +[WARNING] +==== +*Important Note:* + Keep in mind that all user sessions have a local timezone associated with them, affecting how timestamps `with time zone` values are displayed. + + The timezone information *is not stored in the database*. Consequently, every time a user requests a value of this type, Redpanda SQL converts from UTC to the user's local timezone before displaying it. +==== +[WARNING] +==== +*Important Note:* + Redpanda SQL relies on timezone information served by host machine operating system. It must be up-to-date in order to ensure correct timestamp conversions, date calculations, and compliance with regional time changes such as daylight saving adjustments. +==== +On this page, you will learn about the timestamp with the time zone. + +== Format + +The `timestamp with time zone` data type has the following format: + +[source,sql] +---- +YYYY-MM-DD HH:MM:SS.SSSSSS+TZ +---- + +* `YYYY`: Four-digit year +* `MM`: One / two-digit month +* `DD`: One / two-digit day +* `HH`: One / two-digit hour (valid values from 00 to 23) +* `MM`: One / two-digit minutes (valid values from 00 to 59) +* `SS`: One / two-digit seconds (valid values from 00 to 59) +* `[.SSSSSS]`: Up to six fractional digits (microsecond precision) +* `{plus}TZ`: Time zone offset in the format {plus}/-HH:MM (e.g., {plus}05:30, -08:00) + +== Examples + +=== Create a table + +Let’s create a table named `event_log` that consists of a timestamp without a time zone and a timestamp with time zone columns. The values in the `event_timestamp_tz` are in the “Europe/Moscow” timezone. + +[source,sql] +---- +CREATE TABLE events_log ( + event_name TEXT, + event_timestamp TIMESTAMP WITHOUT TIME ZONE, + event_timestamp_tz TIMESTAMP WITH TIME ZONE +); +INSERT INTO events_log (event_name, event_timestamp, event_timestamp_tz) +VALUES + ('Event 1', '2023-07-27 12:30:00', '2023-07-27 12:30:00+03:00'), + ('Event 2', '2023-07-27 08:45:00', '2023-07-27 08:45:00+03:00'), + ('Event 3', '2023-07-27 20:15:00', '2023-07-27 20:15:00+03:00'); +---- + +The table has been successfully created after executing the above query: + +[source,sql] +---- +COMPLETE +INSERT 0 3 +---- + +=== Display the table + +Run the `SELECT` statement to get all records of the table: + +[source,sql] +---- +SELECT event_timestamp, event_timestamp_tz +FROM events_log; +---- + +It will return the result as displayed below. We can see that the `event_timestamp_tz` is converted to UTC timezone. + +[source,sql] +---- + event_timestamp | event_timestamp_tz +----------------------------+--------------------------------- + 2023-07-27 12:30:00.000000 | 2023-07-27 09:30:00.000000+0000 + 2023-07-27 08:45:00.000000 | 2023-07-27 05:45:00.000000+0000 + 2023-07-27 20:15:00.000000 | 2023-07-27 17:15:00.000000+0000 +(3 rows) +---- + +=== Order table by timestamp + +Let’s assume we want to sort the events based on the `event_timestamp` column and display the corresponding UTC in the `event_timestamp_tz` column. Run the following query: + +[source,sql] +---- +SELECT + event_timestamp, + event_timestamp_tz, + event_timestamp AT TIME ZONE 'UTC' AS utc_time +FROM + events_log +ORDER BY + event_timestamp; +---- + +We’ll retrieve the `event_timestamp` and `event_timestamp_tz` columns and calculate the corresponding UTC time using the `AT TIME ZONE 'UTC'` operator. + +We then order the results based on the `event_timestamp` column, giving us a sorted list of events with their corresponding local and UTC times. + +[source,sql] +---- + event_timestamp | event_timestamp_tz | utc_time +----------------------------+---------------------------------+--------------------------------- + 2023-07-27 08:45:00.000000 | 2023-07-27 05:45:00.000000+0000 | 2023-07-27 08:45:00.000000+0000 + 2023-07-27 12:30:00.000000 | 2023-07-27 09:30:00.000000+0000 | 2023-07-27 12:30:00.000000+0000 + 2023-07-27 20:15:00.000000 | 2023-07-27 17:15:00.000000+0000 | 2023-07-27 20:15:00.000000+0000 +(3 rows) +---- + +== AT TIME ZONE operator + +The `AT TIME ZONE` operator in timestamp with time zone converts the given timestamp with time zone to the new time zone, with no time zone designation. + +*Syntax:* + +[source,sql] +---- +SELECT TIMESTAMP WITH TIME ZONE 'timestamp' AT TIME ZONE 'TIME_ZONE'; +---- + +* `timestamp`: The date and time value with the time zone. +* `TIME_ZONE`: The target time zone to which the timestamp will be converted. The user’s timezone is fixed to UTC. + +*Example:* + +In this example, we will convert a specified timestamp with time zone into the UTC timezone. + +[source,sql] +---- +SELECT TIMESTAMP WITH TIME ZONE '2023-03-04 10:29:90-05' AT TIME ZONE 'UTC'; +---- + +The result will be a timestamp without a time zone. + +[source,sql] +---- + f +---------------------------- + 2023-03-04 15:30:30.000000 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc b/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc new file mode 100644 index 000000000..21a1992de --- /dev/null +++ b/modules/reference/pages/sql/sql-data-types/timestamp-without-time-zone.adoc @@ -0,0 +1,210 @@ += Timestamp Without Time Zone +:description: The timestamp data type stores time and date values without a time zone. +:page-topic-type: reference + +== *overview* + +The timestamp data type stores *time* and *date* values without a time zone. It represents a fixed time, independent of any time zone or applied globally. + +== Format + +[source,sql] +---- +YYYY-MM-DD [HH:MM:SS[.SSSSSS]] +---- + +* `YYYY`: Four-digit year +* `MM`: One / two-digit month +* `DD`: One / two-digit day +* `HH`: One / two-digit hour (valid values from 00 to 23) +* `MM`: One / two-digit minutes (valid values from 00 to 59) +* `SS`: One / two-digit seconds (valid values from 00 to 59) +* `[.SSSSSS]`: Up to six fractional digits (microsecond precision) + +[NOTE] +==== +Fractional digits are the digits after the decimal point ( . ) +==== + +== Examples + +=== Create a table + +Here, we will create a *visitor* table to store visitor data in an office building. It consists of the visitor’s name, the purpose of the visit, company, time, and date, which uses the *Timestamp* data type. + +[source,sql] +---- +CREATE TABLE visitors ( + visitorName TEXT, + visitPurp TEXT, + visitComp TEXT, + visitDate TIMESTAMP WITHOUT TIME ZONE +); +INSERT INTO visitors (visitorName, visitPurp, visitComp, visitDate) +VALUES + ('Peter', 'Interview', 'Apple', '2022-01-10 09:12:40'), + ('Will', 'Meeting', 'McKesson', '2022-01-29 11:28:02'), + ('Max', 'Meeting', 'McKesson', '2022-02-11 10:19:10'), + ('Dustin', 'Meeting', 'CVS Health', '2022-03-18 14:24:08'), + ('Lizzy', 'Meeting', 'CVS Health', '2022-04-23 13:10:09'), + ('Evy', 'Interview', 'Apple', '2022-05-01 08:45:50'); +---- + +The *visitors* table has been successfully created after executing the above query: + +[source,sql] +---- +COMPLETE +INSERT 0 6 +---- + +=== Display the table + +Run the `SELECT` statement to get all records of the *visitors* table: + +[source,sql] +---- +SELECT * FROM visitors; +---- + +It will return the result set as displayed below: + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Peter | Interview | Apple | 2022-01-10 09:12:40 | +| Will | Meeting | McKesson | 2022-01-29 11:28:02 | +| Max | Meeting | McKesson | 2022-02-11 10:19:10 | +| Dustin | Meeting | CVS Health | 2022-03-18 14:24:08 | +| Lizzy | Meeting | CVS Health | 2022-04-23 13:10:09 | +| Evy | Interview | Apple | 2022-05-01 08:45:50 | ++--------------+--------------+---------------+-----------------------+ +---- + +=== Look for a specific timestamp + +In the below example, the following statement is used to get records with a specified timestamp: + +[source,sql] +---- +SELECT * FROM visitors +WHERE visitDate = '2022-04-23 13:10:09'; +---- + +We will get the following successful results: + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Lizzy | Meeting | CVS Health | 2022-04-23 13:10:09 | ++--------------+--------------+---------------+-----------------------+ +---- + +=== Insert a value that exceeds the standard format + +The time in timestamp has a standard format, i.e., for *minutes* only valid for values from 00 to 59. + +The example below will insert a new record into the visitors table with a value of `60`, which exceeds the standard seconds format. + +[source,sql] +---- +INSERT INTO visitors (visitorName, visitPurp, visitComp, visitDate) +VALUES + ('Jolly', 'Survey', 'Apple', '2022-01-10 09:12:60'); +---- + +[source,sql] +---- +INSERT 0 1 + +Query returned successfully in 135 msec. +---- + +Verify the result by running the `select` statement below: + +[source,sql] +---- +SELECT * FROM visitors +WHERE visitorName = 'Jolly'; +---- + +We learned that the seconds are displayed as `00` as `60`, which adds 1 minute to the minutes’ value. + +[source,sql] +---- ++--------------+--------------+---------------+-----------------------+ +| visitorName | visitPurp | visitComp | visitDate | ++--------------+--------------+---------------+-----------------------+ +| Jolly | Survey | Apple | 2022-01-10 09:13:00 | ++--------------+--------------+---------------+-----------------------+ +---- + +== AT TIME ZONE operator + +The `AT TIME ZONE` operator enables us to convert the input timestamp to the target time zone specified in the query. Additionally, the timestamp you inputted will always be presented in the user’s local timezone (currently set as UTC). + +[WARNING] +==== +It's important to note that the result type of this operator is different. It produces a timestamp with a time zone. +==== + +=== Syntax + +To use the `AT TIME ZONE` operator, you can follow this syntax: + +[source,sql] +---- +SELECT TIMESTAMP 'input_timestamp' AT TIME ZONE 'TIME_ZONE'; +---- + +Here’s what each element means: + +* `input_timestamp`: This represents the date and time value you want to convert. The user’s time zone is fixed to UTC. +* `TIME_ZONE`: The target time zone to which the timestamp will be converted. + +=== Example 1 + +Suppose we have a timestamp, and we want to convert it into the MST time zone: + +[source,sql] +---- +SELECT TIMESTAMP '2001-02-16 10:28:30' AT TIME ZONE 'MST'; +---- + +The result will be a timestamp with the time zone adjusted to MST: + +[source,sql] +---- + f +--------------------------------- + 2001-02-16 17:28:30.000000+0000 +(1 row) +---- + +=== Example 2 + +Let’s consider from the xref:reference:sql/sql-data-types/timestamp-without-time-zone.adoc[visitors] table, we wish to retrieve a list of visit dates in the MST time zone. We can achieve this using the following query: + +[source,sql] +---- +SELECT visitDate, visitDate AT TIME ZONE 'MST' as "visitDateMST" FROM visitors; +---- + +With this query, we obtain a list of two columns: `visitDate` displays the timestamps without a time zone, and `visitDateMST` stores the timestamps converted to the MST time zone. + +[source,sql] +---- + visitdate | visitDateMST +----------------------------+--------------------------------- + 2022-01-10 09:12:40.000000 | 2022-01-10 16:12:40.000000+0000 + 2022-01-29 11:28:02.000000 | 2022-01-29 18:28:02.000000+0000 + 2022-02-11 10:19:10.000000 | 2022-02-11 17:19:10.000000+0000 + 2022-03-18 14:24:08.000000 | 2022-03-18 21:24:08.000000+0000 + 2022-04-23 13:10:09.000000 | 2022-04-23 20:10:09.000000+0000 + 2022-05-01 08:45:50.000000 | 2022-05-01 15:45:50.000000+0000 +(6 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc new file mode 100644 index 000000000..82c904c26 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/avg.adoc @@ -0,0 +1,145 @@ += AVG +:description: The AVG() function lets you calculate the average value of records. +:page-topic-type: reference + +The `AVG()` function lets you calculate the average value of records. The input and return types we support can be seen in the table below: + +[cols=",",options="header",] +|=== +|Input type |Return type +|`INTEGER` |`DOUBLE PRECISION` +|`BIGINT` |`DOUBLE PRECISION` +|`REAL` |`DOUBLE PRECISION` +|`DOUBLE PRECISION` |`DOUBLE PRECISION` +|=== + +[NOTE] +==== +If the input type is 32-bit, then the result will be 64-bit +==== +*Special cases:* Returns NaN if the input contains a NaN. + +== Examples + +In this example, we will use an *orders* table that stores details of the purchase transactions: + +[source,sql] +---- +CREATE TABLE orders ( + orderid int, + custname text, + orderproduct text, + ordertotal real +); +INSERT INTO orders (orderid, custname, orderproduct, ordertotal) +VALUES +(9557411, 'Maya', 'Jeans', 10.5), +(9557421, 'Aaron', 'T-Shirt', 9.2), +(9557451, 'Alex', 'Hat', 10.8), +(9557311, 'Will', 'Hat', 8.5), +(9557321, 'Will', 'T-Shirt', 12.15), +(9557351, 'Maya', 'T-Shirt', 9.5), +(9557221, 'Maya', 'Jeans', 11.02), +(9557251, 'Alex', 'Jeans', 11.09), +(9557231, 'Aaron', 'Hat', 14.56), +(9557281, 'Aaron', 'Hat', 12.15), +(9557291, 'Will', 'T-Shirt', 13.1); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+ +| orderid | custname | orderproduct | ordertotal | ++----------+-----------+---------------+-------------+ +| 9557411 | Maya | Jeans | 10.5 | +| 9557421 | Aaron | T-Shirt | 9.2 | +| 9557451 | Alex | Hat | 10.8 | +| 9557311 | Will | Hat | 8.5 | +| 9557321 | Will | T-Shirt | 12.15 | +| 9557351 | Maya | T-Shirt | 9.5 | +| 9557221 | Maya | Jeans | 11.02 | +| 9557251 | Alex | Jeans | 11.09 | +| 9557231 | Aaron | Hat | 14.56 | +| 9557281 | Aaron | Hat | 12.15 | +| 9557291 | Will | T-Shirt | 13.1 | ++----------+-----------+---------------+-------------+ +---- + +=== AVG() with a single expression + +In the first example, we want to calculate the average amount of all orders that customers have paid: + +[source,sql] +---- +SELECT AVG(ordertotal) AS "Order Total Average" +FROM orders; +---- + +It will return the following output: + +[source,sql] +---- ++---------------------+ +| Order Total Average | ++---------------------+ +| 11.142727331681685 | ++---------------------+ +---- + +=== AVG() with a GROUP BY clause + +The following example uses the `AVG()` function and `GROUP BY` clause to calculate the average amount paid by each customer: + +* First, the `GROUP BY` clause divides orders into groups based on customers +* Then, the `AVG` function is applied to each group. + +[source,sql] +---- +SELECT custname AS "Customer", AVG (ordertotal) AS "Total Price Average" +FROM orders +GROUP BY custname; +---- + +It will display the output as shown below: + +[source,sql] +---- ++-----------+----------------------+ +| Customer | Total Price Average | ++-----------+----------------------+ +| Aaron | 11.96999994913737 | +| Alex | 10.945000171661377 | +| Will | 11.25 | +| Maya | 10.34000015258789 | ++-----------+----------------------+ +---- + +You can use the cast operator like`::NUMERIC(10,2)` to add two decimal numbers after the comma: + +[source,sql] +---- +SELECT custname AS "Customer", AVG (ordertotal)::NUMERIC(10,2) AS "Total Price Average" +FROM orders +GROUP BY custname; +---- + +The result will trim and round two numbers after the comma: + +[source,sql] +---- ++-----------+----------------------+ +| Customer | Total Price Average | ++-----------+----------------------+ +| Aaron | 11.97 | +| Alex | 10.95 | +| Will | 11.25 | +| Maya | 10.34 | ++-----------+----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc new file mode 100644 index 000000000..0b7ff22f9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-and.adoc @@ -0,0 +1,111 @@ += BOOL_AND +:description: The BOOL_AND() function calculates all the boolean values in the aggregated group, which will have these results: +:page-topic-type: reference + +The `BOOL_AND()` function calculates all the boolean values in the aggregated group, which will have these results: + +* `true` if all the values are `true` for every row. +* `false` if at least one row in the group is `false`. + +The input and the return type must be in `BOOL`. + +[NOTE] +==== +`NULL` values are not aggregated, so it will return `NULL` if there are zero input rows. +==== + +== Examples + +In this example, we will use a payment table that stores details of the orders, whether the order has been paid or unpaid by the customer: + +[source,sql] +---- +CREATE TABLE payment ( + orderid int, + custname text, + orderproduct text, + ordertotal real, + paid boolean +); +INSERT INTO payment (orderid, custname, orderproduct, ordertotal, paid) +VALUES +(9557411, 'Maya', 'Jeans', 10.5, true), +(9557421, 'Aaron', 'T-Shirt', 9.2, true), +(9557451, 'Alex', 'Hat', 10.8, true), +(9557311, 'Will', 'Hat', 8.5, true), +(9557321, 'Will', 'T-Shirt', 12.15, true), +(9557351, 'Maya', 'T-Shirt', 9.5, true), +(9557221, 'Maya', 'Jeans', 11.02, true), +(9557251, 'Alex', 'Jeans', 11.09, true), +(9557231, 'Aaron', 'Hat', 14.56, false), +(9557281, 'Aaron', 'Hat', 12.15, true), +(9557291, 'Will', 'T-Shirt', 13.1, true); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+-------+ +| orderid | custname | orderproduct | ordertotal | paid | ++----------+-----------+---------------+-------------+-------+ +| 9557411 | Maya | Jeans | 10.5 | t | +| 9557421 | Aaron | T-Shirt | 9.2 | t | +| 9557451 | Alex | Hat | 10.8 | t | +| 9557311 | Will | Hat | 8.5 | t | +| 9557321 | Will | T-Shirt | 12.15 | t | +| 9557351 | Maya | T-Shirt | 9.5 | t | +| 9557221 | Maya | Jeans | 11.02 | t | +| 9557251 | Alex | Jeans | 11.09 | t | +| 9557231 | Aaron | Hat | 14.56 | f | +| 9557281 | Aaron | Hat | 12.15 | t | +| 9557291 | Will | T-Shirt | 13.1 | t | ++----------+-----------+---------------+-------------+-------+ +---- + +=== `BOOL_AND` with a false result + +We will find out if all customers have paid for their orders using the query below: + +[source,sql] +---- +SELECT BOOL_AND(paid) AS "final_result" FROM payment; +---- + +In the `BOOL_AND` function, if there is at least one `FALSE` value, the overall result will be `FALSE`. The final output shows that there is an order that hasn’t been paid. + +[source,sql] +---- ++--------------+ +| final_result | ++--------------+ +| f | ++--------------+ +---- + +=== `BOOL_AND` with a true result + +We will find out if Maya has paid for her orders using the query below: + +[source,sql] +---- +SELECT BOOL_AND(paid) AS Maya_Paid +FROM payment +WHERE custname ='Maya'; +---- + +In the `BOOL_AND` function, if all values are `TRUE`, then the overall result will be `TRUE`. The final output shows that Maya has paid all her orders. + +[source,sql] +---- ++-----------+ +| maya_paid | ++-----------+ +| t | ++-----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc new file mode 100644 index 000000000..2593f6926 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/bool-or.adoc @@ -0,0 +1,111 @@ += BOOL_OR +:description: The BOOL_OR() function calculates all the boolean values in the aggregated group, which will have these results: +:page-topic-type: reference + +The `BOOL_OR()` function calculates all the boolean values in the aggregated group, which will have these results: + +* `false` if all the values are `false` for every row. +* `true` if at least one row in the group is true. + +The input and the return type must be in `BOOL`. + +[NOTE] +==== +`NULL` values are not aggregated, so it will return `NULL` if there are zero input rows. +==== + +== Examples + +In this example, we will use a payment table that stores details of the orders, whether the order has been paid or unpaid by the customer: + +[source,sql] +---- +CREATE TABLE payment ( + orderid int, + custname text, + orderproduct text, + ordertotal real, + paid boolean +); +INSERT INTO payment (orderid, custname, orderproduct, ordertotal, paid) +VALUES +(9557411, 'Maya', 'Jeans', 10.5, false), +(9557421, 'Aaron', 'T-Shirt', 9.2, false), +(9557451, 'Alex', 'Hat', 10.8, false), +(9557311, 'Will', 'Hat', 8.5, true), +(9557321, 'Will', 'T-Shirt', 12.15, false), +(9557351, 'Maya', 'T-Shirt', 9.5, true), +(9557221, 'Maya', 'Jeans', 11.02, false), +(9557251, 'Alex', 'Jeans', 11.09, false), +(9557231, 'Aaron', 'Hat', 14.56, false), +(9557281, 'Aaron', 'Hat', 12.15, false), +(9557291, 'Will', 'T-Shirt', 13.1, false); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+--------+ +| orderid | custname | orderproduct | ordertotal | paid | ++----------+-----------+---------------+-------------+--------+ +| 9557411 | Maya | Jeans | 10.5 | f | +| 9557421 | Aaron | T-Shirt | 9.2 | f | +| 9557451 | Alex | Hat | 10.8 | f | +| 9557311 | Will | Hat | 8.5 | t | +| 9557321 | Will | T-Shirt | 12.15 | f | +| 9557351 | Maya | T-Shirt | 9.5 | t | +| 9557221 | Maya | Jeans | 11.02 | f | +| 9557251 | Alex | Jeans | 11.09 | f | +| 9557231 | Aaron | Hat | 14.56 | f | +| 9557281 | Aaron | Hat | 12.15 | f | +| 9557291 | Will | T-Shirt | 13.1 | f | ++----------+-----------+---------------+-------------+--------+ +---- + +=== `BOOL_OR` with a true result + +We will find out if all customers have paid for their orders using the query below: + +[source,sql] +---- +SELECT BOOL_OR(paid) AS "final_result" FROM payment; +---- + +If there is at least one `TRUE` value, the overall result will be `TRUE`. The final output shows that some order has been paid regardless of the other unpaid orders. + +[source,sql] +---- ++--------------+ +| final_result | ++--------------+ +| t | ++--------------+ +---- + +=== `BOOL_OR` with a false result + +We will find out if Aaron has paid for his orders using the query below: + +[source,sql] +---- +SELECT BOOL_OR(paid) AS aaron_paid +FROM payment +WHERE custname ='Aaron'; +---- + +If all values are `FALSE`, then the overall result will be `FALSE`. The final output shows that Aaron hasn’t paid for all his orders. + +[source,sql] +---- ++------------+ +| aaron_paid | ++------------+ +| f | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc new file mode 100644 index 000000000..237426aa1 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/count.adoc @@ -0,0 +1,138 @@ += COUNT +:description: The COUNT() function allows you to retrieve the number of records that match a specific condition. +:page-topic-type: reference + +The `COUNT()` function allows you to retrieve the number of records that match a specific condition. It can be used with any data type supported by Redpanda SQL, and the output will be returned as a `BIGINT`. + +[NOTE] +==== +The output will indicate the total number of rows in a table, regardless of the input types. +==== + +== Examples + +In this example, we will use an orders table that stores details of the purchase transactions: + +[source,sql] +---- +CREATE TABLE orders ( + orderid int, + custname text, + orderproduct text, + ordertotal real +); +INSERT INTO orders (orderid, custname, orderproduct, ordertotal) +VALUES +(9557411, 'Maya', 'Jeans', 10.5), +(9557421, 'Aaron', 'T-Shirt', 9.2), +(9557451, 'Alex', 'Hat', 10.8), +(9557311, 'Will', 'Hat', 8.5), +(9557321, 'Will', 'T-Shirt', 12.15), +(9557351, 'Maya', 'T-Shirt', 9.5), +(9557221, 'Maya', 'Jeans', 11.02), +(9557251, 'Alex', 'Jeans', 11.09), +(9557231, 'Aaron', 'Hat', 14.56), +(9557281, 'Aaron', 'Hat', 12.15), +(9557291, 'Will', 'T-Shirt', 13.1); +---- + +[source,sql] +---- +SELECT * FROM orders; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------+-----------+---------------+-------------+ +| orderid | custname | orderproduct | ordertotal | ++----------+-----------+---------------+-------------+ +| 9557411 | Maya | Jeans | 10.5 | +| 9557421 | Aaron | T-Shirt | 9.2 | +| 9557451 | Alex | Hat | 10.8 | +| 9557311 | Will | Hat | 8.5 | +| 9557321 | Will | T-Shirt | 12.15 | +| 9557351 | Maya | T-Shirt | 9.5 | +| 9557221 | Maya | Jeans | 11.02 | +| 9557251 | Alex | Jeans | 11.09 | +| 9557231 | Aaron | Hat | 14.56 | +| 9557281 | Aaron | Hat | 12.15 | +| 9557291 | Will | T-Shirt | 13.1 | ++----------+-----------+---------------+-------------+ +---- + +=== `COUNT()` with a single expression + +The following example will return the number of all orders in the orders table: + +[source,sql] +---- +SELECT COUNT(*) FROM orders; +---- + +The final result will be as follows: + +[source,sql] +---- ++-------+ +| count | ++-------+ +| 11 | ++-------+ +---- + +=== `COUNT()` with a `GROUP BY` clause + +This example will combine the `COUNT()` function and the `GROUP BY` clause. + +* The `GROUP BY` clause groups the orders based on the customer’s name. +* The `COUNT()` function counts the orders for each customer. + +[source,sql] +---- +SELECT custname, COUNT (orderid) +FROM orders +GROUP BY custname; +---- + +It will display the output as shown below: + +[source,sql] +---- ++-----------+--------+ +| custname | count | ++-----------+--------+ +| Aaron | 3 | +| Alex | 2 | +| Will | 3 | +| Maya | 3 | ++-----------+--------+ +---- + +=== `COUNT()` with a `HAVING` clause + +In this example, we combine the `COUNT()` function and the `HAVING` clause to apply a specific condition to find customers who have made more than two orders: + +[source,sql] +---- +SELECT custname, COUNT (orderid) +FROM orders +GROUP BY custname +HAVING COUNT (orderid) > 2; +---- + +* The `GROUP BY` clause groups the orders based on the customer’s name. +* The `HAVING` clause will filter only customers with more than two order IDs. +* The `COUNT()` function counts the orders for each customer. + +[source,sql] +---- ++-----------+-------+ +| custname | count | ++-----------+-------+ +| Aaron | 3 | +| Will | 3 | +| Maya | 3 | ++-----------+-------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc new file mode 100644 index 000000000..58ab87399 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/distinct.adoc @@ -0,0 +1,211 @@ += DISTINCT +:description: When using aggregation functions, they can contain the DISTINCT keyword. +:page-topic-type: reference + +When using aggregation functions, they can contain the `DISTINCT` keyword. It acts as a qualifier for them, to ensure that only unique values are being processed. Here’s how a sample syntax looks like: + +[source,sql] +---- +aggregation function (DISTINCT expression [clause] ...) ... +---- + +`DISTINCT` keyword can be combined with the following aggregate functions: + +* `AVG()` +* `COUNT()` +* `MAX()` +* `MIN()` +* `SUM()` + +All functions listed above, operate on the same input and return types, that are supported by their counterparts without any qualifiers. They can be grouped without any limitations, provided that they utilise a *single* `DISTINCT` keyword. + +== Examples + +In this section we’ll focus on a few examples, that showcase sample usage of the above mentioned concepts. They will be based on creation of the following tables: + +[source,sql] +---- +CREATE TABLE customer ( + customer_id int, + cust_name text +); +INSERT INTO customer + (customer_id, cust_name) +VALUES + (11112, 'Alex'), + (11113, 'Aaron'), + (11114, 'Alice'), + (11115, 'Nina'), + (11116, 'Rosy'), + (11117, 'Martha'), + (11118, 'John'); + +CREATE TABLE rental ( + rental_id int, + rental_date timestamp, + return_date timestamp, + car text, + customer_id int, + total_price int +); +INSERT INTO rental (rental_id, rental_date, return_date, car, customer_id, total_price) +VALUES +(8557411, '2022-04-02 09:10:19', '2022-04-10 10:15:05', 'Audi', 11112, 1400), +(8557421, '2022-04-06 07:00:30', '2022-04-19 07:10:19', 'BMW', 11115, 2000), +(8557451, '2022-04-19 08:00:20', '2022-04-24 08:05:00', 'Cadillac', 11112, 1000), +(8557311, '2022-05-11 09:15:28', '2022-05-18 09:00:18', 'Audi', 11115, 1500), +(8557321, '2022-05-20 10:12:22', '2022-05-28 10:08:48', 'Audi', 11113, 1500), +(8557351, '2022-06-10 12:18:09', '2022-06-20 18:12:23', 'Cadillac', 11114, 1200), +(8557221, '2022-06-17 14:02:02', '2022-06-20 14:17:02', 'Chevrolet', 11112, 1300), +(8557251, '2022-07-12 05:19:49', '2022-07-19 07:15:28', 'Chevrolet', 11116, 1400), +(8557231, '2022-08-09 09:29:08', '2022-08-24 09:30:58', 'Cadillac', 11114, 2000), +(8557291, '2022-08-18 15:15:20', '2022-09-01 15:30:19', 'BMW', 11117, 3000); +---- + +Here’s how the created tables will look like, respectively: + +[source,sql] +---- +SELECT * FROM customer; + ++-------------+-----------+ +| customer_id | cust_name | ++-------------+-----------+ +| 11112 | Alex | +| 11113 | Aaron | +| 11114 | Alice | +| 11115 | Nina | +| 11116 | Rosy | +| 11117 | Martha | +| 11118 | John | ++-------------+-----------+ + +SELECT * FROM rental; + ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| rental_id | rental_date | return_date | car | customer_id | total_price | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| 8557411 | 2022-04-02 09:10:19 | 2022-04-10 10:15:05 | Audi | 11112 | 1400 | +| 8557421 | 2022-04-06 07:00:30 | 2022-04-19 07:10:19 | BMW | 11115 | 2000 | +| 8557451 | 2022-04-19 08:00:20 | 2022-04-24 08:05:00 | Cadillac | 11112 | 1000 | +| 8557311 | 2022-05-11 09:15:28 | 2022-05-18 09:00:18 | Audi | 11115 | 1500 | +| 8557321 | 2022-05-20 10:12:22 | 2022-05-28 10:08:48 | Audi | 11113 | 1500 | +| 8557351 | 2022-06-10 12:18:09 | 2022-06-20 18:12:23 | Cadillac | 11114 | 1200 | +| 8557221 | 2022-06-17 14:02:02 | 2022-06-20 14:17:02 | Chevrolet | 11112 | 1300 | +| 8557251 | 2022-07-12 05:19:49 | 2022-07-19 07:15:28 | Chevrolet | 11116 | 1400 | +| 8557231 | 2022-08-09 09:29:08 | 2022-08-24 09:30:58 | Cadillac | 11114 | 2000 | +| 8557291 | 2022-08-18 15:15:20 | 2022-09-01 15:30:19 | BMW | 11117 | 3000 | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +---- + +=== `DISTINCT` combined with `COUNT` function + +The following example uses `DISTINCT` qualifier combined with `COUNT()` function to calculate the number of unique car brands in rentals: + +[source,sql] +---- +SELECT COUNT (DISTINCT car) AS number_of_car_brands +FROM rental; +---- + +When executing the above code, it will return the following output: + +[source,sql] +---- ++----------------------+ +| number_of_car_brands | ++----------------------+ +| 4 | ++----------------------+ +---- + +Here’s another example, that uses `DISTINCT` qualifier combined with `COUNT()` function to calculate the amount of rentals by each customer: + +[source,sql] +---- +SELECT c.cust_name AS customer_name, COUNT (DISTINCT r.rental_id) AS rental_count +FROM rental r +JOIN customer c ON r.customer_id = c.customer_id +GROUP BY c.cust_name; +---- + +It will calculate the `rental_count` by each `customer_name` as shown below: + +[source,sql] +---- ++----------------+--------------+ +| customer_name | rental_couunt| ++----------------+--------------+ +| Nina | 2 | +| Aaron | 1 | +| Alice | 2 | +| Martha | 1 | +| Alex | 3 | +| Rosy | 1 | ++----------------+--------------+ +---- + +=== `DISTINCT` combined with `MAX()` function + +The following example uses `DISTINCT` qualifier combined with `MAX()` function to find maximum single spending per each customer, dropping any repeated transactions: + +[source,sql] +---- +SELECT c.cust_name AS customer_name, + MAX (DISTINCT r.total_price) AS max_spending +FROM rental r +JOIN customer c ON r.customer_id = c.customer_id +GROUP BY c.cust_name; +---- + +The output for that code will be as follows: + +[source,sql] +---- ++---------------+--------------+ +| customer_name | max_spending | ++---------------+--------------+ +| Martha | 3000 | +| Rosy | 1400 | +| Alex | 1400 | +| Alice | 2000 | +| Nina | 2000 | +| Aaron | 1500 | ++---------------+--------------+ +---- + +=== `DISTINCT` combined with `SUM()` function + +The following example compares the sum of unique revenues versus the sum of all revenues in rental data: + +[source,sql] +---- +SELECT + SUM (DISTINCT r.total_price) AS unique_revenue, + SUM (r.total_price) AS total_revenue +FROM rental r; +---- + +Here’s the ouput of the above query: + +[source,sql] +---- ++----------------+---------------+ +| unique_revenue | total_revenue | ++----------------+---------------+ +| 11400 | 16300 | ++----------------+---------------+ +---- + +The result may help to understand what is the impact of repeating transactions on total revenue. + +== Limitations + +There is one usecase we are aware of but do not support currently: + +* Aggregation functions with `DISTINCT` keyword used as an argument of an expression, e.g. + +[source,sql] +---- +SELECT 1 + COUNT(DISTINCT col) FROM table +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc new file mode 100644 index 000000000..e066c716b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-max.adoc @@ -0,0 +1,116 @@ += FOR_MAX() +:description: FOR_MAX() function is used to search for a maximum in a specific column and return a value related to that maximum from another column. +:page-topic-type: reference + +`FOR_MAX()` function is used to search for a maximum in a specific column and return a value related to that maximum from another column. + +== Syntax + +[source,sql] +---- +FOR_MAX(metric, value) +---- + +== Arguments + +* `metric`: must be one of the following data types: `INT`, `LONG`, `FLOAT`, `DOUBLE`, `DATE` or `TIMESTAMP` +* `value`: can be any data type except `TEXT` + +The `FOR_MAX()` function returns `NULL` in the following situations: + +* There are no input rows +* The `metric` column contains only `NULL` values +* The `value` corresponding to the metric minimum value is `NULL` + +This function also returns `NaN` (not-a-number) if the input contains a `NaN`. + +== Examples + +For the needs of this section, we will use a `payment` table that stores customer payment records, including any applied discounts: + +[source,sql] +---- +CREATE TABLE payments ( + paymentid int, + customer_name text, + price real, + discount real +); +INSERT INTO payments (paymentid, customer_name, price, discount) +VALUES +(1, 'Alex', 280.12, 0.1), +(2, NULL, 35.75, NULL), +(3, 'Alex', 45.1, 0.05), +(4, 'Alex', NULL, 0.4), +(5, 'John', NULL, 0.1), +(6, 'Bob', 50.45, 0.07), +(7, 'Bob', 120.5, 0.0); +---- + +To view the `payments` table content, run the following query: + +[source,sql] +---- +SELECT * FROM payments; +---- + +[source,sql] +---- ++-----------+---------------+--------+----------+ +| paymentid | customer_name | price | discount | ++-----------+---------------+--------+----------+ +| 2 | | 35.75 | | +| 4 | Alex | | 0.4 | +| 3 | Alex | 45.1 | 0.05 | +| 1 | Alex | 280.12 | 0.1 | +| 6 | Bob | 50.45 | 0.07 | +| 5 | John | | 0.1 | +| 7 | Bob | 120.5 | 0 | ++-----------+---------------+--------+----------+ +---- + +=== `FOR_MAX()` basic usage + +To determine the price, with which is associated the highest discount we need to run the following code: + +[source,sql] +---- +SELECT FOR_MAX(discount, price) AS for_lowest_discount +FROM payments; +---- + +This query returns the following output: + +[source,sql] +---- ++---------------------+ +| for_lowest_discount | ++---------------------+ +| | ++---------------------+ +---- + +=== `FOR_MAX()` with `GROUP BY` clause + +In this example, we will use a `GROUP BY` clause to group customers and then utilise the `FOR_MAX()` function to get a discount for the highest price paid by each customer: + +[source,sql] +---- +SELECT customer_name, FOR_MAX(price, discount) AS discount +FROM payments +GROUP BY customer_name; +---- + +This query returns the following output: + +[source,sql] +---- ++---------------+----------+ +| customer_name | discount | ++---------------+----------+ +| | | +| Bob | 0 | +| Alex | 0.1 | +| John | | ++---------------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc new file mode 100644 index 000000000..41065fc9f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/for-min.adoc @@ -0,0 +1,114 @@ += FOR_MIN() +:description: The FOR_MIN() function is used to search for a minimum in a specific column and return a value related to that minimum from another column. +:page-topic-type: reference + +The `FOR_MIN()` function is used to search for a minimum in a specific column and return a value related to that minimum from another column. + +== Syntax + +[source,sql] +---- +FOR_MIN(metric, value) +---- + +== Arguments + +* `metric`: must be one of the following data types: `INT`, `LONG`, `FLOAT`, `DOUBLE`, `DATE` or `TIMESTAMP` +* `value`: can be any data type except `TEXT` + +The `FOR_MIN()` function returns `NULL` in the following situations: + +* There are no input rows +* The `metric` column contains only `NULL` values +* The `value` corresponding to the metric minimum value is `NULL` + +This function also returns `NaN` (not-a-number) if the input contains a `NaN`. + +== Examples + +For the needs of this section, we will use a `payment` table that stores customer payment records, including any applied discounts: + +[source,sql] +---- +CREATE TABLE payments ( + paymentid int, + customer_name text, + price real, + discount real); + +INSERT INTO + payments (paymentid, customer_name, price, discount) +VALUES + (1, 'Alex', 280.12, 0.1), + (2, NULL, 35.75, NULL), + (3, 'Alex', 45.1, 0.05), + (4, 'Alex', NULL, 0.4), + (5, 'John', NULL, 0.1), + (6, 'Bob', 50.45, 0.07), + (7, 'Bob', 120.5, 0.0); +---- + +To view the `payments` table content, run the following query: + +[source,sql] +---- +SELECT * FROM payments; +---- + +[source,sql] +---- + paymentid | customer_name | price | discount +-----------+---------------+--------+---------- + 1 | Alex | 280.12 | 0.1 + 2 | | 35.75 | + 3 | Alex | 45.1 | 0.05 + 4 | Alex | | 0.4 + 5 | John | | 0.1 + 6 | Bob | 50.45 | 0.07 + 7 | Bob | 120.5 | 0 +(7 rows) +---- + +=== `FOR_MIN()` basic usage + +To determine the price associated with the lowest discount applied across all payments, use the following query: + +[source,sql] +---- +SELECT FOR_MIN(discount, price) AS for_lowest_discount FROM payments; +---- + +This query returns the following output: + +[source,sql] +---- + for_lowest_discount +--------------------- + 120.5 +(1 row) +---- + +=== `FOR_MIN()` with `GROUP BY` clause + +To determine the discount associated with the lowest price paid by each customer, we will use the `GROUP BY` clause with `FOR_MIN()` function: + +[source,sql] +---- +SELECT customer_name, + FOR_MIN(price, discount) AS discount +FROM payments +GROUP BY customer_name; +---- + +This query returns the following output: + +[source,sql] +---- +customer_name | discount +---------------+---------- + Bob | 0.07 + Alex | 0.05 + | + John | +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc new file mode 100644 index 000000000..816711a9d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/index.adoc @@ -0,0 +1,29 @@ += Overview +:description: Aggregate functions compute a single result from a set of input values. + +Aggregate functions compute a single result from a set of input values. Redpanda SQL supports the following aggregate functions: + +[width="100%",cols="36%,64%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/aggregate-functions/sum.adoc[SUM] |Calculates and returns the sum of all values +|xref:reference:sql/sql-functions/aggregate-functions/min.adoc[MIN] |Calculates and returns the minimum value +|xref:reference:sql/sql-functions/aggregate-functions/for-min.adoc[FOR_MIN] |Calculates and returns a value corresponding to the minimal metric in the same row from a set of values +|xref:reference:sql/sql-functions/aggregate-functions/max.adoc[MAX] |Calculates and returns the maximum value +|xref:reference:sql/sql-functions/aggregate-functions/for-max.adoc[FOR_MAX] |Calculates and Returns a value corresponding to the maximum metric in the same row from a set of values +|xref:reference:sql/sql-functions/aggregate-functions/avg.adoc[AVG] |Calculates and returns the average value +|xref:reference:sql/sql-functions/aggregate-functions/count.adoc[COUNT] |Counts the number of rows +|xref:reference:sql/sql-functions/aggregate-functions/bool-and.adoc[BOOL_AND] |Calculates the boolean of all the boolean values in the aggregated group. `FALSE` if at least one of aggregated rows is `FALSE` +|xref:reference:sql/sql-functions/aggregate-functions/bool-or.adoc[BOOL_OR] |Calculates the boolean of all the boolean values in the aggregated group. `TRUE` if at least one of aggregated rows is `TRUE` +|=== + +[width="100%",cols="36%,64%",options="header",] +|=== +|*Function qualifier* |*Description* +|xref:reference:sql/sql-functions/aggregate-functions/distinct.adoc[DISTINCT] |Allows aggregation functions to operate on a distinct set of values within a column +|=== + +[TIP] +==== +You can utilize the aggregate functions with the `GROUP BY` and `HAVING` clauses in the `SELECT` statement. +==== diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc new file mode 100644 index 000000000..5aa6eb6a4 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/max.adoc @@ -0,0 +1,118 @@ += MAX +:description: MAX() is a function that returns the maximum value from a set of records. +:page-topic-type: reference + +`MAX()` is a function that returns the maximum value from a set of records. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +MAX(column_name) +---- + +This function’s output data type will always be the same as the input one, however it returns `NULL` if there are no records or input consists of `NULL` values and it also returns `NaN` if the input contains a `NaN`. + +== Examples + +For the needs of this section, we will create a movies table that stores movie details, such as movie’s title, category, and IMDb rating. + +[source,sql] +---- +CREATE TABLE movies ( + movieid int, + moviename text, + moviecategory text, + imdbrating real +); +INSERT INTO movies (movieid, moviename, moviecategory, imdbrating) +VALUES +(8557411, 'The Shawshank Redemption', 'Drama', 9.4), +(8557421, 'Life Is Beautiful', 'Romance', 8.4), +(8557451, 'The Godfather', 'Crime', 9.3), +(8557311, 'Prisoners', 'Thriller', 8.5), +(8557321, 'Inception', 'Science Fiction', 9), +(8557351, 'The Dark Knight', 'Action', 9.2), +(8557221, 'Coco', 'Drama', 8.2), +(8557251, 'The Sixth Sense', 'Horror', 8.1), +(8557231, 'Kill Bill: Vol. 1', 'Action', 8.1), +(8557281, 'The Notebook', 'Romance', 7.8), +(8557291, 'Forrest Gump', 'Drama', 8); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +By running the above query, we will get the following output: + +[source,sql] +---- ++---------+--------------------------+-----------------+-------------+ +| movieid | moviename | moviecategory | imdbrating | ++---------+--------------------------+-----------------+-------------+ +| 8557411 | The Shawshank Redemption | Drama | 9.4 | +| 8557421 | Life Is Beautiful | Romance | 8.4 | +| 8557451 | The Godfather | Crime | 9.3 | +| 8557311 | Prisoners | Thriller | 8.5 | +| 8557321 | Inception | Science Fiction | 9 | +| 8557351 | The Dark Knight | Action | 9.2 | +| 8557221 | Coco | Drama | 8.2 | +| 8557251 | The Sixth Sense | Horror | 8.1 | +| 8557231 | Kill Bill: Vol. 1 | Action | 8.1 | +| 8557281 | The Notebook | Romance | 7.8 | +| 8557291 | Forrest Gump | Drama | 8 | ++---------+--------------------------+-----------------+-------------+ +---- + +=== `MAX()` with a single expression + +For example, you might want to know what is the highest rating among all stored movies: + +[source,sql] +---- +SELECT MAX(imdbRating) AS "Highest Rating" +FROM movies; +---- + +[source,sql] +---- ++-----------------+ +| Highest Rating | ++-----------------+ +| 9.4 | ++-----------------+ +---- + +=== `MAX()` with GROUP BY clause + +We use a `MAX()` function in this example to get the highest rating in each movie category and the results are ordered by the rating in ascending order. + +[source,sql] +---- +SELECT + movieCategory AS "Movie Category", + MAX(imdbRating) AS "Highest Rating" +FROM movies +GROUP BY movieCategory +ORDER BY MAX(imdbRating) ASC; +---- + +By running the above code, we will get the highest rating from a group of `movieCategory` as shown below: + +[source,bash] +---- + Movie Category | Highest Rating +-----------------+---------------- + Horror | 8.1 + Romance | 8.4 + Thriller | 8.5 + Science Fiction | 9 + Action | 9.2 + Crime | 9.3 + Drama | 9.4 +(7 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc new file mode 100644 index 000000000..f4c23d3d5 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/min.adoc @@ -0,0 +1,118 @@ += MIN +:description: MIN() is a function that returns the minimum value from a set of records. +:page-topic-type: reference + +`MIN()` is a function that returns the minimum value from a set of records. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +MIN(column_name) +---- + +This function’s output data type will always be the same as the input one, however it returns `NULL` if there are no records or input consists of `NULL` values and it also returns `NaN` if the input contains a `NaN`. + +== Examples + +For the needs of this section, we will create a movies table that stores movie details, such as movie’s title, category, and IMDb rating. + +[source,sql] +---- +CREATE TABLE movies ( + movieid int, + moviename text, + moviecategory text, + imdbrating real +); +INSERT INTO movies (movieid, moviename, moviecategory, imdbrating) +VALUES +(8557411, 'The Shawshank Redemption', 'Drama', 9.4), +(8557421, 'Life Is Beautiful', 'Romance', 8.4), +(8557451, 'The Godfather', 'Crime', 9.3), +(8557311, 'Prisoners', 'Thriller', 8.5), +(8557321, 'Inception', 'Science Fiction', 9), +(8557351, 'The Dark Knight', 'Action', 9.2), +(8557221, 'Coco', 'Drama', 8.2), +(8557251, 'The Sixth Sense', 'Horror', 8.1), +(8557231, 'Kill Bill: Vol. 1', 'Action', 8.1), +(8557281, 'The Notebook', 'Romance', 7.8), +(8557291, 'Forrest Gump', 'Drama', 8); +---- + +[source,sql] +---- +SELECT * FROM movies; +---- + +By running the above query, we will get the following output: + +[source,sql] +---- ++---------+--------------------------+-----------------+-------------+ +| movieid | moviename | moviecategory | imdbrating | ++---------+--------------------------+-----------------+-------------+ +| 8557411 | The Shawshank Redemption | Drama | 9.4 | +| 8557421 | Life Is Beautiful | Romance | 8.4 | +| 8557451 | The Godfather | Crime | 9.3 | +| 8557311 | Prisoners | Thriller | 8.5 | +| 8557321 | Inception | Science Fiction | 9 | +| 8557351 | The Dark Knight | Action | 9.2 | +| 8557221 | Coco | Drama | 8.2 | +| 8557251 | The Sixth Sense | Horror | 8.1 | +| 8557231 | Kill Bill: Vol. 1 | Action | 8.1 | +| 8557281 | The Notebook | Romance | 7.8 | +| 8557291 | Forrest Gump | Drama | 8 | ++---------+--------------------------+-----------------+-------------+ +---- + +=== `MIN()` with a single expression + +For example, you might want to know what is the lowest rating of all stored movies: + +[source,sql] +---- +SELECT MIN(imdbRating) AS "Lowest Rating" +FROM movies; +---- + +[source,sql] +---- ++----------------+ +| Lowest Rating | ++----------------+ +| 7.8 | ++----------------+ +---- + +=== `MIN()` with `GROUP BY` clause + +In this example, we will use a `GROUP BY` clause to group the movie categories, then use `MIN()` function to get the lowest rating in each movie category and arrange the results in ascending order. + +[source,sql] +---- +SELECT + movieCategory AS "Movie Category", + MIN(imdbRating) AS "Lowest Rating" +FROM movies +GROUP BY movieCategory +ORDER BY MIN(imdbRating) ASC; +---- + +By running the code above, we will get the following output: + +[source,bash] +---- + Movie Category | Lowest Rating +-----------------+--------------- + Romance | 7.8 + Drama | 8 + Horror | 8.1 + Action | 8.1 + Thriller | 8.5 + Science Fiction | 9 + Crime | 9.3 +(7 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc new file mode 100644 index 000000000..9fbdd7853 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/index.adoc @@ -0,0 +1,3 @@ += Ordered-Set Aggregate Functions +:description: Reference for ordered-set aggregate functions in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc new file mode 100644 index 000000000..54896543a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/mode.adoc @@ -0,0 +1,71 @@ += MODE() +:description: MODE() is an ordered-set aggregate function that returns the most frequently occurring value (the mode) from a set of values. +:page-topic-type: reference + +`MODE()` is an ordered-set aggregate function that returns the most frequently occurring value (the mode) from a set of values. + +== Syntax + +[source,sql] +---- +MODE() WITHIN GROUP (ORDER BY order_list) +---- + +[NOTE] +==== +Null values are ignored during the calculation. If null is the most frequent value, the function will return the second most common value. +==== + +== Parameters + +* `()`: this function takes no parameters, but empty parentheses is required + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], that will contain only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below retrieves the most frequent ratings found in the film table: + +[source,sql] +---- +SELECT MODE() + WITHIN GROUP (ORDER BY rating) +FROM film; +---- + +By executing the code above we will get the following output: + +[source,sql] +---- +| mode | +|-------| +| NC-17 | +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc new file mode 100644 index 000000000..acf96f2d6 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-cont.adoc @@ -0,0 +1,98 @@ += PERCENTILE_CONT() +:description: PERCENTILE_CONT() is an ordered-set aggregate function used to compute continuous percentiles from a set of values. +:page-topic-type: reference + +`PERCENTILE_CONT()` is an ordered-set aggregate function used to compute continuous percentiles from a set of values. The *continuous percentile* returns an interpolated value based on the distribution of the input data, while *multiple continuous percentiles* return an array of results matching the shape of the `fractions` parameter with each non-null element replaced by the value corresponding to that percentile. + +== Syntax + +The syntax for this function is as follows: + +[tabs] +==== +Continuous Percentile:: ++ +[source,sql] +---- +PERCENTILE_CONT(fraction) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +This function is often used in conjunction with the `WITHIN GROUP` clause to specify how to order the data before calculating the percentile. +==== ++ +*Parameters* + +* `fraction`: decimal value between 0 and 1 representing the desired percentile (e.g. 0.25 for the 25th percentile) + +Multiple Continuous Percentile:: ++ +[source,sql] +---- +PERCENTILE_CONT(fractions) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +This function is often used in conjunction with the `WITHIN GROUP` clause to specify how to order the data before calculating the percentile. +==== ++ +*Parameters* + +* `fractions`: array of decimal values between 0 and 1 representing the desired percentiles (e.g. `ARRAY[0.25, 0.50, 0.75, 0.90]`) + +==== + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], that will contain only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +This query calculates the median film length within each rating category. + +[source,sql] +---- +SELECT rating, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY length) AS "50th percentile" FROM film +GROUP BY rating; +---- + +By executing the code above we will get the following output: + +[source,sql] +---- + rating | 25th percentile +--------+----------------- + PG-13 | 74 + PG | 113.5 + NC-17 | 133.5 + G | 65.5 +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc new file mode 100644 index 000000000..38daae515 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/ordered-set-aggregate-functions/percentile-disc.adoc @@ -0,0 +1,98 @@ += PERCENTILE_DISC() +:description: PERCENTILE_DISC() is an ordered-set aggregate function used to compute discrete percentiles from a set of values. +:page-topic-type: reference + +`PERCENTILE_DISC()` is an ordered-set aggregate function used to compute discrete percentiles from a set of values. The *discrete percentile* returns the first input value, which position in the ordering equals or exceeds the specified fraction, while *multiple discrete percentiles* return an array of results matching the shape of the fractions parameter, with each non-null element being replaced by the input value corresponding to that percentile. + +== Syntax + +The syntax for this function is as follows: + +[tabs] +==== +Discrete Percentile:: ++ +[source,sql] +---- +PERCENTILE_DISC(fraction) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +If multiple values share the same rank at the specified percentile, `PERCENTILE_DISC()` will return the first one encountered in the ordering. +==== ++ +*Parameters* + +* `fraction`: decimal value between 0 and 1 representing the desired percentile (e.g. 0.25 for the 25th percentile) + +Multiple Discrete Percentile:: ++ +[source,sql] +---- +PERCENTILE_DISC(fractions) WITHIN GROUP (ORDER BY order_list) +---- ++ +[NOTE] +==== +If multiple values share the same rank at the specified percentile, `PERCENTILE_DISC` will return the first one encountered in the ordering. +==== ++ +*Parameters* + +* `fractions`: array of decimal values between 0 and 1 representing the desired percentiles (e.g. `ARRAY[0.25, 0.50, 0.75, 0.90]`) + +==== + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], that will contain only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below calculates the quartile, median and the third quartile of film lengths: + +[source,sql] +---- +SELECT rating, percentile_disc(ARRAY[0.25, 0.5, 0.75]) WITHIN GROUP (ORDER BY length) AS "quartiles" FROM film +GROUP BY rating; +---- + +By executing the code above, we will get the following output: + +[source,sql] +---- + rating | quartiles +--------+--------------- + G | {54,77,125} + PG | {106,121,137} + PG-13 | {47,83,142} + NC-17 | {131,150,176} +(4 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc new file mode 100644 index 000000000..c6130cd98 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/corr.adoc @@ -0,0 +1,70 @@ += CORR() +:description: The CORR() aggregate function calculates the Pearson correlation coefficient between two sets of number pairs. +:page-topic-type: reference + +The `CORR()` aggregate function calculates the Pearson correlation coefficient between two sets of number pairs. This function measures the linear relationship between two variables, providing a value between -1 and 1. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +CORR(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we are going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `CORR()` function to calculate the correlation between film length and rating: + +[source,sql] +---- +SELECT + CORR(length, rating) AS CorrelationCoefficient +FROM film; +---- + +By running the query above we will get the following output: + +[source,sql] +---- + correlationcoefficient +------------------------ + 0.6190587870867634 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc new file mode 100644 index 000000000..1d3da4ea4 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc @@ -0,0 +1,70 @@ += COVAR_POP() +:description: The COVAR_POP() aggregate function calculates the population covariance between two sets of number pairs. +:page-topic-type: reference + +The `COVAR_POP()` aggregate function calculates the population covariance between two sets of number pairs. This function measures how much two variables change together, providing insight into their linear relationship. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +COVAR_POP(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `COVAR_POP()` function to calculate the covariance between film length and rating: + +[source,sql] +---- +SELECT + COVAR_POP(length, rating) AS Covariance +FROM film; +---- + +By running the query above, we will get the following output: + +[source,sql] +---- + covariance +------------------- + 36.02768166089963 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc new file mode 100644 index 000000000..1173c79b6 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc @@ -0,0 +1,71 @@ += COVAR_SAMP +:description: The COVAR_SAMP() aggregate function calculates the sample covariance between two sets of number pairs. +:page-topic-type: reference + +The `COVAR_SAMP()` aggregate function calculates the sample covariance between two sets of number pairs. This function measures how changes in one variable relate linearly to changes in another variable within a sample dataset. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +COVAR_SAMP(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below query uses the `COVAR_SAMP()` function to calculate the sample covariance between film `length` and `rating` where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + COVAR_SAMP(length, rating) AS SampleCovariance +FROM film +WHERE rating >= 4; +---- + +By running the above query will get the following output: + +[source,sql] +---- + samplecovariance +-------------------- + 23.087912087912066 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc new file mode 100644 index 000000000..164ab0376 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/index.adoc @@ -0,0 +1,27 @@ += Overview +:description: Aggregate functions for statistics are typically used for statistical analysis. + +Aggregate functions for statistics are typically used for statistical analysis. Redpanda SQL supports the following functions: + +[width="100%",cols="42%,58%",options="header",] +|=== +|*Functions* |*Description* +|xref:reference:sql/sql-functions/aggregate-functions/statistics/corr.adoc[CORR] |Calculates the Pearson correlation coefficient between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-pop.adoc[COVAR_POP] |Calculates the population covariance between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/covar-samp.adoc[COVAR_SAMP] |Calculates the sample covariance between two sets of number pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc[REGR_AVGX] |Calculates the average of the independent variable (sum(X)/N) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc[REGR_AVGY] |Calculates the average of the dependent variable (sum(Y)/N) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-count.adoc[REGR_COUNT] |Calculates the number of input rows in which both expressions are non-null +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc[REGR_INTERCEPT] |Calculates the y-intercept of the univariate linear regression line for a group of data points +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc[REGR_R2] |Calculates the coefficient of determination (R2) for a linear regression model +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc[REGR_SLOPE] |Calculates slope of the least-squares-fit linear equation determined by the (X, Y) pairs +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc[REGR_SXX] |Calculates the sum(X2) - sum(X)2/N ("`sum of squares`" of the independent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc[REGR_SXY] |Calculates the sum(X_Y) - sum(X)_ sum(Y)/N ("`sum of products`" of independent times dependent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc[REGR_SYY] |Calculates the sum(Y2) - sum(Y)2/N ("`sum of squares`" of the dependent variable) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev.adoc[STDDEV] |Calculates the sample standard deviation of a set of numeric values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc[STDDEV_POP] |Calculates the population standard deviation of the input values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc[STDDEV_SAMP] |Calculates the sample standard deviation of the input values +|xref:reference:sql/sql-functions/aggregate-functions/statistics/variance.adoc[VARIANCE] |Calculates the the sample variance of a set of numeric values. +|xref:reference:sql/sql-functions/aggregate-functions/statistics/var-pop.adoc[VAR_POP] |Calculates the population variance of the input values (square of the population standard deviation) +|xref:reference:sql/sql-functions/aggregate-functions/statistics/var-samp.adoc[VAR_SAMP] |Calculates the sample variance of the input values (square of the sample standard deviation) +|=== diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc new file mode 100644 index 000000000..bfd122b90 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgx.adoc @@ -0,0 +1,70 @@ += REGR_AVGX() +:description: The REGR_AVGX() aggregate function calculates the average of the independent variable (x) for non-null pairs of dependent (y) and independent (x) vari +:page-topic-type: reference + +The `REGR_AVGX()` aggregate function calculates the average of the independent variable (x) for non-null pairs of dependent (y) and independent (x) variables. This function is commonly used in linear regression analysis to compute the mean of the independent variable where both variables are not null. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_AVGX(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_AVGX()` function to calculate the average rating for films where both `length` and `rating` are not null: + +[source,sql] +---- +SELECT + REGR_AVGX(length, rating) AS AverageRating +FROM film; +---- + +By executing the above code, we will get the following output: + +[source,sql] +---- + averagerating +------------------- + 5.294117647058823 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc new file mode 100644 index 000000000..bbc5e8106 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-avgy.adoc @@ -0,0 +1,70 @@ += REGR_AVGY() +:description: The REGR_AVGY() aggregate function calculates the mean of the dependent variable (y) for non-null pairs of dependent (y) and independent (x) variables +:page-topic-type: reference + +The `REGR_AVGY()` aggregate function calculates the mean of the dependent variable (y) for non-null pairs of dependent (y) and independent (x) variables. This function is used in linear regression analysis to compute the average value of the dependent variable where both variables are not null. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_AVGY(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +They query below uses the `REGR_AVGY()` function to calculate the mean of the dependent variable (`rating`) for rows where both `rating` and `length` are not null: + +[source,sql] +---- +SELECT + REGR_AVGY(rating, length) AS AverageRating +FROM film; +---- + +By running the above query, we will get the following output: + +[source,sql] +---- + averagerating +------------------- + 5.294117647058823 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc new file mode 100644 index 000000000..8750e4df3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-count.adoc @@ -0,0 +1,70 @@ += REGR_COUNT() +:description: The REGR_COUNT() aggregate function calculates the number of non-null value pairs for a dependent variable (y) and an independent variable (x). +:page-topic-type: reference + +The `REGR_COUNT()` aggregate function calculates the number of non-null value pairs for a dependent variable (y) and an independent variable (x). This function is used in linear regression analysis to determine the number of valid data points available for computation. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_COUNT(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_COUNT()` function to count the number of rows where both `rating` and `length` are not null: + +[source,sql] +---- +SELECT + REGR_COUNT(rating, length) AS NonNullPairsCount +FROM film; +---- + +By running the above query, we will get the following output: + +[source,sql] +---- + nonnullpairscount +------------------- + 17 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc new file mode 100644 index 000000000..59d325900 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-intercept.adoc @@ -0,0 +1,70 @@ += REGR_INTERCEPT() +:description: The REGR_INTERCEPT() aggregate function calculates the y-intercept of the univariate linear regression line for a group of data points, where the depe +:page-topic-type: reference + +The `REGR_INTERCEPT()` aggregate function calculates the y-intercept of the univariate linear regression line for a group of data points, where the dependent variable is (y) and the independent variable is (x). The intercept is the point where the regression line crosses the y-axis when x=0. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_INTERCEPT(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +We’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The above query uses the `REGR_INTERCEPT()` function to calculate the y-intercept of the regression line for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_INTERCEPT(rating, length) AS YIntercept +FROM film; +---- + +By running the code above we will get the following output: + +[source,sql] +---- + yintercept +-------------------- + 2.1055200882495355 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc new file mode 100644 index 000000000..5abc0abca --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-r2.adoc @@ -0,0 +1,70 @@ += REGR_R2() +:description: The REGR_R2() aggregate function calculates the coefficient of determination (R2) for a linear regression model. +:page-topic-type: reference + +The `REGR_R2()` aggregate function calculates the coefficient of determination (R2) for a linear regression model. The R2 value indicates how well the independent variable (x) explains the variability of the dependent variable (y). + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_R2(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_R2()` function to calculate the coefficient of determination (R2) for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_R2(rating, length) AS coefficientOfDetermination +FROM film; +---- + +By running the above code, we’re going to get the following output: + +[source,sql] +---- + coefficientofdetermination +---------------------------- + 0.3832337818693347 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc new file mode 100644 index 000000000..c5138d0f3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-slope.adoc @@ -0,0 +1,70 @@ += REGR_SLOPE() +:description: The REGR_SLOPE() aggregate function calculates the slope of the regression line for a linear relationship between a dependent variable (y) and an inde +:page-topic-type: reference + +The `REGR_SLOPE()` aggregate function calculates the slope of the regression line for a linear relationship between a dependent variable (y) and an independent variable (x). The slope represents the rate of change in `y` for every unit increase in `x`. This function is used in regression analysis to quantify the strength and direction of a linear relationship. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_SLOPE(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_SLOPE()` function to calculate the slope of the regression line for valid pairs of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_SLOPE(rating, length) AS Slope +FROM film; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + slope +---------------------- + 0.025985694391063227 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc new file mode 100644 index 000000000..390340f34 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxx.adoc @@ -0,0 +1,70 @@ += REGR_SXX() +:description: The REGR_SXX() aggregate function calculates the sum of squares of deviations for the independent variable (x) in a linear regression analysis. +:page-topic-type: reference + +The `REGR_SXX()` aggregate function calculates the sum of squares of deviations for the independent variable (x) in a linear regression analysis. This value represents the variability of the independent variable and is a key component in calculating the slope and other regression statistics. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_SXX(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_SXX()` function to calculate the sum of squares of deviations for the independent variable `length`: + +[source,sql] +---- +SELECT + REGR_SXX(rating, length) AS SumOfSquaresX +FROM film; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + sumofsquaresx +------------------ + 23569.5294117647 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc new file mode 100644 index 000000000..9e7e056ab --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-sxy.adoc @@ -0,0 +1,70 @@ += REGR_SXY() +:description: The REGR_SXY() aggregate function calculates the sum of products od deviations for the dependent variable (y) and the independent variable (x) in a li +:page-topic-type: reference + +The `REGR_SXY()` aggregate function calculates the sum of products od deviations for the dependent variable (y) and the independent variable (x) in a linear regression analysis. This value represents the covariance-like term used to compute the slope of the regression line. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_SXY(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query above uses the `REGR_SXY()` function to calculate the sum of products of deviations for non-null pair of `rating` and `length`: + +[source,sql] +---- +SELECT + REGR_SXY(rating, length) AS SumOfSquaresXY +FROM film; +---- + +By running the above code, we’ll get the following output: + +[source,sql] +---- + sumofsquaresxy +------------------- + 612.4705882352937 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc new file mode 100644 index 000000000..2d7a517df --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/regr-syy.adoc @@ -0,0 +1,70 @@ += REGR_SYY() +:description: The REGR_SYY() aggregate function calculates the sum of squares of deviations for the dependent variable (y) in a linear regression analysis. +:page-topic-type: reference + +The `REGR_SYY()` aggregate function calculates the sum of squares of deviations for the dependent variable (y) in a linear regression analysis. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +REGR_SYY(y, x) +---- + +== Parameters + +* `y`: variable being predicted +* `x`: variable used for prediction + +== Example + +For the needs of this section we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `REGR_SYY()` function to calculate the sum of squares of deviation for the dependent variable `rating`: + +[source,sql] +---- +SELECT + REGR_SYY(rating, length) AS SumOfSquaresY +FROM film; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + sumofsquaresy +-------------------- + 41.529411764705856 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc new file mode 100644 index 000000000..2c7da325f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-pop.adoc @@ -0,0 +1,69 @@ += STDDEV_POP() +:description: The STDDEV_POP() aggregate function calculates the population stardard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV_POP()` aggregate function calculates the population stardard deviation of a set of numeric values. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +STDDEV_POP(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the population standard deviation is calculated + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +They query below uses the `STDDEV_POP()` function to calculate the population standard deviation for the `length` column: + +[source,sql] +---- +SELECT + STDDEV_POP(length) AS LengthPopStdDev +FROM film; +---- + +By executing the above code, we will get the following output: + +[source,sql] +---- + lengthpopstddev +------------------- + 37.23496886764368 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc new file mode 100644 index 000000000..2ce5ed763 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev-samp.adoc @@ -0,0 +1,70 @@ += STDDEV_SAMP() +:description: The STDDEV_SAMP() aggregate function calculates the sample standard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV_SAMP()` aggregate function calculates the sample standard deviation of a set of numeric values. This function measures how much the values deviate from their mean, assuming the data is a sample of a larger population. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +STDDEV_SAMP(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the sample standard deviation is calculated + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `STDDEV_SAMP()` function to calculate the sample standard deviation for the `length` column where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + STDDEV_SAMP(length) AS LengthSampleStdDev +FROM film +WHERE rating >= 4; +---- + +By running the code above we will get the following output: + +[source,sql] +---- + lengthsamplestddev +-------------------- + 34.92503746251735 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc new file mode 100644 index 000000000..68ec25824 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/stddev.adoc @@ -0,0 +1,69 @@ += STDDEV() +:description: The STDDEV() aggregate function calculates the sample standard deviation of a set of numeric values. +:page-topic-type: reference + +The `STDDEV()` aggregate function calculates the sample standard deviation of a set of numeric values. Standard deviation measures the dispersion or spread of data points around the mean. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +STDDEV(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the sample standard deviation is calculated + +== Example + +For the needs of this section we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +This query below uses the `STDDEV()` function to calculate the sample standard deviation for the `length` column: + +[source,sql] +---- +SELECT + STDDEV(length) AS LengthStdDev +FROM film; +---- + +By running the above code we will get the following output: + +[source,sql] +---- + lengthstddev +------------------- + 38.38092740197003 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc new file mode 100644 index 000000000..2387c1ded --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-pop.adoc @@ -0,0 +1,69 @@ += VAR_POP() +:description: The VAR_POP() aggregate function calculates the population variance of a set of numeric values. +:page-topic-type: reference + +The `VAR_POP()` aggregate function calculates the population variance of a set of numeric values. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +VAR_POP(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the population variance is calculated + +== Example + +For the needs of this section we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `VAR_POP()` function to calculate the population variance for the `length` column: + +[source,sql] +---- +SELECT + VAR_POP(length) AS LengthPopulationVariance +FROM film; +---- + +By executing the above query, we will get the following output: + +[source,sql] +---- + lengthpopulationvariance +-------------------------- + 1386.442906574394 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc new file mode 100644 index 000000000..433a88c81 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/var-samp.adoc @@ -0,0 +1,70 @@ += VAR_SAMP() +:description: The VAR_SAMP() aggregate function calculates the sample variance of a set of numeric values. +:page-topic-type: reference + +The `VAR_SAMP()` aggregate function calculates the sample variance of a set of numeric values. This function measures the spread of data points around the mean, assuming the data is a sample of a larger population. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +VAR_SAMP(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the sample variance is calculated + +== Example + +For the needs of this section we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +They query below uses the `VAR_SAMP()` function to calculate the sample variance for the `length` column where `rating` is greater than or equal to 4: + +[source,sql] +---- +SELECT + VAR_SAMP(length) AS LengthSampleVariance +FROM film +WHERE rating >= 4; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + lengthsamplevariance +---------------------- + 1219.7582417582407 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc new file mode 100644 index 000000000..5a62ca1fc --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/statistics/variance.adoc @@ -0,0 +1,69 @@ += VARIANCE() +:description: The VARIANCE() aggregate function calculate the sample variance of a set of numeric values. +:page-topic-type: reference + +The `VARIANCE()` aggregate function calculate the sample variance of a set of numeric values. Variance measures the spread of data points around the mean, providing insight into how much the values deviate from the average. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +VARIANCE(expression) +---- + +== Parameters + +* `expression`: numeric expression or column for which the variance is calculated + +== Example + +For the needs of this section, we’re going to use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +The query below uses the `VARIANCE()` function to calculate the variance for the `length` column: + +[source,sql] +---- +SELECT + VARIANCE(length) AS LengthVariance +FROM film; +---- + +By executing the code above, we will get the following output: + +[source,sql] +---- + lengthvariance +-------------------- + 1473.0955882352937 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc b/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc new file mode 100644 index 000000000..a051a5ba9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/aggregate-functions/sum.adoc @@ -0,0 +1,253 @@ += SUM +:description: SUM() calculates the sum of values from stored records. +:page-topic-type: reference + +`SUM()` calculates the sum of values from stored records. `SUM()` doesn’t consider `NULL` in the calculation, and it returns `NULL` instead of zero if the executed statement returns no rows. + +The input and return types we support can be seen in the table below. + +[cols=",",options="header",] +|=== +|Input type |Return type +|INT |LONG +|LONG |LONG +|FLOAT |DOUBLE +|DOUBLE |DOUBLE +|INTERVAL |INTERVAL +|=== + +[NOTE] +==== +If the input type is 32-bit, then the result will be 64-bit. +==== + +== Examples + +We have two sample tables here: + +*customer table* + +[source,sql] +---- +CREATE TABLE customer ( + customer_id int, + cust_name text +); +INSERT INTO customer + (customer_id, cust_name) +VALUES + (11112, 'Alex'), + (11113, 'Aaron'), + (11114, 'Alice'), + (11115, 'Nina'), + (11116, 'Rosy'), + (11117, 'Martha'), + (11118, 'John'); +---- + +[source,sql] +---- +SELECT * FROM customer; +---- + +It will create a table as shown below: + +[source,sql] +---- ++-------------+-----------+ +| customer_id | cust_name | ++-------------+-----------+ +| 11112 | Alex | +| 11113 | Aaron | +| 11114 | Alice | +| 11115 | Nina | +| 11116 | Rosy | +| 11117 | Martha | +| 11118 | John | ++-------------+-----------+ +---- + +*rental table* + +[source,sql] +---- +CREATE TABLE rental ( + rental_id int, + rental_date timestamp, + return_date timestamp, + car text, + customer_id int, + total_price int +); +INSERT INTO rental (rental_id, rental_date, return_date, car, customer_id, total_price) +VALUES +(8557411, '2022-04-02 09:10:19', '2022-04-10 10:15:05', 'Audi', 11112, 1400), +(8557421, '2022-04-06 07:00:30', '2022-04-19 07:10:19', 'BMW', 11115, 2000), +(8557451, '2022-04-19 08:00:20', '2022-04-24 08:05:00', 'Cadillac', 11112, 1000), +(8557311, '2022-05-11 09:15:28', '2022-05-18 09:00:18', 'Audi', 11115, 1500), +(8557321, '2022-05-20 10:12:22', '2022-05-28 10:08:48', 'Audi', 11113, 1500), +(8557351, '2022-06-10 12:18:09', '2022-06-20 18:12:23', 'Cadillac', 11114, 1200), +(8557221, '2022-06-17 14:02:02', '2022-06-20 14:17:02', 'Chevrolet', 11112, 1300), +(8557251, '2022-07-12 05:19:49', '2022-07-19 07:15:28', 'Chevrolet', 11116, 1400), +(8557231, '2022-08-09 09:29:08', '2022-08-24 09:30:58', 'Cadillac', 11114, 2000), +(8557291, '2022-08-18 15:15:20', '2022-09-01 15:30:19', 'BMW', 11117, 3000); +---- + +[source,sql] +---- +SELECT * FROM rental; +---- + +Here, we have a rental table which stores the details for car rental: + +[source,sql] +---- ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| rental_id | rental_date | return_date | car | customer_id | total_price | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +| 8557411 | 2022-04-02 09:10:19 | 2022-04-10 10:15:05 | Audi | 11112 | 1400 | +| 8557421 | 2022-04-06 07:00:30 | 2022-04-19 07:10:19 | BMW | 11115 | 2000 | +| 8557451 | 2022-04-19 08:00:20 | 2022-04-24 08:05:00 | Cadillac | 11112 | 1000 | +| 8557311 | 2022-05-11 09:15:28 | 2022-05-18 09:00:18 | Audi | 11115 | 1500 | +| 8557321 | 2022-05-20 10:12:22 | 2022-05-28 10:08:48 | Audi | 11113 | 1500 | +| 8557351 | 2022-06-10 12:18:09 | 2022-06-20 18:12:23 | Cadillac | 11114 | 1200 | +| 8557221 | 2022-06-17 14:02:02 | 2022-06-20 14:17:02 | Chevrolet | 11112 | 1300 | +| 8557251 | 2022-07-12 05:19:49 | 2022-07-19 07:15:28 | Chevrolet | 11116 | 1400 | +| 8557231 | 2022-08-09 09:29:08 | 2022-08-24 09:30:58 | Cadillac | 11114 | 2000 | +| 8557291 | 2022-08-18 15:15:20 | 2022-09-01 15:30:19 | BMW | 11117 | 3000 | ++------------+---------------------+---------------------+-----------+---------------+-------------+ +---- + +=== `SUM()` in `SELECT` statement + +The following example uses the `SUM()` function to calculate the total rent price of all `rental_id`: + +[source,sql] +---- +SELECT SUM (total_price) AS total +FROM rental +---- + +It will return a sum value of the `total_price`: + +[source,sql] +---- ++--------+ +| total | ++--------+ +| 16300 | ++--------+ +---- + +=== `SUM()` with a `NULL` result + +The following example uses the `SUM()` function to calculate the total rent price of the `customer_id = 11118.` + +[source,sql] +---- +SELECT SUM (total_price) AS total +FROM rental +WHERE customer_id = 11118; +---- + +Since no records in the *rental* table have the `customer_id = 11118`, the `SUM()` function returns a `NULL`. + +[source,sql] +---- ++--------+ +| total | ++--------+ +| null | ++--------+ +---- + +=== `SUM()` with `GROUP BY` clause + +You can use the `GROUP BY` clause to group the records in the table and apply the `SUM()` function to each group afterward. + +The following example uses the `SUM()` function and the `GROUP BY` clause to calculate the total price paid by each customer: + +[source,sql] +---- +SELECT customer_id, +SUM (total_price) AS total_spend +FROM rental +GROUP BY customer_id; +---- + +It will calculate the `total_price` from a group of `customer_id` as shown below: + +[source,sql] +---- ++--------------+--------------+ +| customer_id | total_spend | ++--------------+--------------+ +| 11115 | 3500 | +| 11117 | 3000 | +| 11116 | 1400 | +| 11113 | 1500 | +| 11112 | 3700 | +| 11114 | 3200 | ++--------------+--------------+ +---- + +=== `SUM()` with `HAVING` clause + +You can use the `SUM()` function with the `HAVING` clause to filter out the sum of groups based on a specific condition: + +[source,sql] +---- +SELECT + customer_id, + SUM (total_price) AS total_spend +FROM rental +GROUP BY customer_id +HAVING SUM(total_price) >= 3000; +---- + +It will return the customers who spent greater than or equal to 3000: + +[source,sql] +---- ++--------------+--------------+ +| customer_id | total_spend | ++--------------+--------------+ +| 11115 | 3500 | +| 11117 | 3000 | +| 11112 | 3700 | +| 11114 | 3200 | ++--------------+--------------+ +---- + +=== `SUM()` with multiple expression + +The example uses the following: + +* `SUM()` function to calculate total rental days. +* `JOIN` clause to combine the rental table with the customer table. +* `GROUP BY` group a result-set based on the customers’ names. + +[source,sql] +---- +SELECT s.cust_name, SUM(return_date - rental_date ) AS rental_period +FROM rental AS r +JOIN customer AS s +ON r.customer_id = s.customer_id +GROUP BY cust_name; +---- + +The final result will display the customers’ names with their total rental period. + +[source,sql] +---- ++------------+-------------------+ +| cust_name | rental_period | ++------------+-------------------+ +| Aaron | 7 days 23:56:26 | +| Martha | 14 days 00:14:59 | +| Rosy | 7 days 01:55:39 | +| Nina | 19 days 23:54:39 | +| Alex | 16 days 01:24:26 | +| Alice | 25 days 05:56:04 | ++------------+-------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc new file mode 100644 index 000000000..7acf87bcc --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/if-function.adoc @@ -0,0 +1,143 @@ += IF Function +:description: This function returns the specified value if the condition is TRUE and another value if the condition is FALSE. +:page-topic-type: reference + +This function returns the specified value if the condition is `TRUE` and another value if the condition is `FALSE`. The syntax of the `IF()`function is shown below: + +[source,sql] +---- +IF(expression, true_result, else_result) +---- + +[WARNING] +==== +The `expression` must be a Boolean expression. +==== + +== Examples + +== `IF()` with a table + +In this example, we have the *test_result* table. We want to know which participants passed and which failed from the table below: + +[source,sql] +---- +CREATE TABLE test_result ( + applicant_id int, + name text, + score int +); + +INSERT INTO test_result VALUES +(78765,'Mike Aoki',677), +(78786,'Julie Grahams',650), +(78986,'Alexandra Jones',450), +(79742,'Lucas Moore',487), +(79769,'Augustine Harkness',572); +---- + +[source,sql] +---- +SELECT * FROM test_result; +---- + +The above query will display the following table: + +[source,sql] +---- ++---------------+--------------------+--------+ +| applicant_id | name | score | ++---------------+--------------------+--------+ +| 78765 | Mike Aoki | 677 | +| 78786 | Julie Grahams | 650 | +| 78986 | Alexandra Jones | 450 | +| 79742 | Lucas Moore | 487 | +| 79769 | Augustine Harkness | 572 | ++---------------+--------------------+--------+ +---- + +. IF function in the query below states that _IF the score is equal to or greater than 500, then return “PASSED“. Otherwise, if the score is smaller than 500, return “NOT PASSED”_. ++ +[source,sql] +---- +SELECT name, IF(score>=500, 'PASSED', 'NOT PASSED') FROM test_result; +---- + +. It will return the following result: ++ +[source,sql] +---- ++--------------------+-------------+ +| name | case | ++--------------------+-------------+ +| Mike Aoki | PASSED | +| Julie Grahams | PASSED | +| Alexandra Jones | NOT PASSED | +| Lucas Moore | NOT PASSED | +| Augustine Harkness | PASSED | ++--------------------+-------------+ +---- + +== IF() with expressions as return value + +In the second example, we have another table named “*deptcost*. We want to know which department exceeded the budget and which one did not from the following table. + +[source,sql] +---- +CREATE TABLE deptcost ( + dept text, + budget int, + actual int, + status text +); +INSERT INTO deptcost VALUES +('Finance', 800,677,'within budget'), +('HR', 700,930,'over budget'), +('Marketing', 500,677,'over budget'), +('Project', 720,700,'within budget'), +('Sales', 910,860,'within budget'); +---- + +Run the following query to display the table: + +[source,sql] +---- +SELECT * FROM deptcost; +---- + +We have *deptcost* table as seen below: + +[source,sql] +---- ++-----------+--------+--------+---------------+ +| dept | budget | actual | status | ++-----------+--------+--------+---------------+ +| Finance | 800 | 677 | within budget | +| HR | 700 | 930 | over budget | +| Marketing | 500 | 677 | over budget | +| Project | 720 | 700 | within budget | +| Sales | 910 | 860 | within budget | ++-----------+--------+--------+---------------+ +---- + +. The following IF function states that _IF the actual is less than the budget, then return the budget difference, otherwise return 0_. ++ +[source,sql] +---- +SELECT dept, IF(actual < budget, budget - actual, 0) FROM deptcost; +---- + +. We get the following result using the `IF()` function: ++ +[source,sql] +---- ++-----------+-----+ +| dept | f | ++-----------+-----+ +| Finance | 123 | +| HR | 0 | +| Marketing | 0 | +| Project | 20 | +| Sales | 50 | ++-----------+-----+ +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc new file mode 100644 index 000000000..2f26c20eb --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/index.adoc @@ -0,0 +1,3 @@ += Boolean Functions +:description: Reference for boolean functions and operators in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc new file mode 100644 index 000000000..6fd160dfe --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/is-distinct-from-operator.adoc @@ -0,0 +1,149 @@ += IS DISTINCT FROM Operator +:description: The IS DISTINCT FROM operator compares two values, considering them distinct even when both are NULL. +:page-topic-type: reference + +The `IS DISTINCT FROM` operator compares two values, considering them distinct even when both are `NULL`. It returns `TRUE` if the two values are different and `FALSE` if they are the same, including the case where both values are `NULL`. + +== Syntax + +The syntax for the operator is as follows: + +[source,sql] +---- +value1 IS DISTINCT FROM value2 +---- + +Where: + +* `value1` is the first value for comparison. +* `value2` is the second value for comparison. + +== Examples + +=== Basic usage + +Consider the following example where we compare two values: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS DISTINCT FROM NULL AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 2* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM 20 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 3* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM 10 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + f +---- + +=== Compare NULL values + +In this example, we’ll compare `NULL` values using the `IS DISTINCT FROM` operator: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS DISTINCT FROM 10 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 2* + +[source,sql] +---- +SELECT 10 IS DISTINCT FROM NULL AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + t +---- + +=== Track inventory variations + +Suppose we have a table named `inventory_changes` that tracks changes in the quantities of products in a warehouse. The table has the following structure: + +[source,sql] +---- +CREATE TABLE inventory_changes ( + product_id INT, + change_date DATE, + change_quantity INT +); + +INSERT INTO inventory_changes VALUES +(101, '2023-08-01', 50), +(102, '2023-08-01', 0), +(101, '2023-08-02', -15), +(103, '2023-08-03', 30), +(102, '2023-08-04', 0); +---- + +We want to retrieve records where the change quantity is distinct from zero. In this scenario, the `IS DISTINCT FROM` operator can be used. + +[source,sql] +---- +SELECT * +FROM inventory_changes +WHERE change_quantity IS DISTINCT FROM 0; +---- + +The result of the query will not include the 0 values as shown below: + +[source,sql] +---- + product_id | change_date | change_quantity +------------+-------------+----------------- + 101 | 2023-08-01 | 50 + 101 | 2023-08-02 | -15 + 103 | 2023-08-03 | 30 +---- diff --git a/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc b/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc new file mode 100644 index 000000000..5c0b49d81 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/boolean-functions/is-not-distinct-from-operator.adoc @@ -0,0 +1,151 @@ += IS NOT DISTINCT FROM Operator +:description: The IS NOT DISTINCT FROM operator is a counterpart to IS DISTINCT FROM. +:page-topic-type: reference + +== Overview + +The `IS NOT DISTINCT FROM` operator is a counterpart to `IS DISTINCT FROM`. + +It compares two values, treating them as equal even when they are both `NULL`. This operator returns `TRUE` if the two values are the same, including the case where both values are `NULL` and `FALSE` if they are different. + +== Syntax + +The syntax for the operator is as follows: + +[source,sql] +---- +value1 IS NOT DISTINCT FROM value2 +---- + +Where: + +* `value1` is the first value for comparison. +* `value2` is the second value for comparison. + +== Examples + +=== Basic usage + +Consider the following example where we compare two values: + +*Example 1* + +[source,sql] +---- +SELECT 45 IS NOT DISTINCT FROM 45 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + t +---- + +*Example 2* + +[source,sql] +---- +SELECT 60 IS NOT DISTINCT FROM 30 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 3* + +[source,sql] +---- +SELECT NULL IS NOT DISTINCT FROM NULL AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + t +---- + +=== Compare NULL values + +In this example, we’ll compare NULL values using the IS NOT DISTINCT FROM operator: + +*Example 1* + +[source,sql] +---- +SELECT NULL IS NOT DISTINCT FROM 80 AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + f +---- + +*Example 2* + +[source,sql] +---- +SELECT 5 IS NOT DISTINCT FROM NULL AS "Result"; +---- + +The above query will return the following output: + +[source,sql] +---- + Result +-------- + f +---- + +=== Analyze data completeness + +Suppose we have a table named customer_contacts that stores customer contact information. + +[source,sql] +---- +CREATE TABLE customer_contacts ( + customer_id INT, + email TEXT, + phone TEXT +); + +INSERT INTO customer_contacts VALUES +(101, 'john@example.com', NULL), +(102, NULL, '+1234567890'), +(103, 'jane@example.com', '+9876543210'), +(104, NULL, NULL), +(105, 'alex@example.com', '+5555555555'); +---- + +Our objective is to retrieve records from this table where an email address or a phone number is available for contacting the customers. + +[source,sql] +---- +SELECT * +FROM customer_contacts +WHERE email IS NOT DISTINCT FROM phone; +---- + +In this query, we retrieve all rows from the `customer_contacts table` where the email and phone are NULL. We can conclude that the customer with `customer_id 104` has no phone number or email address. + +[source,sql] +---- + customer_id | email | phone +-------------+-------+------- + 104 | | +---- diff --git a/modules/reference/pages/sql/sql-functions/index.adoc b/modules/reference/pages/sql/sql-functions/index.adoc new file mode 100644 index 000000000..52673ce6a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/index.adoc @@ -0,0 +1,19 @@ += SQL Functions +:description: Redpanda SQL supports the following function types for querying and transforming data. + +Redpanda SQL supports the following function types for querying and transforming data: + + +[width="100%",cols="<42%,<58%",options="header",] +|=== +|Function Name |Description +|xref:reference:sql/sql-functions/boolean-functions/if-function.adoc[BOOLEAN FUNCTIONS] |Evaluate logical conditions and return `TRUE`, `FALSE` OR `NULL` +|xref:reference:sql/sql-functions/math-functions/index.adoc[MATH FUNCTIONS] |Perform mathematical operations on numeric data, such as rounding, exponentiation calculation +|xref:reference:sql/sql-functions/string-functions/index.adoc[STRING FUNCTIONS] |Manipulate string data for text processing, including concatenation, substring extraction and case conversion +|xref:reference:sql/sql-functions/timestamp-functions/index.adoc[TIMESTAMP FUNCTIONS] |Handle data and time values including extracting components, adding intervals and comparing timestamps +|xref:reference:sql/sql-functions/trigonometric-functions/index.adoc[TRIGONOMETRIC FUNCTIONS] |Perform calculations using trigonometric ratios, such as sine, cosine and tangent +|xref:reference:sql/sql-functions/json-functions/index.adoc[JSON FUNCTIONS] |Manipulate and query JSON data stored in the database, including extracting values and creating JSON objects +|xref:reference:sql/sql-functions/aggregate-functions/index.adoc[AGGREGATE FUNCTIONS] |Summarize a set of values and return a single result, such as calculating sums, averages and counts +|xref:reference:sql/sql-functions/window-functions/index.adoc[WINDOW FUNCTIONS] |Operate over a subset of rows defined by a windowing clause, enabling ranking, aggregation and row numbering within result sets +|xref:reference:sql/sql-functions/other-functions/index.adoc[OTHER FUNCTIONS] |Includes a variety of specialized functions not categorized elsewhere +|=== diff --git a/modules/reference/pages/sql/sql-functions/json-functions/index.adoc b/modules/reference/pages/sql/sql-functions/json-functions/index.adoc new file mode 100644 index 000000000..45af2f6c2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/index.adoc @@ -0,0 +1,24 @@ += Overview +:description: To help you query JSON data, Redpanda SQL provides some functions that will be used to operate and manipulate the JSON data. + +To help you query JSON data, Redpanda SQL provides some functions that will be used to operate and manipulate the JSON data. The functions are as follows: + +[width="100%",cols="53%,47%",options="header",] +|=== +|*Functions* |*Description* +|xref:reference:sql/sql-functions/json-functions/json-extract-path.adoc[JSON_EXTRACT_PATH()] |It extracts JSON sub-object at the specified path. +|xref:reference:sql/sql-functions/json-functions/json-extract-path-text.adoc[JSON_EXTRACT_PATH_TEXT()] |It returns text referenced by a series of path elements in a JSON string or JSON body. +|xref:reference:sql/sql-functions/json-functions/json-array-length.adoc[JSON_ARRAY_LENGTH()] |It returns the number of elements in the outer array of a JSON string or JSON body. +|xref:reference:sql/sql-functions/json-functions/json-array-extract.adoc[JSON_ARRAY_EXTRACT()] |It returns the JSON array as a set of JSON values. +|=== + +Operators are used to specify conditions when using JSON functions. Redpanda SQL also supports JSON operators as listed below: + +[width="100%",cols="12%,46%,42%",options="header",] +|=== +|*Operators* |*Description* |*Example* +|-> |It gets & returns the element of the JSON array. |`'[{"a":"cab"},{"b":"bac"},{"c":"abc"}]'::json -> 2` +|-> |It gets & returns the JSON object field. |`'{"a": {"b":"abc"}}'::json -> 'a'` +|->> |It gets & returns the element of the JSON array as text. |`'[11,22,33]'::json ->> 2` +|->> |It gets & returns the JSON object field as text. |`'{"a":13,"b":33}'::json ->> 'b'` +|=== diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc new file mode 100644 index 000000000..029f26583 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-array-extract.adoc @@ -0,0 +1,84 @@ += JSON_ARRAY_EXTRACT +:description: The JSON_ARRAY_EXTRACT() function returns the JSON array as a set of JSON values. +:page-topic-type: reference + +== Overview + +The `JSON_ARRAY_EXTRACT()` function returns the JSON array as a set of JSON values. + +== Syntax + +The `JSON_ARRAY_EXTRACT()` has the basic syntax as seen below. + +[source,sql] +---- +JSON_ARRAY_EXTRACT('json_array'::JSON,id); +---- + +`JSON_ARRAY_EXTRACT()` requires the following parameters: + +* `json_array`: the array to be extracted. +* `::JSON`: argument indicating that the query is of type JSON. +* `id`: ID of the element that we want to extract. It is read in an array format that starts with 0. + +=== Another option + +`JSON_ARRAY_EXTRACT` can also be achieved with the `->` operator, as shown in the syntax below: + +[source,sql] +---- +SELECT 'from_json'::JSON -> path; +---- + +* `from_json`: the JSON value from which to extract. +* `::JSON`: a symbol that casts the string literal to a JSON type. +* `path`: key of the field that we want to extract. + +== Examples + +=== Basic JSON_ARRAY_EXTRACT() function + +. In the below example, we will extract a JSON array as a JSON set. ++ +[source,sql] +---- +SELECT JSON_ARRAY_EXTRACT('["Bougenvile", 2, 12, "Lily"]'::JSON,3); +---- ++ +*or* ++ +[source,sql] +---- +SELECT ('["Bougenvile", 2, 12, "Lily"]'::JSON -> 3); +---- + +. The extracted array will look like the following. ++ +[source,sql] +---- ++------------+ +| f | ++------------+ +| "Lily" | ++------------+ +---- + +=== Extract element of JSON array as text + +. In this case, we will extract the element of the JSON array as text with the `->>` operator. ++ +[source,sql] +---- +SELECT ('["Bougenvile", 2, 12, "Lily"]'::JSON ->> 1); +---- + +. You will get the final output as follows: ++ +[source,sql] +---- ++------------+ +| f | ++------------+ +| 2.000000 | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc new file mode 100644 index 000000000..38c4e15f3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-array-length.adoc @@ -0,0 +1,78 @@ += JSON_ARRAY_LENGTH +:description: The JSON_ARRAY_LENGTH() function returns the length of a specified JSON array. +:page-topic-type: reference + +The `JSON_ARRAY_LENGTH()` function returns the length of a specified JSON array. + +== Syntax + +This function has the following basic syntax. + +[source,sql] +---- +JSON_ARRAY_LENGTH(arrayval JSON) +---- + +The required argument for this function is `arrayval`. It represents the JSON array which we will count the length. + +== Examples + +=== Get a JSON array length with a JSON value + +The following example returns the number of elements in the array: + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[4, 7, 10, 11, 14, {"vegetables":"spinach","fruits":"melon"}, {"a":"b"}]'); +---- + +The function above will return the following result: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 7 | ++-------+ +---- + +=== Get a JSON array length with a number + +The following example returns the number of elements in the array. + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[1, 2, [3, 4]]'); +---- + +You will get the final result as follows: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 3 | ++-------+ +---- + +=== JSON array length where the array is NULL or empty + +This example shows that an empty JSON array will return 0. + +[source,sql] +---- +SELECT JSON_ARRAY_LENGTH('[]'); +---- + +An empty array will return 0 in the final output: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| 0 | ++-------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc new file mode 100644 index 000000000..4c7b953e7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path-text.adoc @@ -0,0 +1,66 @@ += JSON_EXTRACT_PATH_TEXT +:description: The JSON_EXTRACT_PATH_TEXT() function extracts JSON nested value from a specified JSON value according to the defined path. +:page-topic-type: reference + +The `JSON_EXTRACT_PATH_TEXT()` function extracts JSON nested value from a specified JSON value according to the defined path. + +[NOTE] +==== +This function may be similar to the `JSON_EXTRACT_PATH()`. This function returns a value of type text instead of type JSON. +==== + +== Syntax + +The `JSON_EXTRACT_PATH_TEXT()` syntax is shown below: + +[source,sql] +---- +JSON_EXTRACT_PATH_TEXT(from_json JSON, path TEXT[]) +---- + +The required arguments are explained below. + +* `from_json`: the JSON value to extract. +* `path`: the path to extract. + +=== Another option + +Besides the syntax above, Redpanda SQL provides and supports the use of operators in queries. See the syntax below: + +[source,sql] +---- +SELECT 'from_json'::JSON ->> 'path'; +---- + +* `from_json`: the JSON value from which to extract. +* `::JSON`: a symbol that casts the text literal to a JSON type. +* `path`: key of the field that we want to extract. + +== Example + +. This example shows how to use the `JSON_EXTRACT_PATH_TEXT()` function to extract values ​​from a JSON object at a specified index. ++ +Run the following query: ++ +[source,sql] +---- +SELECT JSON_EXTRACT_PATH_TEXT('{"a": "Oxla", "b": {"x": 1.234, "y": 4.321}}', 'a') AS "result a"; +---- ++ +*or* ++ +[source,sql] +---- +SELECT '{"a": "Oxla", "b": {"x": 1.234, "y": 4.321}}'::JSON ->> 'a' AS "result a"; +---- + +. The `JSON_EXTRACT_PATH_TEXT()` function extracts the values and returns the output below: ++ +[source,sql] +---- ++------------+ +| result a | ++------------+ +| "Oxla" | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc new file mode 100644 index 000000000..643055542 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/json-functions/json-extract-path.adoc @@ -0,0 +1,79 @@ += JSON_EXTRACT_PATH +:description: JSON_EXTRACT_PATH() function extracts JSON nested value from a specified path. +:page-topic-type: reference + +`JSON_EXTRACT_PATH()` function extracts JSON nested value from a specified path. + +== Syntax + +The syntax of the `JSON_EXTRACT_PATH()` function can be seen below. + +[source,sql] +---- +JSON_EXTRACT_PATH(from_json JSON, path TEXT[]) +---- + +* `from_json`: the JSON value from which to extract. +* `path`: the path to extract. + +=== Another option + +Besides the syntax above, Redpanda SQL provides and supports the use of operators in queries. See the syntax below: + +[source,sql] +---- +SELECT 'from_json'::JSON -> 'path'; +---- + +* `from_json`: the JSON value from which to extract. +* `::JSON`: a symbol that casts the text literal to a JSON type. +* `path`: key of the field that we want to extract. + +== Examples + +These examples display how `JSON_EXTRACT_PATH()` extracts the "`oxla`" JSON sub-object from the specified path. + +. Use the below query: ++ +[source,sql] +---- +SELECT JSON_EXTRACT_PATH('{"f2":{"f3":1},"f4":{"f5":99,"f6":"oxla"}}', 'f4', 'f6'); +---- ++ +*or* ++ +[source,sql] +---- +SELECT '{"f2":{"f3":1},"f4":{"f5":99,"f6":"oxla"}}'::JSON -> 'f4' -> 'f6'; +---- ++ +The query above will return the following result. ++ +[source,sql] +---- ++---------+ +| f | ++---------+ +| "oxla" | ++---------+ +---- + +. Run the query below: ++ +[source,sql] +---- +SELECT + JSON_EXTRACT_PATH('{"a": 1, "b": {"x": "subtract", "y": "plus"}}', 'b', 'x') AS "bx", + JSON_EXTRACT_PATH('{"a": 1, "b": {"x": "multiply", "y": "divide"}}', 'b', 'y') AS "by"; +---- ++ +You will get the following output: ++ +[source,sql] +---- ++---------------+-------------+ +| bx | by | ++---------------+-------------+ +| "subtract" | "divide" | ++---------------+-------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc b/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc new file mode 100644 index 000000000..408d70152 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/abs.adoc @@ -0,0 +1,107 @@ += ABS +:description: The ABS() function returns an absolute number, i.e., the positive value of a number. +:page-topic-type: reference + +The `ABS()` function returns an absolute number, i.e., the positive value of a number. The data type of the returned value will depend on the data type of the value passed to the `ABS()` function. + +== Syntax + +The syntax for the `ABS()`function is as follows: + +[source,sql] +---- +ABS(x) +---- + +The `ABS()` function requires one argument: + +* `x`: An expression that evaluates to a number. + +[NOTE] +==== +The *ABS()* function will return the negation of the negative numbers. +==== + +== Examples + +=== Absolute value of a negative number + +The following example demonstrates how the `ABS()` function can be used to obtain the absolute value of a negative number: + +[source,sql] +---- +SELECT ABS(-10.25); +---- + +It will return an absolute value of the passed argument: + +[source,sql] +---- ++--------+ +| f | ++--------+ +| 10.25 | ++--------+ +---- + +=== ABS() function with an expression + +The following example demonstrates how the `ABS()` function can be used with an expression to obtain the absolute value of the result: + +[source,sql] +---- +SELECT ABS( 100 - 250); +---- + +The result of the above statement is *-150*. However, you will get the output *150*, as 150 is the positive version of -150. + +[source,sql] +---- ++------+ +| f | ++------+ +| 150 | ++------+ +---- + +=== Use the ABS() function with a table + +The following example demonstrates how the `ABS()` function can be used with a table to obtain the absolute values of all numbers in a specific column: + +. First, create a table named absTable containing an *_initialValue_* column with some positive and negative values: ++ +[source,sql] +---- +CREATE TABLE absTable(initialValue float); + +INSERT INTO absTable(initialValue) +VALUES +(550), +(-210), +(72.12), +(-87.93), +(-0.0); +---- + +. Next, use the following query to find the absolute value of all numbers: ++ +[source,sql] +---- +SELECT initialValue, ABS(initialValue) AS absoluteValue +FROM absTable; +---- + +. The above query will retrieve all values in the *"`initialValue`"* column and their absolute values in the *"`absoluteValue`"* column. The output will look something like this: ++ +[source,sql] +---- ++---------------+----------------+ +| initialValue | absoluteValue | ++---------------+----------------+ +| 550 | 550 | +| -210 | 210 | +| 72.12 | 72.12 | +| 87.93 | 87.93 | +| -0 | 0 | ++---------------+----------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-left.adoc b/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-left.adoc new file mode 100644 index 000000000..59fb3329c --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-left.adoc @@ -0,0 +1,97 @@ += BITWISE SHIFT LEFT +:description: Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. +:page-topic-type: reference + +Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. These operations are fundamental in low-level data processing and optimization. + +The bitwise *left shift (`<<`)* operator shifts the bits of an integer to the left by the specified shift amount. For *integers*, this operation is equivalent to multiplying the integer value by 2 raised to the power of the shift amount. During this operation, high-order bits that are shifted out are permanently lost without the ability to be preserved, while zeros are shifted in from the right to fill the vacant positions. Because the left shifts operation (<<) on signed integers is *arithmetic*, meaning it shifts all bits to the left and fills the vacant rightmost bits with zeros on the right, the behavior is the same as a logical shift in this case. However, the overall length of the bit string is preserved, with zeros padding on the right to maintain the length. + +== Syntax + +The syntax for the BITWISE SHIFT LEFT is as follows: + +[source,sql] +---- +value << shift_amount +---- + +== Parameters + +* `value`: integer expression +* `shift_amount`: a *non-negative* integer specifying how many bit positions to shift + +== Restrictions + +Bitwise shift operators in Redpanda SQL require the shift amount to be a *non-negative* integer. Redpanda SQL treats negative shift counts as valid by applying modulo arithmetic based on the bit width, so shifting `1 << -3` in a 32-bit integer is equivalent to shifting `1 << 29`, producing predictable results without errors or undefined behavior. + +When performing bitwise left shift operations (<<) on 32-bit integer values in Redpanda SQL, the shift count is taken *modulo* 32. This means: * Shifting by a number of bits greater than or equal to 32 will wrap around * For example, `1 << 35` is equivalent to `1 << 3` because `35`latexmath:[modulo]`32 = 3` + +[WARNING] +==== +If you shift by a value larger than or equal to 32, the actual shift will be the remainder after dividing by 32, which may lead to unexpected results if not carefully considered. +==== + +== Examples + +For the needs of this section we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `rating` and `privilegs` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title TEXT NOT NULL, + rating TEXT, + privileges INT NOT NULL +); +INSERT INTO film(title, rating, privileges) VALUES + ('ATTRACTION NEWTON', 'PG-13', 1), -- Free users + ('CHRISTMAS MOONSHINE', 'NC-17', 2), -- Premium users + ('DANGEROUS UPTOWN', 'PG', 3), -- Free + Premium users (bits 0 and 1) + ('KILL BROTHERHOOD', 'G', 4), -- Admin-only content + ('HALLOWEEN NUTS', 'PG-13', 1), + ('HOURS RAGE', 'NC-17', 2), + ('PIANIST OUTFIELD', 'NC-17', 3), + ('PICKUP DRIVING', 'G', 4), + ('INDEPENDENCE HOTEL', 'NC-17', 1), + ('PRIVATE DROP', 'PG', 2), + ('SAINTS BRIDE', 'G', 3), + ('FOREVER CANDIDATE', 'NC-17', 4), + ('MILLION ACE', 'PG-13', 1), + ('SLEEPY JAPANESE', 'PG', 2), + ('WRATH MILE', 'NC-17', 3), + ('YOUTH KICK', 'NC-17', 4), + ('CLOCKWORK PARADISE', 'PG-13', 1); +---- + +[NOTE] +==== +* Privilege 1 (binary 0001): Free users can watch. +* Privilege 2 (binary 0010): Premium users can watch. +* Privilege 3 (binary 0011): Both free and premium users can watch. +* Privilege 4 (binary 0100): Admin-only content. +==== +The query below uses the integer `Left shift (<<)` operation, shifting the privileges value left by 1 for the movie '`ATTRACTION NEWTON`': + +[source,sql] +---- +UPDATE film +SET privileges = privileges << 1 +WHERE title = 'ATTRACTION NEWTON'; +---- + +After running the update, you can verify the change with: + +[source,sql] +---- +SELECT title, privileges FROM film WHERE title = 'ATTRACTION NEWTON'; +---- + +Expected output: + +[source,sql] +---- + title | privileges +-------------------+------------ + ATTRACTION NEWTON | 2 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-right.adoc b/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-right.adoc new file mode 100644 index 000000000..7629301c5 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/bitwise-shift-right.adoc @@ -0,0 +1,98 @@ += BITWISE SHIFT RIGHT +:description: Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. +:page-topic-type: reference + +Bitwise shift operators in Redpanda SQL manipulate the bits of integer value by shifting them left or right. These operations are fundamental in low-level data processing and optimization. + +The bitwise *right shift (`>>`)* operator shifts the bits of an integer to the right by the specified number of positions. For *integers*, this operation is equivalent to dividing the integer value by 2 raised to the power of the shift amount, discarding any remainder. Unlike a logical shift, the right shift in Redpanda SQL is an *arithmetic* shift, meaning that the vacant leftmost bits are filled with the original sign bits (the most significant bit) rather than zeros. This preserves the sign of the integer after the shift, ensuring correct behavior for signed values. During the shift, low-order bits that move beyond the size limit are permanently lost. However, the overall length of the bit string is preserved, with zeros padding on the left side to maintain the length. + +== Syntax + +The syntax for the BITWISE SHIFT RIGHT is as follows: + +[source,sql] +---- +value >> shift_amount +---- + +== Parameters + +* `value`: integer expression +* `shift_amount`: a *non-negative* integer specifying how many bit positions to shift + +== Restrictions + +Bitwise shift operators in Redpanda SQL require the shift amount to be a *non-negative* integer. Redpanda SQL treats negative shift counts as valid by applying modulo arithmetic based on the bit width, so shifting `1 >> -3` in a 32-bit integer is equivalent to shifting `1 >> 29`, producing predictable results without errors or undefined behavior. + +When performing bitwise right shift operations (>>) on 32-bit integer values in Redpanda SQL, the shift count is taken *modulo* 32, just as with left shifts. This means: * Shifting by a number of bits greater than or equal to 32 will wrap around * For example `1 >> 35` is equivalent to `1 >> 3` because `35`latexmath:[modulo]`32 = 3` + +[WARNING] +==== +If you shift by a value larger than or equal to 32, the actual shift will be the remainder after dividing by 32, +which may lead to unexpected results if not carefully considered. +==== + +== Examples + +For the needs of this section we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `rating` and `privilegs` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title TEXT NOT NULL, + rating TEXT, + privileges INT NOT NULL +); +INSERT INTO film(title, rating, privileges) VALUES + ('ATTRACTION NEWTON', 'PG-13', 1), -- Free users + ('CHRISTMAS MOONSHINE', 'NC-17', 2), -- Premium users + ('DANGEROUS UPTOWN', 'PG', 3), -- Free + Premium users (bits 0 and 1) + ('KILL BROTHERHOOD', 'G', 4), -- Admin-only content + ('HALLOWEEN NUTS', 'PG-13', 1), + ('HOURS RAGE', 'NC-17', 2), + ('PIANIST OUTFIELD', 'NC-17', 3), + ('PICKUP DRIVING', 'G', 4), + ('INDEPENDENCE HOTEL', 'NC-17', 1), + ('PRIVATE DROP', 'PG', 2), + ('SAINTS BRIDE', 'G', 3), + ('FOREVER CANDIDATE', 'NC-17', 4), + ('MILLION ACE', 'PG-13', 1), + ('SLEEPY JAPANESE', 'PG', 2), + ('WRATH MILE', 'NC-17', 3), + ('YOUTH KICK', 'NC-17', 4), + ('CLOCKWORK PARADISE', 'PG-13', 1); +---- + +[NOTE] +==== +* Privilege 1 (binary 0001): Free users can watch. +* Privilege 2 (binary 0010): Premium users can watch. +* Privilege 3 (binary 0011): Both free and premium users can watch. +* Privilege 4 (binary 0100): Admin-only content. +==== +The query below uses the integer `right shift (>>)` operation, shifting the privileges value right by 1 for the movie '`DANGEROUS UPTOWN`': + +[source,sql] +---- +UPDATE film +SET privileges = privileges >> 1 +WHERE title = 'DANGEROUS UPTOWN'; +---- + +After running the update, you can verify the change with: + +[source,sql] +---- +SELECT title, privileges FROM film WHERE title = 'DANGEROUS UPTOWN'; +---- + +Expected output: + +[source,sql] +---- + title | privileges +-------------------+------------ + DANGEROUS UPTOWN | 1 +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc b/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc new file mode 100644 index 000000000..d4afb0b8e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/cbrt.adoc @@ -0,0 +1,137 @@ += CBRT +:description: The CBRT() function calculates and returns the cube root of a given number. +:page-topic-type: reference + +The `CBRT()` function calculates and returns the cube root of a given number. In mathematical terms, for a number _x_, its cube root _y_ is determined by the equation _y³ = x_. + +== Syntax + +The syntax for the `CBRT()` function is as follows: + +[source,sql] +---- +CBRT(number) +---- + +Where: + +* `number`: This is a required value representing the number for which you want to calculate the cube root. It can be a positive or negative whole number, a decimal, or even an expression that evaluates to a number. + +For example, you can use expressions like `SELECT CBRT(some_column) from test_table`, assuming `some_column` contains a numeric value. + +[NOTE] +==== +*Return Value:* + - It will return `NULL` if the argument is `NULL`. + - It will give an error if you input a parameter that is not a numeric type. +==== + +== Examples + +Below are several usage examples of the `CBRT()` function: + +=== Basic cube root calculation + +Consider the following example: + +[source,sql] +---- + SELECT CBRT(125); +---- + +The result of this query will be: + +[source,sql] +---- + cbrt +------ + 5 +---- + +=== Cube root of a negative value + +To calculate the cube root of a negative number, use the `CBRT()` function as shown: + +[source,sql] +---- + SELECT CBRT(-125); +---- + +The final result is as follows. + +[source,sql] +---- + cbrt +------ + -5 +---- + +=== Cube root of decimal result + +For calculations with decimal numbers, use the `CBRT()` function as demonstrated below: + +[source,sql] +---- +SELECT CBRT(32); +---- + +The result will be a decimal value, as shown below: + +[source,sql] +---- + cbrt +------------------- + 3.174802103936399 +---- + +=== Cube root of decimal input + +In this scenario, fractional seconds are incorporated into the argument: + +[source,sql] +---- +SELECT CBRT(0.12815); +---- + +The result will be the cube root of the provided decimal value. + +[source,sql] +---- + cbrt +------------ + 0.50416523 +---- + +=== Handle incorrect argument + +When a non-numeric argument is provided, the `CBRT()` function works as follows: + +[source,sql] +---- +SELECT CBRT('abc'); +---- + +An error will be generated, and the result will not be valid. + +[source,sql] +---- +invalid input syntax for type double precision: "abc" +---- + +=== CBRT operator (`||/(x)`) + +Here’s an example using the CBRT operator (`||/(x)`) to calculate the cube root of a given number: + +[source,sql] +---- +SELECT ||/(1728) AS cbrt_operator; +---- + +In this example, we calculate the cube root of 1728 using the CBRT operator. The result of this query will be: + +[source,sql] +---- + cbrt_operator +-------------------- + 12.000000000000002 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc b/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc new file mode 100644 index 000000000..101f3958d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/ceil.adoc @@ -0,0 +1,106 @@ += CEIL +:description: The CEIL() function returns the nearest positive or negative integer value greater than or equal to the provided decimal input number. +:page-topic-type: reference + +The `CEIL()` function returns the nearest positive or negative integer value greater than or equal to the provided decimal input number. + +== Syntax + +The syntax of the `CEIL()` function is as follows: + +[source,sql] +---- +CEIL(x) +---- + +The `CEIL()` function requires one argument: + +* `x`: A positive or a negative decimal number (or an expression that evaluates to a decimal number). + +== Examples + +=== Round up a positive decimal value + +The following example demonstrates how the `CEIL()`function rounds up a positive decimal value: + +[source,sql] +---- +SELECT CEIL (300.55); +---- + +As shown below, it will return 301, as it is the nearest integer value greater than 300.55. + +[source,sql] +---- ++------+ +| f | ++------+ +| 301 | ++------+ +---- + +=== Round up a negative decimal value + +The following example demonstrates how the `CEIL()`function rounds up a negative decimal value: + +[source,sql] +---- +SELECT CEIL(-89.9) AS "Ceil"; +---- + +The output of this statement will be -89, as -89 is the nearest integer value greater than or equal to -89.9, as shown below. + +[source,sql] +---- ++-------+ +| Ceil | ++-------+ +| -89 | ++-------+ +---- + +=== Use the `CEIL()` function with a table + +The following example demonstrates how the `CEIL()` function can be used with a table to round up the values in a specific column: + +. First, create a table called *_CeilRecords_* with the following query: ++ +[source,sql] +---- +CREATE TABLE CeilRecords (numbers float); + +INSERT INTO CeilRecords(numbers) +VALUES + (-28.85), + (-9.4), + (0.87), + (78.16), + (42.16); +---- ++ +The above statement will create a table called *"`CeilRecords`"* with a column called *"`numbers`"* and insert 5 decimal values into it. + +. The statement below can be used to retrieve and round up the value for all records in the column *numbers*: ++ +[source,sql] +---- +SELECT *, CEIL(numbers) AS CeilValue FROM CeilRecords; +---- ++ +The final result will contain the following: ++ +* A *numbers* column with initial decimal values. +* A *CeilValue* column with rounded-up integer values. ++ +[source,sql] +---- ++---------+------------+ +| numbers | CeilValue | ++---------+------------+ +| -28.85 | -28 | +| -9.4 | -9 | +| 0.87 | 1 | +| 78.16 | 79 | +| 42.16 | 43 | ++---------+------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc b/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc new file mode 100644 index 000000000..41eaae1f4 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/exp.adoc @@ -0,0 +1,80 @@ += EXP +:description: The EXP() function returns the exponential value of a number specified in the argument. +:page-topic-type: reference + +The `EXP()` function returns the exponential value of a number specified in the argument. + +== Syntax + +The syntax for the `EXP()` is: + +[source,sql] +---- +EXP(number); +---- + +Where: + +* `number`: The number for which you want to calculate the exponential value. Equivalent to the formula `e^number`. + +== Examples + +Let’s explore examples to see how the `EXP()` function works. + +=== Basic usage + +In this case, we use the `EXP()` function with positive and negative values. + +[source,sql] +---- +SELECT EXP(0) AS "EXP of 0", + EXP(1) AS "EXP of 1", + EXP(2) AS "EXP of 2", + EXP(-1) AS "EXP of -1", + EXP(-2) AS "EXP of -2"; +---- + +You will get the following result: + +[source,sql] +---- +EXP of 0 | EXP of 1 | EXP of 2 | EXP of -1 | EXP of -2 +----------+-------------------+------------------+---------------------+-------------------- + 1 | 2.718281828459045 | 7.38905609893065 | 0.36787944117144233 | 0.1353352832366127 +---- + +=== Use `EXP()` with fractions + +This case uses the `EXP()` function with a fractional argument. + +[source,sql] +---- +SELECT EXP(3.2); +---- + +Here is the result: + +[source,sql] +---- + exp +-------------------- + 24.532531366911574 +---- + +=== Use `EXP()` with expressions + +Here, we use the `EXP()` function with expressions. + +[source,sql] +---- +SELECT EXP(5 * 5); +---- + +See the result below: + +[source,sql] +---- + exp +------------------- + 72004899337.38588 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc b/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc new file mode 100644 index 000000000..3d82dbf56 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/floor.adoc @@ -0,0 +1,96 @@ += FLOOR +:description: The FLOOR() returns a number rounded down that is less than or equal to the specified argument. +:page-topic-type: reference + +The `FLOOR()` returns a number rounded down that is less than or equal to the specified argument. + +== Syntax + +The syntax for the `FLOOR()` function in Redpanda SQL is: + +[source,sql] +---- +FLOOR(x) +---- + +The `FLOOR()` function requires one argument: + +`x`: A positive or a negative decimal number (or an expression that evaluates to a decimal number). + +== Examples + +=== Round down a positive decimal value + +The following example demonstrates how the `FLOORL()` function rounds down a positive decimal value: + +[source,sql] +---- +SELECT FLOOR(345.6765467); +---- + +It will return 345 as it is the closest value smaller than the argument. + +[source,sql] +---- ++------+ +| f    | ++------+ +| 345  | ++------+ +---- + +=== Round down a negative decimal value + +The following example demonstrates how the `FLOORL()` function rounds down a negative decimal value: + +[source,sql] +---- +SELECT FLOOR(-0.987657); +---- + +You will get the following result as it is the nearest integer smaller than or equal to the specified argument. + +[source,sql] +---- ++-------+ +| f | ++-------+ +| -1    | ++-------+ +---- + +=== Use the FLOOR() function with a table + +The following example demonstrates how the `FLOOR()` function can be used with a table to round down the values in a specific column: + +. Create a new table called *FloorRecords* with double-precision values using the query below: ++ +[source,sql] +---- +CREATE TABLE FloorRecords (numbers float); +INSERT INTO FloorRecords VALUES (3.987), (4.325), (-0.76), (-22.57); +---- + +. Retrieve the table with its value by running the following query: ++ +[source,sql] +---- +SELECT * ,FLOOR(numbers) AS Floorvalue FROM FloorRecords; +---- + +. The return table will contain the following: ++ +* *numbers,* the column with the initial double-precision values. +* *FloorValue*, the column with the rounded-down values. ++ +[source,sql] +---- ++------------+---------------+ +| numbers | Floorvalue | ++------------+---------------+ +| 3.987 | 3 | +| 4.325 | 4 | +| -0.76 | -1 | +| -22.57 | -23 | ++------------+---------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc b/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc new file mode 100644 index 000000000..4f6ba6d92 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/greatest.adoc @@ -0,0 +1,166 @@ += GREATEST +:description: The GREATEST() function extracts the greatest or largest value from a set of values. +:page-topic-type: reference + +The `GREATEST()` function extracts the greatest or largest value from a set of values. It needs at least one argument to work with, and if you mix different types, like a text and a number, it will return an error. + +For example, comparing the greatest value among 4, "`two`", and 9 would result in an error. + +== Syntax + +The syntax for the `GREATEST()` function is as follows: + +[source,sql] +---- +GREATEST(value_1, [value_n]) +---- + +Where: + +* `value_1`: Represents the first value. +* `value_n`: Represents one or more additional values, separated by commas. + +[NOTE] +==== +*Info:* + -`NULL` values within the expressions are ignored. + - The result will be `NULL` if all expressions evaluate to `NULL`. +==== + +== Examples + +Here are examples that illustrate the usage of the `GREATEST()` function: + +=== Basic usage + +Consider the following example: + +[source,sql] +---- +SELECT GREATEST(3,5,8,9,10); +---- + +The query will return `3`, the smallest value among the provided values. + +[source,sql] +---- +greatest +--------- + 10 +---- + +=== String comparison + +String comparison is also supported, as shown below: + +[source,sql] +---- +SELECT GREATEST('apple', 'banana', 'cherry'); +---- + +In this case, the result will be `'cherry'`, the greatest string according to the order. + +[source,sql] +---- +greatest +---------- + cherry +---- + +=== Handle NULL values + +`NULL` values are ignored when determining the greatest value: + +[source,sql] +---- +SELECT GREATEST (5,null,9); +---- + +The result will be the greatest non-NULL value, which is `9`. + +[source,sql] +---- +least +------- + 9 +---- + +=== Positive and negative numbers + +Negative numbers can also be compared: + +[source,sql] +---- +SELECT GREATEST (4,-4,-8,8); +---- + +This query will return `8`, the greatest value among the provided numbers. + +[source,sql] +---- +least +------- + 8 +---- + +=== Use table data + +The `GREATEST` function can also be used to find the Greatest value between column data. For example, let’s create a table named *Student* that stores students’ names and scores. + +[source,sql] +---- +CREATE TABLE Student( + Student_name TEXT, + Student_Class TEXT, + Subject1 INT, + Subject2 INT, + Subject3 INT, + Subject4 INT +); + +INSERT INTO + Student(Student_name, Student_Class, Subject1, Subject2, Subject3, Subject4) +VALUES + ('Sayan', 'Junior', 81, 90, 86, 92 ), + ('Nitin', 'Junior', 90, 84, 88, 91 ), + ('Aniket', 'Senior', 81, 80, 87, 95 ), + ('Abdur', 'Junior', 85, 90, 80, 90 ), + ('Sanjoy', 'Senio', 88, 82, 84, 90 ); +---- + +Use the `SELECT` statement to view all the records: + +[source,sql] +---- +SELECT * FROM Student; +---- + +[source,sql] +---- +student_name | student_class | subject1 | subject2 | subject3 | subject4 +--------------+---------------+----------+----------+----------+---------- + Sayan | Junior | 81 | 90 | 86 | 92 + Nitin | Junior | 90 | 84 | 88 | 91 + Aniket | Senior | 81 | 80 | 87 | 95 + Abdur | Junior | 85 | 90 | 80 | 90 + Sanjoy | Senio | 88 | 82 | 84 | 90 +---- + +Now, we will find the greatest marks for every student in all subjects. + +[source,sql] +---- +Select Student_name, GREATEST(Subject1, Subject2, Subject3, Subject4) AS Greatest_Mark +FROM Student; +---- + +[source,sql] +---- +student_name | greatest_mark +--------------+--------------- + Sayan | 92 + Nitin | 91 + Aniket | 95 + Abdur | 90 + Sanjoy | 90 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/index.adoc b/modules/reference/pages/sql/sql-functions/math-functions/index.adoc new file mode 100644 index 000000000..394aab6f3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/index.adoc @@ -0,0 +1,27 @@ += Overview +:description: Mathematical functions and Operators in Redpanda SQL are designed to perform mathematical calculations and manipulate integer or floating-point numbers. + +Mathematical functions and Operators in Redpanda SQL are designed to perform mathematical calculations and manipulate integer or floating-point numbers. Redpanda SQL supports the following math functions: + +[width="100%",cols="47%,53%",options="header",] +|=== +|*Function* |*Description* +|xref:reference:sql/sql-functions/math-functions/abs.adoc[ABS()] |This function returns the absolute value of an argument, regardless of whether it is positive or negative +|xref:reference:sql/sql-functions/math-functions/cbrt.adoc[CBRT()] |This function returns the cube root of a given number +|xref:reference:sql/sql-functions/math-functions/ceil.adoc[CEIL()] |This function rounds up to the nearest positive or negative integer value greater than or equal to the argument +|xref:reference:sql/sql-functions/math-functions/exp.adoc[EXP()] |This function returns the exponential value of a number specified in the argument +|xref:reference:sql/sql-functions/math-functions/floor.adoc[FLOOR()] |This function returns a number rounded down that is less than or equal to the specified argument +|xref:reference:sql/sql-functions/math-functions/greatest.adoc[GREATEST()] |This function extracts the greatest or largest value from a set of values. +|xref:reference:sql/sql-functions/math-functions/least.adoc[LEAST()] |This function returns the least or smallest value in a list of values +|xref:reference:sql/sql-functions/math-functions/ln.adoc[LN()] |This function returns the exponential value of its argument +|xref:reference:sql/sql-functions/math-functions/log.adoc[LOG()] |This function returns the base-10 logarithm or logarithm of the specified base of a given number +|xref:reference:sql/sql-functions/math-functions/power.adoc[POWER()] |This function returns the value of a number raised to the power of another number specified in the arguments +|xref:reference:sql/sql-functions/math-functions/random.adoc[RANDOM()] |This function returns a random number between 0 (inclusive) and 1 (exclusive) +|xref:reference:sql/sql-functions/math-functions/round.adoc[ROUND()] |This function rounds numbers to the nearest integer or to a specified number of decimal places +|xref:reference:sql/sql-functions/math-functions/sign.adoc[SIGN()] |This function returns -1 for negative arguments, 1 for positive arguments or 0 if the argument is 0 +|xref:reference:sql/sql-functions/math-functions/sin.adoc[SIN()] |This function returns the trigonometric sine value of a specified angle in radians +|xref:reference:sql/sql-functions/math-functions/sqrt.adoc[SQRT()] |This function returns the square root of its argument +|xref:reference:sql/sql-functions/math-functions/bitwise-shift-left.adoc[BITWISE SHIFT LEFT] |This operator manipulate the bits of integer values by shifting them left +|xref:reference:sql/sql-functions/math-functions/bitwise-shift-left.adoc[BITWISE SHIFT RIGHT] |This operator manipulate the bits of integer values by shifting them right +|xref:reference:sql/sql-functions/math-functions/to-char-from-number.adoc[TO_CHAR() from Number] |Formats a number into a string using a given format +|=== diff --git a/modules/reference/pages/sql/sql-functions/math-functions/least.adoc b/modules/reference/pages/sql/sql-functions/math-functions/least.adoc new file mode 100644 index 000000000..fb37a8165 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/least.adoc @@ -0,0 +1,146 @@ += LEAST +:description: The LEAST() function returns the least or smallest value in a list of values. +:page-topic-type: reference + +The `LEAST()` function returns the least or smallest value in a list of values. It needs at least one argument to work with, and if you mix different types, like a text and a number, it will return an error. + +For example, comparing the greatest value among 4, "`two`", and 9 would result in an error. + +== Syntax + +The syntax for the `LEAST()` function is as follows: + +[source,sql] +---- +LEAST(value_1, [value_n]) +---- + +Where: + +* `value_1`: Represents the first value. +* `value_n`: Represents one or more additional values, separated by commas. + +[NOTE] +==== +*Info:* + -`NULL` values in the list will be ignored. + - The result will be `NULL` if all the expressions evaluate to `NULL`. +==== + +== Examples + +Below are several examples of the `LEAST()` function: + +=== Basic usage + +Consider the following example: + +[source,sql] +---- +SELECT LEAST(3,5,8,9,10); +---- + +The query will return `3`, the smallest value among the provided values. + +[source,sql] +---- + least +------- + 3 +---- + +=== String comparison + +String comparison is also supported, as shown below: + +[source,sql] +---- +SELECT LEAST('a','b','c','aa'); +---- + +In this case, the result will be `'a'`, as it is the smallest string. + +[source,sql] +---- + least +------- + a +---- + +=== Handle NULL values + +`NULL` values are ignored when determining the smallest value: + +[source,sql] +---- +SELECT LEAST (5,null,9); +---- + +The result will be the smallest non-NULL value, which is `5`. + +[source,sql] +---- + least +------- + 5 +---- + +=== Negative numbers + +Negative numbers can also be compared: + +[source,sql] +---- +SELECT LEAST (4,-4,-8,8); +---- + +This query will return `-8`, the smallest value among the provided numbers. + +[source,sql] +---- + least +------- + -8 +---- + +=== Use table data + +Suppose we have a table named `grades` containing columns `x`, `y`, and `z`. + +[source,sql] +---- +CREATE TABLE grades ( + name TEXT, + x INT, + y INT, + z INT +); + +INSERT INTO grades (name, x, y, z) +VALUES + ('Jane', 50, 0, 70), + ('Rio', 60, 30, 80), + ('John', 60, 60, 86), + ('Rose', 80, 90, 88), + ('Gary', 100, 80, 90); +---- + +To find the smallest value among these columns, you can use the following query: + +[source,sql] +---- +SELECT *, LEAST(x, y, z) AS least_grade FROM grades; +---- + +This query will add a new column named `least_grade` to the result, displaying the smallest value among columns `x`, `y`, and `z`. + +[source,sql] +---- + name | x | y | z | least_grade +------+-----+----+----+------------- + Jane | 50 | 0 | 70 | 0 + Rio | 60 | 30 | 80 | 30 + John | 60 | 60 | 86 | 60 + Rose | 80 | 90 | 88 | 80 + Gary | 100 | 80 | 90 | 80 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc b/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc new file mode 100644 index 000000000..70b75240f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/ln.adoc @@ -0,0 +1,80 @@ += LN +:description: LN() will return the exponential value of its argument, which is recognized as the input parameter’s natural logarithm. +:page-topic-type: reference + +`LN()` will return the exponential value of its argument, which is recognized as the input parameter’s natural logarithm. + +[NOTE] +==== +*Info:* + The logarithm doesn’t take negative numbers or 0. +==== + +== Syntax + +The syntax of the `LN()` function is described as follows. + +[source,sql] +---- +LN (x) +---- + +`x`:  A positive or a negative number (or an expression that evaluates to a number). + +== Examples + +=== Basic LN() function + +The example below shows that `LN()` function will return the natural logarithm of the number *7,87653*. + +[source,sql] +---- +SELECT LN(7.87653); +---- + +The final result is as follows. + +[source,sql] +---- ++-------------+ +| f | ++-------------+ +| 2.0638874 | ++-------------+ +---- + +=== Use LN() function with a table + +In the following example, we will combine `LN()` function with `CREATE TABLE` statement. Therefore we can obtain natural logarithmic values of a specific column. + +. Create a new table named *LNTable* containing the *initValue* column with an integer value. ++ +[source,sql] +---- +CREATE TABLE LNtable(initValue int); +INSERT INTO LNtable(initValue) +VALUES (75), (18), (28); +---- + +. Run the following query to get the logarithm output of the column: ++ +[source,sql] +---- +SELECT * ,LN(initValue) AS lnValue FROM LNtable; +---- + +. It will return the initial value with its natural logarithm value. ++ +* *initValue* column with the initial integer values. +* *lnValue* column with the natural logarithm values. ++ +[source,sql] +---- ++------------+---------------------------+ +| initValue | lnValue | ++------------+---------------------------+ +| 75 | 4.31748811353631 | +| 18 | 2.8903717578961645 | +| 28 | 3.332204510175204 | ++------------+---------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/log.adoc b/modules/reference/pages/sql/sql-functions/math-functions/log.adoc new file mode 100644 index 000000000..f5124041f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/log.adoc @@ -0,0 +1,176 @@ += LOG +:description: The LOG() function returns the base-10 logarithm or logarithm of the specified base of a given number. +:page-topic-type: reference + +The `LOG()` function returns the base-10 logarithm or logarithm of the specified base of a given number. + +== Syntax + +The following illustrates the syntax of the `LOG()` function: + +[source,sql] +---- +-- base-10 logarithm +LOG(number) + +-- logarithm of number +LOG(base, number) +---- + +Where: + +* `base`: The base number. It must be greater than 0 and not equal to 1. +* `number`: The number whose logarithm you want to obtain. It must be a positive number and greater than 0. + +== Examples + +Let’s explore some examples of the `LOG()` function. + +=== Base-10 logarithm + +==== Basic usage + +In this case, the `LOG()` function calculates the base-10 logarithm of a specified number. + +[source,sql] +---- +SELECT LOG(2), LOG(2.5); +---- + +You will get the output below: + +[source,sql] +---- + log | log +--------------------+--------- + 0.3010299956639812 | 0.39794 +---- + +==== Negative value + +In this example, the `LOG()` function is applied to negative numbers. + +[source,sql] +---- +SELECT LOG(-1); +---- + +Any input of negative values will give you a `NaN` result. + +[source,sql] +---- + log +----- + NaN +---- + +==== NULL value + +The `LOG()` function will return `NULL` if the argument is `NULL`. + +[source,sql] +---- +SELECT LOG(null); +---- + +You will get a null result when an argument passed is null. + +[source,sql] +---- + log +----- +---- + +==== Zero value + +In this example, the `LOG()` takes zero as an argument. + +[source,sql] +---- +SELECT LOG(0); +---- + +You will get the output below: + +[source,sql] +---- + log +----------- + -Infinity +---- + +=== Logarithm with custom base + +==== Basic usage + +In this case, the `LOG()` function calculates the logarithm of a specified number. + +[source,sql] +---- +SELECT LOG(4, 16), + LOG(0.7, 0.8), + LOG(0.5, 10), + LOG(1, null); +---- + +You will get the output below: + +[source,sql] +---- + log | log | log | log +-----+------------+-----------+----- + 2 | 0.62562156 | -3.321928 | +---- + +==== Use with a table + +Consider a database table called *_data_* with the following records: + +[source,sql] +---- +CREATE TABLE data ( + data_column TEXT, + x REAL, + y REAL +); + +INSERT INTO data (data_column, x, y) VALUES +('Data 1', 0.5, 2), +('Data 2', 1, 2), +('Data 3', 5, 2), +('Data 4', 10, 10), +('Data 5', 50, 10); + +SELECT * FROM data; +---- + +[source,sql] +---- + data_column | x | y +-------------+-----+---- + Data 1 | 0.5 | 2 + Data 2 | 1 | 2 + Data 3 | 5 | 2 + Data 4 | 10 | 10 + Data 5 | 50 | 10 +---- + +Use the `LOG()` function to calculate the logarithm of column *_x_* (as a base) and column _y_ (as a number): + +[source,sql] +---- +SELECT *, LOG(y, x) AS LOG_Value FROM data; +---- + +You will get the result as shown below: + +[source,sql] +---- + data_column | x | y | log_value +-------------+-----+----+----------- + Data 1 | 0.5 | 2 | -1 + Data 2 | 1 | 2 | 0 + Data 3 | 5 | 2 | 2.321928 + Data 4 | 10 | 10 | 1 + Data 5 | 50 | 10 | 1.69897 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/power.adoc b/modules/reference/pages/sql/sql-functions/math-functions/power.adoc new file mode 100644 index 000000000..a8f202664 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/power.adoc @@ -0,0 +1,96 @@ += POWER +:description: The POWER() function calculates the value of a number raised to the power of another number specified in the arguments. +:page-topic-type: reference + +The `POWER()` function calculates the value of a number raised to the power of another number specified in the arguments. + +== Syntax + +The following illustrates the syntax of the `POWER()` function: + +[source,sql] +---- +POWER(a,b) +---- + +Where: + +* `a`: The base number. +* `b`: The exponent to which the base number is raised. + +== Examples + +Let’s explore some examples of the `POWER()` function. + +=== Basic usage + +In this case, the `POWER()` function calculates the result of raising one number to the power of another. + +[source,sql] +---- +SELECT POWER(3, 4) AS "Example 1", + POWER(7, 3) AS "Example 2"; +---- + +You will get the output below: + +[source,sql] +---- + Example 1 | Example 2 +-----------+----------- + 81 | 343 +---- + +=== Use `POWER()` with negative values + +In this case, the `POWER()` function is applied to negative numbers. + +[source,sql] +---- +SELECT POWER(-4, -5), POWER(-1, -2), POWER(-6, -7); +---- + +You will get the output below: + +[source,sql] +---- + power | power | power +-------+-------+------- + -1024 | 1 | 0 +---- + +=== Use `POWER()` with floating-point numbers + +In this example, the `POWER()` function is used to calculate 2.5 raised to the power of 3.0. + +[source,sql] +---- +SELECT POWER(2.5, 3.0) AS power_result; +---- + +The result, 15.625, is the value obtained by raising 2.5 to the third power. + +[source,sql] +---- + power_result +-------------- + 15.625 +---- + +=== Zero to the power of zero + +This case shows that 0 expression raised to the power of 0 returns 1. + +[source,sql] +---- +SELECT POWER(0, 0); +---- + +You will get the output below: + +[source,sql] +---- + power +------- + 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/random.adoc b/modules/reference/pages/sql/sql-functions/math-functions/random.adoc new file mode 100644 index 000000000..32c0471fd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/random.adoc @@ -0,0 +1,82 @@ += RANDOM +:description: The RANDOM() function in Redpanda SQL generates a random number within a defined range. +:page-topic-type: reference + +The `RANDOM()` function in Redpanda SQL generates a random number within a defined range. By default, the range is between 0 (inclusive) and 1 (exclusive), resulting in a value greater than or equal to 0 and less than 1. + +== Syntax + +The syntax for generating a random integer or floating-point number using the `RANDOM()` function is as follows: + +[source,sql] +---- +RANDOM() +---- + +[NOTE] +==== +There are no parameters or arguments for the `RANDOM()` function. +==== + +== Examples + +=== Generate a random number + +The RANDOM() function generates a random number greater than or equal to zero but less than one by default. The following statement can be used to retrieve a random number: + +[source,sql] +---- +SELECT RANDOM(); +---- + +As a result, you will get a random number greater than 0 and less than 1. However, it will never return the maximum value of 1. + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| 0.9122627193276355 | ++-----------------------+ +---- + +=== Generate a random decimal number within a range + +To generate a random decimal number between two values, you can use the following statement: + +[source,sql] +---- +SELECT RANDOM()*(b-a)+a; +---- + +Where: + +* *"`a`"* represents the lower bound of the range. +* *"`b`"* represents the upper bound of the range. + +The return value will be a random floating-point number greater than or equal to a and less than b. + +*Example* + +To generate a random decimal number greater than or equal to 10 and less than 25, the following statement can be used: + +[source,sql] +---- +SELECT RANDOM()*(25 - 10)+10; +---- + +Below is an example of a random number that you may retrieve: + +[source,sql] +---- ++-----------------------+ +| f | ++-----------------------+ +| 18.156098711616043 | ++-----------------------+ +---- + +[WARNING] +==== +It is important to note that the function will never return the maximum value of b. +==== diff --git a/modules/reference/pages/sql/sql-functions/math-functions/round.adoc b/modules/reference/pages/sql/sql-functions/math-functions/round.adoc new file mode 100644 index 000000000..e64092b89 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/round.adoc @@ -0,0 +1,93 @@ += ROUND +:description: The ROUND() function rounds numbers using round half to even method (bankers rounding). +:page-topic-type: reference + +The `ROUND()` function rounds numbers using round half to even method (bankers rounding). + +== Syntax + +The following illustrates the syntax of the `ROUND()` function: + +[source,sql] +---- +ROUND(number); +---- + +Where: + +* `number`: The number to round, it can be positive, negative, or zero, and it can be an xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[Integer] or a xref:reference:sql/sql-data-types/numeric-type/numeric.adoc[Double Precision]. + +== Examples + +Let’s explore some examples to see how the `ROUND()` function works. + +=== Basic usage + +In this example, we round decimal numbers to integers: + +[source,sql] +---- +SELECT + round(28.11) AS "round(28.11)", + round(12.51) AS "round(12.51)", + round(-9.11) AS "round(-9.11)", + round(102.5) AS "round(102.5)", + round(101.5) AS "round(101.5)", + round(-40.51) AS "round(-40.51)"; +---- + +The query will return the nearest integer for all provided values. + +[source,sql] +---- + round(28.11) | round(12.51) | round(-9.11) | round(102.5) | round(101.5) | round(-40.51) +--------------+--------------+--------------+--------------+---------------+--------------- + 28 | 13 | -9 | 102 | 102 | -41 +---- + +=== Use `ROUND` with table + +Suppose you have a table named *Product* that stores product prices with multiple decimal places. You want to round the prices. + +[source,sql] +---- +CREATE TABLE Product ( + ProductID INT, + ProductName TEXT, + Price DOUBLE PRECISION +); + +INSERT INTO Product (ProductID, ProductName, Price) +VALUES + (1, 'Widget A', 12.345), + (2, 'Widget B', 34.678), + (3, 'Widget C', 9.99), + (4, 'Widget D', 45.00), + (5, 'Widget E', 7.12345), + (6, 'Widget F', 19.876), + (7, 'Widget G', 3.5), + (8, 'Widget H', 29.999); +---- + +We use the `ROUND()` function to round the Price column when retrieving the data. + +[source,sql] +---- +SELECT ProductName, ROUND(Price) AS RoundedPrice FROM Product; +---- + +The result will display the product names along with their prices rounded. + +[source,sql] +---- + productname | roundedprice +-------------+-------------- + Widget A | 12 + Widget B | 35 + Widget C | 10 + Widget D | 45 + Widget E | 7 + Widget F | 20 + Widget G | 4 + Widget H | 30 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc new file mode 100644 index 000000000..ea0fce8ef --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sign.adoc @@ -0,0 +1,105 @@ += SIGN +:description: The SIGN() function returns a sign of an argument. +:page-topic-type: reference + +The `SIGN()` function returns a sign of an argument. The returned values are -1 if the argument is less than zero, 1 if the argument is greater than zero, 0 if the argument is equal to zero. + +== Syntax + +The syntax for the `SIGN()`function is as follows: + +[source,sql] +---- +SIGN(x) +---- + +The `SIGN()` function requires one argument: + +* `x`: an expression that evaluates to a number. + +== Examples + +=== Sign of a number + +The following example demonstrates how the `SIGN()` function can be used to obtain the sign of a number: + +[source,sql] +---- +SELECT + SIGN(0.1) AS "SIGN(0.1)", + SIGN(999) AS "SIGN(999)", + SIGN(0) AS "SIGN(0)", + SIGN(-0) AS "SIGN(-0)"; +---- + +The query will return the signs of the passed arguments: + +[source,sql] +---- + SIGN(0.1) | SIGN(999) | SIGN(0) | SIGN(-0) +-----------+-----------+---------+---------- + 1 | 1 | 0 | 0 +---- + +Note: `-0` is accepted as an argument and is equal to zero + +=== SIGN() function with an expression + +The following example demonstrates how the `SIGN()` function can be used with an expression: + +[source,sql] +---- +SELECT SIGN(100 - 200); +---- + +will return the sign of the expression evaluation: + +[source,sql] +---- + sign +------ + -1 +------ +---- + +=== Use the SIGN() function with a table + +The following example demonstrates how the `SIGN()` function can be used with a table to obtain the absolute values of all numbers in a specific column: + +. Create a table signTable containing an *_value_* column with some positive, negative and equal to zero values: ++ +[source,sql] +---- +CREATE TABLE signTable(value float); + +INSERT INTO signTable(value) +VALUES +(1000), +(-200), +(0), +(0.22), +(-12.3), +(-0.0); +---- + +. Use the following query to find the sign of all inserted values: ++ +[source,sql] +---- +SELECT value, SIGN(value) AS sign +FROM signTable; +---- + +. The result will be as follows:: ++ +[source,sql] +---- + value | sign +-------+------ + 1000 | 1 + -200 | -1 + 0 | 0 + 0.22 | 1 + -12.3 | -1 + -0 | 0 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc new file mode 100644 index 000000000..dd70ddbad --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sin.adoc @@ -0,0 +1,134 @@ += SIN +:description: SIN() is a numeric function that returns the trigonometric sine value of a specified angle in radians. +:page-topic-type: reference + +`SIN()` is a numeric function that returns the trigonometric sine value of a specified angle in radians. + +== Syntax + +The syntax of the `SIN()` function is as follows. + +[source,sql] +---- +SIN (x) +---- + +The `SIN()` function requires one argument: + +`x`:  A positive or a negative angle (or an expression that evaluates to an angle). + +== Examples + +=== Sine a positive value + +The example below will use the `SIN()` function with a positive angle as the argument. + +[source,sql] +---- +SELECT SIN(5); +---- + +It will return the sine value of 5. + +[source,sql] +---- ++-----------------------+ +| f    | ++-----------------------+ +| -0.9589242746631385  | ++-----------------------+ +---- + +=== Sine a negative value + +The following example shows the `SIN(`) function with a negative angle as the argument. + +[source,sql] +---- +SELECT SIN(-3); +---- + +The output will be as follows. + +[source,sql] +---- ++----------------------+ +| f                 | ++----------------------+ +| -0.1411200080598672 | ++----------------------+ +---- + +=== Sine a fraction value + +The following example shows the `SIN()` function with a fractional value as the argument. + +[source,sql] +---- +SELECT SIN(5.8732); +---- + +The output will be as follows. + +[source,sql] +---- ++----------------------+ +| f                 | ++----------------------+ +| -0.3985959081271079 | ++----------------------+ +---- + +=== Sine with an expression + +The `SIN()` function can also include an expression, as shown in the example below: + +[source,sql] +---- +SELECT sin(8.5 * 2.3); +---- + +You will get the following output: + +[source,sql] +---- ++-----------------------+ +| f                 | ++-----------------------+ +| 0.6445566903363104 | ++-----------------------+ +---- + +=== Use the `SIN()` function with a table + +In the following example, we will combine `SIN()` function with `CREATE TABLE` statement to obtain the sine values of a specific column. + +. Create a new table named *sineTable* containing the *initialValue* column. Input some values with the negative and positive angles into the column. ++ +[source,sql] +---- +CREATE TABLE sineTable(initialValue int); +INSERT INTO sineTable(initialValue) +VALUES (-5),(18), (0),(-27); +---- + +. Run the query below to get the output of a sine value: ++ +[source,sql] +---- +SELECT * ,SIN(initialValue) AS sinValue FROM sineTable; +---- + +. The final result will have the *initialValue* column with the source value and the *sinValue* column with their calculated sine values. ++ +[source,sql] +---- ++---------------+-------------------------------+ +| initialvalue  | sinValue                      | ++---------------+-------------------------------+ +| -75           | 0.38778163540943045           | +| 180           | -0.8011526357338304           | +| 0           | 0                             | +| -270     | 0.1760459464712114            | ++---------------+-------------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc b/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc new file mode 100644 index 000000000..23cad3ce7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/sqrt.adoc @@ -0,0 +1,118 @@ += SQRT +:description: The SQRT() function returns the square root of a given positive number. +:page-topic-type: reference + +The `SQRT()` function returns the square root of a given positive number. + +== Syntax + +The syntax for the `SQRT()` function in Redpanda SQL is: + +[source,sql] +---- +SQRT(x) +---- + +The `SQRT()` function requires one argument: + +* `x`: A positive number or an expression that evaluates to a positive number. + +== Examples + +=== SQRT() a positive value + +The following example demonstrates how the `SQRT()` function can be used to find the square root of a positive integer: + +[source,sql] +---- +SELECT SQRT(81); +---- + +You will get the following result: + +[source,sql] +---- ++-----+ +| f | ++-----+ +| 9 | ++-----+ +---- + +=== SQRT() with an expression + +Let’s look at an example of using the `SQRT()` function to find the square root of the result of an expression. + +[source,sql] +---- +SELECT SQRT(60 + 4); +---- + +The result of the above statement is the square root of 64: + +[source,sql] +---- ++-----+ +| f | ++-----+ +| 8 | ++-----+ +---- + +=== SQRT() with double precision result + +In addition to integers, Redpanda SQL also supports calculating square roots with floating-point numbers as the outcome. For further details, please refer to the statement below: + +[source,sql] +---- +SELECT SQRT(70); +---- + +The output of the statement above is 8.3666, which is the square root of 70 with double precision, as demonstrated below: + +[source,sql] +---- ++----------+ +| f | ++----------+ +| 8.3666 | ++----------+ +---- + +=== SQRT() a negative number + +The following example demonstrates how attempting to use the `SQRT()` function with a negative value will return an error: + +[source,sql] +---- +SELECT SQRT(-25); +---- + +As the `SQRT()` function only accepts positive numbers, you will get a *_NaN (Not a Number)_* result for the square root of -25, as shown below: + +[source,sql] +---- ++-------+ +| f | ++-------+ +| NaN | ++-------+ +---- + +=== SQRT operator (`|/(x)`) + +Here’s an example using the SQRT operator (`|/(x)`) to calculate the square root of a given number: + +[source,sql] +---- +SELECT |/(169) AS sqrt_operator; +---- + +In this example, we calculate the square root of 169 using the SQRT operator. The result of this query will be: + +[source,sql] +---- + sqrt_operator +--------------- + 13 +---- diff --git a/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc b/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc new file mode 100644 index 000000000..fafd88abd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/math-functions/to-char-from-number.adoc @@ -0,0 +1,136 @@ += TO_CHAR from Number +:description: The TO_CHAR function formats a number into a string using a given format. +:page-topic-type: reference + +The `TO_CHAR` function formats a number into a string using a given format. + +== Syntax + +The syntax for using the `TO_CHAR` function is as follows: + +[source,sql] +---- +TO_CHAR(value, format_string) +---- + +Parameters in the syntax include: + +* `value`: A number that will be formatted to a string. +* `format`: The format of the input string. + +== Format + +Format string supports following template patterns (can be lowercase): + +[width="100%",cols="16%,84%",options="header",] +|=== +|*Pattern* |*Description* +|`9` |Digit position (may be dropped if insignificant) +|`0` |Digit position (never dropped) +|`.` |Decimal point +|`,` |Group (thousands) separator +|`D` |Decimal point +|`G` |Group separator +|`S` |Plus/minus sign directly before or after a number +|`PL` |Plus sign in the specified position (for negative numbers) +|`MI` |Minus sign in specified position (for positive numbers) +|`SG` |Plus/minus sign in the specified position. +|=== + +=== Limitations + +* All text inside double quote `"\{text}"` will not be considered a pattern. +* The quote character `""` will not appear in the result string. +* Any text that does not match any pattern will be preserved in the result string. + +== Examples + +=== Format with leading zeros + +The query formats 123.456 with leading zeros using the pattern '`00000.00000`'. + +[source,sql] +---- +SELECT TO_CHAR(123.456, '00000.00000'); +---- + +The output displays the formatted number as shown below. + +[source,sql] +---- + to_char +-------------- + 00123.45600 +---- + +=== Format with variable length + +The query formats the number 123.456 with a variable-length pattern '`99999.99999`'. + +[source,sql] +---- +SELECT TO_CHAR(123.456, '99999.99999'); +---- + +The output displays the formatted number as shown below. + +[source,sql] +---- + to_char +-------------- + 123.45600 +---- + +=== Format with group + +The query formats the number 123456 with grouping separators using the pattern '`9,999,999,999`'. + +[source,sql] +---- +SELECT TO_CHAR(123456, '9,999,999,999'); +---- + +It will return the output below. + +[source,sql] +---- + to_char +---------------- + 123,456 +---- + +=== Format with negative number + +The query formats the number -123 with a custom pattern including the sign. + +[source,sql] +---- +SELECT TO_CHAR(-123, '"Number formatted with pattern:000S":{000S}'); +---- + +The output shows the custom-formatted number. + +[source,sql] +---- + to_char +------------------------------------------- + Number formatted with pattern:000S:{123-} +---- + +=== Format with sign + +The query formats the number -123.456 with a custom pattern including the sign and separated integer. + +[source,sql] +---- +SELECT TO_CHAR(-123.456, '"Sing is: "SG" integer part is: "999", mantissa part is: ".999'); +---- + +The output shows the customized format as shown below. + +[source,sql] +---- + to_char +--------------------------------------------------------- + Sing is: - integer part is: 123, mantissa part is: .456 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc b/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc new file mode 100644 index 000000000..e8e6f1655 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/coalesce.adoc @@ -0,0 +1,191 @@ += coalesce() +:description: Tables can hold null and non-null values. +:page-topic-type: reference + +Tables can hold null and non-null values. Yet, often we prefer to overlook those null values and this is where `COALESCE()` steps in. It helps when we want to ignore null values while processing the data, by returning the first argument that is not null, while the remaining arguments from the first non-null argument are not evaluated. + +[NOTE] +==== +If all arguments are null, the COALESCE function will return null. +==== + +== Syntax + +The syntax for the `COALESCE()` function is as follows: + +[source,sql] +---- +COALESCE (argument_1, argument_2, …); +---- + +Key points from the syntax: + +* `COALESCE()` requires a minimum of two inputs. +* It can take an unlimited number of arguments. +* Evaluation occurs sequentially from left to right, stopping at the first non-null value. + +== Examples + +Here are some examples to illustrate the application of `COALESCE()`: + +=== Return the first non-null value + +In this example, we have a set of values. By using the `COALESCE()` function, we’re going to get the first non-null value from this set. + +[source,sql] +---- +SELECT COALESCE(9, 3, 8, 7, 1); +---- + +The result will be `9`, the first value without null among the provided options. + +[source,sql] +---- + coalesce +---------- + 9 +---- + +=== Handle NULL value as the last argument + +Let’s include NULL as the final argument and check the query output. + +[source,sql] +---- +Select COALESCE(3,4,5,9,10,NULL); +---- + +The function output is `3` because it returns the first non-null value. + +[source,sql] +---- + coalesce +---------- + 3 +---- + +=== Handle NULL value as the first argument + +Consider NULL as the first argument in the following example. + +[source,sql] +---- +Select COALESCE(NULL,1,5,7,9,2); +---- + +The output is `1`, as it is the first non-null value of the argument. + +[source,sql] +---- + coalesce +---------- + 1 +---- + +=== Handle multiple NULL values + +In the following query, NULL appears in the first, second, fourth, and last positions. + +[source,sql] +---- +Select COALESCE(NULL, NULL ,3, NULL, 7,9,4,5, NULL); +---- + +The `COALESCE()` function ignores the first two NULLs and returns the first non-null value, `3`. It does not process the subsequent NULL values. + +[source,sql] +---- + coalesce +---------- + 3 +---- + +=== Handle all NULL values + +Assume that the given values are entirely composed of nulls. + +[source,sql] +---- +Select COALESCE(NULL, NULL ,NULL, NULL); +---- + +In this case, the `COALESCE()` function returns an empty value (null). + +[source,sql] +---- + coalesce +---------- +---- + +=== `COALESCE()` with table data + +Imagine we have the `employee_absent` table, which comprises a mix of NULL and non-null values: + +[source,sql] +---- +CREATE TABLE employee_absent ( + emp_name TEXT, + emp_dept TEXT, + absent TEXT +); + +INSERT INTO employee_absent (emp_name, emp_dept, absent) +VALUES + ('Alice', 'Finance', 'absent'), + ('Bob', 'Operations', 'absent'), + ('Carol', 'Finance', 'absent'), + ('David', 'HR', NULL), + ('Emily', 'HR', NULL); +---- + +Use the `SELECT` statement to display all the records: + +[source,sql] +---- +SELECT * FROM employee_absent; +---- + +[source,sql] +---- + emp_name | emp_dept | absent +----------+------------+-------- + Alice | Finance | absent + Bob | Operations | absent + Carol | Finance | absent + David | HR | + Emily | HR | +---- + +The query below uses the `COALESCE()` function on the `absent` column. It retrieves names and absences (with `out of office` for NULL values) for each employee. + +[source,sql] +---- +SELECT emp_name, COALESCE(absent, 'out of office') AS DisplayAbsent FROM employee_absent; +---- + +[source,sql] +---- + emp_name | displayabsent +----------+--------------- + Alice | absent + Bob | absent + Carol | absent + David | out of office + Emily | out of office +---- + +=== Error output in `COALESCE()` + +When specifying arguments with different datatypes, they should be convertible. + +[source,sql] +---- +Select Coalesce ('x',NULL,1); +---- + +If the datatypes cannot be converted, the `COALESCE()` function will generate an error, as shown below. + +[source,sql] +---- +ERROR: invalid input syntax for type integer: "x" +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc new file mode 100644 index 000000000..01438b4da --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/col-description.adoc @@ -0,0 +1,29 @@ += col_description() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[col_description()^] is a comment information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[col_description()^] is a comment information function that retrieves the comment associated with a specified table column. + +== Syntax + +The syntax for the `col_description()` function is as follows: + +.... +col_description (table_oid, column_number) → NULL +.... + +== Parameters + +The following parameters are required to execute this function: + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[table_oid^]: specifies the object identifier (OID) of the table containing the column for which you want to retrieve the comment +* link:https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-INT[column_number^]: indicates the ordinal position of the column within the table (starting from 1 for the first column) + +[NOTE] +==== +It is important to note that the column number must be provided as an object identifier (OID), which can be achieved by casting the table name to `regclass` +==== + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified diff --git a/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc b/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc new file mode 100644 index 000000000..45f0836a3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/current-database.adoc @@ -0,0 +1,34 @@ += current_database() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_database()^] is a session information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_database()^] is a session information function that returns the current database’s name. + +== Syntax + +The syntax for`current_database()` function is as follows: + +[source,sql] +---- +SELECT current_database(); +---- + +== Example + +In the following example, we will obtain the database name to which we are currently connected: + +[source,sql] +---- +SELECT current_database(); +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- ++------------+ +| f       | ++------------+ +| Oxla   | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc b/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc new file mode 100644 index 000000000..8f0765286 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/current-schema.adoc @@ -0,0 +1,45 @@ += current_schema() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_schema()^] is a session information function that r +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[current_schema()^] is a session information function that returns the name of the first existing schema. + +== Syntax + +There are two available syntax versions of `current_schema()` function: + +.Version 1 +[source,sql] +---- +SELECT current_schema(); +---- + +.Version 2 +[source,sql] +---- +SELECT current_schema; +---- +[NOTE] +==== +It will return `NULL` if none of the schemas from `search_path` exist +==== + +== Example + +The following example shows how to get the current schema name using this function + +[source,sql] +---- +SELECT current_schema(); +---- + +The output from the above query can be as follows: + +[source,sql] +---- ++------------+ +| f | ++------------+ +| public | ++------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privillege.adoc b/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privillege.adoc new file mode 100644 index 000000000..4285742ed --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/has-schema-privillege.adoc @@ -0,0 +1,73 @@ += has_schema_privilege() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-ACCESS[has_schema_privilege()^]has_schema_privilege is an access pr +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-ACCESS[has_schema_privilege()^]`has_schema_privilege` is an access privilege inquiry function that checks whether the current user has specific privileges on a schema. + +== Syntax + +There are two available syntax versions of the `has_schema_privilege` function: + +.Version 1 +[source,sql] +---- +SELECT has_schema_privilege('user', 'schema', 'privilege'); +---- + +.Version 2 +[source,sql] +---- +SELECT has_schema_privilege('schema', 'privilege'); +---- +No matter what syntax version you choose, the `has_schema_privilege()` function will always return `TRUE (t)`. + +== Parameters + +The following parameters are required to execute this function: + +* `schema`: name of the schema for which you want to check privileges (can be any string value or string columns from other tables) +* `user`: name of the user who has the privileges (can be any string value) +* `privilege`: specifies the specific privilege you want to check for in the schema (currently, the function supports `create` and `usage`) + +[NOTE] +==== +The comparison for the `privilege` is case-insensitive, so you can use lowercase or uppercase notation for the privilege name +==== + +== Examples + +=== Check for `CREATE` privilege + +In this example, we will use the `has_schema_privilege()` function to determine if the current user has the `create` privilege on a schema named "`*public*`": + +[source,sql] +---- +SELECT has_schema_privilege('public', 'create'); +---- + +By executing the query above, we will get `TRUE`, which means that the current user has a `create` privilege on the "`public`" schema. + +[source,sql] +---- + has_schema_privilege +---------------------- + t +---- + +=== Check for `USAGE` privilege + +You can also use the `has_schema_privilege()` function to check for the `usage` privilege on a schema. For example, in order to check if the current user can create objects in the "`*public*`" schema, you can execute the following code: + +[source,sql] +---- +SELECT has_schema_privilege('cahyo', 'public', 'USAGE'); +---- + +The query above will return `TRUE`, which means the current user has `usage` privilege on the "`*public*`" schema. + +[source,sql] +---- + has_schema_privilege +---------------------- + t +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/index.adoc b/modules/reference/pages/sql/sql-functions/other-functions/index.adoc new file mode 100644 index 000000000..c74db2e3f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/index.adoc @@ -0,0 +1,30 @@ += Overview +:description: Besides math, aggregate, window, string, timestamp, JSON and trigonometric functions we also provide support for other functions. + +Besides xref:reference:sql/sql-functions/math-functions/index.adoc[math], xref:reference:sql/sql-functions/aggregate-functions/index.adoc[aggregate], xref:reference:sql/sql-functions/window-functions/index.adoc[window], xref:reference:sql/sql-functions/string-functions/index.adoc[string], xref:reference:sql/sql-functions/timestamp-functions/index.adoc[timestamp], xref:reference:sql/sql-functions/json-functions/index.adoc[JSON] and xref:reference:sql/sql-functions/trigonometric-functions/index.adoc[trigonometric] functions we also provide support for other functions. The list of them can be found below: + +[width="100%",cols="46%,54%",options="header",] +|=== +|*Function* |*Description* +|xref:reference:sql/sql-functions/other-functions/coalesce.adoc[coalesce()] |Returns the first argument that is not null, while the remaining arguments from the first non-null argument are not evaluated +|xref:reference:sql/sql-functions/other-functions/current-database.adoc[current_database()] |Returns the current database’s name +|xref:reference:sql/sql-functions/other-functions/current-schema.adoc[current_schema()] |Returns the schema’s name (first in the search path) +|xref:reference:sql/sql-functions/other-functions/has-schema-privillege.adoc[has_schema_privilege()] |Checks whether the current user has specific privileges on a schema +|xref:reference:sql/sql-functions/other-functions/nullif.adoc[nullif()] |Replaces a given value with null if it matches a specific criterion +|xref:reference:sql/sql-functions/other-functions/pg-get-expr.adoc[pg_get_expr()] |Retrieves the internal form of an individual expression (such as the default value for a column) +|xref:reference:sql/sql-functions/other-functions/pg-total-relation-size.adoc[pg_total_relation_size()] |Retrieves the size of a table +|xref:reference:sql/sql-functions/other-functions/pg-typeof.adoc[pg_typeof()] |Retrieves the data type of any given value +|xref:reference:sql/sql-functions/other-functions/pg-encoding-to-char.adoc[pg_encoding_to_char()] |Converts an encoding internal identifier to a human-readable name +|xref:reference:sql/sql-functions/other-functions/pg-get-indexdef.adoc[pg_get_indexdef()] |Reconstructs the PostgreSQL command used to retrieve the definition of a specified index +|xref:reference:sql/sql-functions/other-functions/pg-get-userbyid.adoc[pg_get_userbyid()] |Retrieves that name of a user (role) given its unique identifier (OID) +|xref:reference:sql/sql-functions/other-functions/pg-relation-is-publishable.adoc[pg_relation_is_publishable()] |Determines whether a specified relation (table) can be published in a publication +|xref:reference:sql/sql-functions/other-functions/pg-size-pretty.adoc[pg_size_pretty()] |Converts sizes in bytes into a human-readable format +|xref:reference:sql/sql-functions/other-functions/pg-table-size.adoc[pg_table_size()] |Retrieves that size of a specific table, including its associated storage components but excluding indexes +|xref:reference:sql/sql-functions/other-functions/pg-table-is-visible.adoc[pg_table_is_visible()] |Checks whether a specified table (or other database object) is visible in the current schema search path +|xref:reference:sql/sql-functions/other-functions/pg-get-constraintdef.adoc[pg_get_constraintdef()] |Retrieves the definition of a specific constraint in a human-readable format +|xref:reference:sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc[pg_get_statisticsobjdef_columns()] |Retrieves the definitions of columns associated with a specified statistics object +|xref:reference:sql/sql-functions/other-functions/obj-description.adoc[obj_description()] |Returns the comment associated with a specific database object +|xref:reference:sql/sql-functions/other-functions/col-description.adoc[col_description()] |Retrieves the comment associated with a specified table column based on its name +|xref:reference:sql/sql-functions/other-functions/shobj-description.adoc[shobj_description()] |Retrieves the comment associated with a shared database object +|xref:reference:sql/sql-functions/other-functions/pg-backend-pid.adoc[pg_backend_pid()] |Returns the process ID (PID) of the node handling the current session +|=== diff --git a/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc b/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc new file mode 100644 index 000000000..13e8fafb4 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/nullif.adoc @@ -0,0 +1,197 @@ += nullif() +:description: The NULLIF() function allows us to replace a given value with null if it matches a specific criterion. +:page-topic-type: reference + +The `NULLIF()` function allows us to replace a given value with null if it matches a specific criterion. + +== Syntax + +The following illustrates the syntax of the `NULLIF` function: + +[source,sql] +---- +NULLIF(argument_1,argument_2); +---- + +From the syntax above, we learn that the `NULLIF` function takes two arguments: + +* The first argument is the value we want to evaluate +* The second argument is the value we want to treat as null if the first argument matches it + +[TIP] +==== +*The Output*: + If the first argument matches the second argument, the `NULLIF()` function returns *NULL*. Otherwise, it returns the first argument as-is. +==== + +== Examples + +=== Handle equal values + +In this case, the `NULLIF` function is used to compare the values 4 and 4. + +[source,sql] +---- +SELECT NULLIF (4, 4); +---- + +The result will be `NULL` since the two values being compared are equal (4 = 4). + +[source,text] +.... + if +---- +.... + +=== Handle different values + +In this example, we want to use the `NULLIF` function to manage different values. + +[source,sql] +---- +SELECT NULLIF (9, 0); +---- + +The result will be `9` because the second value in the `NULLIF` function is 0 (The two values are not equal). + +[source,text] +---- + if + + 9 +---- + +=== String comparison + +In this case, the `NULLIF` function compares the strings '`L`' and '`O`'. + +[source,sql] +---- +SELECT NULLIF ('L', 'O'); +---- + +The result will be `L` because the two strings being compared ('`L`' and '`O`') are not equal. Therefore, the function returns the first string. + +[source,text] +---- + if + + L +---- + +=== Handle default values + +Suppose we have an `employees` table with columns for `name` and `salary`. This query retrieves employee names and their adjusted salaries, where a salary of 0 is replaced with NULL: + +[source,sql] +---- +CREATE TABLE employees ( + name TEXT, + salary INT +); + +INSERT INTO employees (name, salary) +VALUES + ('John', 50000), + ('Jane', 0), + ('Roy', 0), + ('NEil', 0), + ('Michael', 75000); +---- + +Display the records of the table: + +[source,sql] +---- +SELECT * FROM employees; +---- + +[source,text] +---- + name | salary +---------+-------- + John | 50000 + Jane | 0 + Roy | 0 + NEil | 0 + Michael | 75000 +---- + +This query retrieves employee names and their adjusted salaries, where a salary of 0 is replaced with NULL: + +[source,sql] +---- +SELECT name, NULLIF(salary, 0) AS adjusted_salary +FROM employees; +---- + +The `NULLIF` function checks if the `salary` value is 0. If it is, the function returns NULL - otherwise, it returns the original `salary` value. + +[source,text] +---- + name | adjusted_salary +---------+----------------- + John | 50000 + Jane | + Roy | + NEil | + Michael | 75000 +---- + +=== Avoid division by zero + +Suppose we have a `fractions` table with columns, a `numerator` and a `denominator`. + +[source,sql] +---- +CREATE TABLE fractions ( + numerator INT, + denominator INT +); + +INSERT INTO fractions (numerator, denominator) +VALUES + (10, 2), + (20, 0), + (15, 3), + (75, 0), + (15, 3); +---- + +Display the table using the `SELECT` statement: + +[source,sql] +---- +SELECT * FROM fractions; +---- + +[source,text] +---- + numerator | denominator +-----------+------------- + 10 | 2 + 20 | 0 + 15 | 3 + 75 | 0 + 15 | 3 +---- + +Here, the `NULLIF` function is applied to the `denominator` column. If the `denominator` is 0, the function returns NULL, avoiding division by zero. + +[source,sql] +---- +SELECT numerator, denominator, numerator / NULLIF(denominator, 0) AS "result" FROM fractions; +---- + +The result is shown in the result column. + +[source,text] +---- + numerator | denominator | result +-----------+-------------+-------- + 10 | 2 | 5 + 20 | 0 | + 15 | 3 | 5 + 75 | 0 | + 15 | 3 | 5 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc new file mode 100644 index 000000000..63be5c051 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/obj-description.adoc @@ -0,0 +1,24 @@ += obj_description() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[obj_description()^] is a comment information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-COMMENT[obj_description()^] is a comment information function that returns the comment associated with a specific database object. + +== Syntax + +The syntax for the `obj_description()` function is as follows: + +.... +obj_description (object_oid, catalog_name) → NULL +.... + +== Parameters + +The following parameters are required to execute this function: + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[object_oid^]: specifies the object identifier (OID) of the database object for which you want to retrieve the comment +* link:https://www.postgresql.org/docs/current/catalogs.html[catalog_name^]: specifies the name of the system catalog that contains the object + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc new file mode 100644 index 000000000..6c5190b54 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-backend-pid.adoc @@ -0,0 +1,14 @@ += pg_backend_pid() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[pg_backend_pid()^] is a session information function that r +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-SESSION[pg_backend_pid()^] is a session information function that returns the process ID (PID) of the server process handling the current session. It is useful for identifying the backend process associated with a specific database connection, allowing for monitoring and tasks management. + +== Syntax + +The syntax for the `pg_backend_pid()` function is as follows: + +[source,sql] +---- +pg_backend_pid() +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc new file mode 100644 index 000000000..e164401b2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-encoding-to-char.adoc @@ -0,0 +1,52 @@ += pg_encoding_to_char() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_encoding_to_char()^] is a system catalog information fun +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_encoding_to_char()^] is a system catalog information function that converts an encoding internal identifier to a human-readable name. + +== Syntax + +The syntax for the `pg_encoding_to_char()` function is as follows: + +[source,sql] +---- +pg_encoding_to_char(number) +---- + +== Parameters + +The following parameters are required to execute function: + +* `number`: specifies the integer value representing the encoding identifier + +== Examples + +[source,sql] +---- +SELECT pg_encoding_to_char(1); + + pg_encoding_to_char +--------------------- + EUC_JP +(1 row) +---- + +[source,sql] +---- +SELECT pg_encoding_to_char(0); + + pg_encoding_to_char +--------------------- + SQL_ASCII +(1 row) +---- + +[source,sql] +---- +SELECT pg_encoding_to_char(-1); + + pg_encoding_to_char +--------------------- + +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc new file mode 100644 index 000000000..1e119ac7b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-constraintdef.adoc @@ -0,0 +1,24 @@ += pg_get_constraintdef() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_constraintdef()^] is a system catalog information fu +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_constraintdef()^] is a system catalog information function that retrieves the definition of a specific constraint in a human-readable format. + +== Syntax + +The syntax for the `pg_get_constraintdef()` function is as follows: + +.... +pg_get_constraintdef (constraint_oid [, pretty_bool]) → NULL +.... + +== Parameters + +The following parameters are required to execute this function: + +* link:https://www.postgresql.org/docs/current/catalog-pg-constraint.html[constraint_oid^]: specifies the object identifier (OID) of the constraint for which you want to retrieve the definition +* link:https://www.postgresql.org/docs/current/datatype-boolean.html[pretty_bool^]: controls whether to format the output in a human-readable way + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc new file mode 100644 index 000000000..5d17e4cef --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-expr.adoc @@ -0,0 +1,76 @@ += pg_get_expr() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_expr()^] is a system catalog information function th +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_expr()^] is a system catalog information function that retrieves the internal form of an individual expression, such as the default value for a column. + +== Syntax + +There are two available syntax versions of the `pg_get_expr()` function: + +.Version 1 +[source,sql] +---- +SELECT pg_get_expr('expr_text', relation_oid); +---- + +.Version 2 +[source,sql] +---- +SELECT pg_get_expr('expr_text', relation_oid, pretty_bool); +---- +Both versions of the `pg_get_expr()` function return an empty string `""`. + +== Parameters + +The following parameters are required to execute this function: + +* `expr_text`: expression for which you want to obtain the internal representation (can be any string value) +* `relation_oid`: OID (Object Identifier) of the table the expression belongs to (integer type) +* `pretty_bool`: boolean value determining whether to format the expression in a more human-readable format (`TRUE`) or not (`FALSE`) + +== Example + +For the needs of this section, first we will create a sample table named *employees* + +[source,sql] +---- +CREATE TABLE employees ( + id INT, + name TEXT, + salary TEXT +); +---- + +Then we will get the OID of the table + +[source,sql] +---- +SELECT oid FROM pg_class WHERE relname = 'employees'; +---- + +[source,sql] +---- + oid +------ + 1018 +---- + +As the last step, we will retrieve the internal form for the `salary` column using `pg_get_expr()` function + +[source,sql] +---- +-- Version 1 +SELECT pg_get_expr('salary', 1018); + +-- Version 2 +SELECT pg_get_expr('salary', 1018, TRUE); +---- + +By executing any of the queries above, we will get the following output: + +[source,sql] +---- + pg_get_expr +------------- +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc new file mode 100644 index 000000000..ded3c0dac --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-indexdef.adoc @@ -0,0 +1,70 @@ += pg_get_indexdef() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_indexdef()^] is a system catalog information functio +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_indexdef()^] is a system catalog information function that reconstructs the PostgreSQL command used to retrieve the definition of a specified index. + +== Syntax + +Here are the two available syntax versions of the `pg_get_indexdef()` function: + +.Version 1 +[source,sql] +---- +pg_get_indexdef(index_oid, column_oid) +---- + +.Version 2 +[source,sql] +---- +pg_get_indexdef(index_oid, column_oid, pretty_bool) +---- + +== Parameters + +The following parameters are required to execute this function: + +* `index_oid`: specifies the object identifier (OID) of the index +* `column_oid`: indicates the column number within the index (starting from 1) +* `pretty_bool`: controls whether to format the output in a human-readable way + +== Example + +In this example we’ll start from creating a sample table and an index for it + +[source,sql] +---- +CREATE TABLE sample_table(col int); +CREATE INDEX sample_index ON sample_table(col); +---- + +Once that is done, we can get the OID of the index in a following way + +[source,sql] +---- +SELECT oid FROM pg_class WHERE relname = 'sample_index'; +---- + +[source,sql] +---- + oid +------ + 16387 +---- + +As the last step we’re going to retrieve the index definition + +[source,sql] +---- +SELECT pg_get_indexdef(16387); +---- + +Here is the reconstructed definition: + +[source,sql] +---- + pg_get_indexdef +------------------------------------------------------- + CREATE INDEX sample_index ON public.sample_table(col) +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc new file mode 100644 index 000000000..b9890829d --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-statisticsobjdef-columns.adoc @@ -0,0 +1,17 @@ += pg_get_statisticsobjdef_columns() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_statisticsobjdef_columns()^] is a system catalog inf +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_statisticsobjdef_columns()^] is a system catalog information function that retrieves information about the columns associated with an extended statistics object. + +== Syntax + +The syntax for the `pg_get_statisticsobjdef_columns()` function is as follows: + +.... +pg_get_statisticsobjdef_columns() → NULL +.... + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc new file mode 100644 index 000000000..05165546f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-get-userbyid.adoc @@ -0,0 +1,57 @@ += pg_get_userbyid() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_userbyid()^] is a system catalog information functio +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_get_userbyid()^] is a system catalog information function that retrieves that name of a user (role) given its unique identifier (OID). + +== Syntax + +The syntax for the `pg_get_userbyid()` function is as follows: + +[source,sql] +---- +pg_get_userbyid(role_oid) +---- + +== Parameters + +The following parameters are required to execute this function: + +* `role_oid`: specifies the object identifier (OIDs) of the users + +== Example + +In this example, what we will do first is to get the OIDs of all the users + +[source,sql] +---- +SELECT id,name FROM oxla_internal.oxla_role; +---- + +Then, return the list of users with their ids (OIDs): + +[source,sql] +---- + id | name +----+--------- + 1 | oxla + 2 | other_user +(2 rows) +---- + +As the next step we will translate the OID to a role name in a following way: + +[source,sql] +---- +SELECT pg_get_userbyid(2); +---- + +By executing the code above, we will get the following result: + +[source,sql] +---- + pg_get_userbyid +----------------- + other_user +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc new file mode 100644 index 000000000..3581a5be1 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-relation-is-publishable.adoc @@ -0,0 +1,40 @@ += pg_relation_is_publishable() +:description: Thepg_relation_is_publishable() function is used to determine whether a specified relation (table) can be published in a link:https://www.postgresql.o +:page-topic-type: reference + +The`pg_relation_is_publishable()` function is used to determine whether a specified relation (table) can be published in a link:https://www.postgresql.org/docs/current/logical-replication-publication.html[publication^]. + +== Syntax + +The syntax for the `pg_relation_is_publishable()` function is as follows: + +[source,sql] +---- +pg_relation_is_publishable(table_name_or_oid) +---- + +The function returns `false` for every existing table and `NULL` for any non-existing table. + +== Parameters + +The following parameters are required to execute this function: + +* `table_name_or_oid`: specifies the object identifier (OID) of a table or it’s name + +== Examples + +[source,sql] +---- +SELECT pg_relation_is_publishable('existing_table'); + pg_relation_is_publishable +---------------------------- + f +---- + +[source,sql] +---- +SELECT pg_relation_is_publishable(16386); + pg_relation_is_publishable +---------------------------- + f +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc new file mode 100644 index 000000000..16172609a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-size-pretty.adoc @@ -0,0 +1,40 @@ += pg_size_pretty() +:description: The link:https://www.postgresql.org/docs/9.4/functions-admin.html[pg_size_pretty()^] is a database object management function that converts sizes in by +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/9.4/functions-admin.html[pg_size_pretty()^] is a database object management function that converts sizes in bytes into a human-readable format. + +== Syntax + +The syntax for the `pg_size_pretty()` function is as follows: + +[source,sql] +---- +pg_size_pretty(size) +---- + +== Parameters + +The following parameters are required to execute this function: + +* `size`: specifies the size in bytes that you want to convert + +== Examples + +[source,sql] +---- +SELECT pg_size_pretty(100); + pg_size_pretty +---------------- + 100 bytes +(1 row) +---- + +[source,sql] +---- +SELECT pg_size_pretty(1000000); + pg_size_pretty +---------------- + 977 kB +(1 row) +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc new file mode 100644 index 000000000..77cbcec8b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-is-visible.adoc @@ -0,0 +1,45 @@ += pg_table_is_visible() +:description: The link:https://www.postgresql.org/docs/9.4/functions-admin.html[pg_table_is_visible()^] is a schema visibility inquiry function that checks whether a +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/9.4/functions-admin.html[pg_table_is_visible()^] is a schema visibility inquiry function that checks whether a specified table or other database object is visible in the current schema search path. + +== Syntax + +The syntax for the `pg_table_is_visible()` function is as follows: + +[source,sql] +---- +pg_table_is_visible(table_or_index_oid) +---- + +== Parameters + +The following parameters are required to run this function: + +* `table_or_index_oid`: specifies the object identifier (OID) of a table or it’s name + +== Examples + +[source,sql] +---- +SELECT pg_table_is_visible(-1); + pg_table_is_visible +---------------------------- +---- + +[source,sql] +---- +SELECT pg_table_is_visible(16386); + pg_table_is_visible +---------------------------- + t +---- + +[source,sql] +---- +SELECT pg_table_is_visible(16381); + pg_table_is_visible +---------------------------- + f +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc new file mode 100644 index 000000000..99aa274f9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-table-size.adoc @@ -0,0 +1,20 @@ += pg_table_size() +:description: The link:https://www.postgresql.org/docs/9.1/functions-admin.html[pg_table_size()^] is a system administration function that retrieves the size of a sp +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/9.1/functions-admin.html[pg_table_size()^] is a system administration function that retrieves the size of a specific table, including its associated storage components but excluding indexes. + +== Syntax + +The syntax for the `pg_table_size()` function is as follows: + +[source,sql] +---- +pg_table_size(regclass) +---- + +== Parameters + +The following parameters are required to execute this function: + +* `regclass`: name or object identifier (OID) of the table whose size is to be retrieved diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc new file mode 100644 index 000000000..50dc99aa9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-total-relation-size.adoc @@ -0,0 +1,59 @@ += pg_total_relation_size() +:description: The link:https://www.postgresql.org/docs/9.1/functions-admin.html[pg_total_relation_size()^] is a database object size function that retrieves the size +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/9.1/functions-admin.html[pg_total_relation_size()^] is a database object size function that retrieves the size of a table and is useful for monitoring the storage requirements. + +== Syntax + +The syntax for the `pg_total_relation_size()` function is as follows: + +[source,sql] +---- +pg_total_relation_size('relation_name'); +---- + +It returns the size of the specified table in bytes. + +== Parameters + +The following parameters are required to execute this function: + +* `relation_name`: name of the table for which you want to determine the size + +== Example + +For the needs of this section, we will create a *users* table + +[source,sql] +---- +CREATE TABLE users ( + username TEXT, + email TEXT +); +INSERT INTO users (username, email) VALUES + ('john_doe', 'john.doe@example.com'), + ('jane_smith', 'jane.smith@example.com'), + ('alice_smith', 'alice.smith@example.com'), + ('bob_jones', 'bob.jones@example.com'), + ('susan_wilson', 'susan.wilson@example.com'), + ('michael_jackson', 'michael.jackson@example.com'), + ('lisa_johnson', 'lisa.johnson@example.com'), + ('david_smith', 'david.smith@example.com'); +---- + +Now we would like to use the `pg_total_relation_size()` function to determine the size of the *users* table (in bytes) + +[source,sql] +---- +SELECT pg_total_relation_size('users'); +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + pg_total_relation_size +------------------------ + 556 +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc b/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc new file mode 100644 index 000000000..6442aefdd --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/pg-typeof.adoc @@ -0,0 +1,108 @@ += pg_typeof() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_typeof()^] is a system catalog information function that +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[pg_typeof()^] is a system catalog information function that retrieves the data type of any given value. It returns a string literal corresponding to the expression type. + +== Syntax + +The syntax of the `pg_typeof()` function is as follows: + +[source,sql] +---- +SELECT pg_typeof(`any`); +---- + +== Parameters + +The following parameters are required to execute this function: + +* `any`: represents any value you want to determine the data type of + +== Examples + +=== Numeric + +This example shows the function usage with a numeric value: + +[source,sql] +---- +SELECT pg_typeof(100) as "data type"; +---- + +[source,sql] +---- + data type +----------- + integer +---- + +=== String + +In this example, we will use a string value as an input: + +[source,sql] +---- +SELECT pg_typeof('event'::TEXT) as "data type"; +---- + +[source,sql] +---- + data type +----------- + text +---- + +=== Interval + +Here we will focus on using an interval input: + +[source,sql] +---- +SELECT pg_typeof(INTERVAL '1 day') as "data type"; +---- + +[source,sql] +---- + data type +----------- + interval +---- + +=== Table + +For the needs of this section we will create a sample table and then use `pg_typeof()` to retrieve the data types of information stored in the table + +[source,sql] +---- +CREATE TABLE timestamp_example ( + id int, + event_time timestamp, + description text +); + +INSERT INTO timestamp_example (event_time, description) +VALUES + ('2023-10-20 12:30:00', 'Event 1'), + (NULL, 'Event 2'); +---- + +Now that we created the table, let’s use `pg_typeof()` function to determine the data types of the *event_time* and description columns for each row + +[source,sql] +---- +SELECT + pg_typeof(event_time) AS event_time_type, + pg_typeof(description) AS description_type +FROM timestamp_example; +---- + +By executing the query above we will get the following output + +[source,sql] +---- + event_time_type | description_type +-----------------------------+------------------ + timestamp without time zone | text + timestamp without time zone | text +---- diff --git a/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc b/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc new file mode 100644 index 000000000..140ab632a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/other-functions/shobj-description.adoc @@ -0,0 +1,24 @@ += shobj_description() +:description: The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[shobj_description()^] is a comment information function tha +:page-topic-type: reference + +The link:https://www.postgresql.org/docs/current/functions-info.html#FUNCTIONS-INFO-CATALOG[shobj_description()^] is a comment information function that retrieves the comment associated with a shared database object. + +== Syntax + +The syntax for the `shobj_description()` function is as follows: + +.... +shobj_description (object_oid, catalog_name) → NULL +.... + +== Parameters + +The following parameters are required to execute this function: + +* link:https://www.postgresql.org/docs/current/datatype-oid.html[object_oid^]: specifies the object identifier (OID) of the shared object for which you want to retrieve the comment +* link:https://www.postgresql.org/docs/current/catalogs.html[catalog_name^]: specifies the name of the system catalog that contains the shared object + +== Restrictions + +* This function always returns `NULL` if there are no parameters specified diff --git a/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc b/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc new file mode 100644 index 000000000..9805c2216 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/concat.adoc @@ -0,0 +1,118 @@ += CONCAT +:description: The CONCAT() function is used to concatenate one or more input values into a single result. +:page-topic-type: reference + +The `CONCAT()` function is used to concatenate one or more input values into a single result. It supports all data types in Redpanda SQL, except `TIMESTAMPTZ`, and the output will be returned as a concatenation of the input values. + +*Special cases:* Returns `NULL` if there are no input rows or `NULL` values. + +== Examples + +=== Basic `CONCAT()` function + +The below example uses the `CONCAT()` function to concatenate three values = into a single result: + +[source,sql] +---- +SELECT CONCAT ('Oxla', '.', 'com') AS "Website"; +---- + +The final result will be as follows: + +[source,sql] +---- ++------------+ +| Website | ++------------+ +| Oxla.com | ++------------+ +---- + +=== `CONCAT()` function using column + +We have an example of a *payment* table that stores customer payment data. + +[source,sql] +---- +CREATE TABLE payment ( + paymentid int, + custFirstName text, + custLastName text, + product text, + ordertotal int +); +INSERT INTO payment + (paymentid, custFirstName, custLastName, product, ordertotal) +VALUES + (9557451,'Alex','Drue','Latte',2.10), + (9557421,'Lana','Rey','Latte',2.10), + (9557411,'Tom','Hanks','Americano',1.85), + (9557351,'Maya','Taylor','Cappuccino',2.45), + (9557321,'Smith','Jay','Cappuccino',2.45), + (9557311,'Will','Ritchie','Americano',1.85); +---- + +[source,sql] +---- +SELECT * FROM payment; +---- + +The above query will display the following table: + +[source,sql] +---- ++------------+----------------+----------------+--------------+---------------+ +| paymentid | custFirstName | custLastName | product | ordertotal | ++------------+----------------+----------------+--------------+---------------+ +| 9557451 | Alex | Drue | Latte | 2.10 | +| 9557421 | Lana | Rey | Latte | 2.10 | +| 9557411 | Tom | Hanks | Americano | 1.85 | +| 9557351 | Maya | Taylor | Cappuccino | 2.45 | +| 9557321 | Smith | Jay | Cappuccino | 2.45 | +| 9557311 | Will | Ritchie | Americano | 1.85 | ++------------+----------------+----------------+--------------+---------------+ +---- + +The following query will concatenate values in the `custFirstName` and `custLastName` columns of the *payment* table: + +[source,sql] +---- +SELECT CONCAT (custFirstName, ' ', custLastName) AS "Customer Name" +FROM payment; +---- + +It will display an output where spaces separate the first and last names. + +[source,sql] +---- ++-----------------+ +| Customer Name | ++-----------------+ +| Tom Hanks | +| Lana Rey | +| Alex Drue | +| Will Ritchie | +| Smith Jay | +| Maya Taylor | ++-----------------+ +---- + +=== CONCAT() function with NULL + +We use the `CONCAT()` function in the following example to concatenate a string with a `NULL` value: + +[source,sql] +---- +SELECT CONCAT('Talent Source ',NULL) AS "concat"; +---- + +The result shows that the `CONCAT` function will skip the `NULL` value: + +[source,sql] +---- ++------------------+ +| concat | ++------------------+ +| Talent Source | ++------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc b/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc new file mode 100644 index 000000000..b943596e9 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/ends-with.adoc @@ -0,0 +1,154 @@ += ENDS_WITH +:description: The ENDS_WITH() function determines whether the first argument ends with a specified string in the second argument or not. +:page-topic-type: reference + +The `ENDS_WITH()` function determines whether the first argument ends with a specified string in the second argument or not. + +[source,sql] +---- +ENDS_WITH(first_argument, 'second_argument') +---- + +* `first_argument`: the specified argument, which will be the search reference. It can be a string or a column name. +* `second_argument`: the specified argument, which will have the search keywords. + +The input type will be `STRING`, and the return type is `BOOL`, shown as `true` or `false`. + +*Special case:* + +* It will return `NULL` for the `NULL` record. +* It will return `true` (including the `NULL` record) if the `second_argument` is not specified. + +== Examples + +=== `ENDS_WITH()` function using column + +Let’s say we have a table named *courses*: + +[source,sql] +---- +CREATE TABLE courses ( + course_id int, + course_name text, + credits text +); +INSERT INTO courses + (course_id, course_name, credits) +VALUES + (2111,'Basics of Plant Biotechnology',2), + (2102,'Biochemistry',3), + (1241,'Statistics',3), + (4142,'Microbial Biodiversity',2), + (3262,'Introduction to Plant Pathology',3), + (3233,'Enzyme Technology',2), + (1201,'Rural Sociology',2); +---- + +[source,sql] +---- +SELECT * FROM courses; +---- + +The above query will display the following table: + +[source,sql] +---- ++------------+----------------------------------+-----------+ +| course_id | course_name | credits | ++------------+----------------------------------+-----------+ +| 2111 | Basics of Plant Biotechnology | 2 | +| 2102 | Biochemistry | 3 | +| 1241 | Statistics | 3 | +| 4142 | Microbial Biodiversity | 2 | +| 3262 | Introduction to Plant Pathology | 3 | +| 3233 | Enzyme Technology | 2 | +| 1201 | Rural Sociology | 2 | ++------------+----------------------------------+-----------+ +---- + +Using the following query, we want to confirm the values of the *course_name* column that end with "`ology`" in the table above: + +[source,sql] +---- +SELECT course_name, ENDS_WITH(course_name, 'ology') FROM courses; +---- + +It will return true to all the courses with the name ending with **ology.** Otherwise*,* `false`. + +[source,sql] +---- ++----------------------------------+-------------+ +| course_name | ends_with | ++----------------------------------+-------------+ +| Basics of Plant Biotechnology | true | +| Biochemistry | false | +| Statistics | false | +| Microbial Biodiversity | false | +| Introduction to Plant Pathology | true | +| Enzyme Technology | true | +| Rural Sociology | true | ++----------------------------------+-------------+ +---- + +=== `ENDS_WITH()` function with no specified argument + +Here we have the **patients_data** table with a `NULL` value in the *allergies* column. + +[source,sql] +---- +CREATE TABLE patients_data ( + record_number int, + patient_name text, + height_in_cm int, + weight_in_kg int, + allergies text +); +INSERT INTO patients_data + (record_number, patient_name, height_in_cm, weight_in_kg, allergies) +VALUES + (2009000908,'Vivienne Desjardin',168,49,''), + (2012000876,'Elizabeth Reinhard',163,55,''), + (2015000965,'James McCarthy',188,70,'penicillin'), + (2020000109,'Jose Ramirez',170,70,'sulfonamide'), + (2020000222,'Stefani Ricci',170,70,'peniccilin'); +---- + +[source,sql] +---- +SELECT * FROM patients_data; +---- + +[source,sql] +---- ++----------------+---------------------+---------------+--------------+-------------+ +| record_number | patient_name | height_in_cm | weight_in_kg | allergies | ++----------------+---------------------+---------------+--------------+-------------+ +| 2009000908 | Vivienne Desjardin | 168 | 49 | null | +| 2012000876 | Elizabeth Reinhard | 163 | 55 | null | +| 2015000965 | James McCarthy | 188 | 70 | penicillin | +| 2020000109 | Jose Ramirez | 170 | 70 | sulfonamide | +| 2020000222 | Stefani Ricci | 170 | 70 | peniccilin | ++----------------+---------------------+---------------+--------------+-------------+ +---- + +For example, we run the `ENDS_WITH` function but with no specified `second_argument`. + +[source,sql] +---- +SELECT allergies, ENDS_WITH(allergies, '') FROM patients_data; +---- + +We will have the result where the `ENDS_WITH` will return true to all records (even the `null` one). + +[source,sql] +---- ++--------------+--------------+ +| allergies | starts_with | ++--------------+--------------+ +| null | true | +| null | true | +| penicillin | true | +| sulfonamide | true | +| peniccilin | true | ++--------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/index.adoc b/modules/reference/pages/sql/sql-functions/string-functions/index.adoc new file mode 100644 index 000000000..c59763794 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/index.adoc @@ -0,0 +1,34 @@ += Overview +:description: String functions are used to analyze and manipulate string values. + +String functions are used to analyze and manipulate string values. Redpanda SQL supports the following string related functions and operators: + +== String functions + +[width="100%",cols="20%,80%",options="header",] +|=== +|*Function* |*Description* +|xref:reference:sql/sql-functions/string-functions/length.adoc[LENGTH()] |Returns the number of characters in a string +|xref:reference:sql/sql-functions/string-functions/lower.adoc[LOWER()] |Makes string lowercase +|xref:reference:sql/sql-functions/string-functions/upper.adoc[UPPER()] |Makes string upper case +|xref:reference:sql/sql-functions/string-functions/starts-with.adoc[STARTS_WITH()] |Checks if a string starts with a specified substring +|xref:reference:sql/sql-functions/string-functions/ends-with.adoc[ENDS_WITH()] |Checks if a string ends with a specified substring +|xref:reference:sql/sql-functions/string-functions/concat.adoc[CONCAT()] |Adds two or more strings together +|xref:reference:sql/sql-functions/string-functions/substr.adoc[SUBSTR()] |Extracts a substring from a string +|xref:reference:sql/sql-functions/string-functions/strpos.adoc[STRPOS()] |Finds the position at which the substring starts within the string +|xref:reference:sql/sql-functions/string-functions/regex/regexp-match.adoc[REGEXP_MATCH()] |Matches a POSIX regular expression pattern to a string +|xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[REGEXP_REPLACE()] |Substitutes new text for substrings that match POSIX regular expression patterns +|xref:reference:sql/sql-functions/string-functions/replace.adoc[REPLACE()] |Finds and replace occurences of a substring in a string +|xref:reference:sql/sql-functions/string-functions/position.adoc[POSITION()] |Returns the position of the first occurrence of a substring in a string +|=== + +== String operators + +[width="100%",cols="51%,49%",options="header",] +|=== +|*Operator* |*Description* +|text ~ text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in case sensitive match +|text ~* text -> boolean |Returns `true` if the first argument matches the pattern of the second argument in a case-insensitive match +|text !~ text -> boolean |Returns `true` if the first argument does not match the pattern of the second argument in case sensitive match. +|text !~* text -> boolean |Returns `true` if the first argument does not match the pattern of the second argument in a case-insensitive match +|=== diff --git a/modules/reference/pages/sql/sql-functions/string-functions/length.adoc b/modules/reference/pages/sql/sql-functions/string-functions/length.adoc new file mode 100644 index 000000000..379b10bbe --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/length.adoc @@ -0,0 +1,105 @@ += LENGTH +:description: The LENGTH() function is used to find the length of a string, i.e., the number of characters in a given string. +:page-topic-type: reference + +The `LENGTH()` function is used to find the length of a string, i.e., the number of characters in a given string. It accepts a string as a parameter. Syntax of the length function is illustrated below: + +[source,sql] +---- +LENGTH(string) +---- + +The input type is a string, and the return type is int, as it returns the number of characters. + +*Special cases:* + +* If a null value is passed in the function, i.e., `LENGTH(NULL)`, it will return `NULL`. +* If the parameter is an empty string `LENGTH(")`, it will return 0. +* If the parameter is a space character `LENGTH('')`, not empty or null, it will return 1 as it is not empty anymore. + +== Examples + +=== Basic `LENGTH()` function + +The below example uses the `LENGTH()` function to find out the length of a string text: + +[source,sql] +---- +SELECT LENGTH ('Oxla PostgreSQL Tutorial'); +---- + +The final output will be as follows: + +[source,sql] +---- ++------------+ +| length | ++------------+ +| 24 | ++------------+ +---- + +=== `LENGTH()` function using columns + +Let’s see how the `LENGTH()` function works on the *personal_details* table containing the employee’s *id*, *first_name*, *last_name*, and *gender* of a retail store as columns. + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +The above query will show the following table: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +The following query returns the last name and the length of the last name from the personal_details table, where the length of the last_name is greater than 5. + +[source,sql] +---- +SELECT last_name,length(last_name) +AS "Length of Last Name" +FROM personal_details +WHERE LENGTH(last_name) > 5; +---- + +The output displays all those items in the last_name column with a length of more than 5 characters. + +[source,sql] +---- ++---------------+-----------------------+ +| last_name | Length of Last Name | ++---------------+-----------------------+ +| Wheeler | 7 | +| Hopper | 6 | +| Sinclair | 8 | ++---------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc b/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc new file mode 100644 index 000000000..e835a4a5b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/lower.adoc @@ -0,0 +1,105 @@ += LOWER +:description: The LOWER() function returns a given string, an expression, or values in a column in all lowercase letters. +:page-topic-type: reference + +The LOWER() function returns a given string, an expression, or values in a column in all lowercase letters. The syntax of the function is illustrated below: + +[source,sql] +---- +LOWER(string) +---- + +It accepts input as a string and returns the text in the lowercase alphabet. + +*Special Cases:* If there are characters in the input which are not of type string, they remain unaffected by the LOWER()function. + +[NOTE] +==== +We support Unicode so that the ß is equivalent to the string ss. +==== + +== Examples + +=== Basic `LOWER()` function + +The following basic query converts the given string in all lowercase alphabets: + +[source,sql] +---- +SELECT LOWER('PostGreSQL'); +---- + +The final output will be as follows: + +[source,sql] +---- ++------------+ +| lower | ++------------+ +| postgresql | ++------------+ +---- + +=== `LOWER()` function using columns + +Let’s see how the `LOWER()` function works using an example with columns. We have a *personal_details* table containing columns *id*, *first_name*, *last_name*, and *gender* of retail store employees. + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +The above query will show the following table: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +Let’s assume that we want to convert the first and last names of employees with *id* numbers 2, 4, and 5 to all lowercase letters, which can be done using the following query: + +[source,sql] +---- +SELECT first_name,last_name,LOWER(first_name),LOWER(last_name) +FROM personal_details +where id in (2, 4, 5); +---- + +The output displays the first and last names of employees with the specified ids in lowercase letters: + +[source,sql] +---- ++------------+-------------+----------+----------+ +| first_name | last_name | lower | lower | ++------------+-------------+----------+----------+ +| Tom | Hanks | tom | hanks | +| Emily | Byers | emily | byers | +| Lucas | Sinclair | lucas | lucas | ++------------+-------------+----------+----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/position.adoc b/modules/reference/pages/sql/sql-functions/string-functions/position.adoc new file mode 100644 index 000000000..811ff56e7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/position.adoc @@ -0,0 +1,90 @@ += POSITION +:description: The POSITION() function returns the position of the first occurrence of a substring in a string. +:page-topic-type: reference + +The `POSITION()` function returns the position of the first occurrence of a substring in a string. It works the same as xref:reference:sql/sql-functions/string-functions/strpos.adoc[STRPOS], but it has slightly different syntax. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +POSITION(substring IN string) +---- + +The position of the substring within the string starts from 1. If the substring is not found, it returns 0. + +== Examples + +=== Example 1 + +This query looks for the position of the substring `world` within the string `Hello, world!`. + +[source,sql] +---- +SELECT POSITION('world' IN 'Hello, world!'); +---- + +The result would be the starting position of the substring `world`, which is 7. + +[source,sql] +---- +position +---------- + 7 +---- + +=== Example 2 + +The query looks for the position of the substring `123` within the string `1a2b3c`. + +[source,sql] +---- +SELECT POSITION('123' IN '1a2b3c'); +---- + +`123` is found starting at position 1, the result would be 1. + +[source,sql] +---- +position +---------- + 7 +---- + +=== Example 3 + +The query tries to find the position of the substring `abc` within the string `xyz`. + +[source,sql] +---- +SELECT POSITION('abc' IN 'xyz'); +---- + +`abc` is not found in `xyz`, the result would be 0. + +[source,sql] +---- +position +---------- + 0 +---- + +=== Example 4 + +This query searches for the position of the substring `cde` within the string `cde`. + +[source,sql] +---- +SELECT POSITION('cde' IN 'cde'); +---- + +`cde` is the entire string, the result would be 1. + +[source,sql] +---- +position +---------- + 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc new file mode 100644 index 000000000..ebdccff46 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/index.adoc @@ -0,0 +1,3 @@ += Regular Expressions +:description: Reference for regular expression functions in Redpanda SQL. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc new file mode 100644 index 000000000..b4d67ba2a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/posix-regular-expressions.adoc @@ -0,0 +1,20 @@ += POSIX Regular Expressions +:description: POSIX (Portable Operating System Interface) defines a set of standard operating system interfaces based on the UNIX OS. +:page-topic-type: reference + +*POSIX* (Portable Operating System Interface) defines a set of standard operating system interfaces based on the UNIX OS. In POSIX Basic Regex Expression (BRE) syntax, most characters are treated as literals (e.g. they match only themselves). However, some characters called *metacharacters* have special meaning. + +The following table describes common POSIX BRE metacharacters: + +[width="100%",cols="38%,62%",options="header",] +|=== +|*Metacharacter* |*Description* +|`.` |Matches any single character. For example, `a.c` matches "`*abc*`", but `[a.c]` matches only "`*a*`", "`*.*`", or "`*c*`" +|`-` |Used to define a range. For example, `[a-c]` will match characters *a* to *c* (both inclusive) +|[] |Calculates and returns a value corresponding to the minimal metric in the same row from a set of values +|`^` |Calculates and returns the maximum value +|`$` |Calculates and returns a value corresponding to the maximum metric in the same row from a set of values +|`*` |Calculates and returns the average value +|`\{n}` |Counts the number of rows +|`{n,m}` |Calculates the boolean of all the boolean values in the aggregated group (returns `FALSE` if at least one of aggregated rows is `FALSE` ) +|=== diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc new file mode 100644 index 000000000..4a10d6202 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-match.adoc @@ -0,0 +1,138 @@ += REGEXP_MATCH() +:description: The REGEXP_MATCH() function matches a POSIX regular expression pattern to a string. +:page-topic-type: reference + +The `REGEXP_MATCH()` function matches a POSIX regular expression pattern to a string. It returns an array of `TEXT[]` type with substring(s) of matched groups within the first match. + +== Syntax + +The syntax for `REGEXP_MATCH()` function is as follows: + +[source,sql] +---- +REGEXP_MATCH(source_string, pattern, [flags]) +---- + +== Parameters + +* `source_string`: string on which you want to perform the matching +* `pattern`: POSIX regular expression pattern to match +* `flags`: (optional) string with flags that change the matching behavior of `REGEXP_MATCH()` function + +The `flags` parameter is an optional string that controls how the function operates. Here is a list of flags that are supported by Redpanda SQL: + +* `i`: use this flag for case-insensitive matching +* `c`: `REGEXP_MATCH()` function is case sensitive by default, using the `c` flag has the same effect as having no flags at all + +[NOTE] +==== +If you use multiple flags, the last one takes precedence. If you use the `ci` flags, the regex will be case-insensitive, while using the `ic` flags it will be case-sensitive +==== + +== Examples + +=== Basic ssage + +The following example demonstrates how to find the first occurrence of an email address in the input string: + +[source,sql] +---- +SELECT REGEXP_MATCH('Contact us at hello@oxla.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'); +---- + +[source,sql] +---- + regexp_match +------------------ + {hello@oxla.com} +(1 row) +---- + +=== Match multiple groups + +The `REGEXP_MATCH()` function can capture multiple groups within a match, which allow you to extract key parts from a string in a structured way. The example below extracts the protocol, domain and path from a given URL: + +[source,sql] +---- +SELECT REGEXP_MATCH('https://www.example.com/products/item123', '(https?)://([\w.-]+)/(.+)'); +---- + +[source,sql] +---- + regexp_match +------------------------------------------ + {https,www.example.com,products/item123} +(1 row) +---- + +=== Case-insensitive matching + +This example shows how to match a pattern regardless of case-sensitivity: + +[source,sql] +---- +SELECT REGEXP_MATCH('User.Name@Example.COM', '@([a-z0-9.-]+)$', 'i'); +---- + +[source,sql] +---- + regexp_match +--------------- + {Example.COM} +(1 row) +---- + +=== Match with patterns stored in a table + +In this example we will show you how to take the source string and regex pattern directly from the table. For the needs of this section, let’s create two sample tables: + +[source,sql] +---- +CREATE TABLE users ( + email TEXT NOT NULL +); + +CREATE TABLE patterns ( + id INT, + regex_pattern TEXT NOT NULL +); +---- + +Once that is done, let’s insert values into those tables: + +[source,sql] +---- +INSERT INTO users (email) VALUES + ('user@example.com'), + ('admin@test.org'), + ('invalid-email@wrong'); + +INSERT INTO patterns (id, regex_pattern) VALUES + (0, '^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$'); +---- + +Now, we can validate if user emails in `users` table are valid. If the regex doesn’t match, a `NULL` value is returned. + +[source,sql] +---- +SELECT users.email, + patterns.regex_pattern, + REGEXP_MATCH(users.email, patterns.regex_pattern, 'i') AS is_valid +FROM users +JOIN patterns ON patterns.id = 0; +---- + +[source,sql] +---- + email | regex_pattern | is_valid +---------------------+-----------------------------------------+-------------------- + user@example.com | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | {user@example.com} + admin@test.org | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | {admin@test.org} + invalid-email@wrong | ^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$ | +(3 rows) +---- + +=== Restrictions + +* The function returns `NULL` if it cannot match the regular expression pattern +* `i` and `c` flags shouldn’t be used with each other diff --git a/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc new file mode 100644 index 000000000..5d5e76b6a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/regex/regexp-replace.adoc @@ -0,0 +1,156 @@ += REGEXP_REPLACE() +:description: The REGEXP_REPLACE() function replaces all occurrences of a regular expression pattern in a string with a specified replacement string. +:page-topic-type: reference + +The `REGEXP_REPLACE()` function replaces all occurrences of a regular expression pattern in a string with a specified replacement string. + +== Syntax + +The syntax for `REGEXP_REPLACE()` function is as follows: + +[source,sql] +---- +REGEXP_REPLACE(source_string, pattern, replacement, [flags]) +---- + +== Parameters + +* `source_string`: string that we want to perform the replacement on +* `pattern`: POSIX regular expression pattern to match +* `replacement`: replacement string +* `flags`: (optional) string that changes the matching behavior of `REGEXP_REPLACE()` function + +The `flags` parameter is an optional string that controls how the function operates. Here is a list of flags supported in Redpanda SQL: + +* `g`: global replacement. This flag ensures that all occurrences of the pattern are replaced +* `i`: use this flag for case-insensitive matching +* `c`: `REGEXP_REPLACE()` function is case sensitive by default, using the `c` flag has the same effect as using no flags + +== Examples + +=== Basic function usage + +In this example, we will focus on using `REGEXP_REPLACE()` function with a basic POSIX regular expression pattern: + +[source,sql] +---- +SELECT REGEXP_REPLACE('The OXLA supports various data types', 'T[^ ]*', 'We') AS "Replaced_String"; +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + Replaced_String +----------------------------------------- + We OXLA supports various data types +---- + +The pattern used was *"`T[^ ]*`"*, which matches any substring that starts with a '`T`' character, followed by any number of non-space characters. The function replaces the matched substring with the specified replacement string *"`We`"*. + +=== Replace special characters + +This example demonstrates how to replace a non-alphanumeric character in a string with a tilde (~): + +[source,sql] +---- +SELECT REGEXP_REPLACE('Hello World!', '[^A-Za-z0-9 ]', '~') AS "Replaced_String"; +---- + +In the above query, the second parameter is a regular expression *“[^A-Za-z0-9 ]”* that matches any characters that are not uppercase / lowercase letters, digits or spaces. The output for executing the query above will be as follows: + +[source,sql] +---- + Replaced String +------------------- + Hello World~ +---- + +=== Flags usage + +==== Replace certain substrings with a single flag defined + +This example will focus on using the `REGEXP_REPLACE()` function with a defined flag and replacing certain substrings in a string. For the needs of this section, we will create a sample `quotes` table: + +[source,sql] +---- +CREATE TABLE quotes (quotes_text text); +INSERT INTO quotes (quotes_text) +VALUES ('Work hard and stay hungry. Lazy people get nowhere in life.'), + ('An excuse is a way for a LAZY person to feel better.'), + ('The word LUCKY is how a lazy person describes someone who works hard.'); + +SELECT quotes_text FROM quotes; +---- + +By executing the code above, we will get the following output: + +[source,bash] +---- + quotes_text +----------------------------------------------------------------------- + Work hard and stay hungry. Lazy people get nowhere in life. + An excuse is a way for a LAZY person to feel better. + The word LUCKY is how a lazy person describes someone who works hard. +(3 rows) +---- + +Now, we will use the `REGEXP_REPLACE()` function with the `i` flag specifiec to replace all occurrences of the word `lazy` with `active` regardless of the case sensitivity: + +[source,sql] +---- +SELECT quotes_text, REGEXP_REPLACE(quotes_text, 'lazy', 'active', 'i') AS "New quotes" FROM quotes; +---- + +In this case, all occurrences of the word `lazy` have been replaced with `active`: + +[source,bash] +---- + quotes_text | New quotes +-----------------------------------------------------------------------+------------------------------------------------------------------------- + Work hard and stay hungry. Lazy people get nowhere in life. | Work hard and stay hungry. active people get nowhere in life. + An excuse is a way for a LAZY person to feel better. | An excuse is a way for a active person to feel better. + The word LUCKY is how a lazy person describes someone who works hard. | The word LUCKY is how a active person describes someone who works hard. +(3 rows) +---- + +=== Specify one or more flags + +Without specifying the `g` flag, `REGEXP_REPLACE()` function replaces only the first occurrence of a substring: + +[source,sql] +---- +SELECT REGEXP_REPLACE('ab12c', '[0-9]', 'X'); +---- + +[source,sql] +---- + regexp_replace +---------------- + abX2c +---- + +In this case, as you can see only the first digit (`1`) was replaced with `X`. By adding the `g` flag, all occurrences are replaced with `X`: + +[source,sql] +---- +SELECT REGEXP_REPLACE('ab12c', '[0-9]', 'X', 'g'); +---- + +[source,sql] +---- + regexp_replace +---------------- + abXXc +---- + +[NOTE] +==== +If you use multiple flags, the last one takes precedence. If you use the `ci` flags, the regex will be case-insensitive, while using the `ic` flags it will be case-sensitive +==== + +== Restrictions + +* The function returns `NULL` if there are no input rows or `NULL` values +* If the regular expression pattern isn’t found in the string, the `REGEXP_REPLACE()` function returns the original string +* `i` and `c` flags shouldn’t be used with each other diff --git a/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc b/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc new file mode 100644 index 000000000..5dc43eeb0 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/replace.adoc @@ -0,0 +1,152 @@ += REPLACE() +:description: The REPLACE() function looks for and replaces a substring with a new one in a string. +:page-topic-type: reference + +The `REPLACE()` function looks for and replaces a substring with a new one in a string. This function is often used to update the outdated or spelling mistakes in data that require an amendment. + +[NOTE] +==== +Redpanda SQL also supports the xref:reference:sql/sql-functions/string-functions/regex/regexp-replace.adoc[`REGEXP_REPLACE()`] function. It enables you to search and replace a substring that matches with a POSIX regular expression +==== + +== Syntax + +The syntax for `REPLACE()` function is as follows: + +[source,sql] +---- +REPLACE(string, old_substring, new_substring) +---- + +[WARNING] +==== +The `REPLACE()` function performs a case-sensitive replacement +==== + +=== Parameters + +The syntax requires three parameters, explained below: + +* `string`: string that you want to replace +* `old_substring`: substring that you want to replace (all parts will be replaced if it appears multiple times in the string) +* `new_substring`: new substring that will replace the old one + +== Examples + +=== Basic usage + +In this example we will focus on a basic usage of the `REPLACE()` function, so we can understand on real example how it works. + +[source,sql] +---- +SELECT REPLACE ('NewDatabase', 'New', 'Oxla'); +---- + +The `REPLACE()` function will find all occurrences of the '`New`' substring in the '`NewDatabase`' string and replace it with the '`Redpanda SQL`' substring, producing the following output: + +[source,sql] +---- ++---------------------+ +| f | ++---------------------+ +| OxlaDatabase | ++---------------------+ +---- + +=== Replace specified values in a table + +This example shows how to replace the values of a specific column in a table. For the needs of this example, we will create a new table named *extracurriculars* with *club* and *category* columns and insert the values into the respective columns. + +[source,sql] +---- +CREATE TABLE hobby ( + club text, + category text +); +INSERT INTO hobby + (club, category) +VALUES + ('Bridge','group'), + ('Painting','individual'), + ('Basketball','group'), + ('Volleyball','group'); +---- + +Once that is done, we can retrieve all values from the table using the following query: + +[source,sql] +---- +SELECT * FROM hobby; +---- + +[source,sql] +---- ++------------+---------------+ +| club | category | ++------------+---------------+ +| Bridge | group | +| Painting | individual | +| Basketball | group | +| Volleyball | group | ++--------------+-------------+ +---- + +What we would do here is to replace the *'`group`'* values in the *category* column with *'`sports`'*: + +[source,sql] +---- +SELECT REPLACE(category, 'group', 'sports') from hobby; +---- + +[source,sql] +---- ++--------------+ +| f | ++--------------+ +| sports | +| individual | +| sports | +| sports | ++--------------+ +---- + +=== Remove a substring from a string + +In the following example, we will show how to remove a substring from a string using the `REPLACE()` function. In this case we want to find all occurences of '`Friends`' substring in '`Hello Friends`' string and get rid of it: + +[source,sql] +---- +SELECT REPLACE('Hello Friends', 'Friends', ''); +---- + +[source,sql] +---- ++-----------+ +| f | ++-----------+ +| Hello | ++-----------+ +---- + +=== Replace multiple patterns + +The following example uses the `REPLACE()` function to replace multiple patterns of the given string: + +[source,sql] +---- +SELECT REPLACE(REPLACE(REPLACE(REPLACE('2*[9-5]/{4+8}', '[', '('), ']', ')'), '{', '('), '}', ')'); +---- + +We can see that the `REPLACE()` function is called multiple times to replace the corresponding string as specified: + +* *`[]`* into *`()`* +* *`{}`* into *`()`* + +[source,sql] +---- ++------------------+ +| f | ++------------------+ +| 2*(9-5)/(4-8) | ++------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc b/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc new file mode 100644 index 000000000..85cc04493 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/starts-with.adoc @@ -0,0 +1,155 @@ += STARTS_WITH +:description: The STARTS_WITH() function determines whether the first argument starts with a specified string in the second argument or not. +:page-topic-type: reference + +The `STARTS_WITH()` function determines whether the first argument starts with a specified string in the second argument or not. + +[source,sql] +---- +STARTS_WITH(first_argument, 'second_argument') +---- + +* `first_argument`: the specified argument, which will be the search reference. It can be a string or a column name. +* `second_argument`: the specified argument, which will have the search keywords. + +The input type will be `STRING`, and the return type is `BOOL`, shown as `true` or `false`. + +Special case: + +* It will return `NULL` for the `NULL` record. +* It will return `true` (including the `NULL` record) if the `second_argument` is not specified. + +== Examples + +=== `STARTS_WITH()` function using column + +Let’s say we have a table with the title *petsData*, as shown below. + +[source,sql] +---- +CREATE TABLE petsData ( + petid int, + petname text, + species text, + breed text, + sex text, + age int +); +INSERT INTO petsData + (petid, petname, species, breed, sex, age) +VALUES + (2021001,'Bartholomeow','cat','persian','m',2), + (2021004,'Jack','dog','boston terrier','m',1), + (2022001,'Jesse','hamster','dzungarian','m',1), + (2022010,'Bella','dog','dobberman','f',3), + (2022011,'June','cat','american shorthair','f',2); +---- + +[source,sql] +---- +SELECT * FROM petsData; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------+--------------+----------+---------------------+------+-----+ +| petid | petname | species | breed | sex | age | ++----------+--------------+----------+---------------------+------+-----+ +| 2021001 | Bartholomeow | cat | persian | m | 2 | +| 2021004 | Jack | dog | boston terrier | m | 1 | +| 2022001 | Jesse | hamster | dzungarian | m | 1 | +| 2022010 | Bella | dog | dobberman | f | 3 | +| 2022011 | June | cat | american shorthair | f | 2 | ++----------+--------------+----------+---------------------+------+-----+ +---- + +From the table above, we want to retrieve the values of *petname* column that start with “J” by using the following query: + +[source,sql] +---- +SELECT petname, STARTS_WITH(petname, 'J') FROM petsData; +---- + +It will return `true` to the pet with a pet starting with the letter J. Otherwise, `false`. + +[source,sql] +---- ++--------------+---------------+ +| petname | starts_with | ++---------------+--------------+ +| Bartholomeow | false | +| Jack | true | +| Jesse | true | +| Bella | false | +| June | true | ++---------------+--------------+ +---- + +=== `STARTS_WITH()` function with no specified argument + +Here we have the *petsData* table with a `NULL` value in the breed column. + +[source,sql] +---- +CREATE TABLE petsData ( + petid int, + petname text, + species text, + breed text, + sex text, + age int +); +INSERT INTO petsData + (petid, petname, species, breed, sex, age) +VALUES + (2021001,'Bartholomeow','cat','persian','m',2), + (2021004,'Jack','dog','boston terrier','m',1), + (2022001,'Jesse','hamster','dzungarian','m',1), + (2022010,'Bella','dog','dobberman','f',3), + (2022011,'June','cat','american shorthair','f',2), + (2022012,'Phoebe','gold fish','','f',1); +---- + +[source,sql] +---- +SELECT * FROM petsData; +---- + +[source,sql] +---- ++----------+--------------+------------+---------------------+------+------+ +| petid | petname | species | breed | sex | age | ++----------+--------------+------------+---------------------+------+------+ +| 2021001 | Bartholomeow | cat | persian | m | 2 | +| 2021004 | Jack | dog | boston terrier | m | 1 | +| 2022001 | Jesse | hamster | dzungarian | m | 1 | +| 2022010 | Bella | dog | dobberman | f | 3 | +| 2022011 | June | cat | american shorthair | f | 2 | +| 2022012 | Phoebe | gold fish | | f | 1 | ++----------+--------------+------------+---------------------+------+------+ +---- + +For example, we run the `STARTS_WITH` function but with no specified `second_argument:` + +[source,sql] +---- +SELECT breed, STARTS_WITH(breed, '') FROM petsData; +---- + +We will have the following result where the `STARTS_WITH` will return true to all records (even the `null` one): + +[source,sql] +---- ++---------------------+--------------+ +| breed | starts_with | ++---------------------+--------------+ +| persian | true | +| boston terrier | true | +| dzungarian | true | +| dobberman | true | +| american shorthair | true | +| null | true | ++---------------------+--------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc b/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc new file mode 100644 index 000000000..e35a2dac1 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/strpos.adoc @@ -0,0 +1,113 @@ += STRPOS +:description: The STRPOS() is used to return the position from where the substring (the second argument) is matched with the string (the first argument). +:page-topic-type: reference + +The `STRPOS()` is used to return the position from where the substring (the second argument) is matched with the string (the first argument). + +[source,sql] +---- +STRPOS(string, substring) +---- + +The input and return must be of type `string`. + +*Special cases:* + +* Returns `NULL` if there are no input rows or `NULL` values. +* If the `substring` is not found in the string, then the `STRPOS()` function will return 0. + +== Examples + +=== Basic `STRPOS()` function + +In the example below, we will find the *ut* (substring) position in the *computer* (string): + +[source,sql] +---- +SELECT STRPOS('computer', 'ut') AS "Position of ut"; +---- + +We can see that *ut* is located at the fifth character of the *computer*: + +[source,sql] +---- ++-----------------+ +| Position of ut | ++-----------------+ +| 5 | ++-----------------+ +---- + +=== STRPOS() function using column + +We have a *listofwords* table where it stores the word data. + +[source,sql] +---- +CREATE TABLE listofwords ( + words text +); +INSERT INTO listofwords + (words) +VALUES + ('corral'), + ('traditionally'), + ('real'), + ('communal'), + ('challenge'), + ('fall'), + ('wall'), + ('gallop'), + ('albatross'); +---- + +[source,sql] +---- +SELECT * FROM listofwords; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------------+ +| words | ++----------------+ +| corral | +| traditionally | +| real | +| communal | +| challenge | +| fall | +| wall | +| gallop | +| albatross | ++----------------+ +---- + +The following query will display the words and a position of a specific substring = ‘*al*’ using the `STRPOS()` function: + +[source,sql] +---- +SELECT words, STRPOS(words, 'al') AS "Position of al" +FROM listofwords; +---- + +The result will display the *al* position of different words: + +[source,sql] +---- ++----------------+------------------+ +| words | Position of al | ++----------------+------------------+ +| corral | 5 | +| traditionally | 10 | +| real | 3 | +| communal | 7 | +| challenge | 3 | +| fall | 2 | +| wall | 2 | +| gallop | 2 | +| albatross | 1 | ++----------------+------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc b/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc new file mode 100644 index 000000000..7edd86f82 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/substr.adoc @@ -0,0 +1,144 @@ += SUBSTR +:description: The SUBSTR() function extracts a specific number of characters from a string. +:page-topic-type: reference + +The `SUBSTR()` function extracts a specific number of characters from a string. + +== Syntax + +The syntax of the function is illustrated below: + +*2 Arguments* + +[source,sql] +---- +substr( string, start_position) +---- + +*3 Arguments* + +[source,sql] +---- +substr( string, start_position, length ) +---- + +[TIP] +==== +Both syntaxes will have input and return of type `string`. +==== + +=== Start position + +The `start_position` is used as the starting position, specifying the part from where the substring is to be returned. It is written as an integer value. + +[width="100%",cols="36%,64%",options="header",] +|=== +|*Input* |*Return* +|`start_position < 0 ``start_position < string` |The `start_position` is a given character in the string. The count starts from the first character. +|`start_position > string` |Returns an empty substring. +|`start_position` = negative value |The count starts from the provided negative value, with subsequent characters yielded as it approaches 0. +|=== + +If the index is less than or equal to 0, no characters are returned. + +Once it exceeds 0, characters from the string are yielded, starting from the first one. | + +=== Length + +The `length` is used to determine the number of characters to be extracted__.__ It can be one or more characters. + +[width="100%",cols="20%,80%",options="header",] +|=== +|*Input* |*Return* +|`length` = 0 |Returns an empty substring. +|`length` is not set |The function will start from the specified `start_position` and end at the last character of the `string`. +|`length` = negative value |Returns an error. +|=== + +== Examples + +=== `SUBSTR()` function with specified `start_position` & `length` + +In this example, we will set the `start_position` with the first six characters and have five characters extracted: + +[source,sql] +---- +SELECT substr('Watermelon',6,5) AS "Fruit"; +---- + +The updated table is shown below: + +[source,sql] +---- +Fruit +------- + melon +---- + +=== `SUBSTR()` function with `length` = 0 + +The following query will extract a string with `length` = 0: + +[source,sql] +---- +SELECT substr('Watermelon',6,0) AS "Fruit"; +---- + +It will display an empty output as there is no `length` specified: + +[source,sql] +---- +Fruit +------- +---- + +=== `SUBSTR()` function with `length` = negative value + +Here we will check if the `length` is specified with a negative value: + +[source,sql] +---- +SELECT substr('Watermelon',6,-2) AS "Fruit"; +---- + +Instead of extracting the string from the last characters, it will return an error as seen below: + +[source,sql] +---- +ERROR: Length of substring cannot be negative +---- + +=== `SUBSTR()` function with `start_position` > `string` + +We know that *Watermelon* only has ten characters, but this time, we will figure out if the specified `start_position` is larger than the string’s characters: + +[source,sql] +---- +SELECT substr('Watermelon',20,2) AS "Fruit"; +---- + +It will display an empty output as shown below: + +[source,sql] +---- +Fruit +------- +---- + +=== `SUBSTR()` function with 2 arguments + +In this example, we will set the `start_position` with the first six characters and have five characters extracted. + +[source,sql] +---- +SELECT substr('database', 6) AS "Result"; +---- + +It will display the substring from position 6 output as shown below: + +[source,sql] +---- +Result +-------- + ase +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc b/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc new file mode 100644 index 000000000..594b5dde8 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/substring.adoc @@ -0,0 +1,50 @@ += SUBSTRING +:description: SUBSTR is an alias for SUBSTRING. +:page-topic-type: reference + +[WARNING] +==== +SUBSTR is an alias for SUBSTRING. Learn more at xref:reference:sql/sql-functions/string-functions/substr.adoc[SUBSTR] documentation. +==== +The SUBSTRING() function lets you extract a part of a string and return that substring. + +== Syntax + +Here are the 2 basic syntaxes of the `SUBSTRING()` function in Redpanda SQL: + +*2 Arguments* + +[source,sql] +---- +SUBSTRING( string, start_position ) +---- + +*3 Arguments* + +[source,sql] +---- +SUBSTRING(string, start_position, length) +---- + +[TIP] +==== +Both syntaxes will have input and return of type `string`. +==== + +== Example + +The following example uses the `SUBSTRING()` function to extract the first 7 characters from the string. + +[source,sql] +---- +SELECT SUBSTRING('OxlaDocumentation', 1, 7); +---- + +It will display the substring from position 6 output as shown below: + +[source,sql] +---- +substring +----------- + OxlaDoc +---- diff --git a/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc b/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc new file mode 100644 index 000000000..87f4bb53e --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/string-functions/upper.adoc @@ -0,0 +1,109 @@ += UPPER +:description: The UPPER() function returns a given string, an expression, or values in a column in all uppercase letters. +:page-topic-type: reference + +The `UPPER()` function returns a given string, an expression, or values in a column in all uppercase letters. The syntax of the function is illustrated below: + +[source,sql] +---- +UPPER(string) +---- + +It accepts input as a string and returns text in uppercase letters. + +*Special Case:* + +* If characters in the input are not of type string, they remain unaffected by the `UPPER()` function. +* We support Unicode for the `UPPER()` function. + +== Examples + +=== Basic `UPPER()` function + +The following basic query converts the given string in all uppercase alphabets: + +[source,sql] +---- +SELECT UPPER('PostGreSQL'); +---- + +The final output will be as follows: + +[source,sql] +---- ++-------------+ +| upper | ++-------------+ +| POSTGRESQL | ++-------------+ +---- + +=== UPPER() function using columns and CONCAT() function + +Let’s see how the `UPPER()` function works using an example with columns. We have a table named *personal_details* containing employee’s *id*, *first_name*, *last_name*, and *gender* of a retail store: + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +[source,sql] +---- +SELECT * FROM personal_details; +---- + +The above query will show the following table: + +[source,sql] +---- ++-----+-------------+-------------+----------+ +| id | first_name | last_name | gender | ++-----+-------------+-------------+----------+ +| 1 | Mark | Wheeler | M | +| 2 | Tom | Hanks | M | +| 3 | Jane | Hopper | F | +| 4 | Emily | Byers | F | +| 5 | Lucas | Sinclair | M | ++-----+-------------+-------------+----------+ +---- + +Let’s assume that: + +. We want to convert employees’ first and last names with *id* numbers 1, 3, and 5 to all uppercase letters. +. Then, combine them using the `CONCAT()` function into one *full_name* column in uppercase. ++ +It can be done using the following query: ++ +[source,sql] +---- +SELECT CONCAT (UPPER(first_name),' ', UPPER(last_name)) +as full_name +FROM personal_details +where id in (1, 3, 5); +---- ++ +The output displays the first and last names of employees with the specified ids in uppercase letters: ++ +[source,sql] +---- ++---------------------+ +| full_name | ++---------------------+ +| MARK WHEELER | +| JANE HOPPER | +| LUCAS SINCLAIR | ++---------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc new file mode 100644 index 000000000..1356fda91 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/current-timestamp.adoc @@ -0,0 +1,37 @@ += CURRENT_TIMESTAMP +:description: The CURRENT_TIMESTAMP() returns the current timestamp value representing the date and time the query was executed. +:page-topic-type: reference + +The `CURRENT_TIMESTAMP()` returns the current timestamp value representing the date and time the query was executed. + +[NOTE] +==== +Note that the time returned by this function is the time when the query was executed. +==== + +== Syntax + +[source,sql] +---- +CURRENT_TIMESTAMP() // The parentheses are optional +---- + +== Examples + +The following example shows how to get the current date and time with a `CURRENT_TIMESTAMP()`function: + +[source,sql] +---- +SELECT CURRENT_TIMESTAMP AS "Current Time"; +---- + +The final result will display the current date and time in your timezone: + +[source,sql] +---- +----------------------------- + Current Time +----------------------------- + 2022-08-31 16:56:06.464016 +----------------------------- +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc new file mode 100644 index 000000000..e8f4863f6 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/date-trunc.adoc @@ -0,0 +1,160 @@ += DATE_TRUNC +:description: The DATE_TRUNC() function truncates a timestamp, timestamp with time zone or interval value to the specified precision, effectively rounding down the +:page-topic-type: reference + +The `DATE_TRUNC()` function truncates a timestamp, timestamp with time zone or interval value to the specified precision, effectively rounding down the value to the start of the given time unit. The return type matches the input type. + +== Syntax + +The syntax for using the `DATE_TRUNC()` function is as follows: + +.Without time_zone +[source,sql] +---- +DATE_TRUNC(field, source) +---- + +.With time_zone +[source,sql] +---- +DATE_TRUNC(field, source, time_zone) +---- + +== Parameters + +* `field`: The unit of time used to truncate the `source` value. It accepts `text` inputs and is case-insensitive +* `source`: The value you want to truncate. It can be `INTERVAL`, `TIMESTAMP` or `TIMESTAMP WITH TIME ZONE` +* `time_zone` _(applicable for the second syntax option)_: The time zone for the operation. It accepts `text` input + +== Fields + +Below is a list of supported values to specify the fields param in `DATE_TRUNC()` syntax. + +* `microseconds` +* `milliseconds` +* `second` +* `minute` +* `hour` +* `day` +* `week` +* `month` +* `quarter` +* `year` +* `decade` +* `century` +* `millennium` + +[NOTE] +==== +Some fields like `microseconds` and `milliseconds` are supported only for interval types. +==== + +== Examples + +=== Truncate to year + +This example truncates the timestamp to the year level. + +[source,sql] +---- +select DATE_TRUNC('year', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp **“1911-12-02 19:40:00”** has been truncated to 1911, with the month and day set to January 1st. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-01-01 00:00:00.000000 +---- + +=== Truncate to day + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the day level. + +[source,sql] +---- +select DATE_TRUNC('day', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp has been truncated to the same day, year, month, and day components. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-12-02 00:00:00.000000 +---- + +=== Truncate to week + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the week level. + +[source,sql] +---- +select DATE_TRUNC('week', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp has been truncated to the start of the week containing the date, which is Monday, November 27, 1911, at 00:00:00. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-11-27 00:00:00.000000 +---- + +=== Truncate to quarter + +This query truncates the timestamp *"`1911-12-02 19:40:00`"* to the quarter level. + +[source,sql] +---- +select DATE_TRUNC('quarter', '1911-12-02 19:40:00'::timestamp); +---- + +The timestamp is truncated to the start of the quarter. The month and day are set to the first month and first day of the quarter, with time components reset to zero. + +[source,sql] +---- + date_trunc +---------------------------- + 1911-10-01 00:00:00.000000 +---- + +=== Truncate to hour + +This query truncates the interval *"`15 hours 10 minutes`"* to the hour precision. + +[source,sql] +---- +select DATE_TRUNC('hour', '15 hour 10 minutes'::interval); +---- + +The minutes and seconds components are set to zero, resulting in an interval of exactly 15 hours. + +[source,sql] +---- + date_trunc +----------------- + 15:00:00.000000 +---- + +=== Truncate to quarter (interval) + +This query truncates the interval *"`16 years 4 months`"* to the quarter-year level. + +[source,sql] +---- +select DATE_TRUNC('quarter', '16 years 4 months'::interval); +---- + +The interval is truncated to the nearest quarter-year unit. The months components is adjusted to the start of the quarter. Since each quarter consists of 3 months, 4 months is truncated down to 3 months, resulting in: + +[source,sql] +---- + date_trunc +----------------- + 16 years 3 mons +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc new file mode 100644 index 000000000..a4f517ac3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/extract.adoc @@ -0,0 +1,94 @@ += EXTRACT +:description: The EXTRACT() function retrieves a specified part (field) from a given date/time or interval value. +:page-topic-type: reference + +The `EXTRACT()` function retrieves a specified part (field) from a given date/time or interval value. It is commonly used to obtain components such as year, month, day, hour, etc., from timestamps or dates. + +== Syntax + +[source,sql] +---- +EXTRACT (field FROM source) +---- + +== Parameters + +* `field`: string or identifier specifying the part of the date / time to extract +* `source`: date / time value from which to extract the specifed field + +The table below shows the supported input and corresponding return types for the `EXTRACT()` function: + +[width="100%",cols="24%,55%,21%",options="header",] +|=== +|Input Type: `source` |Supported `field` values |Return Type +|`TIMESTAMP` |`YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` |`DOUBLE PRECISION` +|`TIMESTAMPTZ` |`YEAR`, `MONTH`, `DAY`, `HOUR`, `MINUTE`, `SECOND` |`DOUBLE PRECISION` +|`DATE` |`YEAR`, `MONTH`, `DAY` |`INTEGER` +|=== + +[NOTE] +==== +The SECOND field returns a fractional value as DOUBLE PRECISION to include fractional seconds, not an integer type +==== + +== Examples + +=== EXTRACT() with timestamp - year + +The below example uses the `EXTRACT()` function to extract a given timestamp’s *YEAR*: + +[source,sql] +---- +SELECT EXTRACT(YEAR FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The final output will be as follows: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 2025 | ++----------+ +---- + +=== EXTRACT() with timestamp - month + +Here we will use the `EXTRACT()` function to extract a given timestamp’s *MONTH:* + +[source,sql] +---- +SELECT EXTRACT(MONTH FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The final output will take the month’s part of a given timestamp: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 12 | ++----------+ +---- + +=== EXTRACT() with timestamp - seconds (including fractional seconds) + +Here we will use the `EXTRACT()` function to extract a given timestamp’s *SECONDS*: + +[source,sql] +---- +SELECT EXTRACT(SECOND FROM TIMESTAMP '2025-12-31 13:30:15.123456'); +---- + +The final output will take the seconds’ part of a given timestamp: + +[source,sql] +---- ++----------+ +| extract | ++----------+ +| 15.123456| ++----------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc new file mode 100644 index 000000000..6a3e4f115 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/format-timestamp.adoc @@ -0,0 +1,87 @@ += FORMAT_TIMESTAMP +:description: The FORMAT_TIMESTAMP() function returns a given timestamp value in a specified format. +:page-topic-type: reference + +The `FORMAT_TIMESTAMP()` function returns a given timestamp value in a specified format. Its syntax is illustrated below: + +[source,sql] +---- +FORMAT_TIMESTAMP(timestamp, format_string) +---- + +This function requires two arguments, i.e., a *timestamp* string that represents the timestamp value that needs to be converted to a specified format and a *format_string* that specifies the format to be converted into. Its return type is a timestamp value with a timezone. + +== Basic `FORMAT_TIMESTAMP()` function + +The below example uses the `FORMAT_TIMESTAMP()` function to convert a given timestamp into a timestamp format as specified in the function arguments. + +[source,sql] +---- +SELECT FORMAT_TIMESTAMP( 2 '2022-05-30 5:30:04', 3 'YYYY-MM-DD HH:MI:SS' 4); +---- + +Details of the format specified are as follows: + +* `YYYY` is the four-digit year 2022 +* `MM` is the month: 05 +* `DD` is the day: 30 +* `HH` is the hour: 5 +* `MI` is the minute: 30 +* `SS` is the second: 04 + +[NOTE] +==== +The format specified in the string can be used in any combination. +==== +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2022-05-30 05:30:04+05 | ++-----------------------------+ +---- + +== `FORMAT_TIMESTAMP()` function using multiple spaces + +The `FORMAT_TIMESTAMP()` when given multiple spaces in the input string, omits the spaces and only returns the correct timestamp value. Let’s see how it works using the following example: + +[source,sql] +---- +SELECT 2 FORMAT_TIMESTAMP('2008 Dec','YYYY MON'); +---- + +It will return the following output: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2008-12-01 00:00:00+05 | ++-----------------------------+ +---- + +== `FORMAT_TIMESTAMP()` function if the input value of the year is less than 4 digits + +`FORMAT_TIMESTAMP()` will adjust the year to the nearest year value if the input argument has less than the required number of digits i.e., less than 4. To see how it works, look at the example below: + +[source,sql] +---- +SELECT 2 FORMAT_TIMESTAMP('07 25 09 10:40', 'MM DD YY HH:MI'); +---- + +It will return the following output: + +[source,sql] +---- ++-----------------------------+ +| format_timestamp | ++-----------------------------+ +| 2009-07-25 10:40:00+06 | ++-----------------------------+ +---- + +In this example, the two-digit year `09` has been changed to the nearest four-digit year i.e., `2009`. Similarly, `70` will become `1970`, and `10` will become `2010,` etc. diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc new file mode 100644 index 000000000..cddc32684 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/index.adoc @@ -0,0 +1,22 @@ += Overview +:description: Timestamp functions return a date-time value based on a specified timestamp/interval. + +Timestamp functions return a date-time value based on a specified timestamp/interval. Redpanda SQL supports the following timestamp functions: + +[width="100%",cols="<38%,<62%",options="header",] +|=== +|*Functions* |*Description* +|xref:reference:sql/sql-functions/timestamp-functions/current-timestamp.adoc[CURRENT_TIMESTAMP()] |Returns the current date and time as a timestamp data type. +|xref:reference:sql/sql-functions/timestamp-functions/format-timestamp.adoc[FORMAT_TIMESTAMP()] |Modifies the current timestamp into a different format. +|xref:reference:sql/sql-functions/timestamp-functions/unix-seconds.adoc[UNIX_SECONDS()] |Converts a given timestamp to a UNIX timestamp in seconds. +|xref:reference:sql/sql-functions/timestamp-functions/unix-millis.adoc[UNIX_MILLIS()] |Converts a given timestamp to a UNIX timestamp in milliseconds. +|xref:reference:sql/sql-functions/timestamp-functions/unix-macros.adoc[UNIX_MICROS()] |Converts a given timestamp to a UNIX timestamp in microseconds. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-seconds.adoc[TIMESTAMP_SECONDS()] |Converts a UNIX timestamp in seconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-millis.adoc[TIMESTAMP_MILLIS()] |Converts a UNIX timestamp in milliseconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-micros.adoc[TIMESTAMP_MICROS()] |Converts a UNIX timestamp in microseconds to a timestamp. +|xref:reference:sql/sql-functions/timestamp-functions/timestamp-trunc.adoc[TIMESTAMP_TRUNC()] |Truncates a given timestamp to the nearest time part. Supported time parts are YEAR, MONTH, DAY, HOUR, MINUTE, and SECOND +|xref:reference:sql/sql-functions/timestamp-functions/extract.adoc[EXTRACT()] |Extracts some part of a specified timestamp or interval. +|xref:reference:sql/sql-functions/timestamp-functions/to-timestamp.adoc[TO_TIMESTAMP()] |Converts a string into a timestamp based on the provided format. +|xref:reference:sql/sql-functions/timestamp-functions/date-trunc.adoc[DATE_TRUNC()] |Truncates intervals or timestamps/time zones to a specified field. +|xref:reference:sql/sql-functions/timestamp-functions/to-char.adoc[TO_CHAR() from Timestamp] |Formats a timestamp into a string using a given format. +|=== diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc new file mode 100644 index 000000000..7aaf0555f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-micros.adoc @@ -0,0 +1,93 @@ += TIMESTAMP_MICROS +:description: The TIMESTAMP_MICROS() function converts a given UNIX timestamp value in microseconds since 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_MICROS()` function converts a given UNIX timestamp value in microseconds since 1970-01-01 00:00:00 UTC into a timestamp. Its syntax can be seen below: + +[source,sql] +---- +SELECT TIMESTAMP_MICROS(BIGINT) +---- + +Its input type is a `BIGINT` expression representing a UNIX timestamp in microseconds and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_MICROS()` function + +The below example uses the `TIMESTAMP_MICROS()` function to convert a given UNIX timestamp in microseconds into a timestamp without a timezone: + +[source,sql] +---- +SELECT TIMESTAMP_MICROS(2280419000000000) AS timestamp_microsvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| timestamp_microsvalues | ++-----------------------------+ +| 2042-04-06 17:43:20 | ++-----------------------------+ +---- + +=== `TIMESTAMP_MICROS()` function using columns + +Let’s suppose we have a table named **timemirco_example** with the following UNIX time values in microseconds in the *unix_timestamp* column: + +[source,sql] +---- +CREATE TABLE timemirco_example ( + unix_timestamp long +); + +INSERT INTO timemirco_example VALUES +('1350417000000000'), +('2130215000000000'), +('1110115000000000'), +('2310112000000000'); +---- + +[source,sql] +---- +SELECT * FROM timemirco_example; +---- + +The above query will show the following table: + +[source,sql] +---- ++--------------------+ +| unix_timestamp | ++--------------------+ +| 1350417000000000 | +| 2130215000000000 | +| 1110115000000000 | +| 2310112000000000 | ++--------------------+ +---- + +We want to convert all UNIX timestamp values in microseconds to timestamp values. To do that, we have to run the following query: + +[source,sql] +---- +SELECT unix_timestamp, TIMESTAMP_MICROS(unix_timestamp) +AS timestamp_value +FROM timemicro_example; +---- + +The output displays all the entries in the table in UNIX timestamp format (in microseconds) in the *unix_timestamp* column and in the timestamp format in the column *timestamp_value* without timezone: + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_timestamp | timestamp_value | ++-------------------------+-----------------------+ +|1350417000000000 | 2012-10-16 19:50:00 | +|2130215000000000 | 2037-07-03 06:23:20 | +|1110115000000000 | 2005-03-06 13:16:40 | +|2310112000000000 | 2043-03-16 09:46:40 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc new file mode 100644 index 000000000..cc9cd14fa --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-millis.adoc @@ -0,0 +1,90 @@ += TIMESTAMP_MILLIS +:description: The TIMESTAMP_MILLIS() function converts a given UNIX timestamp value in milliseconds since 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_MILLIS()` function converts a given UNIX timestamp value in milliseconds since 1970-01-01 00:00:00 UTC into a timestamp. Its syntax can be seen below: + +[source,sql] +---- +SELECT TIMESTAMP_MILLIS(BIGINT) +---- + +Its input type is a `BIGINT` expression which represents a UNIX timestamp in milliseconds and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_MILLIS()` function + +The below example uses the `TIMESTAMP_MILLIS()` function to convert a given UNIX timestamp in milliseconds into a timestamp without a timezone. + +[source,sql] +---- +SELECT TIMESTAMP_MILLIS(1671975000000) AS timestamp_millisvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| timestamp_millisvalues | ++-----------------------------+ +| 2022-12-25 13:30:00 | ++-----------------------------+ +---- + +=== `TIMESTAMP_MILLIS()` function using columns + +Let’s suppose we have a table named **unix_example** with the following UNIX time values in milliseconds in the *unix_timestamp* column: + +[source,sql] +---- +CREATE TABLE unix_example ( + unix_timestamp long +); + +INSERT INTO unix_timestamp VALUES +('171472000000'), +('1671975000000'), +('153276000000'); +---- + +[source,sql] +---- +SELECT * FROM unix_example; +---- + +The above query will show the following table: + +[source,sql] +---- ++----------------+ +| unix_timestamp | ++----------------+ +| 171472000000 | +| 1671975000000 | +| 153276000000 | ++----------------+ +---- + +We want to convert all UNIX timestamp values in milliseconds to timestamp values. To do that, we have to run the following query: + +[source,sql] +---- +SELECT unix_timestamp, TIMESTAMP_MILLIS(unix_timestamp) +AS timestamp_value +FROM unix_example; +---- + +The output displays all the entries in the table in UNIX timestamp format (in milliseconds) in the **unix_timestamp** column and in the timestamp format in the column** timestamp_value** without timezone. + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_timestamp | timestamp_value | ++-------------------------+-----------------------+ +|171472000000 | 1975-06-08 15:06:40 | +|1671975000000 | 2022-12-25 13:30:00 | +|153276000000 | 1974-11-10 00:40:00 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc new file mode 100644 index 000000000..964f150a3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-seconds.adoc @@ -0,0 +1,90 @@ += TIMESTAMP_SECONDS +:description: The TIMESTAMP_SECONDS() function converts a given UNIX timestamp value in seconds from 1970-01-01 00:00:00 UTC into a timestamp. +:page-topic-type: reference + +The `TIMESTAMP_SECONDS()` function converts a given UNIX timestamp value in seconds from 1970-01-01 00:00:00 UTC into a timestamp. Its syntax can be seen below: + +[source,sql] +---- +SELECT TIMESTAMP_SECONDS(Int64) +---- + +Its input type is an `int64` expression representing a UNIX timestamp in seconds, and the return data type is a timestamp. + +== Examples + +=== Basic `TIMESTAMP_SECONDS()` function + +The below example uses the `TIMESTAMP_SECONDS()` function to convert a given UNIX timestamp in seconds into a timestamp: + +[source,sql] +---- +SELECT TIMESTAMP_SECONDS(1671975000) AS timestamp_secondsvalue; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| timestamp_secondsvalue | ++-----------------------------+ +| 2022-12-25 13:30:00 | ++-----------------------------+ +---- + +=== `TIMESTAMP_SECONDS()` function using columns + +Let’s suppose we have a table named **unix_time** with the following UNIX time values in seconds: + +[source,sql] +---- +CREATE TABLE unix_time ( + unix_time int +); + +INSERT INTO unix_time VALUES +('982384720'), +('1671975000'), +('171472000'); +---- + +[source,sql] +---- +SELECT * FROM unix_time; +---- + +The above query will show the following table: + +[source,sql] +---- ++-------------+ +| unix_time | ++-------------+ +| 982384720 | +| 1671975000 | +| 171472000 | ++-------------+ +---- + +We want to convert all UNIX timestamp values in seconds to timestamp values. To do that, we have to run the following query: + +[source,sql] +---- +SELECT unix_time, TIMESTAMP_SECONDS(unix_time) +AS timestamp_value +FROM unix_time ; +---- + +The output displays all the entries in the table in UNIX timestamp format (in seconds) in the *unix_time* column, and in the timestamp format without timezone in the column *timestamp_value*. + +[source,sql] +---- ++-------------------------+-----------------------+ +| unix_time | timestamp_value | ++-------------------------+-----------------------+ +| 982384720 | 2001-02-17 04:38:40 | +| 1671975000 | 2022-12-25 13:30:00 | +| 171472000 | 1975-06-08 15:06:40 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc new file mode 100644 index 000000000..1c7af7427 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/timestamp-trunc.adoc @@ -0,0 +1,83 @@ += TIMESTAMP_TRUNC +:description: The TIMESTAMP_TRUNC() function rounds a timestamp to a specific day_time granularity, resulting in a truncated timestamp. +:page-topic-type: reference + +The `TIMESTAMP_TRUNC()` function rounds a timestamp to a specific `day_time` granularity, resulting in a truncated timestamp. + +== Syntax + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP 'YYYY-MM-DD hour:min:sec', day_time); +---- + +`day_time` can be replaced with various time values as follows: + +* `SECOND` +* `MINUTE` +* `HOUR` +* `DAY` +* `MONTH` +* `YEAR` + +== Examples + +=== `TIMESTAMP_TRUNC()` - hour + +The following example shows how to round the hour to the closest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2017-09-18 14:43:39.02322', HOUR) ; +---- + +The final result will display the current date and time in your timezone: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2017-09-18 14:00:00.00000 | ++-----------------------------+ +---- + +=== `TIMESTAMP_TRUNC()` - minute + +Here we will truncate the specified timestamp into the nearest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2005-03-18 14:13:13', MINUTE) ; +---- + +The result will return the truncated timestamp as shown below: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2005-03-18 14:13:00.00000 | ++-----------------------------+ +---- + +=== Basic `TIMESTAMP_TRUNC()` function - year + +Run the following query to round the date to the closest value: + +[source,sql] +---- +SELECT TIMESTAMP_TRUNC(TIMESTAMP '2023-03-04', YEAR); +---- + +The function will truncate the year and return the following result: + +[source,sql] +---- ++-----------------------------+ +| f | ++-----------------------------+ +| 2023-01-01 00:00:00.00000 | ++-----------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc new file mode 100644 index 000000000..1cebca54b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-char.adoc @@ -0,0 +1,163 @@ += TO_CHAR +:description: The TO_CHAR function formats various data types, including date/time, integer, float point and numeric into a formatted string. +:page-topic-type: reference + +The `TO_CHAR` function formats various data types, including `date/time`, `integer`, `float point` and `numeric` into a formatted string. + +== Syntax + +The syntax for using the `TO_CHAR` function is as follows: + +.Timestamp +[source,sql] +---- +TO_CHAR(timestamp, format_string) +---- + +.Interval +[source,sql] +---- +TO_CHAR(interval, format_string) +---- + +== Arguments + +* `timestamp`: `TIMESTAMP` or `TIMESTAMP WITH TIMEZONE` value to be formatted +* `format`: format of the output string + +== Supported formats + +The string format supports the following template patterns (case insensitive): + +[width="100%",cols="49%,51%",options="header",] +|=== +|*Pattern* |*Description* +|`YYYY` |Year (1-9999) +|`MM` |Month number (01–12) +|`DD` |Day of month (01–31) +|`HH` |Hour of day (1–12) +|`HH12` |Hour of day (1–12) +|`HH24` |Hour of day (0–23) +|`MI` |Minute (0–59) +|`SS` |Second (0–59) +|`MS` |Millisecond (0–999) +|`US` |Microsecond (0–999999) +|`AM`, `am`, `PM` or `pm` |Meridiem indicator without periods +|`A.M.`, `a.m.`, `P.M.` or `p.m.` |Meridiem indicator with periods +|=== + +=== General restrictions + +* All text inside double quote `"\{text}"` will not be considered a pattern +* The quote character (`"`) will not appear in the result string +* Any text that is not a template pattern is simply copied verbatim i.e. preserved in the result string + +=== Interval overflow restrictions + +Interval overflow occurs when an operation involving interval values exceeds the maximum limits of the interval data type, resulting in an error or unexpected behavior. This can happen when adding, subtracting or multiplying interval values that lead to a representation that goes beyond the allowable range for any of its components i.e. years, months, days, hours, minutes and seconds. When executing the `TO_CHAR` function for intervals, it is important to be aware of the following overflow restrictions: + +[cols="<,^,^",options="header",] +|=== +|Conversion |Source Component |Target Component +|Days to Months |Days |Months +|Hours to Days |Hours |Days +|Seconds to Days |Seconds |Days +|=== + +All in all, for intervals the date overflow doesn’t apply (units smaller than an hour can only overflow into hours, but not into days and so on), any excess units will not carry over to the next larger unit. + +== Examples + +=== Intervals + +This query converts an interval and displays it in a specified string format: + +.Month_to_Year +[source,sql] +---- +SELECT TO_CHAR('25 months'::INTERVAL,'"YEAR:" YYYY "MONTH:" MM') AS FORMATTED_INTERVAL; +---- + +.Hour_to_Day +[source,sql] +---- +SELECT TO_CHAR('13 days' + '49 hours'::INTERVAL, '"Day:" DD "Hour:" HH') AS FORMATTED_INTERVAL; +---- + +.Second_to_Minute +[source,sql] +---- +SELECT TO_CHAR('65 seconds'::INTERVAL, '"MINUTE": MI "SECOND": SS') AS FORMATTED_INTERVAL; +---- +Here are the outputs for the queries presented above: + +.Month_to_Year +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +YEAR: 0002 MONTH: 01 +---- + +.Hour_to_Day +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +Day: 13 Hour: 01 +---- + +.Second_to_Minute +[source,sql] +---- + FORMATTED_INTERVAL +--------------------------------------- +MINUTE: 01 SECOND: 05 +---- + +=== Timestamps + +This query retrieves the current timestamp and displays it in a specified string format: + +.Timestamp +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), '"YEAR:" YYYY "MONTH:" MM "DAY:" DD') AS FORMATTED_TIMESTAMP; +---- + +.Timestamp_with_Microseconds +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), 'YYYY-MM-DD HH24:MI:SS.US') AS FORMATTED_TIMESTAMP; +---- + +.Timestamp_with_Meridiem +[source,sql] +---- +SELECT TO_CHAR(CURRENT_TIMESTAMP(), 'YYYY-MM-DD HH12:MI:SS a.m.') AS FORMATTED_TIMESTAMP; +---- +Here are the outputs for the queries presented above: + +.Timestamp +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +YEAR:2025 MONTH:01 DAY:01 +---- + +.Timestamp_with_Microseconds +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +2025-01-01 08:08:03.001200 +---- + +.Timestamp_with_Meridiem +[source,sql] +---- + FORMATTED_TIMESTAMP +--------------------------------------- +2025-01-01 08:08:03 p.m. +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc new file mode 100644 index 000000000..30a9a3864 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/to-timestamp.adoc @@ -0,0 +1,197 @@ += TO_TIMESTAMP +:description: The TO_TIMESTAMP() function converts a string into a timestamp based on the provided format. +:page-topic-type: reference + +The `TO_TIMESTAMP()` function converts a string into a timestamp based on the provided format. It returns a `TIMESTAMP WITH TIME ZONE` type. + +== Syntax + +The syntax for using the `TO_TIMESTAMP()` function is as follows: + +[source,sql] +---- +SELECT TO_TIMESTAMP('source', 'format''); +---- + +Let’s analyze the above syntax: + +* `source`: The date/time value to be converted. The value type is `TIMESTAMP` (`YYYY-MM-DD HH:MM:SS`). +* `format`: The format of the input string. + +== Format + +Format string supports following template patterns (can be lowercase): + +[cols="1,2,3",options="header"] +|=== +|Pattern |Description |Detail + +|`YYYY` +|Year (1–9999) +a|- The lowest possible value is 1 AD. + + 0001 is 1. + + 1 is 1. + +|`MM` +|Month number (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`DD` +|Day of month (1–31) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH` +|Hour of day (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH12` +|Hour of day (1–12) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`HH24` +|Hour of day (0–23) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`MI` +|Minute (0–59) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`SS` +|Second (0–59) +a|- Up to 2 digits. + + 01 is 1. + + 1 is 1. + +|`MS` +|Millisecond (0–999) +a|- Up to 3 digits. + + 001 is 1 millisecond. + + 1 is 100 milliseconds. + +|`US` +|Microsecond (0–999999) +a|- Up to 6 digits. + + 000001 is 1 microsecond. + + 1 is 100000 microseconds. + +|`AM`, `am`, `PM` or `pm` +|Meridiem indicator +|Without periods. + +|`A.M.`, `a.m.`, `P.M.` or `p.m.` +|Meridiem indicator +|With periods. +|=== + +== Examples + +=== Timestamp into YYYY-MM-DD HH24:MI + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `YYYY-MM-DD HH24:MI`. + +[source,sql] +---- +select TO_TIMESTAMP('2020-03-04 14:30', 'YYYY-MM-DD HH24:MI'); +---- + +The final output will be a timestamp with a timezone. + +[source,sql] +---- + to_timestamp +------------------------------- + 2020-03-04 14:30:00.000000+00 +---- + +=== Timestamp into MM-DD HH12:MI + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `MM-DD HH12:MI`. + +[source,sql] +---- +select TO_TIMESTAMP('3-04 02:30', 'MM-DD HH12:MI'); +---- + +The final output will be a timestamp with a timezone. + +[source,sql] +---- + to_timestamp +---------------------------- + 1-03-04 02:30:00.000000+00 +---- + +=== Timestamp into YYYY-MM HH12:MI(AM/PM) + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with the format `YYYY-MM HH12:MI` with meridiem indicator (AM/PM). + +*Request 1* + +[source,sql] +---- +select TO_TIMESTAMP('2020-02 12:30AM', 'YYYY-MM HH12:MIPM'); +---- + +*Request 2* + +[source,sql] +---- +select TO_TIMESTAMP('2020-02 12:30AM', 'YYYY-MM HH:MIAM'); +---- + +The final output of both requests will have the same result. It changes the time into a 12-hour format, resulting in *12:30* being adjusted to *00:30*. + +[source,sql] +---- + to_timestamp +------------------------------- + 2020-02-01 00:30:00.000000+00 +---- + +=== Timestamp into YYYY-MM-DD HH24:MI:SS.MS.US + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with `YYYY-MM-DD HH24:MI:SS.MS.US` format. + +[source,sql] +---- +select TO_TIMESTAMP('1960-01-31 15:12:02.020.001230', 'YYYY-MM-DD HH24:MI:SS.MS.US'); +---- + +The final output will be a timestamp with milliseconds and microseconds. + +[source,sql] +---- + to_timestamp +------------------------------- + 1960-01-31 15:12:02.021230+00 +---- + +=== Timestamp into YYYY-MM-DD HH24:MI:SS.MS + +The `TO_TIMESTAMP()` function converts the provided string into a timestamp with `YYYY-MM-DD HH24:MI:SS.MS` format. + +[source,sql] +---- +select TO_TIMESTAMP('1960-01-31 15:12:02.02', 'YYYY-MM-DD HH24:MI:SS.MS'); +---- + +The final output will be a timestamp with milliseconds. + +[source,sql] +---- + to_timestamp +------------------------------- + 1960-01-31 15:12:02.020000+00 +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-macros.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-macros.adoc new file mode 100644 index 000000000..9c3763051 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-macros.adoc @@ -0,0 +1,90 @@ += UNIX_MICROS +:description: The UNIX_MICROS() function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative). +:page-topic-type: reference + +The `UNIX_MICROS()` function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative). Its syntax is illustrated below: + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `BIGINT` representing time in microseconds. + +== Examples + +=== Basic `UNIX_MICROS()` function + +The below example uses the `UNIX_MICROS()` function to convert a given timestamp into a UNIX timestamp in microseconds: + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP "2022-12-25 13:30:00+00") AS unix_microsvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| unix_microsvalues | ++-----------------------------+ +| 1671975000000000.000000 | ++-----------------------------+ +---- + +=== `UNIX_MICROS()` function using columns + +Let’s suppose we have a table named *time_example* with the following timestamp values: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stamp timestamp +); + +INSERT INTO time_example VALUES +('2022-12-25 13:30:00'), +('2021-10-02 06:30:00'), +('2020-09-25 07:25:00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +The above query will show the following table: + +[source,sql] +---- ++-------------------------+ +| time_example | ++-------------------------+ +| 2022-12-25 13:30:00 | +| 2021-10-02 06:30:00 | +| 2020-09-25 07:25:00 | ++-------------------------+ +---- + +We want to convert all timestamp values into UNIX timestamp values in microseconds. To do that, we have to run the following query: + +[source,sql] +---- +SELECT time_stamp, UNIX_MICROS(time_stamp) +AS time_micros +FROM time_example; +---- + +The output displays all the timestamp entries in the *time_stamp* column and the converted UNIX timestamps in microseconds in the column *time_micros*. + +[source,sql] +---- ++-------------------------+--------------------------+ +| time_stamp | time_micros | ++-------------------------+--------------------------+ +| 2022-12-25 13:30:00 | 1671975000000000.000000 | +| 2021-10-02 06:30:00 | 1633156200000000.000000 | +| 2020-09-25 07:25:00 | 1601018700000000.000000 | ++-------------------------+--------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc new file mode 100644 index 000000000..1d092cf4f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-micros.adoc @@ -0,0 +1,90 @@ += UNIX_MICROS +:description: The UNIX_MICROS() function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative). +:page-topic-type: reference + +The `UNIX_MICROS()` function returns a given timestamp into a UNIX timestamp in microseconds, from 1970-01-01 00:00:00-00 (can be negative). Its syntax is illustrated below: + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `int64` representing time in microseconds. + +== Examples + +=== Basic `UNIX_MICROS()` function + +The below example uses the `UNIX_MICROS()` function to convert a given timestamp into a UNIX timestamp in microseconds: + +[source,sql] +---- +SELECT UNIX_MICRO(TIMESTAMP "2022-12-25 13:30:00+00") AS unix_microsvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| unix_microsvalues | ++-----------------------------+ +| 1671975000000000.000000 | ++-----------------------------+ +---- + +=== `UNIX_MICROS()` function using columns + +Let’s suppose we have a table named *time_example* with the following timestamp values: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stamp timestamp +); + +INSERT INTO time_example VALUES +('2022-12-25 13:30:00'), +('2021-10-02 06:30:00'), +('2020-09-25 07:25:00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +The above query will show the following table: + +[source,sql] +---- ++-------------------------+ +| time_example | ++-------------------------+ +| 2022-12-25 13:30:00 | +| 2021-10-02 06:30:00 | +| 2020-09-25 07:25:00 | ++-------------------------+ +---- + +We want to convert all timestamp values into UNIX timestamp values in microseconds. To do that, we have to run the following query: + +[source,sql] +---- +SELECT time_stamp, UNIX_MICROS(time_stamp) +AS time_micros +FROM time_example; +---- + +The output displays all the timestamp entries in the *time_stamp* column and the converted UNIX timestamps in microseconds in the column *time_micros*. + +[source,sql] +---- ++-------------------------+--------------------------+ +| time_stamp | time_micros | ++-------------------------+--------------------------+ +| 2022-12-25 13:30:00 | 1671975000000000.000000 | +| 2021-10-02 06:30:00 | 1633156200000000.000000 | +| 2020-09-25 07:25:00 | 1601018700000000.000000 | ++-------------------------+--------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc new file mode 100644 index 000000000..7d62fb652 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-millis.adoc @@ -0,0 +1,88 @@ += UNIX_MILLIS +:description: The UNIX_MILLIS() function returns a given timestamp to a UNIX timestamp in milliseconds from 1970-01-01 00:00:00-00 (can be negative). +:page-topic-type: reference + +The `UNIX_MILLIS()` function returns a given timestamp to a UNIX timestamp in milliseconds from 1970-01-01 00:00:00-00 (can be negative). Its syntax is illustrated below: + +[source,sql] +---- +SELECT UNIX_MILLIS(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `BIGINT` representing time in milliseconds. + +== Examples + +=== Basic `UNIX_MILLIS()` function + +The below example uses the `UNIX_MILLIS()` function to convert a given timestamp into a UNIX timestamp in milliseconds: + +[source,sql] +---- +SELECT UNIX_MILLIS(TIMESTAMP "1996-5-02 7:15:00+00") AS unix_millisvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| unix_millisvalues | ++-----------------------------+ +| 831021300000.000000 | ++-----------------------------+ +---- + +=== `UNIX_MILLIS()` function using columns + +Let’s suppose we have a table named **time_example** with the following timestamp values in the *time_stamp* column: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stamp timestamp +); + +INSERT INTO time_example VALUES +('2004-07-23 11:30:00+00'), +('2011-02-12 04:45:00+00'), +('1975-08-03 07:50:00+00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +The above query will show the following table: + +[source,sql] +---- ++-------------------------+ +| time_example | ++-------------------------+ +| 2004-07-23 11:30:00 | +| 2011-02-12 04:45:00 | +| 1975-08-03 07:50:00 | ++-------------------------+ +---- + +We want to convert all timestamp values into UNIX timestamp values in milliseconds. To do that, we have to run the following query: + +[source,sql] +---- +SELECT time_stamp, UNIX_MILLIS(time_stamp) AS time_millis FROM time_example; +---- + +The output displays all the timestamp entries of the table in the **time_stamp** column and the converted UNIX milliseconds timestamp entries in the column *time_millis*. + +[source,sql] +---- ++-------------------------+-----------------------+ +| time_stamp | time_millis | ++-------------------------+-----------------------+ +| 2004-07-23 11:30:00 | 1090582200000.000000 | +| 2011-02-12 04:45:00 | 1297485900000.000000 | +| 1975-08-03 07:50:00 | 176284200000.000000 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc new file mode 100644 index 000000000..838217463 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/timestamp-functions/unix-seconds.adoc @@ -0,0 +1,93 @@ += UNIX_SECONDS +:description: The UNIX_SECONDS() function returns a given timestamp to a UNIX timestamp in seconds, from 1970-01-01 00:00:00-00. +:page-topic-type: reference + +The `UNIX_SECONDS()` function returns a given timestamp to a UNIX timestamp in seconds, from 1970-01-01 00:00:00-00. Its syntax is illustrated below: + +[source,sql] +---- +SELECT UNIX_SECONDS(TIMESTAMP) +---- + +Its input type is a TIMESTAMP expression, and the return data type is `BIGINT` representing time in seconds. + +== Examples + +=== Basic `UNIX_SECONDS()` function + +The below example uses the `UNIX_SECONDS()` function to convert a given timestamp into a UNIX timestamp in seconds: + +[source,sql] +---- +SELECT UNIX_SECONDS(TIMESTAMP "2008-12-25 15:30:00+00") AS unix_secondsvalues; +---- + +The final output will be as follows: + +[source,sql] +---- ++-----------------------------+ +| unix_secondsvalues | ++-----------------------------+ +| 1230219000.000000 | ++-----------------------------+ +---- + +=== `UNIX_SECONDS()` function using columns + +Let’s suppose we have a table named **time_example** with the following timestamp values in the *time_stampvalues* column: + +[source,sql] +---- +CREATE TABLE time_example ( + time_stampvalues timestamp +); + +INSERT INTO time_example VALUES +('2022-12-25 13:30:00'), +('2020-09-25 07:25:00'), +('2008-12-25 15:30:00'), +('2021-10-02 06:30:00'); +---- + +[source,sql] +---- +SELECT * FROM time_example; +---- + +The above query will return the following table: + +[source,sql] +---- ++-------------------------+ +| time_stampvalues | ++-------------------------+ +| 2022-12-25 13:30:00 | +| 2020-09-25 07:25:00 | +| 2008-12-25 15:30:00 | +| 2021-10-02 06:30:00 | ++-------------------------+ +---- + +. We want to convert all timestamp values into UNIX timestamp values in seconds. To do that, we have to run the following query: ++ +[source,sql] +---- +SELECT time_stampvalues, UNIX_SECONDS(time_stampvalues) +AS time_secondsvalues +FROM time_example; +---- + +. The output displays all the timestamp entries of the table in the *time_stampvalues* column and the converted UNIX seconds timestamp entries in the column *time_secondsvalues*. ++ +[source,sql] +---- ++-------------------------+-----------------------+ +| time_stampvalues | time_secondsvalues | ++-------------------------+-----------------------+ +| 2022-12-25 13:30:00 | 1671975000.000000 | +| 2020-09-25 07:25:00 | 1601018700.000000 | +| 2008-12-25 15:30:00 | 1230219000.000000 | +| 2021-10-02 06:30:00 | 1633156200.000000 | ++-------------------------+-----------------------+ +---- diff --git a/modules/reference/pages/sql/sql-functions/trigonometric-functions/index.adoc b/modules/reference/pages/sql/sql-functions/trigonometric-functions/index.adoc new file mode 100644 index 000000000..b3f6c8e61 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/trigonometric-functions/index.adoc @@ -0,0 +1,25 @@ += Trigonometric Functions +:description: These trigonometric functions in Redpanda SQL take arguments and return values of type double precision and real. + +These trigonometric functions in Redpanda SQL take arguments and return values of type `double precision` and `real`. + +[width="100%",cols="6%,32%,26%,36%",options="header",] +|=== +|*Function* |*Description* |*Syntax* |*Example* +|`acos` |It calculates the inverse cosine of a given argument, where the output is expressed in radians. |`acos(argument)` |`select acos(1);` It will return: `0` +|`acosd` |It calculates the inverse cosine of a given argument, where the output is expressed in degrees. |`acosd(argument)` |`select acosd(0.5);` It will return: `60` +|`asin` |It calculates the inverse sine of a given argument, where the output is expressed in radians. |`asin(argument)` |`select asin(1);` It will return: `1.5707963267948966` +|`asind` |It calculates the inverse sine of a given argument, where the output is expressed in degrees. |`asind(argument)` |`select asind(0.5);` It will return: `30` +|`atan` |It calculates the inverse tangent of a given argument, where the output is expressed in radians. |`atan(argument)` |`select atan(1);` It will return: `0.7853965` +|`atand` |It calculates the inverse tangent of a given argument, where the output is expressed in degrees. |`atand(argument)` |`select atand(1);` It will return: `44.99990469434657` +|`atan2` |It calculates the inverse tangent of y/x, where the output is expressed in radians. |`atan2(y_value, x_value)``y_value` & `x_value` are in double precision type. |`select atan2(1, 0);` It will return: `1.5707963267948966` +|`atan2d` |It calculates the inverse tangent of y/x, where the output is expressed in degrees. |`atan2d(y_value, x_value)``y_value` & `x_value` are in double precision type. |`select atan2d(1, 0);` It will return: `90` +|`cos` |It calculates the cosine of a given argument, where the argument is in radians. |`cos(argument)` |`select cos(0);` It will return: `1` +|`cosd` |It calculates the cosine of a given argument, where the argument is in degrees. |`cosd(argument)` |`select cosd(60);` It will return: `0.5000000000000001` +|`cot` |It calculates the cotangent of a given argument, where the argument is in radians. |`cot(argument)` |`select cot(0.5);` It will return: `1.8304877` +|`cotd` |It calculates the cotangent of a given argument, where the argument is in degrees. |`cotd(argument)` |`select cotd(45);` It will return: `1.0000000000000002` +|`sin` |It calculates the sine of a given argument, where the argument is in radians. |`sin(argument)` |`select sin(1);` It will return: `0.8414709848078965` +|`sind` |It calculates the sine of a given argument, where the argument is in degrees. |`sind(argument)` |`select sind(30);` It will return: `0.49999999999999994` +|`tan` |It calculates the tangent of a given argument, where the argument is in radians. |`tan(argument)` |`select tan(1);` It will return: `1.5574077246549023` +|`tand` |It calculates the tangent of a given argument, where the argument is in degrees. |`tand(argument)` |`select tand(45);` It will return: `0.9999999999999999` +|=== diff --git a/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc b/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc new file mode 100644 index 000000000..e9f65257b --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/avg.adoc @@ -0,0 +1,140 @@ += AVG() +:description: The AVG() window function calculates the average (arithmetic mean) of a set of numeric values within a window. +:page-topic-type: reference + +The `AVG()` window function calculates the average (arithmetic mean) of a set of numeric values within a window. This function allows you to compute averages over a set of rows that are related to the current row, such as rows within a partition of ordered set. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +AVG(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE frame_specification] +) +---- + +== Parameters + +* `expression`: column or expression that the function operates on (must be of numeric type) +* `ROWS or RANGE`: (optional) frame specification to control which rows are included in the calculation relative to the current row + +== Example + +For the needs of this section, we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating int +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 5), + ('CHRISTMAS MOONSHINE', 150, 7), + ('DANGEROUS UPTOWN', 121, 4), + ('KILL BROTHERHOOD', 54, 3), + ('HALLOWEEN NUTS', 47, 5), + ('HOURS RAGE', 122, 7), + ('PIANIST OUTFIELD', 136, 7), + ('PICKUP DRIVING', 77, 3), + ('INDEPENDENCE HOTEL', 157, 7), + ('PRIVATE DROP', 106, 4), + ('SAINTS BRIDE', 125, 3), + ('FOREVER CANDIDATE', 131, 7), + ('MILLION ACE', 142, 5), + ('SLEEPY JAPANESE', 137, 4), + ('WRATH MILE', 176, 7), + ('YOUTH KICK', 179, 7), + ('CLOCKWORK PARADISE', 143, 5); +---- + +=== Rolling average by rating + +The query below uses the `AVG()` function to calculate the rolling average of `length` as rows are ordered by `rating`: + +[source,sql] +---- +SELECT + rating, + length, + AVG(length) OVER (ORDER BY rating) AS RollingAverageLength +FROM film +WHERE length IS NOT NULL +ORDER BY rating; +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + rating | length | rollingaveragelength +--------+--------+---------------------- + 3 | 77 | 85.33333333333333 + 3 | 125 | 85.33333333333333 + 3 | 54 | 85.33333333333333 + 4 | 121 | 103.33333333333333 + 4 | 106 | 103.33333333333333 + 4 | 137 | 103.33333333333333 + 5 | 83 | 103.5 + 5 | 142 | 103.5 + 5 | 47 | 103.5 + 5 | 143 | 103.5 + 7 | 157 | 122.70588235294117 + 7 | 179 | 122.70588235294117 + 7 | 176 | 122.70588235294117 + 7 | 131 | 122.70588235294117 + 7 | 136 | 122.70588235294117 + 7 | 122 | 122.70588235294117 + 7 | 150 | 122.70588235294117 +(17 rows) +---- + +=== Time series: rolling average length over last 3 ratings + +In this example, we will demonstrate a time series-style rolling average using a window frame of the current row and the two preceding rows, ordered by rating. This simulates a moving average over a sliding window of 3 rows: + +[source,sql] +---- +SELECT + rating, + length, + AVG(length) OVER ( + ORDER BY rating + ROWS BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS rolling_avg_length_3 +FROM film +WHERE length IS NOT NULL +ORDER BY rating; +---- + +The query above calculates the average length over the current rating and the two previous ratings (based on ordering by rating) smoothing the fluctuations by averaging over a fixed-size window: + +[source,sql] +---- + rating | length | rolling_avg_length_3 +--------+--------+---------------------- + 3 | 77 | 65.5 + 3 | 125 | 85.33333333333333 + 3 | 54 | 54 + 4 | 121 | 107.66666666666667 + 4 | 106 | 117.33333333333333 + 4 | 137 | 121.33333333333333 + 5 | 83 | 91 + 5 | 142 | 90.66666666666667 + 5 | 47 | 109 + 5 | 143 | 128.66666666666666 + 7 | 157 | 127.33333333333333 + 7 | 179 | 159.33333333333334 + 7 | 176 | 170.66666666666666 + 7 | 131 | 162 + 7 | 136 | 147.66666666666666 + 7 | 122 | 129.66666666666666 + 7 | 150 | 136 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc b/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc new file mode 100644 index 000000000..b6b33933f --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/bool-and.adoc @@ -0,0 +1,92 @@ += BOOL_AND() +:description: The BOOL_AND() window function evaluates whether all values within a specified window of rows are TRUE. +:page-topic-type: reference + +The `BOOL_AND()` window function evaluates whether all values within a specified window of rows are `TRUE`. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +BOOL_AND (expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +== Parameters + +* `expression`: column or expression that the function operates on. It should evaluate to a boolean value (`TRUE` or `FALSE`) + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `BOOL_AND()` function to evaluate if all films in each rating category have a length greater than 100: + +[source,sql] +---- +SELECT + title, + length, + rating, + BOOL_AND(length > 100) OVER (PARTITION BY rating) as ALLlongFilmsByRating +FROM film +ORDER BY rating; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + title | length | rating | alllongfilmsbyrating +---------------------+--------+--------+---------------------- + KILL BROTHERHOOD | 54 | G | f + PICKUP DRIVING | 77 | G | f + SAINTS BRIDE | 125 | G | f + CHRISTMAS MOONSHINE | 150 | NC-17 | t + HOURS RAGE | 122 | NC-17 | t + PIANIST OUTFIELD | 136 | NC-17 | t + INDEPENDENCE HOTEL | 157 | NC-17 | t + FOREVER CANDIDATE | 131 | NC-17 | t + WRATH MILE | 176 | NC-17 | t + YOUTH KICK | 179 | NC-17 | t + DANGEROUS UPTOWN | 121 | PG | t + PRIVATE DROP | 106 | PG | t + SLEEPY JAPANESE | 137 | PG | t + ATTRACTION NEWTON | 83 | PG-13 | f + HALLOWEEN NUTS | 47 | PG-13 | f + MILLION ACE | 142 | PG-13 | f + CLOCKWORK PARADISE | 143 | PG-13 | f +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc b/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc new file mode 100644 index 000000000..5dcb75a28 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/bool-or.adoc @@ -0,0 +1,92 @@ += BOOL_OR() +:description: The BOOL_OR() window function evaluates whether at least one value within a specified window of rows is TRUE. +:page-topic-type: reference + +The `BOOL_OR()` window function evaluates whether at least one value within a specified window of rows is `TRUE`. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +BOOL_OR (expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +== Parameters + +* `expression`: column or expression that the function operates on. It should evaluate to a boolean value (`TRUE` or `FALSE`) + +== Example + +For the needs of this section, we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `BOOL_OR()` function to evaluate whether at least one film in each rating category have a length greater than 150: + +[source,sql] +---- +SELECT + title, + length, + rating, + BOOL_OR(length > 150) OVER (PARTITION BY rating) as ALLleastOneLongFilmsByRating +FROM film +ORDER BY rating; +---- + +By executing the above query, we will get the following output: + +[source,sql] +---- + title | length | rating | allleastonelongfilmsbyrating +---------------------+--------+--------+------------------------------ + KILL BROTHERHOOD | 54 | G | f + PICKUP DRIVING | 77 | G | f + SAINTS BRIDE | 125 | G | f + CHRISTMAS MOONSHINE | 150 | NC-17 | t + HOURS RAGE | 122 | NC-17 | t + PIANIST OUTFIELD | 136 | NC-17 | t + INDEPENDENCE HOTEL | 157 | NC-17 | t + FOREVER CANDIDATE | 131 | NC-17 | t + WRATH MILE | 176 | NC-17 | t + YOUTH KICK | 179 | NC-17 | t + DANGEROUS UPTOWN | 121 | PG | f + PRIVATE DROP | 106 | PG | f + SLEEPY JAPANESE | 137 | PG | f + ATTRACTION NEWTON | 83 | PG-13 | f + HALLOWEEN NUTS | 47 | PG-13 | f + MILLION ACE | 142 | PG-13 | f + CLOCKWORK PARADISE | 143 | PG-13 | f +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/count.adoc b/modules/reference/pages/sql/sql-functions/window-functions/count.adoc new file mode 100644 index 000000000..c187b3c83 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/count.adoc @@ -0,0 +1,156 @@ += COUNT() +:description: The COUNT() window function allows you to retrieve the number of records that meet a specific criteria. +:page-topic-type: reference + +The `COUNT()` window function allows you to retrieve the number of records that meet a specific criteria. When using it with he `RANGE` clause, it allows you to perform counts within a defined range based on the values of the current row. This function can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +There are two available variants of that function: + +* `COUNT(*)`: counts all rows in the target table, regardless of whether they contain NULL values or not +* `COUNT(expression)`: counts the number of non-NULL values in a specific column or expression + +The syntax for this function is as follows: + +[source,sql] +---- +COUNT(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE BETWEEN start_value AND end_value] +) +---- + +The `COUNT()` window function always return `BIGINT` as an output, which represents the total number of rows in a table irrespective of the input types. + +== Parameters + +* `expression`: input’s column or expression that the function operates on +* `PARTITION BY`: optional clause, which divides the result set into partitions to which the function is applied +* `ROWS | RANGE BETWEEN`: range-based window frame relative to the current row + +== Examples + +For the needs of this section, we will create a `winsales` table that stores the details of some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== COUNT(*) + +In this example, we will focus on executing the variant of this function that counts all rows in the target table: + +[source,sql] +---- +SELECT salesid, qty + COUNT(*) OVER (ORDER BY salesid rows unbounded preceding) AS count +FROM winsales +ORDER BY salesid; +---- + +The output of the code abote displays the sales ID, quantity and the count of all rows from the start of the data window: + +[source,sql] +---- + salesid | qty | count +---------+-----+------- + 10001 | 10 | 1 + 10005 | 30 | 2 + 10006 | 10 | 3 + 20001 | 20 | 4 + 20002 | 20 | 5 + 30001 | 10 | 6 + 30003 | 15 | 7 + 30004 | 20 | 8 + 30007 | 30 | 9 + 40001 | 40 | 10 + 40005 | 10 | 11 +(11 rows) +---- + +=== Count(expression) + +In this example, we will focus on executing the variant of this function that counts the number of non-NULL values in a specific expression: + +[source,sql] +---- +SELECT salesid, qty, qty_shipped, + COUNT(qty_shipped) OVER (ORDER BY salesid rows unbounded preceding) AS count +FROM winsales +ORDER BY salesid; +---- + +Here is the output for the query presented above: + +[source,sql] +---- + salesid | qty | qty_shipped | count +---------+-----+-------------+------- + 10001 | 10 | 10 | 1 + 10005 | 30 | | 1 + 10006 | 10 | | 1 + 20001 | 20 | 20 | 2 + 20002 | 20 | 20 | 3 + 30001 | 10 | 10 | 4 + 30003 | 15 | | 4 + 30004 | 20 | | 4 + 30007 | 30 | | 4 + 40001 | 40 | | 4 + 40005 | 10 | 10 | 5 +(11 rows) +---- + +=== Time series: COUNT(*) with RANGE for last 90 days + +In this example, we will demonstrate counting the number of sales within a 90-day window prior to each sale, based on `dateid`: + +[source,sql] +---- +SELECT salesid, dateid, qty, + COUNT(*) OVER ( + ORDER BY dateid + RANGE BETWEEN INTERVAL '90 days' PRECEDING AND CURRENT ROW + ) AS sales_count_90d +FROM winsales +ORDER BY dateid; +---- + +This query above counts the number of sales transactions within a 90-day window before each `dateid`, including the current sale: + +[source,sql] +---- + salesid | dateid | qty | sales_count_90d +---------+------------+-----+----------------- + 30001 | 2003-08-02 | 10 | 1 + 10001 | 2003-12-24 | 10 | 2 + 10005 | 2003-12-24 | 30 | 2 + 40001 | 2004-01-09 | 40 | 3 + 10006 | 2004-01-18 | 10 | 4 + 20001 | 2004-02-12 | 20 | 6 + 40005 | 2004-02-12 | 10 | 6 + 20002 | 2004-02-16 | 20 | 7 + 30003 | 2004-04-18 | 15 | 6 + 30004 | 2004-04-18 | 20 | 6 + 30007 | 2004-09-07 | 30 | 1 +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc b/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc new file mode 100644 index 000000000..469e44dc3 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/cume-dist.adoc @@ -0,0 +1,90 @@ += CUME_DIST() +:description: The CUME_DIST() function is a window function used to calculate the cumulative distribution of a value within a set of values. +:page-topic-type: reference + +The `CUME_DIST()` function is a window function used to calculate the cumulative distribution of a value within a set of values. This function returns a value between 0 and 1, representing a relative position of a row within a partition or result set. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +CUME_DIST() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +---- + +== Parameters + +* (): this function takes no arguments but parentheses is required + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], containing only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `CUME_DIST()` function to calculate the cumulative distribution of film lengths: + +[source,sql] +---- +SELECT + title, + length, + CUME_DIST() OVER (ORDER BY length) AS cume_dist +FROM film; +---- + +When executing the code above, we will get the following output: + +[source,sql] +---- + title | length | cume_dist +---------------------+--------+---------------------- + HALLOWEEN NUTS | 47 | 0.058823529411764705 + KILL BROTHERHOOD | 54 | 0.11764705882352941 + PICKUP DRIVING | 77 | 0.17647058823529413 + ATTRACTION NEWTON | 83 | 0.23529411764705882 + PRIVATE DROP | 106 | 0.29411764705882354 + DANGEROUS UPTOWN | 121 | 0.35294117647058826 + HOURS RAGE | 122 | 0.4117647058823529 + SAINTS BRIDE | 125 | 0.47058823529411764 + FOREVER CANDIDATE | 131 | 0.5294117647058824 + PIANIST OUTFIELD | 136 | 0.5882352941176471 + SLEEPY JAPANESE | 137 | 0.6470588235294118 + MILLION ACE | 142 | 0.7058823529411765 + CLOCKWORK PARADISE | 143 | 0.7647058823529411 + CHRISTMAS MOONSHINE | 150 | 0.8235294117647058 + INDEPENDENCE HOTEL | 157 | 0.8823529411764706 + WRATH MILE | 176 | 0.9411764705882353 + YOUTH KICK | 179 | 1 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc new file mode 100644 index 000000000..e3bad0662 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/dense-rank.adoc @@ -0,0 +1,122 @@ += DENSE_RANK() +:description: The DENSE_RANK() window function assigns a rank for each value within a specified group, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +The `DENSE_RANK()` window function assigns a rank for each value within a specified group, based on the `ORDER BY` expression in the `OVER` clause. Unlike the `RANK()` function, which can leave gaps in the ranking sequence when there are ties, `DENSE_RANK()` provides consecutive rank values without any gaps. This function can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +DENSE_RANK() OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +The output type for this function is a `BIGINT` and it indicates the rank of values in a table, regardless of the input types. If the `ORDER BY` expression is omitted, all ranks will default to 1. In case an optional `PARTITION BY` expression is included, the rankings are reset for each group of rows. The rows with equal values for the ranking criteria receive the same rank. + +[NOTE] +==== +Unlike `RANK()` function, there is no gap in the sequence of ranked values (if two rows are ranked 1, the next rank will be 2) +==== + +== Parameters + +* `()`: this function takes no parameters, but empty parentheses is required +* `PARTITION BY`: optional clause, which is used to divide the result set into partitions to which the `DENSE_RANK()` function is applied (if omitted, the entire result set is treated as a single partition) +* `ORDER BY`: order of rows in each partition to which the function is applied + +== Examples + +For the needs of this section, we will create a `winsales` table that stores information about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== DENSE_RANK() with ORDER BY + +In this example we will focus on executing the `DENSE_RANK()` function with `ORDER BY` keyword and calculate the descending dense rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + Dense_RANK() OVER (ORDER BY qty DESC) AS d_rnk + RANK() OVER (ORDER BY qty DESC) AS rnk +FROM winsales +ORDER BY 2,1; +---- + +Here is the output for the query presented above that includes the sales ID along with the quantity sold and both dense and regular ranks: + +[source,sql] +---- + salesid | qty | d_rnk | rnk +---------+-----+-------+----- + 10001 | 10 | 5 | 8 + 10006 | 10 | 5 | 8 + 30001 | 10 | 5 | 8 + 40005 | 10 | 5 | 8 + 30003 | 15 | 4 | 7 + 20001 | 20 | 3 | 4 + 20002 | 20 | 3 | 4 + 30004 | 20 | 3 | 4 + 10005 | 30 | 2 | 2 + 30007 | 30 | 2 | 2 + 40001 | 40 | 1 | 1 +(11 rows) +---- + +=== DENSE_RANK() with ORDER BY and PARTITION_BY + +In this example we will focus on executing the `DENSE_RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause and partition the table by seller ID, then order each partition by the quantity and assign a dense rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + DENSE_RANK() OVER (PARTITION BY sellerid ORDER BY qty DESC) AS d_rnk +FROM winsales +ORDER BY 2,3,1; +---- + +Here is the output for the query presented above: + +[source,sql] +---- + salesid | sellerid | qty | d_rnk +---------+----------+-----+------- + 10001 | 1 | 10 | 2 + 10006 | 1 | 10 | 2 + 10005 | 1 | 30 | 1 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 1 + 30001 | 3 | 10 | 4 + 30003 | 3 | 15 | 3 + 30004 | 3 | 20 | 2 + 30007 | 3 | 30 | 1 + 40005 | 4 | 10 | 2 + 40001 | 4 | 40 | 1 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc new file mode 100644 index 000000000..00886f656 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/first-value.adoc @@ -0,0 +1,99 @@ += FIRST_VALUE() +:description: The FIRST_VALUE() is a window function that retrieves the first value in an ordered set of values within a specified partition. +:page-topic-type: reference + +The `FIRST_VALUE()` is a window function that retrieves the first value in an ordered set of values within a specified partition. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +FIRST_VALUE(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `expression`: target column or expression that the function operates on +* `PARTITION BY`: optional clause, which divides the result set into partitions to which the `FIRST_VALUE()` function is applied (if omitted, the entire result set is treated as a single partition) +* `ORDER BY`: order of rows in each partition to which the function is applied +* `RANGE BETWEEN`: range-based window frame relative to the current row + +== Example + +For the needs of this section, we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `FIRST_VALUE()` function to retrieve the title of the film with the shortest duration, partitioning results by rating and ordering by length. + +[source,sql] +---- +SELECT + title, + length, + rating, + FIRST_VALUE(title) OVER ( + PARTITION BY rating + ORDER BY + length ASC ROWS BETWEEN UNBOUNDED PRECEDING + AND UNBOUNDED FOLLOWING + ) AS shortest_film_in_rating +FROM film; +---- + +By executing the code above, we will get the following output: + +[source,bash] +---- +| title | length | rating | shortest_film_in_rating | +|---------------------|------------|------------|-----------------------------| +| KILL BROTHERHOOD | 54 | G | KILL BROTHERHOOD | +| PICKUP DRIVING | 77 | G | KILL BROTHERHOOD | +| SAINTS BRIDE | 125 | G | KILL BROTHERHOOD | +| HOURS RAGE | 122 | NC-17 | HOURS RAGE | +| FOREVER CANDIDATE | 131 | NC-17 | HOURS RAGE | +| PIANIST OUTFIELD | 136 | NC-17 | HOURS RAGE | +| CHRISTMAS MOONSHINE | 150 | NC-17 | HOURS RAGE | +| INDEPENDENCE HOTEL | 157 | NC-17 | HOURS RAGE | +| WRATH MILE | 176 | NC-17 | HOURS RAGE | +| YOUTH KICK | 179 | NC-17 | HOURS RAGE | +| PRIVATE DROP | 106 | PG | PRIVATE DROP | +| DANGEROUS UPTOWN | 121 | PG | PRIVATE DROP | +| SLEEPY JAPANESE | 137 | PG | PRIVATE DROP | +| HALLOWEEN NUTS | 47 | PG-13 | HALLOWEEN NUTS | +| ATTRACTION NEWTON | 83 | PG-13 | HALLOWEEN NUTS | +| MILLION ACE | 142 | PG-13 | HALLOWEEN NUTS | +| CLOCKWORK PARADISE | 143 | PG-13 | HALLOWEEN NUTS | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/index.adoc b/modules/reference/pages/sql/sql-functions/window-functions/index.adoc new file mode 100644 index 000000000..eb6e98879 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/index.adoc @@ -0,0 +1,67 @@ += Overview +:description: Window functions is a group of SQL functions, that operate on a partition or "window" of a result set, returning values for every row within that wind + +Window functions is a group of SQL functions, that operate on a partition or "`window`" of a result set, returning values for every row within that window. Redpanda SQL supports the following window functions and clauses: + +== Window functions + +[width="100%",cols="40%,60%",options="header",] +|=== +|Function Name |Description +|xref:reference:sql/sql-functions/window-functions/count.adoc[COUNT] |Counts all the rows or those specified by the given expression +|xref:reference:sql/sql-functions/window-functions/avg.adoc[AVG] |Calculates the average (arithmetic mean) of a set of numeric values within a window +|xref:reference:sql/sql-functions/window-functions/sum.adoc[SUM] |Calculates and returns the sum of values from the input column or expression values +|xref:reference:sql/sql-functions/window-functions/min.adoc[MIN] |Computes the minimum value of an expression across a set of rows +|xref:reference:sql/sql-functions/window-functions/max.adoc[MAX] |Computes the maximum value of an expression across a set of rows +|xref:reference:sql/sql-functions/window-functions/bool-and.adoc[BOOL_AND] |Evaluates whether all values within a specified window of rows are true +|xref:reference:sql/sql-functions/window-functions/bool-or.adoc[BOOL_OR] |Evaluates whether at least one value within a specified window of rows is true +|=== + +== Ranking functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/row-number.adoc[ROW_NUMBER] |Returns the current row index within its partition (beginning with 1) +|xref:reference:sql/sql-functions/window-functions/rank.adoc[RANK] |Calculates and returns the rank of a value within a specified group of values +|xref:reference:sql/sql-functions/window-functions/dense-rank.adoc[DENSE_RANK] |Calculates the percent rank of a value within a group and returns the result +|xref:reference:sql/sql-functions/window-functions/ntile.adoc[NTILE] |Divides an ordered data set into a specified number of approximately equal groups +|=== + +== Distribution functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/cume-dist.adoc[CUME_DIST] |Calculates the cumulative distribution of a value within a set of values +|xref:reference:sql/sql-functions/window-functions/percent-rank.adoc[PERCENT_RANK] |Calculates and returns the percent rank of a value within a specified group of values +|=== + +== Value functions + +[width="100%",cols="<40%,<60%",options="header",] +|=== +|*Function Name* |*Description* +|xref:reference:sql/sql-functions/window-functions/first-value.adoc[FIRST_VALUE] |Returns the first value in an ordered set of values within a specified partition +|xref:reference:sql/sql-functions/window-functions/last-value.adoc[LAST_VALUE] |Returns the last value in an ordered set of values within a specified partition +|xref:reference:sql/sql-functions/window-functions/nth-value.adoc[NTH_VALUE] |Returns a value from the nth row in an ordered partition of a result set +|xref:reference:sql/sql-functions/window-functions/lag.adoc[LAG] |Returns the values for a row located at a defined offset, either above or below the current row within the partition +|xref:reference:sql/sql-functions/window-functions/lead.adoc[LEAD] |Returns the values for a row located at a defined offset, either above or below the current row within the partition +|=== + +== Window clause + +[width="100%",cols="<35%,<65%",options="header",] +|=== +|*Clause Name* |*Description* +|xref:reference:sql/sql-clauses/over-window.adoc[OVER] |Defines the window specification and is mandatory for window functions +|xref:reference:sql/sql-clauses/over-window.adoc[WINDOW] |Optional clause that defines one or more named window specifications +|=== + +== Important notes + +There are a few essential things to remember when using window functions in Redpanda SQL: + +* Verify that you can effectively use window functions alongside the `PARTITION BY`, `ORDER BY` and `FRAME` clauses as part of your window specification +* Ensure the window specification chaining is supported by executing the following command: `SELECT SUM(i0) OVER w2 FROM tb1 WINDOW w1 AS (PARTITION BY i1), w2 AS (w1 ROWS CURRENT ROW)` +* The `FRAME` clause of the window specification is restricted to the `ROWS` clause and does not include frame exclusion diff --git a/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc b/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc new file mode 100644 index 000000000..8860cbe9a --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/lag.adoc @@ -0,0 +1,143 @@ += LAG() +:description: The LAG() window function returns the values from specific rows based on the offset argument (previous to the current row in the partition). +:page-topic-type: reference + +The `LAG()` window function returns the values from specific rows based on the offset argument (previous to the current row in the partition). It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL] + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +LAG (expression, offset, default) +OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The output’s data type for this function is the same as the input’s one. If there is no row that meets the offset criteria, it returns a default value (that must be of a type compatible with the expression value) + +== Parameters + +* `expression`: column, which will be referenced +* `offset`: numeric indicator of the previous row to access, that is relative to the current row (optional, if not specified 1 will be returned) +* `default`: value that wil be returned if the `offset` is out of range (optional, if not specified `NULL` will be returned) + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== LAG(expression, offset) + +In this example, we will focus on executing the `LAG()` function with expression and offset parameters’ values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LAG(qty,1) OVER (ORDER BY buyerid, dateid) AS prev_qty +FROM winsales WHERE buyerid = 'c' +ORDER BY buyerid, dateid; +---- + +When executing the query above, it returns the buyer ID, date ID, quantity and previous quantity for all rows with buyer ID equal to `c`: + +[source,sql] +---- + buyerid | dateid | qty | prev_qty +---------+------------+-----+---------- + c | 2003-12-24 | 10 | + c | 2004-01-18 | 10 | 10 + c | 2004-02-16 | 20 | 10 + c | 2004-09-07 | 30 | 20 +(4 rows) +---- + +=== LAG(expression, offset, default) + +In this example, we will focus on executing the `LAG()` function with expression, offset and default parameters’ values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LAG(buyerid,1,'unknown') OVER (ORDER BY dateid) AS prev_buyerid +FROM winsales +ORDER BY dateid; +---- + +The query above returns the buyer ID, date ID, quantity and previous buyer ID for all rows: + +[source,sql] +---- + buyerid | dateid | qty | prev_buyerid +---------+------------+-----+-------------- + b | 2003-08-02 | 10 | unknown + c | 2003-12-24 | 10 | b + a | 2003-12-24 | 30 | c + a | 2004-01-09 | 40 | a + c | 2004-01-18 | 10 | a + b | 2004-02-12 | 20 | c + a | 2004-02-12 | 10 | b + c | 2004-02-16 | 20 | a + b | 2004-04-18 | 15 | c + b | 2004-04-18 | 20 | b + c | 2004-09-07 | 30 | b +(11 rows) +---- + +=== Time series: LAG() to compare daily sales quantities + +In this example, we will use LAG() to compare each day’s sales quantity (`qty`) with the previous day’s quantity, ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, qty, + LAG(qty) OVER (ORDER BY dateid) AS prev_day_qty, + qty - LAG(qty) OVER (ORDER BY dateid) AS qty_change +FROM winsales +ORDER BY dateid; +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + dateid | qty | prev_day_qty | qty_change +------------+-----+--------------+------------ + 2003-08-02 | 10 | | + 2003-12-24 | 10 | 10 | 0 + 2003-12-24 | 30 | 10 | 20 + 2004-01-09 | 40 | 30 | 10 + 2004-01-18 | 10 | 40 | -30 + 2004-02-12 | 20 | 10 | 10 + 2004-02-12 | 10 | 20 | -10 + 2004-02-16 | 20 | 10 | 10 + 2004-04-18 | 15 | 20 | -5 + 2004-04-18 | 20 | 15 | 5 + 2004-09-07 | 30 | 20 | 10 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc new file mode 100644 index 000000000..4e7b487fc --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/last-value.adoc @@ -0,0 +1,97 @@ += LAST_VALUE() +:description: The LAST_VALUE() is a window function that retrieves the last value in an ordered set of values within a specified partition. +:page-topic-type: reference + +The `LAST_VALUE()` is a window function that retrieves the last value in an ordered set of values within a specified partition. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +LAST_VALUE(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `expression`: input’s column or expression values that returns a single value. It represents the value you want to retrieve from the first row of the sorted partition +* `PARTITION BY`: optional clause, which divides the result set into partitions to which the `LAST_VALUE()` function is applied (if omitted, the entire result set is treated as a single partition) +* `ORDER BY`: order of rows in each partition to which the function is applied +* `RANGE BETWEEN`: range-based window frame relative to the current row + +== Example + +For the needs of this section, we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `LAST_VALUE()` function to retrieve the title of the film with the longest duration, partitioning results by rating and ordering by length. + +[source,sql] +---- +SELECT + title, + length, + rating, + LAST_VALUE(title) OVER ( + PARTITION BY rating + ORDER BY + length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS shortest_film_in_rating +FROM film; +---- + +By running the code above, we will get the following output: + +[source,bash] +---- +| title | length | rating | longest_film_in_rating | +|---------------------|--------|--------|------------------------| +| KILL BROTHERHOOD | 54 | G | SAINTS BRIDE | +| PICKUP DRIVING | 77 | G | SAINTS BRIDE | +| SAINTS BRIDE | 125 | G | SAINTS BRIDE | +| HOURS RAGE | 122 | NC-17 | YOUTH KICK | +| FOREVER CANDIDATE | 131 | NC-17 | YOUTH KICK | +| PIANIST OUTFIELD | 136 | NC-17 | YOUTH KICK | +| CHRISTMAS MOONSHINE | 150 | NC-17 | YOUTH KICK | +| INDEPENDENCE HOTEL | 157 | NC-17 | YOUTH KICK | +| WRATH MILE | 176 | NC-17 | YOUTH KICK | +| YOUTH KICK | 179 | NC-17 | YOUTH KICK | +| DANGEROUS UPTOWN | 121 | PG | SLEEPY JAPANESE | +| SLEEPY JAPANESE | 137 | PG | SLEEPY JAPANESE | +| HALLOWEEN NUTS | 47 | PG-13 | CLOCKWORK PARADISE | +| ATTRACTION NEWTON | 83 | PG-13 | CLOCKWORK PARADISE | +| MILLION ACE | 142 | PG-13 | CLOCKWORK PARADISE | +| CLOCKWORK PARADISE | 143 | PG-13 | CLOCKWORK PARADISE | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc b/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc new file mode 100644 index 000000000..6cd7f4820 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/lead.adoc @@ -0,0 +1,143 @@ += LEAD() +:description: The LEAD() window function takes a column and an integer offset as arguments and returns the value of the cell in that column that is located at the s +:page-topic-type: reference + +The `LEAD()` window function takes a column and an integer offset as arguments and returns the value of the cell in that column that is located at the specified number of rows after the current row. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL] + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +LEAD (expression, offset, default) +OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The output’s type for this function is the same as the input’s one. If there is no row and value that meets the offset criteria, it returns the specified default value, which must be of a type compatible with the input value. + +== Parameters + +* `expression`: column, which will be referenced +* `offset`: numeric indicator of the row that is relative to the current one (optional, if not specified 1 will be returned) +* `default`: value that wil be returned if the `offset` is out of range (optional, if not specified `NULL` will be returned) + +== Examples + +In this example, we will use the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== Lead(expression, offset) + +In this example, we will focus on executing the `LEAD()` function with expression and offset parameters’ values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LEAD(qty,1) OVER (ORDER BY buyerid, dateid) AS next_qty +FROM winsales WHERE buyerid = 'c' +ORDER BY buyerid, dateid; +---- + +The following query returns the buyer ID, date ID, quantity and previous quantity for all rows with buyer ID equal to `c`: + +[source,sql] +---- + buyerid | dateid | qty | next_qty +---------+------------+-----+---------- + c | 2003-12-24 | 10 | 10 + c | 2004-01-18 | 10 | 20 + c | 2004-02-16 | 20 | 30 + c | 2004-09-07 | 30 | +(4 rows) +---- + +=== Expression, offset and default specified + +In this example, we will focus on executing the `LEAD()` function with expression, offset and default parameters’ values specified: + +[source,sql] +---- +SELECT buyerid, dateid, qty + LEAD(buyerid,1,'unknown') OVER (ORDER BY dateid) AS next_buyerid +FROM winsales +ORDER BY dateid; +---- + +The above query returns the buyer ID, date ID, quantity and following buyer ID for all rows: + +[source,sql] +---- + buyerid | dateid | qty | next_buyerid +---------+------------+-----+-------------- + b | 2003-08-02 | 10 | c + c | 2003-12-24 | 10 | a + a | 2003-12-24 | 30 | a + a | 2004-01-09 | 40 | c + c | 2004-01-18 | 10 | b + b | 2004-02-12 | 20 | a + a | 2004-02-12 | 10 | c + c | 2004-02-16 | 20 | b + b | 2004-04-18 | 15 | b + b | 2004-04-18 | 20 | c + c | 2004-09-07 | 30 | unknown +(11 rows) +---- + +=== Time series: LEAD() to compare next day’s sales quantity + +In this example, we will use LEAD() to compare each day’s sales quantity (`qty`) with the next day’s quantity, ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, qty, + LEAD(qty) OVER (ORDER BY dateid) AS next_day_qty, + LEAD(qty) OVER (ORDER BY dateid) - qty AS qty_change +FROM winsales +ORDER BY dateid; +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + dateid | qty | next_day_qty | qty_change +------------+-----+--------------+------------ + 2003-08-02 | 10 | 10 | 0 + 2003-12-24 | 10 | 30 | 20 + 2003-12-24 | 30 | 40 | 10 + 2004-01-09 | 40 | 10 | -30 + 2004-01-18 | 10 | 20 | 10 + 2004-02-12 | 20 | 10 | -10 + 2004-02-12 | 10 | 20 | 10 + 2004-02-16 | 20 | 15 | -5 + 2004-04-18 | 15 | 20 | 5 + 2004-04-18 | 20 | 30 | 10 + 2004-09-07 | 30 | | +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/max.adoc b/modules/reference/pages/sql/sql-functions/window-functions/max.adoc new file mode 100644 index 000000000..a7b998daf --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/max.adoc @@ -0,0 +1,96 @@ += MAX() +:description: The MAX() window function is used to compute the maximum value of an expression across a set of rows defined by a window specification. +:page-topic-type: reference + +The `MAX()` window function is used to compute the maximum value of an expression across a set of rows defined by a window specification. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +MAX ([ALL] expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `ALL`: retains all duplicate values from the expression + +== Example + +For the needs of this section, we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `MAX()` to find the maximum length of films for each rating category and also calculate a running maximum length as you move through the films ordered by length. The `RunningMaxLength` column will be updated as it encounters longer films. + +[source,sql] +---- +SELECT + title, + length, + rating, + MAX(length) OVER ( PARTITION BY rating ) AS MaxLengthByRating, + MAX(length) OVER ( +ORDER BY + length ROWS BETWEEN unbounded preceding AND CURRENT ROW ) AS RunningMaxLength +FROM film +ORDER BY length; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + title | length | rating | maxlengthbyrating | runningmaxlength +---------------------+--------+--------+-------------------+------------------ + HALLOWEEN NUTS | 47 | PG-13 | 143 | 47 + KILL BROTHERHOOD | 54 | G | 125 | 54 + PICKUP DRIVING | 77 | G | 125 | 77 + ATTRACTION NEWTON | 83 | PG-13 | 143 | 83 + PRIVATE DROP | 106 | PG | 137 | 106 + DANGEROUS UPTOWN | 121 | PG | 137 | 121 + HOURS RAGE | 122 | NC-17 | 179 | 122 + SAINTS BRIDE | 125 | G | 125 | 125 + FOREVER CANDIDATE | 131 | NC-17 | 179 | 131 + PIANIST OUTFIELD | 136 | NC-17 | 179 | 136 + SLEEPY JAPANESE | 137 | PG | 137 | 137 + MILLION ACE | 142 | PG-13 | 143 | 142 + CLOCKWORK PARADISE | 143 | PG-13 | 143 | 143 + CHRISTMAS MOONSHINE | 150 | NC-17 | 179 | 150 + INDEPENDENCE HOTEL | 157 | NC-17 | 179 | 157 + WRATH MILE | 176 | NC-17 | 179 | 176 + YOUTH KICK | 179 | NC-17 | 179 | 179 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/min.adoc b/modules/reference/pages/sql/sql-functions/window-functions/min.adoc new file mode 100644 index 000000000..f3d385235 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/min.adoc @@ -0,0 +1,96 @@ += MIN() +:description: The MIN() window function is used to compute the minimum value of an expression across a set of rows defined by a window specification. +:page-topic-type: reference + +The `MIN()` window function is used to compute the minimum value of an expression across a set of rows defined by a window specification. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +MIN ([ALL] expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `ALL`: retains all duplicate values from the expression + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `MIN()` to find the minimum length of films for each rating category and also calculates a running minimum length of films ordered by their length. + +[source,sql] +---- +SELECT + title, + length, + rating, + MIN(length) OVER ( PARTITION BY rating ) AS MinLengthByRating, + MIN(length) OVER ( +ORDER BY + length ROWS BETWEEN unbounded preceding AND CURRENT ROW ) AS RunningMinLength +FROM film +ORDER BY length; +---- + +By running the above code, we will get the following output: + +[source,sql] +---- + title | length | rating | minlengthbyrating | runningminlength +---------------------+--------+--------+-------------------+------------------ + HALLOWEEN NUTS | 47 | PG-13 | 47 | 47 + KILL BROTHERHOOD | 54 | G | 54 | 47 + PICKUP DRIVING | 77 | G | 54 | 47 + ATTRACTION NEWTON | 83 | PG-13 | 47 | 47 + PRIVATE DROP | 106 | PG | 106 | 47 + DANGEROUS UPTOWN | 121 | PG | 106 | 47 + HOURS RAGE | 122 | NC-17 | 122 | 47 + SAINTS BRIDE | 125 | G | 54 | 47 + FOREVER CANDIDATE | 131 | NC-17 | 122 | 47 + PIANIST OUTFIELD | 136 | NC-17 | 122 | 47 + SLEEPY JAPANESE | 137 | PG | 106 | 47 + MILLION ACE | 142 | PG-13 | 47 | 47 + CLOCKWORK PARADISE | 143 | PG-13 | 47 | 47 + CHRISTMAS MOONSHINE | 150 | NC-17 | 122 | 47 + INDEPENDENCE HOTEL | 157 | NC-17 | 122 | 47 + WRATH MILE | 176 | NC-17 | 122 | 47 + YOUTH KICK | 179 | NC-17 | 122 | 47 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc b/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc new file mode 100644 index 000000000..0f28d8b48 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/nth-value.adoc @@ -0,0 +1,100 @@ += NTH_VALUE() +:description: The NTH_VALUE() is a window function that allows you to access the value from the nth row within a specified window frame. +:page-topic-type: reference + +The `NTH_VALUE()` is a window function that allows you to access the value from the nth row within a specified window frame. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +NTH_VALUE (value, n) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + RANGE BETWEEN start_value AND end_value +) +---- + +== Parameters + +* `value`: column or expression for which you want to retrieve the value +* `n`: positive integer (greater than zero) that determines the row number within the window frame from which to retrieve the value +* `PARTITION BY`: optional clause, which divides the result set into partitions to which the `NTH_VALUE()` function is applied (if omitted, the entire result set is treated as a single partition) +* `ORDER BY`: order of rows in each partition to which the function is applied +* `RANGE BETWEEN`: range-based window frame relative to the current row + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the Pagila database, containing only the `title`, `length` and `rating` columns. The complete schema for the `film` table can be found on the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila^] database website. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `NTH_VALUE()` function to retrieve the title of the film with the second shortest duration, partitioning results by rating and ordering by length: + +[source,sql] +---- +SELECT + title, + length, + rating, + NTH_VALUE(title, 2) OVER ( + PARTITION BY rating + ORDER BY + length ASC + ) AS second_shortest_film_in_rating +FROM film; +---- + +The above query will show the following table: + +[source,bash] +---- +| title |length |rating | second_shortest_film_in_rating | +|---------------------|--------|--------|----------------------------------| +| KILL BROTHERHOOD | 54 | G | NULL | +| PICKUP DRIVING | 77 | G | PICKUP DRIVING | +| SAINTS BRIDE | 125 | G | PICKUP DRIVING | +| HOURS RAGE | 122 | NC-17 | NULL | +| FOREVER CANDIDATE | 131 | NC-17 | FOREVER CANDIDATE | +| PIANIST OUTFIELD | 136 | NC-17 | FOREVER CANDIDATE | +| CHRISTMAS MOONSHINE | 150 | NC-17 | FOREVER CANDIDATE | +| INDEPENDENCE HOTEL | 157 | NC-17 | FOREVER CANDIDATE | +| WRATH MILE | 176 | NC-17 | FOREVER CANDIDATE | +| YOUTH KICK | 179 | NC-17 | FOREVER CANDIDATE | +| PRIVATE DROP | 106 | PG | NULL | +| DANGEROUS UPTOWN | 121 | PG | DANGEROUS UPTOWN | +| SLEEPY JAPANESE | 137 | PG | DANGEROUS UPTOWN | +| HALLOWEEN NUTS | 47 | PG-13 | NULL | +| ATTRACTION NEWTON | 83 | PG-13 | ATTRACTION NEWTON | +| MILLION ACE | 142 | PG-13 | ATTRACTION NEWTON | +| CLOCKWORK PARADISE | 143 | PG-13 | ATTRACTION NEWTON | +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc b/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc new file mode 100644 index 000000000..e510641ac --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/ntile.adoc @@ -0,0 +1,95 @@ += NTILE() +:description: The NTILE() function is a window function used to divide an ordered data set into a specified number of approximately equal groups or buckets. +:page-topic-type: reference + +The `NTILE()` function is a window function used to divide an ordered data set into a specified number of approximately equal groups or buckets. This function assigns each group a bucket number starting form one. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +NTILE(buckets) OVER ( + PARTITION BY partition_expression, ... ] + [ORDER BY sort_expression [ASC | DESC], ...] +) +---- + +== Parameters + +* `bucket`: positive integer or an expression that evaluates to a positive integer for each partition. It specifies the number of groups into which the data should be divided. + +== Restrictions + +* `buckets`: its value must be a positive integer. If it is a non-integer constant, it will be truncated to an integer. + +== Example + +For the needs of this section we will use a simplified version of the `film` table from the link:https://www.postgresql.org/ftp/projects/pgFoundry/dbsamples/pagila/pagila/[Pagila database^], that will contain only the `title`, `length` and `rating` columns. + +[source,sql] +---- +DROP TABLE IF EXISTS film; +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text +); + +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +The query below uses the `NTILE()` function to divide the films into four quartiles based on their length: + +[source,sql] +---- +SELECT + title, + length, + NTILE(4) OVER (ORDER BY length) AS quartile +FROM film; +---- + +By running the code above, we will get the following output: + +[source,sql] +---- + title | length | quartile +---------------------+--------+---------- + HALLOWEEN NUTS | 47 | 1 + KILL BROTHERHOOD | 54 | 1 + PICKUP DRIVING | 77 | 1 + ATTRACTION NEWTON | 83 | 1 + PRIVATE DROP | 106 | 1 + DANGEROUS UPTOWN | 121 | 2 + HOURS RAGE | 122 | 2 + SAINTS BRIDE | 125 | 2 + FOREVER CANDIDATE | 131 | 2 + PIANIST OUTFIELD | 136 | 3 + SLEEPY JAPANESE | 137 | 3 + MILLION ACE | 142 | 3 + CLOCKWORK PARADISE | 143 | 3 + CHRISTMAS MOONSHINE | 150 | 4 + INDEPENDENCE HOTEL | 157 | 4 + WRATH MILE | 176 | 4 + YOUTH KICK | 179 | 4 +(17 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc new file mode 100644 index 000000000..e60e216f7 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/percent-rank.adoc @@ -0,0 +1,128 @@ += PERCENT_RANK() +:description: PERCENT_RANK() window function determines the relative rank of a value in a group of values, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +`PERCENT_RANK()` window function determines the relative rank of a value in a group of values, based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +PERCENT_RANK() OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression +) +---- + +The `PERCENT_RANK()` is calculated as: + +[source,bash] +---- +(r - 1) / (n - 1) +---- + +Where `r` is the rank of the current row and `n` is the total number of rows in the window or partition. + +Rows with equal values for the ranking criteria receive the same relative rank. The output data type for this function is `DOUBLE PRECISION`. The output will indicate the rank of values in a table, regardless of the input types. + +* If the optional `PARTITION BY` expression is present, the rankings are reset for each group of rows +* If the `ORDER BY` expression is omitted then all relative ranks are equal to 0 + +== Parameters + +* `()`: this function takes no arguments but parentheses is required +* `PARTITION BY`: optional clause used to divide the result set into partitions (`PERCENT_RANK()` function is applied to each partition independently) +* `ORDER BY`: order of rows in each partition to which the function is applied + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== PERCENT_RANK() with ORDER BY + +In this example, we will focus on executing the `PERCENT_RANK()` function with `ORDER BY` keyword and calculate the descending percent rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + PERCENT_RANK() OVER (ORDER BY qty DESC) AS p_rnk + RANK() OVER (ORDER BY qty DESC) AS rnk +FROM winsales +ORDER BY 2,1; +---- + +Here is the output for the query presented above that includes the sales ID along with the quantity sold and both percent and regular ranks: + +[source,sql] +---- + salesid | qty | p_rnk | rnk +---------+-----+-------+----- + 10001 | 10 | 0.7 | 8 + 10006 | 10 | 0.7 | 8 + 30001 | 10 | 0.7 | 8 + 40005 | 10 | 0.7 | 8 + 30003 | 15 | 0.6 | 7 + 20001 | 20 | 0.3 | 4 + 20002 | 20 | 0.3 | 4 + 30004 | 20 | 0.3 | 4 + 10005 | 30 | 0.1 | 2 + 30007 | 30 | 0.1 | 2 + 40001 | 40 | 0 | 1 +---- + +=== PERCENT_RANK() with ORDER BY and PARTITION BY + +In this example, we will focus on executing the `PERCENT_RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause, partition the table by seller ID, order each partition by the quantity and assign a percent rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + PERCENT_RANK() OVER (PARTITION BY sellerid ORDER BY qty DESC) AS p_rnk +FROM winsales +ORDER BY 2,3,1; +---- + +Here is the output for the query presented above: + +[source,sql] +---- + salesid | sellerid | qty | p_rnk +---------+----------+-----+-------------------- + 10001 | 1 | 10 | 0.5 + 10006 | 1 | 10 | 0.5 + 10005 | 1 | 30 | 0 + 20001 | 2 | 20 | 0 + 20002 | 2 | 20 | 0 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 0.6666666666666666 + 30004 | 3 | 20 | 0.3333333333333333 + 30007 | 3 | 30 | 0 + 40005 | 4 | 10 | 1 + 40001 | 4 | 40 | 0 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc b/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc new file mode 100644 index 000000000..ac37e9a25 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/rank.adoc @@ -0,0 +1,117 @@ += RANK() +:description: The RANK() window function determines the rank of a value in a group of values, based on the ORDER BY expression in the OVER clause. +:page-topic-type: reference + +The `RANK()` window function determines the rank of a value in a group of values, based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types supported by Redpanda SQL]. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +RANK() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +Rows with equal values for the ranking criteria receive the same rank. The output type for this function is `BIGINT` and it indicates the rank of values in a table, regardles of the input types. + +* If the optional `PARTITION BY` expression is present, the rankings are reset for each group of rows +* If the `ORDER BY` expression is omitted then all ranks are equal to 1 + +== Parameters + +* `()`: this function takes no arguments but parentheses is required + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== RANK() with ORDER BY + +In this example, we will focus on executing the `RANK()` function with `ORDER BY` keyword and calculate the rank of all rows based on the quantity sold: + +[source,sql] +---- +SELECT salesid, qty + RANK() OVER (ORDER BY qty) +FROM winsales +ORDER BY 2,1; +---- + +Here is the output for the query presented above that includes the sales ID along with the quantity sold and regular ranks: + +[source,sql] +---- + salesid | qty | rank +---------+-----+------ + 10001 | 10 | 1 + 10006 | 10 | 1 + 30001 | 10 | 1 + 40005 | 10 | 1 + 30003 | 15 | 5 + 20001 | 20 | 6 + 20002 | 20 | 6 + 30004 | 20 | 6 + 10005 | 30 | 9 + 30007 | 30 | 9 + 40001 | 40 | 11 +(11 rows) +---- + +=== RANK() with ORDER BY and PARTITION BY + +In this example, we will focus on executing the `RANK()` function with `ORDER BY` keyword and `PARTITION BY` clause, partition the table by seller ID, order each partition by the quantity and assign a rank to each row: + +[source,sql] +---- +SELECT salesid, sellerid, qty + RANK() OVER (PARTITION BY sellerid ORDER BY qty) +FROM winsales +ORDER BY 2,3,1; +---- + +Here is the output for the query presented above: + +[source,sql] +---- + salesid | sellerid | qty | rank +---------+----------+-----+------ + 10001 | 1 | 10 | 1 + 10006 | 1 | 10 | 1 + 10005 | 1 | 30 | 3 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 1 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 2 + 30004 | 3 | 20 | 3 + 30007 | 3 | 30 | 4 + 40005 | 4 | 10 | 1 + 40001 | 4 | 40 | 2 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc b/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc new file mode 100644 index 000000000..218d1bab2 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/row-number.adoc @@ -0,0 +1,146 @@ += ROW_NUMBER +:description: The ROW_NUMBER() window function returns the number of the current row within its partition (counting from 1), based on the ORDER BY expression in the +:page-topic-type: reference + +The `ROW_NUMBER()` window function returns the number of the current row within its partition (counting from 1), based on the `ORDER BY` expression in the `OVER` clause. It can be used with all xref:reference:sql/sql-data-types/index.adoc[data types] supported by Redpanda SQL. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +ROW_NUMBER() OVER ( + [PARTITION BY partition_expression, ... ] + ORDER BY sort_expression [ASC | DESC], ... +) +---- + +The output’s type for this function is `BIGINT`. Rows with equal values for the `ORDER BY` expression receive different row numbers nondeterministically. + +== Parameters + +* `()`: this function takes no arguments but parentheses is required + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details about some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== ROW_NUMBER() with ORDER BY + +In this example, we will focus on executing the `ROW_NUMBER()` function with `ORDER BY` keyword, assign a row number to each row and order the table by the row number (the results will be sorted after the window function results are applied): + +[source,sql] +---- +SELECT salesid, qty + ROW_NUMBER() OVER (ORDER BY salesid) +FROM winsales +ORDER BY 3; +---- + +Here is the output for the code above: + +[source,sql] +---- + salesid | qty | row_number +---------+-----+------------ + 10001 | 10 | 1 + 10005 | 30 | 2 + 10006 | 10 | 3 + 20001 | 20 | 4 + 20002 | 20 | 5 + 30001 | 10 | 6 + 30003 | 15 | 7 + 30004 | 20 | 8 + 30007 | 30 | 9 + 40001 | 40 | 10 + 40005 | 10 | 11 +(11 rows) +---- + +=== ROW_NUMBER() with ORDER BY and PARTITION BY + +In this example, we will focus on executing the `ROW_NUMBER()` function with `ORDER BY` keyword and `PARTITION BY` clause, partition the table by seller ID, assign a row number to each row and order the table by the sales ID and row number (the results will be sorted after the window function results are applied): + +[source,sql] +---- +SELECT salesid, sellerid, qty + ROW_NUMBER() OVER (PARTITION BY sellerid ORDER BY salesid) +FROM winsales +ORDER BY 1; +---- + +The output of the code above will be as follows: + +[source,sql] +---- + salesid | sellerid | qty | row_number +---------+----------+-----+------------ + 10001 | 1 | 10 | 1 + 10005 | 1 | 30 | 2 + 10006 | 1 | 10 | 3 + 20001 | 2 | 20 | 1 + 20002 | 2 | 20 | 2 + 30001 | 3 | 10 | 1 + 30003 | 3 | 15 | 2 + 30004 | 3 | 20 | 3 + 30007 | 3 | 30 | 4 + 40001 | 4 | 40 | 1 + 40005 | 4 | 10 | 2 +(11 rows) +---- + +=== Time series: assigning sequential row numbers by date + +In this example, we will assign a sequential row number to each sale ordered by `dateid`: + +[source,sql] +---- +SELECT dateid, salesid, qty, + ROW_NUMBER() OVER (ORDER BY dateid, salesid) AS time_series_position +FROM winsales +ORDER BY dateid, salesid; +---- + +By executing the query above, we will get the following output: + +[source,sql] +---- + dateid | salesid | qty | time_series_position +------------+---------+-----+---------------------- + 2003-08-02 | 30001 | 10 | 1 + 2003-12-24 | 10001 | 10 | 2 + 2003-12-24 | 10005 | 30 | 3 + 2004-01-09 | 40001 | 40 | 4 + 2004-01-18 | 10006 | 10 | 5 + 2004-02-12 | 20001 | 20 | 6 + 2004-02-12 | 40005 | 10 | 7 + 2004-02-16 | 20002 | 20 | 8 + 2004-04-18 | 30003 | 15 | 9 + 2004-04-18 | 30004 | 20 | 10 + 2004-09-07 | 30007 | 30 | 11 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc b/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc new file mode 100644 index 000000000..4f258a828 --- /dev/null +++ b/modules/reference/pages/sql/sql-functions/window-functions/sum.adoc @@ -0,0 +1,191 @@ += SUM() +:description: The SUM() window function returns the sum of the input column or expression values. +:page-topic-type: reference + +The `SUM()` window function returns the sum of the input column or expression values. It can be used with a `RANGE` clause, that allows you to define a logical frame of rows based on the values of the current row, rather than a fixed number of rows. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +SUM(expression) OVER ( + [PARTITION BY partition_expression] + ORDER BY sort_expression + [ROWS | RANGE BETWEEN start_value AND end_value] +) +---- + +The expression’s argument types supported by the `SUM` window function are `INTEGER`, `BIGINT`, `REAL` and `DOUBLE PRECISION`. The return types of the `SUM` function are: `BIGINT` for integer and `DOUBLE PRECISION` for floating-point arguments. + +[NOTE] +==== +The `SUM()` window function works with numeric values and ignores NULL ones +==== + +== Parameters + +* `expression`: input’s column or expression values to be summed +* `PARTITION BY`: optional clause, which divides the result set into partitions to which the function is applied +* `ROWS | RANGE BETWEEN`: range-based window frame relative to the current row + +== Examples + +For the needs of this section, we will create the `winsales` table that stores details of some sales transactions: + +[source,sql] +---- +CREATE TABLE winsales( + salesid int, + dateid date, + sellerid int, + buyerid text, + qty int, + qty_shipped int); +INSERT INTO winsales VALUES + (30001, '8/2/2003', 3, 'b', 10, 10), + (10001, '12/24/2003', 1, 'c', 10, 10), + (10005, '12/24/2003', 1, 'a', 30, null), + (40001, '1/9/2004', 4, 'a', 40, null), + (10006, '1/18/2004', 1, 'c', 10, null), + (20001, '2/12/2004', 2, 'b', 20, 20), + (40005, '2/12/2004', 4, 'a', 10, 10), + (20002, '2/16/2004', 2, 'c', 20, 20), + (30003, '4/18/2004', 3, 'b', 15, null), + (30004, '4/18/2004', 3, 'b', 20, null), + (30007, '9/7/2004', 3, 'c', 30, null); +---- + +=== SUM() with ORDER BY + +In this example, we will focus on executing the `SUM()` window function with `ORDER BY` keyword: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty + SUM(qty) OVER (ORDER BY dateid, salesid ROWS UNBOUNDED PRECEDING) +FROM winsales +ORDER BY 2,1; +---- + +The output from the above query includes the sales ID, date ID, seller ID, quantity and quantity sum: + +[source,sql] +---- + salesid | dateid | sellerid | qty | sum +---------+------------+----------+-----+----- + 30001 | 2003-08-02 | 3 | 10 | 10 + 10001 | 2003-12-24 | 1 | 10 | 20 + 10005 | 2003-12-24 | 1 | 30 | 50 + 40001 | 2004-01-09 | 4 | 40 | 90 + 10006 | 2004-01-18 | 1 | 10 | 100 + 20001 | 2004-02-12 | 2 | 20 | 120 + 40005 | 2004-02-12 | 4 | 10 | 130 + 20002 | 2004-02-16 | 2 | 20 | 150 + 30003 | 2004-04-18 | 3 | 15 | 165 + 30004 | 2004-04-18 | 3 | 20 | 185 + 30007 | 2004-09-07 | 3 | 30 | 215 +(11 rows) +---- + +=== SUM() with ORDER BY and ROWS frame + +In this example we will calculate the running total of `qty` ordered by dateid and salesid using a `ROWS UNBOUNDED PRECEDING` frame, which sums all rows from the start up to the current row: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty, + SUM(qty) OVER (ORDER BY dateid, salesid ROWS UNBOUNDED PRECEDING) AS running_qty_sum +FROM winsales +ORDER BY dateid, salesid; +---- + +After executing the query above, we get the following output: + +[source,sql] +---- + salesid | dateid | qty | running_qty_sum +---------+------------+-----+----------------- + 30001 | 2003-08-02 | 10 | 10 + 10001 | 2003-12-24 | 10 | 20 + 10005 | 2003-12-24 | 30 | 50 + 40001 | 2004-01-09 | 40 | 90 + 10006 | 2004-01-18 | 10 | 100 + 20001 | 2004-02-12 | 20 | 120 + 40005 | 2004-02-12 | 10 | 130 + 20002 | 2004-02-16 | 20 | 150 + 30003 | 2004-04-18 | 15 | 165 + 30004 | 2004-04-18 | 20 | 185 + 30007 | 2004-09-07 | 30 | 215 +(11 rows) +---- + +The `running_qty_sum` column shows the cumulative sum of `qty` ordered by `dateid` and `salesid`. For each row, it sums all `qty` values from the first row up to the current row in that order. + +=== SUM() with ORDER BY and PARTITION BY + +In this example we will focus on executing the `SUM()` function with `ORDER BY` keyword and `PARTITION BY` clause: + +[source,sql] +---- +SELECT salesid, dateid, sellerid, qty + SUM(qty) OVER (PARTITION BY sellerid ORDER BY dateid, sellerid ROWS UNBOUNDED PRECEDING) +FROM winsales +ORDER BY 3,2,1; +---- + +After executing the query above, we get the following output: + +[source,sql] +---- + salesid | dateid | sellerid | qty | sum +---------+------------+----------+-----+----- + 10001 | 2003-12-24 | 1 | 10 | 10 + 10005 | 2003-12-24 | 1 | 30 | 40 + 10006 | 2004-01-18 | 1 | 10 | 50 + 20001 | 2004-02-12 | 2 | 20 | 20 + 20002 | 2004-02-16 | 2 | 20 | 40 + 30001 | 2003-08-02 | 3 | 10 | 10 + 30003 | 2004-04-18 | 3 | 15 | 25 + 30004 | 2004-04-18 | 3 | 20 | 45 + 30007 | 2004-09-07 | 3 | 30 | 75 + 40001 | 2004-01-09 | 4 | 40 | 40 + 40005 | 2004-02-12 | 4 | 10 | 50 +(11 rows) +---- + +=== Time series: SUM() with RANGE BETWEEN for last 30 days + +In this example, we will demonstrate a common time series use case. Calculating the rolling sum of sales quantity over the last 30 days for each row, using the RANGE BETWEEN INTERVAL '`30 days`' PRECEDING AND CURRENT ROW frame: + +[source,sql] +---- +SELECT salesid, dateid, qty, + SUM(qty) OVER ( + ORDER BY dateid + RANGE BETWEEN INTERVAL '30 days' PRECEDING AND CURRENT ROW + ) AS rolling_30d_qty_sum +FROM winsales +ORDER BY dateid; +---- + +The output from the above query sums the `qty` of all sales within the 30-day window ending at the current row’s `dateid`: + +[source,sql] +---- + salesid | dateid | qty | rolling_30d_qty_sum +---------+------------+-----+--------------------- + 30001 | 2003-08-02 | 10 | 10 + 10001 | 2003-12-24 | 10 | 40 + 10005 | 2003-12-24 | 30 | 40 + 40001 | 2004-01-09 | 40 | 80 + 10006 | 2004-01-18 | 10 | 90 + 20001 | 2004-02-12 | 20 | 40 + 40005 | 2004-02-12 | 10 | 40 + 20002 | 2004-02-16 | 20 | 60 + 30003 | 2004-04-18 | 15 | 35 + 30004 | 2004-04-18 | 20 | 35 + 30007 | 2004-09-07 | 30 | 30 +(11 rows) +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-csv.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-csv.adoc new file mode 100644 index 000000000..00c259178 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-csv.adoc @@ -0,0 +1,168 @@ += Export Data to a CSV file +:description: Exporting data from Redpanda SQL to a CSV file can be accomplished using the COPY TO command. +:page-topic-type: reference + +Exporting data from Redpanda SQL to a CSV file can be accomplished using the `COPY TO` command. This guide outlines the methods for exporting data, including specifying delimiters, handling headers and controlling null value representation. + +== Syntax + +The syntax for this function is as follows: + +[source,sql] +---- +COPY table_name (column_name) TO {'file_path' | STDOUT} WITH (option, ...); +---- + +== Parameters + +* `table_name`: existing table from which the data will be exported +* `column_name`: *optionally* list of columns to be exported. If omitted, all columns are exported +* `TO`: destination for the exported data +** `file_path`: path to the CSV file where the data will be written +** `STDOUT`: exports the data to the standard output stream +* `option`: available options below +** *FORMAT*: output format (currently only CSV is supported) +** *DELIMITER*: delimiter character represented as single quote string literal (By default, this function uses commas as the delimiter) +** *NULL*: string to use for representing NULL values (e.g. NULL 'unknown') +** *HEADER*: boolean value indicating whether to include a header row with column names (values can be `TRUE` or `FALSE`, with default set to `FALSE`) +** *Endpoint*: provide object-based storage credentials + +== Examples + +=== Export data via STDOUT + +These example demonstrates how to export data from the file table to a CSV file named `film_export.csv`, including a header row and using a semicolon as the delimiter. + +*1. Creating a table* + +Ensure that the table exists in your Redpanda SQL instance. If the table does not exist, create one using the following command: + +[source,sql] +---- +CREATE TABLE film ( + title text NOT NULL, + length int, + rating text + ); +---- + +*2. Inserting Data* + +[source,sql] +---- +INSERT INTO film(title, length, rating) VALUES + ('ATTRACTION NEWTON', 83, 'PG-13'), + ('CHRISTMAS MOONSHINE', 150, 'NC-17'), + ('DANGEROUS UPTOWN', 121, 'PG'), + ('KILL BROTHERHOOD', 54, 'G'), + ('HALLOWEEN NUTS', 47, 'PG-13'), + ('HOURS RAGE', 122, 'NC-17'), + ('PIANIST OUTFIELD', 136, 'NC-17'), + ('PICKUP DRIVING', 77, 'G'), + ('INDEPENDENCE HOTEL', 157, 'NC-17'), + ('PRIVATE DROP', 106, 'PG'), + ('SAINTS BRIDE', 125, 'G'), + ('FOREVER CANDIDATE', 131, 'NC-17'), + ('MILLION ACE', 142, 'PG-13'), + ('SLEEPY JAPANESE', 137, 'PG'), + ('WRATH MILE', 176, 'NC-17'), + ('YOUTH KICK', 179, 'NC-17'), + ('CLOCKWORK PARADISE', 143, 'PG-13'); +---- + +*3. Executing the Export Command* + +[source,sql] +---- +COPY table_name TO 'file_path' WITH (FORMAT CSV, HEADER TRUE, DELIMITER ';'); +---- + +[NOTE] +==== +* Replace `table_name` with your target table (e.g. film) +* Replace `file_path` with the full path where the data will be written +* Use `HEADER` to include a header row with column names +==== + +*4. Verifying Export* + +Now let's check the contents of `film_export.csv` to ensure the data has been successfully exported + +[source,text] +---- +public.film.title;public.film.length;public.film.rating +"ATTRACTION NEWTON";83;PG-13 +"CHRISTMAS MOONSHINE";150;NC-17 +"DANGEROUS UPTOWN";121;PG +"KILL BROTHERHOOD";54;G +"HALLOWEEN NUTS";47;PG-13 +"HOURS RAGE";122;NC-17 +"PIANIST OUTFIELD";136;NC-17 +"PICKUP DRIVING";77;G +"INDEPENDENCE HOTEL";157;NC-17 +"PRIVATE DROP";106;PG +"SAINTS BRIDE";125;G +"FOREVER CANDIDATE";131;NC-17 +"MILLION ACE";142;PG-13 +"SLEEPY JAPANESE";137;PG +"WRATH MILE";176;NC-17 +"YOUTH KICK";179;NC-17 +"CLOCKWORK PARADISE";143;PG-13 +---- + +=== Export data to cloud storage + +To export data to an object storage from a Redpanda SQL table, you can use the `COPY TO` command with object storage credentials. This command allows you to transfer data to cloud storage services like AWS S3, Google Cloud Storage or Azure Blob Storage. + +[source,sql] +---- +COPY table_name TO 'cloud_storage_file_path' (object_storage(object_storage_credentials)); +---- + +* `object storage`: `AWS_CRED`, `AZURE_CRED` or `GCS_CRED` (depending on your provider) +* `object_storage_credentials`: for accessing your cloud storage + +You need to provide *Provider-Specific credentials* to authenticate access to your files. Use the following authentication parameters to access your cloud storage. + +==== AWS s3 bucket + +* `aws_region`: AWS region associated with the storage service +* `key_id`: key identifier for authentication +* `access_key`: access key for authentication +* `endpoint_url`: URL endpoint for the storage service + +[source,sql] +---- +COPY table_name TO 's3://your-bucket/file_name' WITH (AWS_CRED(AWS_REGION 'us-west-1', AWS_KEY_ID 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 's3.us-west-1.amazonaws.com'), FORMAT CSV, HEADER ON, NULL 'unknown'); +---- + +[TIP] +==== +In the exported file, `NULL` values will be represented as 'unknown' as specified in the `NULL` option. +==== + +==== Google Cloud Storage + +* ``: path to JSON credentials file +* ``: contents of the GCS's credentials file + +[source,sql] +---- +COPY table_name TO 'gs://your-bucket/file_name' (GCS_CRED('/path/to/credentials.json')); +---- + +[TIP] +==== +For Google Cloud Storage, it's recommended to use HMAC keys for authentication. You can find more details about that on the link:https://cloud.google.com/storage/docs/authentication/hmackeys[HMAC keys - Cloud Storage^] page. +==== + +==== Azure Blob Storage + +* `tenant_id`: tenant identifier representing your organization's identity in Azure +* `client_id`: client identifier used for authentication +* `client_secret`: secret identifier acting as a password for authentication. + +[source,sql] +---- +COPY table_name TO 'wasbs://container-name/your_blob' (AZURE_CRED(TENANT_ID 'your_tenant_id' CLIENT_ID 'your_client_id', CLIENT_SECRET 'your_client_secret')); +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-stdout.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-stdout.adoc new file mode 100644 index 000000000..897b7fb93 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-stdout.adoc @@ -0,0 +1,73 @@ += COPY TO STDOUT +:description: The COPY TO STDOUT command is used to export data directly from a table to the client. +:page-topic-type: reference + +The `COPY TO STDOUT` command is used to export data directly from a table to the client. This approach allows for data transfer by sending the data directly to the client, eliminating the need for server-side file operations. + +== Syntax + +The basic syntax for using `COPY TO STDOUT` is: + +[source,sql] +---- +COPY table_name TO STDOUT; +---- + +Parameters: + +* `table_name`: The table from which the data will be exported. +* `stdout`: Indicates that the data will be sent to the standard output (client application). + +[NOTE] +==== +- *Format*: Only .csv is supported + - *Delimiter*: For CSV format, the default delimiter is a comma (,) +==== + +== Examples + +=== Create the table + +. Create the table and insert some data into it. ++ +[source,sql] +---- +CREATE TABLE book_inventory ( + title TEXT, + copies_available INT +); +INSERT INTO book_inventory (title, copies_available) VALUES +('To Kill a Mockingbird', 5), +('1984', 8), +('The Great Gatsby', 3), +('Moby Dick', 2), +('War and Peace', 4); +---- + +. Upon successful creation, you should see the output below: ++ +[source,sql] +---- +CREATE +INSERT 0 5 +---- + +=== Start the export operation + +. Run the `COPY TO STDOUT` command to export the data from the `book_inventory` table: ++ +[source,sql] +---- +COPY book_inventory TO STDOUT; +---- + +. You will get the output with the table values, which you can use to create or copy into a CSV file: ++ +[source,sql] +---- +"To Kill a Mockingbird",5 +1984,8 +"The Great Gatsby",3 +"Moby Dick",2 +"War and Peace",4 +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-delimiter.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-delimiter.adoc new file mode 100644 index 000000000..09f5be76f --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-delimiter.adoc @@ -0,0 +1,127 @@ += COPY TO with Delimiter +:description: A delimiter is a character that separates text strings. +:page-topic-type: reference + +A delimiter is a character that separates text strings. Common delimiters include: + +* Commas (,) +* Semicolon (;) +* Quotes ( “, ’ ) +* Dash (-) +* Pipes (|) +* Slashes ( /  ). + +== Syntax + +The syntax for `COPY TO` with a delimiter is as follows: + +[source,sql] +---- +COPY table_name TO 'file_path' (DELIMITER 'delimiter'); +---- + +Parameters in the syntax include: + +* `table_name`: The table containing the data to be exported. +* `file_path`: The CSV file location where the data will be saved. +* `DELIMITER ‘delimiter'`: The Delimiter used in the exported CSV file. + +[NOTE] +==== +*Default delimiter is a comma (*`,`*).* +==== + +== Example + +=== Create a table + +. Before creating a table, check for duplicate tables using the following statement: ++ +[source,sql] +---- +DESCRIBE DATABASE +---- + +. You will receive a list of existing tables in Redpanda SQL: ++ +[source,sql] +---- + namespace_name | name +----------------+---------------- + public | client + public | distance_table + public | weight + public | product + public | salary +---- + +[WARNING] +==== +Ensure you are not creating duplicate tables. +==== +. Create a "`*customer*`" table. ++ +[source,sql] +---- +CREATE TABLE customer ( + cust_id int, + cust_name text +); +INSERT INTO customer + (cust_id, cust_name) +VALUES + (11001, 'Maya'), + (11003, 'Ricky'), + (11009, 'Sean'), + (11008, 'Chris'), + (11002, 'Emily'), + (11005, 'Rue'), + (11007, 'Tom'), + (11006, 'Casey'); +---- + +. The table and data were created successfully. ++ +[source,sql] +---- +COMPLETE +INSERT 0 8 +---- + +=== Export data to a CSV file using delimiter + +[WARNING] +==== +*Important Notes:* + - By default, the `COPY TO` command overwrites the CSV file if it already exists. + - Please ensure that the directory where you save the file has a write permissions. +==== +In the example below, we are using a Comma ( `,` ). + +[source,sql] +---- +COPY salary TO '/home/acer/Documents/customerexport.csv' (DELIMITER ','); +---- + +You will get the successful output below. + +[source,sql] +---- +-- +(0 rows) +---- + +Using the comma ( `,` ) as the delimiter for the `customer` table, the expected output would be: + +[source,sql] +---- +cust_id,cust_name +11001,Maya +11003,Ricky +11009,Sean +11008,Chris +11002,Emily +11005,Rue +11007,Tom +11006,Casey +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-endpoint.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-endpoint.adoc new file mode 100644 index 000000000..7c7a92464 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-endpoint.adoc @@ -0,0 +1,92 @@ += COPY TO with Endpoint +:description: When running COPY TO queries, you should have the option to include the endpoint URL. +:page-topic-type: reference + +When running xref:reference:sql/sql-statements/copy-to/copy-to.adoc[COPY TO] queries, you should have the option to include the *endpoint URL*. This feature is especially useful for scenarios where you need to provide credentials and specific endpoints. + +== Syntax + +The syntax for using `COPY TO` statement is as follows: + +[source,sql] +---- +COPY table_name TO 'file_path' (AWS_CRED(AWS_REGION 'aws_region', AWS_KEY_ID + 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 'endpoint_url')); +---- + +[NOTE] +==== +Replace `AWS_CRED` with `AZURE_CRED` or `GCS_CRED` when copying to the Azure Blob Storage or Google Cloud Storage. +==== +Here’s the breakdown of parameters syntax: + +* *Shared parameters*: +** `table_name`: table containing the data to be exported +** `file_path`: CSV file location accessible from the server +* *Parameters in `AWS_CRED`*: +** `aws_region`: AWS region associated with the storage service (e.g. '`region1`') +** `key_id`: key identifier used for authentication +** `access_key`: access key used for authentication +** `endpoint_url`: URL endpoint for the storage service +* *Parameters in `GCS_CRED`*: +** ``: path to JSON credentials file. +** ``: contents of the GCS’s credentials file +* *Parameters in `AZURE_CRED`*: +** `tenant_id`: tenant identifier representing your organization’s identity in Azure +** `client_id`: client identifier used for authentication. +** `client_secret`: secret identifier acting as a password when authenticating + +== Examples + +=== COPY TO with AWS S3 bucket + +In this example, we use the `COPY TO` statement to export data from the `students` table to a CSV file named `students_file`. + +[source,sql] +---- +COPY students TO 's3://oxla-testdata/cayo/students_file' (AWS_CRED(AWS_REGION 'region1', AWS_KEY_ID + 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 's3.us-east-2.amazonaws.com')); +---- + +*Expected Output*: `student` table data is copied to the `students_file` on AWS S3 + +=== COPY TO with Google Cloud Storage + +This example shows how to use the `COPY TO` statement to export data, but this time, the data is stored on Google Cloud Storage. + +[source,sql] +---- +COPY project TO 'gs://your-bucket/project_file' (GCS_CRED('/path/to/credentials.json')); +---- + +If for any reason you cannot use a path to the `credentials.json` file, you can also pass its contents as a string in the following way: + +[source,sql] +---- +COPY project FROM 'gs://your-bucket/project_file' (GCS_CRED('')); +---- + +[NOTE] +==== +Make sure that it is in JSON format +==== +You can also copy the data using the `AWS_CRED` like below: + +[source,sql] +---- +COPY project TO 'gs://your-bucket/project_file' (AWS_CRED(AWS_REGION 'region1', AWS_KEY_ID + 'key_id', AWS_PRIVATE_KEY 'access_key', ENDPOINT 'https://storage.googleapis.com')); +---- + +*Expected Output*: Data from the `project` table is copied to the `project_file` on Google Cloud Storage + +=== COPY TO with Azure Blob Storage + +It’s a similar story for storing data in Azure Blob Storage. + +[source,sql] +---- +COPY taxi_data TO 'wasbs://container-name/your_blob' (AZURE_CRED(TENANT_ID 'your_tenant_id' CLIENT_ID 'your_client_id', CLIENT_SECRET 'your_client_secret')); +---- + +*Expected Output*: Data from the `taxi_data` table is copied to `your_blob` on Azure Blob Storage diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-header.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-header.adoc new file mode 100644 index 000000000..8103a042c --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-header.adoc @@ -0,0 +1,148 @@ += COPY TO with HEADER +:description: When you export data from a table to a CSV file using the COPY TO command, you can include or skip the header. +:page-topic-type: reference + +== Overview + +When you export data from a table to a CSV file using the `COPY TO` command, you can include or skip the header. Redpanda SQL provides three options for handling headers: `HEADER OFF`, `HEADER ON`, and `HEADER MATCH`. + +== Syntax + +The syntax for `COPY TO` with `HEADER` is as follows: + +[source,sql] +---- +COPY table_name TO 'file_path' (Header_Syntax); +---- + +Parameters in the syntax include: + +* `table_name`: The table containing the data to be exported. +* `file_path`: The CSV file location where the data will be saved. +* `Header_Syntax`: The specified header options. + +== Header options + +* *HEADER OFF* + +This option will not skip the header of the CSV file. The available syntax is: + +[source,none] +---- +HEADER OFF +HEADER FALSE +HEADER 0 +---- + +[NOTE] +==== +This option is a default behaviour if `HEADER` is not provided. +==== +* *HEADER ON* + +This option skips the header of the CSV file and follows only the previously specified columns. The available syntax is: + +[source,none] +---- +HEADER ON +HEADER TRUE +HEADER 1 +---- + +== Examples + +First, create a *"`personal_details`"* table. + +[source,sql] +---- +CREATE TABLE personal_details ( + id int, + first_name text, + last_name text, + gender text +); +INSERT INTO personal_details + (id, first_name, last_name, gender) +VALUES + (1,'Mark','Wheeler','M'), + (2,'Tom','Hanks','M'), + (3,'Jane','Hopper','F'), + (4,'Emily','Byers','F'), + (5,'Lucas','Sinclair','M'); +---- + +The table and data were created successfully. + +[source,sql] +---- +COMPLETE +INSERT 0 5 +---- + +Now, let’s explore some cases of `COPY TO` with different header options: + +=== HEADER OFF + +[NOTE] +==== +Please ensure that the directory where you save the file has a write permissions. +==== +. Run the query below to export the table. ++ +[source,sql] +---- +COPY personal_details TO '/home/acer/Documents/personalinfo.csv'; +---- + +. You will get the following output, indicating that the table has successfully exported to the CSV file. ++ +[source,sql] +---- +-- +(0 rows) +---- + +. The data in the table is copied directly to the `personalinfo` file without considering the first row as a header. ++ +[source,sql] +---- +1,'Mark','Wheeler','M' +2,'Tom','Hanks','M' +3,'Jane','Hopper','F' +4,'Emily','Byers','F' +5,'Lucas','Sinclair','M' +---- + +[TIP] +==== +To include headers, use the `HEADER ON` option. +==== + +=== HEADER ON + +. Run the query below to export the table. ++ +[source,sql] +---- +COPY personal_details TO '/home/acer/Documents/personalinfo.csv' (HEADER ON); +---- + +. You will get a successful output below. ++ +[source,sql] +---- +-- +(0 rows) +---- + +. In this case, the header from the table will be included in the CSV file. ++ +[source,none] +---- +id,first_name,last_name,gender +1,'Mark','Wheeler','M' +2,'Tom','Hanks','M' +3,'Jane','Hopper','F' +4,'Emily','Byers','F' +5,'Lucas','Sinclair','M' +---- diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-null.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-null.adoc new file mode 100644 index 000000000..89d273792 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to-with-null.adoc @@ -0,0 +1,61 @@ += COPY TO with NULL +:description: A NULL value indicates that the value does not exist in the database. +:page-topic-type: reference + +A `NULL` value indicates that the value does not exist in the database. In Redpanda SQL, you can use the `NULL` option in the `COPY TO` state to specify a string that will replace `NULL` values ​​when copying data from the table to a CSV file. + +== Syntax + +The syntax for using the `NULL` option in the `COPY TO` is as follows: + +[source,sql] +---- +COPY table_name TO 'file_path' (NULL 'replacement_string'); +---- + +Parameters in the syntax include: + +* `table_name`: The table containing the data to be exported. +* `file_path`: A CSV file location where the data will be saved. +* `NULL ‘replacement_string'`: The specified string that will replace NULL values in the exported CSV file. The default value is `' '`. + +== Example + +. Create a table with a `NULL` value. ++ +[source,sql] +---- +CREATE TABLE example_table ( + id serial, + name varchar(50), + age int, + city varchar(50) +); + +INSERT INTO example_table (name, age, city) VALUES + ('John', 25, 'New York'), + ('Alice', NULL, 'Chicago'), + ('Bob', 30, NULL); +---- + +. Now, let’s use `COPY TO` with an empty string: ++ +[source,sql] +---- +COPY example_table TO '/path/to/exampleexport.csv' (NULL ''); +---- + +. The `NULL` values in the table are replaced with the empty string in the CSV file. ++ +// TODO: Confirm result +[source,text] +---- +1,John,25,"New York" +2,Alice,null,"Chicago" +3,Bob,30,"" +---- + +[TIP] +==== +You can specify another string to replace the null value, such as blank, empty, invalid, etc. +==== diff --git a/modules/reference/pages/sql/sql-statements/copy-to/copy-to.adoc b/modules/reference/pages/sql/sql-statements/copy-to/copy-to.adoc new file mode 100644 index 000000000..6545cb193 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/copy-to.adoc @@ -0,0 +1,145 @@ += COPY TO +:description: The COPY TO statement is used to export tables, specific columns, or results of select queries into .csv files. +:page-topic-type: reference + +The `COPY TO` statement is used to export tables, specific columns, or results of select queries into .csv files. It allows you to copy data from a table or query result and save it to a specified file. + +== Syntax + +The syntax for `COPY TO` is as follows: + +[source,sql] +---- +COPY { table_name [ ( column_name [, ...] ) ] | ( query ) } TO 'filename' [( option [, ...] ) ]; +---- + +Parameters in the syntax include: + +* `table_name`: Table with the data to export. +* `column_name`: Optional. Specify columns for export. +* `query`: A `SELECT` statement for exporting specific results. +* `filename`: File name for saving the exported data. +* `option`: Optional parameters for customization. + +== Example + +=== Create a table + +. Before creating The table, check for duplicate tables using the following statement: ++ +[source,sql] +---- +DESCRIBE DATABASE +---- + +. You will receive a list of existing tables in Redpanda SQL: ++ +[source,sql] +---- + namespace_name | name +----------------+---------------- + public | client + public | distance_table + public | weight + public | product +---- + +[WARNING] +==== +Ensure you are not creating duplicate tables. +==== +. Now, let’s create a table for exporting data to a CSV file. Here, we’ll create a "`*salary*`" table: ++ +[source,sql] +---- +CREATE TABLE salary ( + empid int, + empname text, + empdept text, + empaddress text, + empsalary int +); +INSERT INTO salary + (empid, empname, empdept, empaddress, empsalary) +VALUES + (2001,'Paul','HR', 'California', null ), + (2002,'Brandon','Product', 'Norway', 15000), + (2003,'Bradley','Marketing', 'Texas', null), + (2004,'Lisa','Marketing', 'Houston', 10000), + (2005,'Emily','Marketing', 'Texas', 20000), + (2006,'Bobby','Finance', 'Seattle', 20000), + (2007,'Parker','Project', 'Texas', 45000); +---- + +. The table and data were created successfully. ++ +[source,sql] +---- +COMPLETE +INSERT 0 7 +---- + +=== Copy the table into the CSV file + +[WARNING] +==== +*Important Notes:* + - By default, the `COPY TO` command overwrites the CSV file if it already exists. + - Please ensure that the directory where you save the file has the necessary write permissions. +==== +*Option 1: Exporting all columns from a table* + +Copy all columns in the table to the specified CSV file: + +[source,sql] +---- +COPY salary TO '/path/to/exportsalary.csv'; +---- + +You will get the following successful result: + +[source,sql] +---- +-- +(0 rows) +---- + +The data from the table will be exported to the CSV file. + +*Option 2: Exporting specific columns from a table* + +Copy only specific columns by specifying the column names in the query: + +[source,sql] +---- +COPY salary (empid, empname, empsalary) TO 'exportsalary.csv'; +---- + +You will get the following successful result: + +[source,sql] +---- +-- +(0 rows) +---- + +The data from the specified columns will be exported to the CSV file. + +*Option 3: Exporting results of a SELECT statement* + +In the example below, copy data only from the *Marketing department* using the `SELECT` statement and `WHERE` clause: + +[source,sql] +---- +COPY (SELECT * FROM salary WHERE empdept = 'Marketing') TO 'exportsalary.csv'; +---- + +You will get the following successful result: + +[source,sql] +---- +-- +(0 rows) +---- + +Data exported to CSV file is only from the Marketing department. diff --git a/modules/reference/pages/sql/sql-statements/copy-to/index.adoc b/modules/reference/pages/sql/sql-statements/copy-to/index.adoc new file mode 100644 index 000000000..7580ac2e2 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/copy-to/index.adoc @@ -0,0 +1,3 @@ += COPY TO +:description: Export query results to external storage using the COPY TO statement. +:page-layout: index diff --git a/modules/reference/pages/sql/sql-statements/describe.adoc b/modules/reference/pages/sql/sql-statements/describe.adoc new file mode 100644 index 000000000..50bc8f943 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/describe.adoc @@ -0,0 +1,96 @@ += DESCRIBE +:description: The DESCRIBE statement is used to show columns within a table as well as tables within a database. +:page-topic-type: reference + +The `DESCRIBE` statement is used to show columns within a table as well as tables within a database. + +[TIP] +==== +It is recommended to be used before creating a new table to avoid tables duplication +==== + +== Syntax + +Below you can find the basic syntax for describing tables within a database as well as columns within tables: + +[source,sql] +---- +DESCRIBE DATABASE; +---- + +[source,sql] +---- +DESCRIBE TABLE table_name; +---- + +where: + +`table_name`: name of the table that you want to show + +[NOTE] +==== +This statement is available to all users with the `USAGE` privilege on the schema, where the table is located +==== + +== Examples + +To get a better understanding of the `DESCRIBE` statement, take a look at some examples below: + +=== DESCRIBE table + +In this example, we will figure out the columns of the *part* table. In order to do so, you need to run the query below: + +[source,sql] +---- +DESCRIBE TABLE part; +---- + +As a result, you will get a list of column names, column types, and nullable options from the *part* table: + +[source,sql] +---- ++----------------+------------+-------------+-------+----------+ +| database_name | table_name | name | type | nullable | ++----------------+------------+-------------+-------+----------+ +| public | part | p_partkey | INT | f | +| public | part | p_name | TEXT | f | +| public | part | p_mfgr | TEXT | f | +| public | part | p_category | TEXT | f | +| public | part | p_brand | TEXT | f | +| public | part | p_color | TEXT | f | +| public | part | p_type | TEXT | f | +| public | part | p_size | INT | f | +| public | part | p_container | TEXT | f | ++----------------+------------+-------------+-------+----------+ +---- + +[TIP] +==== +The example above shows that the tables reside in the `public` schema (the default schema in Redpanda SQL). You can also display tables from other schemas, by following the doc xref:reference:sql/schema.adoc[here] +==== + +=== DESCRIBE database + +In order to describe the database, you need to execute the following query: + +[source,sql] +---- +DESCRIBE DATABASE; +---- + +The output for the above code consists of all existing tables from the specified database, as presented below: + +[source,sql] +---- ++-----------------------------+ +| name | ++-----------------------------+ +| supplier_scale_1_no_index | +| features | +| orders | +| features2 | +| featurestable | +| featurestable1 | +| featurestable10 | ++-----------------------------+ +---- diff --git a/modules/reference/pages/sql/sql-statements/index.adoc b/modules/reference/pages/sql/sql-statements/index.adoc new file mode 100644 index 000000000..20e262c34 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/index.adoc @@ -0,0 +1,17 @@ += SQL STATEMENTS +:description: SQL statements are the commands used to interact with Redpanda SQL database. + +SQL statements are the commands used to interact with Redpanda SQL database. These statements enable you to create, modify, query and manage database objects and data efficiently. + +The following table summarizes the statements supported by Redpanda SQL: + +[width="100%",cols="<48%,<52%",options="header",] +|=== +|Statement Name |Description +|xref:reference:sql/sql-statements/select.adoc[SELECT] |Retrieves data from table +|xref:reference:sql/sql-statements/copy-to/copy-to.adoc[COPY TO] |Exports table data or specific columns to CSV files +|xref:reference:sql/sql-statements/set-show.adoc[SET/SHOW] |Configures or displays session-level settings such as path +|xref:reference:sql/sql-statements/show-tables.adoc[SHOW TABLES] |Lists all tables within the current schema or database +|xref:reference:sql/sql-statements/show-nodes.adoc[SHOW NODES] |Displays the current state of nodes in a distributed cluster +|xref:reference:sql/sql-statements/describe.adoc[DESCRIBE] |Shows detailed information about columns in a table or tables within a database +|=== diff --git a/modules/reference/pages/sql/sql-statements/keywords.adoc b/modules/reference/pages/sql/sql-statements/keywords.adoc new file mode 100644 index 000000000..361e77942 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/keywords.adoc @@ -0,0 +1,471 @@ += Keywords +:description: In Redpanda SQL, reserved and non-reserved keywords play an important role in SQL syntax and usage. +:page-topic-type: reference + +In Redpanda SQL, *reserved* and *non-reserved* keywords play an important role in SQL syntax and usage. Reserved keywords are strictly defined by the SQL standard and cannot be used as identifiers, such as table or column names, unless explicitly quoted. These keywords have predefined meanings and are always interpreted as part of the SQL syntax, for example, `SELECT`, `INSERT` and `UPDATE`. + +On the other hand, non-reserved keywords have special meanings only in specific context and can be used as identifiers in other situations. For example, the keyword `DB` is non-reserved, meaning you can use it directly to name a database. + +The table below lists all available keywords that you can use in statements: + +[cols=",<",options="header",] +|=== +|Keyword |Status +|ABSOLUTE |non-reserved +|ACTION |non-reserved +|ADD |non-reserved +|AFTER |non-reserved +|AGGREGATE |non-reserved +|ALL |reserved +|ALLOCATE |reserved +|ALTER |non-reserved +|ANALYSE |reserved +|ANALYZE |reserved +|AND |reserved +|ANY |reserved +|ANY_VALUE |non-reserved +|ARE |reserved +|ARRAY |reserved, requires AS +|ARRAY_MAX_CARDINALITY |non-reserved +|AS |reserved, requires AS +|ASC |reserved +|ASENSITIVE |non-reserved +|ASSERTION |non-reserved +|ASSIGNMENT |non-reserved +|ASYMMETRIC |reserved +|AT |non-reserved +|ATOMIC |non-reserved +|AUTHORIZATION |reserved (can be function or type) +|AVG |non-reserved +|BEFORE |non-reserved +|BEGIN |non-reserved +|BEGIN_FRAME |non-reserved +|BEGIN_PARTITION |non-reserved +|BETWEEN |non-reserved (cannot be function or type) +|BIGINT |non-reserved (cannot be function or type) +|BIT |non-reserved (cannot be function or type) +|BIT_LENGTH |reserved +|BLOB |non-reserved +|BOOL |non-reserved +|BOOLEAN |non-reserved (cannot be function or type) +|BOTH |reserved +|BY |non-reserved +|CACHE |non-reserved +|CALL |reserved +|CALLED |reserved +|CARDINALITY |non-reserved +|CASCADE |reserved +|CASCADED |reserved +|CASE |reserved +|CAST |reserved +|CATALOG |non-reserved +|CEILING |non-reserved +|CHAR |non-reserved +|CHAR_LENGTH |non-reserved +|CHARACTER |non-reserved +|CHARACTER_LENGTH |non-reserved +|CHECK |reserved +|CLASSIFIER |non-reserved +|CLOB |non-reserved +|CLOSE |reserved +|COALESCE |reserved +|COLLATE |reserved +|COLLATION |reserved +|COLLECT |non-reserved +|COLUMN |reserved +|COLUMNS |non-reserved +|COMMIT |reserved +|CONDITION |reserved +|CONNECT |reserved +|CONNECTION |reserved +|CONSTRAINT |reserved +|CONSTRAINTS |non-reserved +|CONTAINS |non-reserved +|CONTINUE |reserved +|CONTROL |non-reserved +|CONVERT |non-reserved +|COPY |non-reserved +|CORR |non-reserved +|CORRESPONDING |reserved +|COVAR_POP |non-reserved +|COVAR_SAMP |non-reserved +|CREATE |reserved +|CROSS |reserved +|CUBE |reserved +|CUME_DIST |non-reserved +|CURRENT |reserved +|CURRENT_USER |reserved +|CURRENT_ROLE |reserved +|CURSOR |reserved +|CYCLE |reserved +|DATABASE |non-reserved +|DATABASES |non-reserved +|DATALINK |non-reserved +|DATE |non-reserved +|DATETIME |non-reserved +|DAY |non-reserved +|DEALLOCATE |reserved +|DEC |non-reserved +|DECFLOAT |non-reserved +|DECIMAL |non-reserved +|DECLARE |reserved +|DEFAULT |reserved +|DEFERRABLE |reserved +|DEFERRED |reserved +|DEFINE |non-reserved +|DELETE |reserved +|DELTA |non-reserved +|DENSE_RANK |non-reserved +|DEREF |non-reserved +|DESC |reserved +|DESCRIBE |reserved +|DESCRIPTOR |reserved +|DETERMINISTIC |reserved +|DIAGNOSTICS |reserved +|DIRECT |non-reserved +|DISCONNECT |reserved +|DISTINCT |reserved +|DLNEWCOPY |non-reserved +|DLPREVIOUSCOPY |non-reserved +|DLURLCOMPLETE |non-reserved +|DLURLCOMPLETEONLY |non-reserved +|DLURLCOMPLETEWRITE |non-reserved +|DLURLPATH |non-reserved +|DLURLPATHONLY |non-reserved +|DLURLPATHWRITE |non-reserved +|DLURLSCHEME |non-reserved +|DLURLSERVER |non-reserved +|DLVALUE |non-reserved +|DO |reserved +|DOMAIN |non-reserved +|DOUBLE |non-reserved +|DROP |reserved +|DYNAMIC |non-reserved +|EACH |reserved +|ELEMENT |non-reserved +|ELSE |reserved +|EMPTY |non-reserved +|END |reserved +|END_FRAME |non-reserved +|END_PARTITION |non-reserved +|EQUALS |non-reserved +|ESCAPE |reserved +|EVERY |reserved +|EXCEPT |reserved +|EXCEPTION |reserved +|EXEC |reserved +|EXECUTE |reserved +|EXISTS |reserved +|EXP |non-reserved +|EXPLAIN |reserved +|EXTERNAL |reserved +|EXTRACT |reserved +|FALSE |reserved +|FETCH |reserved +|FILE |non-reserved +|FILTER |reserved +|FIRST |reserved +|FIRST_VALUE |non-reserved +|FLOAT |non-reserved +|FLOOR |non-reserved +|FOR |reserved +|FOREIGN |reserved +|FORMAT |non-reserved +|FOUND |non-reserved +|FRAME_ROW |non-reserved +|FREE |non-reserved +|FROM |reserved +|FULL |reserved +|FUNCTION |reserved +|FUSION |non-reserved +|GET |non-reserved +|GLOBAL |reserved +|GO |non-reserved +|GOTO |non-reserved +|GRANT |reserved +|GROUP |reserved +|GROUPING |reserved +|GROUPS |non-reserved +|HASH |non-reserved +|HAVING |reserved +|HINT |non-reserved +|HOLD |non-reserved +|HOUR |non-reserved +|IDENTITY |reserved +|IF |reserved +|ILIKE |non-reserved +|IMMEDIATE |reserved +|IMPORT |non-reserved +|IN |reserved +|INDEX |reserved +|INDICATOR |reserved +|INITIAL |reserved +|INITIALLY |reserved +|INNER |reserved +|INOUT |reserved +|INPUT |reserved +|INSENSITIVE |reserved +|INSERT |reserved +|INT |non-reserved +|INTEGER |non-reserved +|INTERSECT |reserved +|INTERSECTION |non-reserved +|INTERVAL |reserved +|INTO |reserved +|IS |reserved +|ISNULL |non-reserved +|ISOLATION |reserved +|JOIN |reserved +|JSON |non-reserved +|JSON_ARRAY |non-reserved +|JSON_ARRAYAGG |non-reserved +|JSON_EXISTS |non-reserved +|JSON_OBJECT |non-reserved +|JSON_OBJECTAGG |non-reserved +|JSON_QUERY |non-reserved +|JSON_TABLE |non-reserved +|JSON_TABLE_PRIMITIVE |non-reserved +|JSON_VALUE |non-reserved +|JSONB |non-reserved +|KEY |non-reserved +|LAG |non-reserved +|LANGUAGE |reserved +|LARGE |non-reserved +|LAST |reserved +|LAST_VALUE |non-reserved +|LATERAL |reserved +|LEAD |non-reserved +|LEADING |reserved +|LEFT |reserved +|LEVEL |non-reserved +|LIKE |reserved +|LIKE_REGEX |non-reserved +|LIMIT |reserved +|LISTAGG |non-reserved +|LN |non-reserved +|LOAD |non-reserved +|LOCAL |reserved +|LOCALTIME |reserved +|LOCALTIMESTAMP |reserved +|LONG |non-reserved +|MEASURES |non-reserved +|MEMBER |non-reserved +|MERGE |reserved +|METHOD |non-reserved +|MINUS |reserved +|MINUTE |non-reserved +|MODIFIES |reserved +|MODULE |non-reserved +|MONTH |non-reserved +|MULTISET |non-reserved +|NAMES |non-reserved +|NATIONAL |non-reserved +|NATURAL |reserved +|NCHAR |non-reserved +|NCLOB |non-reserved +|NEW |reserved +|NEXT |non-reserved +|NO |reserved +|NONE |non-reserved +|NOT |reserved +|NTILE |non-reserved +|NULL |reserved +|NULLIF |reserved +|NULLS |reserved +|NVARCHAR |non-reserved +|OCCURRENCES_REGEX |non-reserved +|OCTET_LENGTH |non-reserved +|OF |reserved +|OFF |non-reserved +|OFFSET |reserved +|OLD |reserved +|OMIT |non-reserved +|ON |reserved +|ONE |non-reserved +|ONLY |reserved +|OPEN |reserved +|OPTION |reserved +|OR |reserved +|ORDER |reserved +|OUT |reserved +|OUTER |reserved +|OUTPUT |reserved +|OVER |reserved +|OVERLAPS |reserved +|OVERLAY |non-reserved +|PAD |non-reserved +|PARAMETER |reserved +|PARAMETERS |non-reserved +|PARTIAL |reserved +|PARTITION |reserved +|PATTERN |non-reserved +|PER |non-reserved +|PERCENT |non-reserved +|PERCENT_RANK |non-reserved +|PERCENTILE_CONT |non-reserved +|PERCENTILE_DISC |non-reserved +|PERIOD |reserved +|PERMUTE |non-reserved +|PLACING |non-reserved +|PLAN |non-reserved +|PORTION |non-reserved +|PRECEDES |non-reserved +|PRECISION |reserved +|PREPARE |reserved +|PRESERVE |reserved +|PRIMARY |reserved +|PRIOR |reserved +|PRIVILEGES |non-reserved +|PROCEDURE |reserved +|PTF |non-reserved +|PUBLIC |reserved +|RANGE |reserved +|READ |reserved +|READS |reserved +|REAL |non-reserved +|RECURSIVE |reserved +|REF |reserved +|REFERENCES |reserved +|REFERENCING |reserved +|REGR_AVGX |non-reserved +|REGR_AVGY |non-reserved +|REGR_COUNT |non-reserved +|REGR_INTERCEPT |non-reserved +|REGR_R2 |non-reserved +|REGR_SLOPE |non-reserved +|REGR_SXX |non-reserved +|REGR_SXY |non-reserved +|REGR_SYY |non-reserved +|RELATIVE |non-reserved +|RELEASE |reserved +|RENAME |reserved +|RESTRICT |reserved +|RESULT |reserved +|RETURN |reserved +|RETURNS |reserved +|REVOKE |reserved +|RIGHT |reserved +|ROLLBACK |reserved +|ROLLUP |reserved +|ROW |reserved +|ROW_NUMBER |non-reserved +|ROWS |reserved +|RUNNING |non-reserved +|SAVEPOINT |reserved +|SCHEMA |reserved +|SCHEMAS |non-reserved +|SCOPE |reserved +|SCROLL |reserved +|SEARCH |non-reserved +|SECOND |non-reserved +|SECTION |non-reserved +|SEEK |non-reserved +|SELECT |reserved +|SENSITIVE |reserved +|SESSION |reserved +|SESSION_USER |reserved +|SET |reserved +|SHOW |non-reserved +|SIMILAR |non-reserved +|SIZE |non-reserved +|SKIP |non-reserved +|SMALLINT |non-reserved +|SOME |reserved +|SORTED |non-reserved +|SPACE |non-reserved +|SPATIAL |non-reserved +|SPECIFIC |reserved +|SPECIFICTYPE |non-reserved +|SQL |reserved +|SQLCODE |non-reserved +|SQLERROR |non-reserved +|SQLEXCEPTION |non-reserved +|SQLSTATE |non-reserved +|SQLWARNING |non-reserved +|START |reserved +|STATIC |reserved +|STDDEV_POP |non-reserved +|STDDEV_SAMP |non-reserved +|STRING |non-reserved +|SUBMULTISET |non-reserved +|SUBSET |non-reserved +|SUCCEEDS |non-reserved +|SYMMETRIC |reserved +|SYSTEM |reserved +|SYSTEM_TIME |non-reserved +|SYSTEM_USER |reserved +|TABLE |reserved +|TABLES |non-reserved +|TABLESAMPLE |reserved +|TEMPORARY |reserved +|TEXT |non-reserved +|THEN |reserved +|TIME |non-reserved +|TIMESTAMP |non-reserved +|TIMESTAMP_TRUNC |non-reserved +|TO |reserved +|TOP |non-reserved +|TRAILING |reserved +|TRANSACTION |reserved +|TRANSLATE |reserved +|TRANSLATE_REGEX |non-reserved +|TRANSLATION |non-reserved +|TREAT |reserved +|TRIGGER |reserved +|TRUE |reserved +|TRUNCATE |reserved +|UESCAPE |reserved +|UNION |reserved +|UNIQUE |reserved +|UNKNOWN |reserved +|UNLOAD |non-reserved +|UNMATCHED |non-reserved +|UNNEST |non-reserved +|UPDATE |reserved +|UPPER |non-reserved +|USAGE |reserved +|USER |non-reserved +|USING |reserved +|VALUES |reserved +|VAR_POP |non-reserved +|VAR_SAMP |non-reserved +|VARBINARY |non-reserved +|VARCHAR |non-reserved +|VARIADIC |reserved +|VARYING |reserved +|VERSIONING |non-reserved +|VIEW |reserved +|VIRTUAL |non-reserved +|WHEN |reserved +|WHENEVER |reserved +|WHERE |reserved +|WIDTH_BUCKET |non-reserved +|WINDOW |reserved +|WITH |reserved +|WITHIN |reserved +|WITHOUT |reserved +|WORK |reserved +|WRITE |non-reserved +|XML |non-reserved +|XMLAGG |non-reserved +|XMLATTRIBUTES |non-reserved +|XMLBINARY |non-reserved +|XMLCAST |non-reserved +|XMLCOMMENT |non-reserved +|XMLCONCAT |non-reserved +|XMLDOCUMENT |non-reserved +|XMLELEMENT |non-reserved +|XMLEXISTS |non-reserved +|XMLFOREST |non-reserved +|XMLITERATE |non-reserved +|XMLNAMESPACES |non-reserved +|XMLPARSE |non-reserved +|XMLPI |non-reserved +|XMLQUERY |non-reserved +|XMLSERIALIZE |non-reserved +|XMLTABLE |non-reserved +|XMLTEXT |non-reserved +|XMLVALIDATE |non-reserved +|YEAR |non-reserved +|ZONE |non-reserved +|=== diff --git a/modules/reference/pages/sql/sql-statements/select.adoc b/modules/reference/pages/sql/sql-statements/select.adoc new file mode 100644 index 000000000..6826b7d97 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/select.adoc @@ -0,0 +1,151 @@ += SELECT +:description: The SELECT statement helps you obtain the data you need from one or more tables. +:page-topic-type: reference + +The `SELECT` statement helps you obtain the data you need from one or more tables. + +The application of this statement will be helpful in several cases listed below: + +* Evaluating data from only particular fields in a table. +* Reviewing data from several tables at the same time. +* Retrieving the data based on specific criteria. + +== Syntax + +To request data from a table using the `SELECT` statement, you can use the following syntax: + +[source,sql] +---- +SELECT * FROM table_name; +---- + +You are allowed to filter the table by column. Refer to the syntax below. + +[source,sql] +---- +SELECT column1, column2, ... +FROM table_name; +---- + +We will define each syntax as follows. + +* `SELECT` determines the data we need from the database or a table. +* `*` referred to as *_select star_* or *_asterisk_* or represents *_all_*. It defines that the query should return all columns of the queried tables. +* `FROM` clause indicates the table(s) to retrieve data from. +* `table_name` represents the table(s) name. +* `column1, column2, ...` these are used to specify the columns from where we want to retrieve the data. + +[NOTE] +==== +The `SELECT` statement is case insensitive, which means `select` *or* `SELECT` has the same result. +==== + +== Examples + +We have a table named *student_data* that stores the id, name, and where the student lives. + +[source,sql] +---- +CREATE TABLE student_data ( + id int, + name text, + domicile text +); +INSERT INTO student_data + (id, name, domicile) +VALUES + (119291,'Jordan','Los Angeles'), + (119292,'Mike','Melbourne'), + (119293,'Will','Sydney'); +---- + +[TIP] +==== +All the examples below are executed in the `public` schema. You can also display table from another specific schema. +Click xref:reference:sql/schema.adoc[here] for more info. +==== + +=== Query data from all columns + +. In the first case, we want to display all the data from the *student_data* table. Please refer to the syntax below: ++ +[source,sql] +---- +SELECT * FROM table_name; +---- + +. Use the `SELECT` statement within the table name to get all the data: ++ +[source,sql] +---- +SELECT * FROM student_data; +---- + +. If you have successfully run the query, you will get all the data from the *student_data* table. ++ +[source,sql] +---- ++--------+----------+----------------+ +| id | name | domicile | ++--------+----------+----------------+ +| 119291 | Jordan | Los Angeles | +| 119292 | Mike | Melbourne | +| 119293 | Will | Sydney | ++--------+----------+----------------+ +---- + +=== Query data from specific columns + +. We want to get the list of students’ names with their IDs. Please refer to the syntax below: ++ +[source,sql] +---- +SELECT column_1, column_2 FROM table_name; +---- + +. Run the following query: ++ +[source,sql] +---- +SELECT id, name FROM student_data; +---- + +. If you have successfully run the query, you will get a list of students’ IDs & names from the *student_data* table. ++ +[source,sql] +---- ++--------+----------+ +| id | name | ++--------+----------+ +| 119291 | Jordan | +| 119292 | Mike | +| 119293 | Will | ++--------+----------+ +---- + +=== Query data from a specific column with the condition + +. If we have a large number of data, skimming for the desired data will require a long time. We can apply some conditions to the `SELECT` statement. Please refer to the syntax below: ++ +[source,sql] +---- +SELECT column_1 FROM table_name WHERE condition; +---- + +. Let’s say we want to know the student’s name who lives in Sydney, have a look and run the query below: ++ +[source,sql] +---- +SELECT name FROM student_data WHERE domicile='Sydney'; +---- + +. If you have successfully run the query, we now know that Will lives in Sydney. ++ +[source,sql] +---- ++----------+ +| name | ++----------+ +| Will | ++----------+ +---- diff --git a/modules/reference/pages/sql/sql-statements/set-show.adoc b/modules/reference/pages/sql/sql-statements/set-show.adoc new file mode 100644 index 000000000..543132965 --- /dev/null +++ b/modules/reference/pages/sql/sql-statements/set-show.adoc @@ -0,0 +1,168 @@ += SET/SHOW +:description: The SET statement lets you set specific options while the SHOW statement helps you see the current values in Redpanda SQL. +:page-topic-type: reference + +The `SET` statement lets you set specific options while the `SHOW` statement helps you see the current values in Redpanda SQL. + +== Syntax + +The syntax for these functions is as follows: + +[tabs] +==== +SET:: ++ +[source,sql] +---- +SET