Skip to content

Commit

Permalink
Replace ext/tokenizer/tokenizer_data_gen.php with CMake-based script
Browse files Browse the repository at this point in the history
As the ext/tokenizer/tokenizer_data_gen.php is a very simple PHP
command-line script, it can be also implemented directly in CMake. It
simplifies building when PHP is not installed. There are simply too many
dependencies and issues around these files - Zend's language parser
files, PHP stubs, tokenizer generated data source file and PHP binary or
installed PHP.
  • Loading branch information
petk committed Feb 26, 2025
1 parent 5943555 commit 737f636
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 7 deletions.
13 changes: 6 additions & 7 deletions cmake/ext/tokenizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ project(

include(CMakeDependentOption)
include(FeatureSummary)
include(PHP/AddCustomCommand)

option(PHP_EXT_TOKENIZER "Enable the tokenizer extension" ON)

Expand Down Expand Up @@ -71,17 +70,17 @@ target_sources(
add_dependencies(php_ext_tokenizer Zend::Zend)

# Generate tokenizer data source files.
if(PHP_SOURCE_DIR)
php_add_custom_command(
php_ext_tokenizer_generate_data
if(EXISTS ${PHP_SOURCE_DIR}/Zend/zend_language_parser.y)
add_custom_command(
OUTPUT
${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_data.stub.php
${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_data.c
DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_data_gen.php
${PHP_SOURCE_DIR}/Zend/zend_language_parser.y
PHP_COMMAND
${CMAKE_CURRENT_SOURCE_DIR}/tokenizer_data_gen.php
COMMAND
${CMAKE_COMMAND}
-D PHP_SOURCE_DIR=${PHP_SOURCE_DIR}
-P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/GenerateTokenizerData.cmake
COMMENT
"[ext/tokenizer] Regenerating tokenizer_data.c and tokenizer_data.stub.php"
VERBATIM
Expand Down
124 changes: 124 additions & 0 deletions cmake/ext/tokenizer/cmake/GenerateTokenizerData.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# This is CMake-based alternative of ext/tokenizer/tokenizer_data_gen.php
#
# Run as:
# cmake -D PHP_SOURCE_DIR=<php-src> -P GenerateTokenizerData.cmake

cmake_minimum_required(VERSION 3.25...3.31)

if(NOT CMAKE_SCRIPT_MODE_FILE)
message(FATAL_ERROR "This is a command-line script.")
endif()

if(NOT PHP_SOURCE_DIR)
message(FATAL_ERROR "PHP_SOURCE_DIR variable is required.")
endif()

set(regex "^%token [^T]*(T_[^ \n]+)")

file(
STRINGS
"${PHP_SOURCE_DIR}/Zend/zend_language_parser.y"
lines
REGEX "${regex}"
)

# Bypass the [ and ] characters issue in lists:
# https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#cmake-language-lists
string(REPLACE "[" "LEFT_BRACKET" lines "${lines}")
string(REPLACE "]" "RIGHT_BRACKET" lines "${lines}")

# Get a list of tokens.
set(tokens "")
foreach(line IN LISTS lines)
if(line MATCHES "${regex}")
set(token "${CMAKE_MATCH_1}")

if(token MATCHES "^T_(NOELSE|ERROR)$")
continue()
endif()

list(APPEND tokens "${token}")
endif()
endforeach()

set(content "")
foreach(token IN LISTS tokens)
string(
APPEND
content
"/**\n * @var int\n * @cvalue ${token}\n */\n"
"const ${token} = UNKNOWN;\n"
)
endforeach()
string(STRIP "${content}" content)

file(
CONFIGURE
OUTPUT "${PHP_SOURCE_DIR}/ext/tokenizer/tokenizer_data.stub.php"
CONTENT [[
<?php

/** @generate-class-entries */

@content@
/**
* @var int
* @cvalue T_PAAMAYIM_NEKUDOTAYIM
*/
const T_DOUBLE_COLON = UNKNOWN;
]] @ONLY)

set(content "")
foreach(token IN LISTS tokens)
if(token STREQUAL "T_PAAMAYIM_NEKUDOTAYIM")
string(
APPEND
content
"\t\tcase T_PAAMAYIM_NEKUDOTAYIM: return \"T_DOUBLE_COLON\";\n"
)
else()
string(APPEND content "\t\tcase ${token}: return \"${token}\";\n")
endif()
endforeach()

set(tab "\t")

file(
CONFIGURE
OUTPUT "${PHP_SOURCE_DIR}/ext/tokenizer/tokenizer_data.c"
CONTENT [[
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| [email protected] so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Johannes Schlueter <[email protected]> |
+----------------------------------------------------------------------+
*/

/*
DO NOT EDIT THIS FILE!
This file is generated using tokenizer_data_gen.php
*/

#include "php.h"
#include "zend.h"
#include <zend_language_parser.h>

char *get_token_type_name(int token_type)
{
@tab@switch (token_type) {

@content@
@tab@}
@tab@return NULL;
}

]] @ONLY)

0 comments on commit 737f636

Please sign in to comment.