diff --git a/.changeset/README.md b/.changeset/README.md new file mode 100644 index 0000000..82e351f --- /dev/null +++ b/.changeset/README.md @@ -0,0 +1,24 @@ +# Meta-Notation Changesets + +This directory contains [changesets](https://github.com/changesets/changesets) which are used to manage versions and changelogs. + +## How to add a changeset + +When making changes that should trigger a version bump, run: + +```bash +npx changeset +``` + +Or manually trigger a release via GitHub Actions: +1. Go to Actions tab +2. Select "Manual Release" workflow +3. Click "Run workflow" +4. Choose version bump type (patch/minor/major) +5. Add optional description + +## Changeset types + +- **patch**: Bug fixes and minor changes +- **minor**: New features (backward compatible) +- **major**: Breaking changes diff --git a/.changeset/config.json b/.changeset/config.json new file mode 100644 index 0000000..fce1c26 --- /dev/null +++ b/.changeset/config.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://unpkg.com/@changesets/config@3.0.0/schema.json", + "changelog": "@changesets/cli/changelog", + "commit": false, + "fixed": [], + "linked": [], + "access": "public", + "baseBranch": "main", + "updateInternalDependencies": "patch", + "ignore": [] +} diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..81b5319 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,85 @@ +# Contributing to Meta-Notation + +Thank you for your interest in contributing to meta-notation! + +## Development Setup + +### JavaScript/TypeScript + +```bash +cd js +npm install +npm run build +npm test +``` + +### Rust + +```bash +cd rust +cargo build +cargo test +``` + +## Running Tests + +### JavaScript +```bash +cd js +npm test # Run all tests +npm run build # Build the project +``` + +### Rust +```bash +cd rust +cargo test # Run all tests +cargo test --doc # Run doc tests +cargo fmt # Format code +cargo clippy # Lint code +``` + +## Continuous Integration + +We use GitHub Actions for CI/CD: + +- **CI/CD**: Runs linting and tests on multiple platforms +- **Manual Release**: Trigger releases via GitHub Actions +- **Auto-publish**: Automatic publishing to npm and crates.io on release + +## Making Changes + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Ensure tests pass: `npm test` (JS) and `cargo test` (Rust) +5. Run formatters: `cargo fmt` (Rust) +6. Submit a pull request + +## Releasing + +We use [changesets](https://github.com/changesets/changesets) for version management. + +### Manual Release +1. Go to GitHub Actions +2. Select "Manual Release" workflow +3. Choose version bump type (patch/minor/major) +4. Review and merge the created PR +5. The release will be automated + +## Code Style + +### JavaScript/TypeScript +- Use test-anywhere for tests +- Follow existing code patterns +- Keep tests in `tests/` folder + +### Rust +- Follow standard Rust conventions +- Run `cargo fmt` before committing +- Run `cargo clippy` to catch common mistakes +- Add tests for new functionality + +## License + +By contributing, you agree to license your contribution under the Unlicense (public domain). diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..a6e4175 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,157 @@ +name: CI/CD + +on: + push: + branches: [main] + pull_request: + types: [opened, synchronize, reopened] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + # Lint and format check for JavaScript/TypeScript + lint-js: + name: Lint JavaScript/TypeScript + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20.x' + cache: 'npm' + cache-dependency-path: js/package-lock.json + + - name: Install dependencies + working-directory: js + run: npm ci + + - name: Build + working-directory: js + run: npm run build + + # Lint and format check for Rust + lint-rust: + name: Lint Rust + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + components: rustfmt, clippy + + - name: Format check + working-directory: rust + run: cargo fmt --check + + - name: Clippy + working-directory: rust + run: cargo clippy -- -D warnings + + # Test JavaScript/TypeScript implementation + test-js: + name: Test JS (${{ matrix.os }}, Node ${{ matrix.node }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + node: ['18.x', '20.x', '22.x'] + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node }} + cache: 'npm' + cache-dependency-path: js/package-lock.json + + - name: Install dependencies + working-directory: js + run: npm ci + + - name: Build + working-directory: js + run: npm run build + + - name: Run tests + working-directory: js + run: npm test + + # Test Rust implementation + test-rust: + name: Test Rust (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + + - name: Build + working-directory: rust + run: cargo build --verbose + + - name: Run tests + working-directory: rust + run: cargo test --verbose + + - name: Run doc tests + working-directory: rust + run: cargo test --doc + + # Release workflow (only on main branch after successful tests) + release: + name: Release + needs: [lint-js, lint-rust, test-js, test-rust] + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20.x' + cache: 'npm' + cache-dependency-path: js/package-lock.json + + - name: Setup Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + + - name: Install JS dependencies + working-directory: js + run: npm ci + + - name: Build JS + working-directory: js + run: npm run build + + - name: Build Rust + working-directory: rust + run: cargo build --release + + - name: Create Release PR + id: changesets + uses: changesets/action@v1 + with: + title: "🚀 Release new version" + commit: "🤖 Version bump" + createGithubReleases: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/manual-release.yml b/.github/workflows/manual-release.yml new file mode 100644 index 0000000..48d199c --- /dev/null +++ b/.github/workflows/manual-release.yml @@ -0,0 +1,67 @@ +name: Manual Release + +on: + workflow_dispatch: + inputs: + bump_type: + description: 'Version bump type' + required: true + type: choice + options: + - patch + - minor + - major + description: + description: 'Release description (optional)' + required: false + type: string + +permissions: + contents: write + pull-requests: write + +jobs: + create-changeset: + name: Create Release Changeset + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20.x' + + - name: Create changeset file + run: | + mkdir -p .changeset + cat > .changeset/manual-release-${{ github.run_id }}.md << EOF + --- + "meta-notation": ${{ github.event.inputs.bump_type }} + --- + + ${{ github.event.inputs.description || 'Manual release triggered' }} + EOF + + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + commit-message: "chore: prepare ${{ github.event.inputs.bump_type }} release" + branch: changeset-release/${{ github.run_id }} + title: "🚀 Release: ${{ github.event.inputs.bump_type }} version bump" + body: | + ## Manual Release Request + + **Triggered by:** @${{ github.actor }} + **Release type:** ${{ github.event.inputs.bump_type }} + **Description:** ${{ github.event.inputs.description || 'No description provided' }} + + This PR was automatically created to prepare a new release. + + ### Next Steps + 1. Review the changeset file + 2. Merge this PR to main + 3. The automated release workflow will handle versioning and publishing + labels: release diff --git a/.github/workflows/publish-npm.yml b/.github/workflows/publish-npm.yml new file mode 100644 index 0000000..cfe1bf2 --- /dev/null +++ b/.github/workflows/publish-npm.yml @@ -0,0 +1,35 @@ +name: Publish to npm + +on: + release: + types: [published] + +jobs: + publish-npm: + name: Publish to npm + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20.x' + registry-url: 'https://registry.npmjs.org' + cache: 'npm' + cache-dependency-path: js/package-lock.json + + - name: Install dependencies + working-directory: js + run: npm ci + + - name: Build + working-directory: js + run: npm run build + + - name: Publish to npm + working-directory: js + run: npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + continue-on-error: true diff --git a/.github/workflows/publish-rust.yml b/.github/workflows/publish-rust.yml new file mode 100644 index 0000000..3f3e454 --- /dev/null +++ b/.github/workflows/publish-rust.yml @@ -0,0 +1,20 @@ +name: Publish to crates.io + +on: + release: + types: [published] + +jobs: + publish-rust: + name: Publish Rust crate + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + + - name: Publish to crates.io + working-directory: rust + run: cargo publish --token ${{ secrets.CARGO_TOKEN }} + continue-on-error: true diff --git a/.gitignore b/.gitignore index 9a5aced..fd90349 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,19 @@ +# Build outputs +js/dist/ +js/src/**/*.js +js/src/**/*.js.map +js/src/**/*.d.ts +js/src/**/*.d.ts.map +!js/src/meta.grammar.d.ts +!js/src/**/*.test.ts +!js/src/**/*.spec.ts + +# Rust build outputs +rust/target/ +rust/Cargo.lock +target/ +Cargo.lock + # Logs logs *.log diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ab15ea1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +resolver = "2" +members = ["rust"] diff --git a/README.md b/README.md index 3368ee3..16bd83a 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,214 @@ # meta-notation -A notation for largest possible set of languages. It should focus on parsing ( ), { }, [ ], ' ', \` \`, " " and so on. -It should be simplier version of links-notation. +[![CI/CD](https://github.com/link-foundation/meta-notation/actions/workflows/ci.yml/badge.svg)](https://github.com/link-foundation/meta-notation/actions/workflows/ci.yml) +[![npm version](https://badge.fury.io/js/meta-notation.svg)](https://www.npmjs.com/package/meta-notation) +[![crates.io](https://img.shields.io/crates/v/meta-notation.svg)](https://crates.io/crates/meta-notation) +[![License: Unlicense](https://img.shields.io/badge/license-Unlicense-blue.svg)](https://github.com/link-foundation/meta-notation/blob/main/LICENSE) -So it should support plain sequences of references and also nested, but without ability to use or parse `:` as way to define a link's self reference. +A notation for the largest possible set of languages. It focuses on parsing common delimiters: `()`, `{}`, `[]`, `''`, `` ` ` ``, `""` and so on. -That way it will be compatible with much larger set of languages. +## Vision -It can be similar to my old idea https://github.com/konard/metalanguage. +Meta-notation is a simpler version of [links-notation](https://github.com/link-foundation/links-notation). -But we can now have all tools in https://github.com/link-foundation/links-notation to do it right and efficient. +It supports plain sequences of references and nested structures, but **without** the ability to use or parse `:` as a way to define a link's self reference. This makes it compatible with a much larger set of programming languages. + +The implementation is similar to the concepts in [metalanguage](https://github.com/konard/metalanguage), but leverages all the tools from links-notation to do it right and efficiently. + +## Implementations + +Meta-notation is available in multiple languages: + +- **[JavaScript/TypeScript](./js)** - Full-featured implementation with PEG.js grammar +- **[Rust](./rust)** - High-performance implementation with serde support + +## Features + +- **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` +- **Language Agnostic**: Works with 25+ programming languages and all natural languages +- **Nested Structures**: Supports arbitrary nesting of delimiters +- **Round-trip Serialization**: Parse and serialize back to original text +- **Multiple Language Implementations**: JavaScript/TypeScript and Rust +- **Simple Grammar**: Clean, efficient parsing +- **Comprehensive Tests**: 81+ test cases for programming and natural languages + +## Installation + +### JavaScript/TypeScript + +```bash +npm install meta-notation +``` + +### Rust + +```toml +[dependencies] +meta-notation = "0.1" +``` + +## Quick Start + +### JavaScript/TypeScript + +```typescript +import { parse, serialize } from 'meta-notation'; + +const code = 'function test() { return "hello"; }'; +const parsed = parse(code); +const serialized = serialize(parsed); +console.log(serialized === code); // true +``` + +### Rust + +```rust +use meta_notation::{parse, serialize}; + +let code = r#"function test() { return "hello"; }"#; +let parsed = parse(code); +let serialized = serialize(&parsed); +assert_eq!(serialized, code); +``` + +## API + +### `parse(input: string): Sequence` + +Parses text into a sequence of blocks. + +```typescript +const result = parse('hello (world) {test}'); +// Returns: +// [ +// { type: 'text', content: 'hello ' }, +// { type: 'paren', content: [{ type: 'text', content: 'world' }] }, +// { type: 'text', content: ' ' }, +// { type: 'curly', content: [{ type: 'text', content: 'test' }] } +// ] +``` + +### `serialize(sequence: Sequence): string` + +Converts a sequence of blocks back to text. + +```typescript +const blocks = [ + { type: 'text', content: 'hello ' }, + { type: 'paren', content: [{ type: 'text', content: 'world' }] } +]; +const text = serialize(blocks); +// Returns: "hello (world)" +``` + +## Types + +```typescript +type DelimiterType = 'paren' | 'curly' | 'square' | 'singleQuote' | 'doubleQuote' | 'backtick' | 'text'; + +interface Block { + type: DelimiterType; + content: Block[] | string; +} + +type Sequence = Block[]; +``` + +## Language Support + +Meta-notation works seamlessly with both programming languages and natural languages. + +### Programming Languages (Tested) + +- **JavaScript/TypeScript** - Functions, arrow functions, template literals +- **Python** - Dictionaries, lists, function definitions +- **Go** - Functions, print statements +- **Rust** - Vectors, macros, format strings +- **C++** - Streams, functions, return statements +- **Java** - Classes, methods, arrays +- **C#** - LINQ, collections, generics +- **Ruby** - Methods, string interpolation +- **PHP** - Functions, arrays, associative arrays +- **Swift** - Functions, string interpolation +- **Kotlin** - Functions, lists +- **Scala** - Functions, type annotations +- **Perl** - Subroutines, arrays +- **Haskell** - Pure functions +- **Lisp/Scheme** - S-expressions +- **Clojure** - Vectors, strings +- **Lua** - Functions, string concatenation +- **Elixir** - Functions, string interpolation +- **R** - Functions, paste +- **MATLAB** - Functions +- **SQL** - SELECT statements with WHERE clauses +- **JSON** - Objects and arrays +- **YAML** - Arrays (with bracket syntax) +- **Bash/Shell** - Echo, variables, pipes +- **Markdown** - Code blocks with backticks + +### Natural Languages (Tested) + +Meta-notation parses natural language text including: + +- **Direct speech** with quotes: `She said, "Hello!"` +- **Parenthetical remarks**: `The conference (next week) is online.` +- **Citations and references**: `According to [Smith, 2020]...` +- **Academic writing** with nested structures +- **Legal text** with section references +- **Technical documentation** mixing code and prose +- **Multiple languages**: English, Spanish, French, German, Italian, Portuguese, and more + +Works with any language that uses these common delimiters for structure. + +## Examples + +See the [examples](./src/examples) directory for more detailed usage examples. + +## Building + +### JavaScript/TypeScript + +```bash +cd js +npm install +npm run build +``` + +### Rust + +```bash +cd rust +cargo build --release +``` + +## Testing + +### JavaScript/TypeScript + +```bash +cd js +npm test +``` + +### Rust + +```bash +cd rust +cargo test +``` + +## Comparison with Links-Notation + +| Feature | meta-notation | links-notation | +|---------|---------------|----------------| +| Delimiter parsing | ✅ | ✅ | +| Nested structures | ✅ | ✅ | +| Self-reference (`:`) | ❌ | ✅ | +| Language compatibility | Very high | High | +| Complexity | Low | Medium | + +By removing the `:` self-reference syntax, meta-notation can parse a wider variety of languages without conflicts. + +## License + +[Unlicense](https://github.com/link-foundation/meta-notation/blob/main/LICENSE) (Public Domain) diff --git a/js/README.md b/js/README.md new file mode 100644 index 0000000..44ac5bf --- /dev/null +++ b/js/README.md @@ -0,0 +1,57 @@ +# Meta-Notation (JavaScript/TypeScript) + +JavaScript/TypeScript implementation of meta-notation parser. + +## Installation + +```bash +npm install meta-notation +``` + +## Usage + +```typescript +import { parse, serialize } from 'meta-notation'; + +// Parse code with delimiters +const code = 'function test() { return "hello"; }'; +const parsed = parse(code); + +// Serialize back to string +const serialized = serialize(parsed); +console.log(serialized === code); // true +``` + +## API + +### `parse(input: string): Sequence` + +Parses text into a sequence of blocks. + +### `serialize(sequence: Sequence): string` + +Converts a sequence of blocks back to text. + +## Building + +```bash +npm install +npm run build +``` + +## Testing + +```bash +npm test +``` + +## Features + +- **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` +- **Language Agnostic**: Works with 25+ programming languages and all natural languages +- **Nested Structures**: Supports arbitrary nesting of delimiters +- **Round-trip Serialization**: Parse and serialize back to original text +- **TypeScript Support**: Fully typed API +- **81 Test Cases**: Comprehensive test coverage + +See the [main README](../README.md) for more information. diff --git a/js/package-lock.json b/js/package-lock.json new file mode 100644 index 0000000..4c2d61c --- /dev/null +++ b/js/package-lock.json @@ -0,0 +1,125 @@ +{ + "name": "meta-notation", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "meta-notation", + "version": "0.1.0", + "license": "Unlicense", + "dependencies": { + "test-anywhere": "^0.6.0" + }, + "devDependencies": { + "@types/node": "^20.10.0", + "peggy": "^4.0.0", + "typescript": "^5.3.0" + } + }, + "node_modules/@peggyjs/from-mem": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/@peggyjs/from-mem/-/from-mem-1.3.5.tgz", + "integrity": "sha512-oRyzXE7nirAn+5yYjCdWQHg3EG2XXcYRoYNOK8Quqnmm+9FyK/2YWVunwudlYl++M3xY+gIAdf0vAYS+p0nKfQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "semver": "7.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/node": { + "version": "20.19.25", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz", + "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/commander": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/peggy": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/peggy/-/peggy-4.2.0.tgz", + "integrity": "sha512-ZjzyJYY8NqW8JOZr2PbS/J0UH/hnfGALxSDsBUVQg5Y/I+ZaPuGeBJ7EclUX2RvWjhlsi4pnuL1C/K/3u+cDeg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@peggyjs/from-mem": "1.3.5", + "commander": "^12.1.0", + "source-map-generator": "0.8.0" + }, + "bin": { + "peggy": "bin/peggy.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/source-map-generator": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/source-map-generator/-/source-map-generator-0.8.0.tgz", + "integrity": "sha512-psgxdGMwl5MZM9S3FWee4EgsEaIjahYV5AzGnwUvPhWeITz/j6rKpysQHlQ4USdxvINlb8lKfWGIXwfkrgtqkA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">= 10" + } + }, + "node_modules/test-anywhere": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/test-anywhere/-/test-anywhere-0.6.0.tgz", + "integrity": "sha512-BTWYYmHXPbP4pnHpWB5N3frx3GrWStIzjur2icRjVfrIkAPM9nBu1yUAqp+YPRXn1d95Bw9twxRy/O3EBsKEyA==", + "license": "Unlicense", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + } + } +} diff --git a/js/package.json b/js/package.json new file mode 100644 index 0000000..3ba6074 --- /dev/null +++ b/js/package.json @@ -0,0 +1,37 @@ +{ + "name": "meta-notation", + "version": "0.1.0", + "description": "A notation for the largest possible set of languages, focusing on parsing common delimiters", + "type": "module", + "main": "dist/src/index.js", + "types": "dist/src/index.d.ts", + "scripts": { + "build:grammar": "peggy --format es src/meta.grammar.pegjs", + "build:ts": "tsc", + "build:copy": "cp src/meta.grammar.js dist/src/meta.grammar.js", + "build": "npm run build:grammar && npm run build:ts && npm run build:copy", + "test": "node --test dist/tests/languages.test.js dist/tests/natural-languages.test.js dist/tests/parser.test.js dist/tests/serializer.test.js", + "prepare": "npm run build" + }, + "keywords": [ + "parser", + "notation", + "meta", + "delimiters", + "metalanguage" + ], + "author": "", + "license": "Unlicense", + "devDependencies": { + "@types/node": "^20.10.0", + "peggy": "^4.0.0", + "typescript": "^5.3.0" + }, + "files": [ + "dist", + "src" + ], + "dependencies": { + "test-anywhere": "^0.6.0" + } +} diff --git a/js/src/examples/README.md b/js/src/examples/README.md new file mode 100644 index 0000000..25d8f1f --- /dev/null +++ b/js/src/examples/README.md @@ -0,0 +1,47 @@ +# Meta-Notation Examples + +This directory contains examples demonstrating the use of meta-notation. + +## Running Examples + +First, build the project: + +```bash +npm install +npm run build +``` + +Then run the examples: + +```bash +node dist/examples/basic.js +``` + +## What is Meta-Notation? + +Meta-notation is a simple, language-agnostic notation system that parses common delimiters found in most programming languages: + +- Parentheses: `()` +- Curly braces: `{}` +- Square brackets: `[]` +- Single quotes: `''` +- Double quotes: `""` +- Backticks: `` ` ` `` + +Unlike more complex notation systems, meta-notation intentionally avoids language-specific features (like the `:` self-reference in links-notation), making it compatible with a much larger set of programming languages. + +## Use Cases + +1. **Code Analysis**: Parse code structure without full language parsing +2. **Syntax Highlighting**: Quick delimiter matching +3. **Code Transformation**: Preserve structure while modifying content +4. **Multi-language Tools**: Build tools that work across many languages +5. **Educational**: Understand code structure at a basic level + +## Design Philosophy + +Meta-notation is inspired by: +- [links-notation](https://github.com/link-foundation/links-notation) - A more complex notation system with self-references +- [metalanguage](https://github.com/konard/metalanguage) - An earlier meta programming language concept + +The key difference is simplicity and universality. By focusing only on common delimiters and avoiding language-specific syntax, meta-notation can parse code from JavaScript, Python, C++, Java, Go, Rust, and many other languages with the same simple grammar. diff --git a/js/src/examples/basic.ts b/js/src/examples/basic.ts new file mode 100644 index 0000000..58c3fce --- /dev/null +++ b/js/src/examples/basic.ts @@ -0,0 +1,70 @@ +/** + * Basic examples of using meta-notation + */ + +import { parse, serialize, type Block } from '../index.js'; + +// Example 1: Parse simple text with delimiters +console.log('Example 1: Basic parsing'); +console.log('========================'); +const text1 = 'hello (world)'; +const parsed1 = parse(text1); +console.log('Input:', text1); +console.log('Parsed:', JSON.stringify(parsed1, null, 2)); +console.log(); + +// Example 2: Parse nested structures +console.log('Example 2: Nested structures'); +console.log('============================'); +const text2 = '{a [b (c) d] e}'; +const parsed2 = parse(text2); +console.log('Input:', text2); +console.log('Parsed:', JSON.stringify(parsed2, null, 2)); +console.log(); + +// Example 3: Parse different quote types +console.log('Example 3: Different quote types'); +console.log('================================'); +const text3 = `'single' "double" \`backtick\``; +const parsed3 = parse(text3); +console.log('Input:', text3); +console.log('Parsed:', JSON.stringify(parsed3, null, 2)); +console.log(); + +// Example 4: Round-trip (parse then serialize) +console.log('Example 4: Round-trip'); +console.log('====================='); +const original = 'function test() { return "hello"; }'; +const parsed = parse(original); +const serialized = serialize(parsed); +console.log('Original: ', original); +console.log('Serialized:', serialized); +console.log('Match:', original === serialized); +console.log(); + +// Example 5: Analyze JavaScript code structure +console.log('Example 5: JavaScript analysis'); +console.log('=============================='); +const jsCode = 'const x = [1, 2, 3];'; +const jsParsed = parse(jsCode); +console.log('Code:', jsCode); +console.log('Structure:'); +jsParsed.forEach((block: Block, i: number) => { + console.log(` [${i}] ${block.type}:`, + Array.isArray(block.content) ? `${block.content.length} items` : `"${block.content}"` + ); +}); +console.log(); + +// Example 6: Analyze Python code structure +console.log('Example 6: Python analysis'); +console.log('=========================='); +const pyCode = 'def func(a, b): return {"key": "value"}'; +const pyParsed = parse(pyCode); +console.log('Code:', pyCode); +console.log('Structure:'); +pyParsed.forEach((block: Block, i: number) => { + console.log(` [${i}] ${block.type}:`, + Array.isArray(block.content) ? `${block.content.length} items` : `"${block.content}"` + ); +}); diff --git a/js/src/index.ts b/js/src/index.ts new file mode 100644 index 0000000..1274527 --- /dev/null +++ b/js/src/index.ts @@ -0,0 +1,26 @@ +/** + * Meta-Notation + * + * A notation for the largest possible set of languages. + * Focuses on parsing common delimiters: (), {}, [], '', "", `` + * + * This is a simpler version of links-notation without the : self-reference syntax, + * making it compatible with a much larger set of languages. + * + * @module meta-notation + */ + +export { parse, MetaNotationParser } from './parser.js'; +export { serialize, MetaNotationSerializer } from './serializer.js'; +export type { Block, Sequence, DelimiterType, Parser, Serializer } from './types.js'; + +/** + * Main API for convenience + */ +import { parse } from './parser.js'; +import { serialize } from './serializer.js'; + +export default { + parse, + serialize +}; diff --git a/js/src/meta.grammar.d.ts b/js/src/meta.grammar.d.ts new file mode 100644 index 0000000..4c982d9 --- /dev/null +++ b/js/src/meta.grammar.d.ts @@ -0,0 +1,11 @@ +/** + * Type declarations for PEG.js generated parser + */ + +import type { Sequence } from './types.js'; + +/** + * Parse meta-notation text into a sequence of blocks + * Generated by PEG.js from meta.grammar.pegjs + */ +export function parse(input: string): Sequence; diff --git a/js/src/meta.grammar.pegjs b/js/src/meta.grammar.pegjs new file mode 100644 index 0000000..d4b7fea --- /dev/null +++ b/js/src/meta.grammar.pegjs @@ -0,0 +1,27 @@ +{{ + // Meta-Notation Grammar + // A simple notation for parsing delimiters across multiple languages + // Supports: (), {}, [], '', "", `` + // Does NOT support : for self-reference (unlike links-notation) +}} + +text = s:sequence { return s; } + +sequence = blocks:block* { return blocks; } + +block + = "(" s:sequence ")" { return { type: 'paren', content: s }; } + / "{" s:sequence "}" { return { type: 'curly', content: s }; } + / "[" s:sequence "]" { return { type: 'square', content: s }; } + / "'" s:singleQuoteContent "'" { return { type: 'singleQuote', content: s }; } + / '"' s:doubleQuoteContent '"' { return { type: 'doubleQuote', content: s }; } + / "`" s:backtickContent "`" { return { type: 'backtick', content: s }; } + / c:plainText { return { type: 'text', content: c }; } + +// Content inside quotes - different rules since they don't nest the same way +singleQuoteContent = c:[^']* { return c.join(''); } +doubleQuoteContent = c:[^"]* { return c.join(''); } +backtickContent = c:[^`]* { return c.join(''); } + +// Plain text outside delimiters +plainText = c:[^(){}[\]'"`]+ { return c.join(''); } diff --git a/js/src/parser.ts b/js/src/parser.ts new file mode 100644 index 0000000..185de9a --- /dev/null +++ b/js/src/parser.ts @@ -0,0 +1,40 @@ +/** + * Meta-Notation Parser + * + * Parses text using the meta-notation grammar. + */ + +import { parse as grammarParse } from './meta.grammar.js'; +import type { Parser, Sequence } from './types.js'; + +/** + * Parse meta-notation text into a sequence of blocks + * + * @param input - The text to parse + * @returns A sequence of blocks representing the parsed structure + * + * @example + * ```typescript + * const result = parse('hello (world) {test}'); + * // Returns: [ + * // { type: 'text', content: 'hello ' }, + * // { type: 'paren', content: [{ type: 'text', content: 'world' }] }, + * // { type: 'text', content: ' ' }, + * // { type: 'curly', content: [{ type: 'text', content: 'test' }] } + * // ] + * ``` + */ +export function parse(input: string): Sequence { + return grammarParse(input) as Sequence; +} + +/** + * Parser class implementing the Parser interface + */ +export class MetaNotationParser implements Parser { + parse(input: string): Sequence { + return parse(input); + } +} + +export default { parse, MetaNotationParser }; diff --git a/js/src/serializer.ts b/js/src/serializer.ts new file mode 100644 index 0000000..5175b08 --- /dev/null +++ b/js/src/serializer.ts @@ -0,0 +1,89 @@ +/** + * Meta-Notation Serializer + * + * Converts meta-notation blocks back to text. + */ + +import type { Block, Sequence, Serializer as ISerializer } from './types.js'; + +/** + * Get the opening delimiter for a block type + */ +function getOpenDelimiter(type: string): string { + switch (type) { + case 'paren': return '('; + case 'curly': return '{'; + case 'square': return '['; + case 'singleQuote': return "'"; + case 'doubleQuote': return '"'; + case 'backtick': return '`'; + default: return ''; + } +} + +/** + * Get the closing delimiter for a block type + */ +function getCloseDelimiter(type: string): string { + switch (type) { + case 'paren': return ')'; + case 'curly': return '}'; + case 'square': return ']'; + case 'singleQuote': return "'"; + case 'doubleQuote': return '"'; + case 'backtick': return '`'; + default: return ''; + } +} + +/** + * Serialize a single block back to text + */ +function serializeBlock(block: Block): string { + if (block.type === 'text') { + return block.content as string; + } + + const open = getOpenDelimiter(block.type); + const close = getCloseDelimiter(block.type); + + if (Array.isArray(block.content)) { + // Nested sequence + const inner = serialize(block.content); + return `${open}${inner}${close}`; + } else { + // String content (for quotes and backticks) + return `${open}${block.content}${close}`; + } +} + +/** + * Serialize a sequence of blocks back to text + * + * @param sequence - The sequence to serialize + * @returns The serialized text + * + * @example + * ```typescript + * const blocks = [ + * { type: 'text', content: 'hello ' }, + * { type: 'paren', content: [{ type: 'text', content: 'world' }] } + * ]; + * const text = serialize(blocks); + * // Returns: "hello (world)" + * ``` + */ +export function serialize(sequence: Sequence): string { + return sequence.map(serializeBlock).join(''); +} + +/** + * Serializer class implementing the Serializer interface + */ +export class MetaNotationSerializer implements ISerializer { + serialize(sequence: Sequence): string { + return serialize(sequence); + } +} + +export default { serialize, MetaNotationSerializer }; diff --git a/js/src/types.ts b/js/src/types.ts new file mode 100644 index 0000000..a3bc644 --- /dev/null +++ b/js/src/types.ts @@ -0,0 +1,45 @@ +/** + * Meta-Notation Types + * + * A notation for the largest possible set of languages. + * Focuses on parsing common delimiters without language-specific syntax. + */ + +/** + * Delimiter types supported by meta-notation + */ +export type DelimiterType = + | 'paren' // () + | 'curly' // {} + | 'square' // [] + | 'singleQuote' // '' + | 'doubleQuote' // "" + | 'backtick' // `` + | 'text'; // plain text + +/** + * A block is either a delimited structure or plain text + */ +export interface Block { + type: DelimiterType; + content: Block[] | string; +} + +/** + * A sequence is an array of blocks + */ +export type Sequence = Block[]; + +/** + * Parser interface + */ +export interface Parser { + parse(input: string): Sequence; +} + +/** + * Serializer interface + */ +export interface Serializer { + serialize(sequence: Sequence): string; +} diff --git a/js/tests/languages.test.ts b/js/tests/languages.test.ts new file mode 100644 index 0000000..c0945a2 --- /dev/null +++ b/js/tests/languages.test.ts @@ -0,0 +1,271 @@ +/** + * Tests for meta-notation with various programming languages + */ + +import { test, assert } from 'test-anywhere'; +import { parse } from '../src/parser.js'; +import { serialize } from '../src/serializer.js'; +import type { Block, Sequence, DelimiterType } from '../src/types.js'; + +// Helper function to check if a delimiter type exists anywhere in the parsed result +function hasDelimiterType(sequence: Sequence, type: DelimiterType): boolean { + for (const block of sequence) { + if (block.type === type) { + return true; + } + if (Array.isArray(block.content)) { + if (hasDelimiterType(block.content, type)) { + return true; + } + } + } + return false; +} + +// JavaScript +test('parse JavaScript code', () => { + const code = 'const greet = (name) => { return `Hello, ${name}!`; };'; + const result = parse(code); + assert.ok(result.length > 0); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'backtick')); + // Round-trip test + assert.equal(serialize(result), code); +}); + +// Python +test('parse Python code', () => { + const code = 'def calculate(x, y): return {"sum": x + y, "list": [x, y]}'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Go +test('parse Go code', () => { + const code = 'func main() { fmt.Println("Hello, World!") }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Rust +test('parse Rust code', () => { + const code = 'fn main() { let x = vec![1, 2, 3]; println!("{:?}", x); }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// C++ +test('parse C++ code', () => { + const code = 'int main() { std::cout << "Hello" << std::endl; return 0; }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Java +test('parse Java code', () => { + const code = 'public class Main { public static void main(String[] args) { System.out.println("Hello"); } }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// C# +test('parse C# code', () => { + const code = 'public void Test() { var list = new List {1, 2, 3}; Console.WriteLine("Done"); }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Ruby +test('parse Ruby code', () => { + const code = 'def greet(name); puts "Hello, #{name}!"; end'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// PHP +test('parse PHP code', () => { + const code = 'function test($x) { return ["key" => "value"]; }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Swift +test('parse Swift code', () => { + const code = 'func greet(name: String) -> String { return "Hello, \\(name)!" }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Kotlin +test('parse Kotlin code', () => { + const code = 'fun main() { val list = listOf(1, 2, 3); println("Hello") }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// TypeScript +test('parse TypeScript code', () => { + const code = 'const add = (a: number, b: number): number => { return a + b; };'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.equal(serialize(result), code); +}); + +// Scala +test('parse Scala code', () => { + const code = 'def add(x: Int, y: Int): Int = { x + y }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.equal(serialize(result), code); +}); + +// Perl +test('parse Perl code', () => { + const code = 'sub greet { my ($name) = @_; print "Hello, $name!\\n"; }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Haskell +test('parse Haskell code', () => { + const code = 'main = putStrLn "Hello, World!"'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Lisp/Scheme +test('parse Lisp code', () => { + const code = '(define (factorial n) (if (= n 0) 1 (* n (factorial (- n 1)))))'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), code); +}); + +// Clojure +test('parse Clojure code', () => { + const code = '(defn greet [name] (str "Hello, " name "!"))'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Lua +test('parse Lua code', () => { + const code = 'function greet(name) return "Hello, " .. name end'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Elixir +test('parse Elixir code', () => { + const code = 'def greet(name), do: "Hello, #{name}!"'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// R +test('parse R code', () => { + const code = 'greet <- function(name) { paste("Hello,", name) }'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// MATLAB +test('parse MATLAB code', () => { + const code = 'function y = square(x); y = x .^ 2; end'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), code); +}); + +// SQL +test('parse SQL code', () => { + const code = 'SELECT name, age FROM users WHERE status = "active" ORDER BY created_at;'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// JSON +test('parse JSON', () => { + const code = '{"name": "John", "age": 30, "tags": ["developer", "designer"]}'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'curly')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// YAML-like (with brackets) +test('parse YAML with brackets', () => { + const code = 'dependencies: ["react", "typescript"]'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Shell/Bash +test('parse Bash code', () => { + const code = 'echo "Hello, ${USER}!" | grep "Hello"'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), code); +}); + +// Markdown code blocks +test('parse Markdown with code blocks', () => { + const code = 'Here is code: `const x = [1, 2, 3];` in backticks.'; + const result = parse(code); + assert.ok(hasDelimiterType(result, 'backtick')); + assert.equal(serialize(result), code); +}); diff --git a/js/tests/natural-languages.test.ts b/js/tests/natural-languages.test.ts new file mode 100644 index 0000000..4becda2 --- /dev/null +++ b/js/tests/natural-languages.test.ts @@ -0,0 +1,276 @@ +/** + * Tests for meta-notation with natural language text + * + * Meta-notation should work seamlessly with natural languages since they + * use the same delimiters: quotes for speech, parentheses for asides, + * brackets for references, etc. + */ + +import { test, assert } from 'test-anywhere'; +import { parse } from '../src/parser.js'; +import { serialize } from '../src/serializer.js'; +import type { Block, Sequence, DelimiterType } from '../src/types.js'; + +// Helper function to check if a delimiter type exists anywhere in the parsed result +function hasDelimiterType(sequence: Sequence, type: DelimiterType): boolean { + for (const block of sequence) { + if (block.type === type) { + return true; + } + if (Array.isArray(block.content)) { + if (hasDelimiterType(block.content, type)) { + return true; + } + } + } + return false; +} + +// English +test('parse English text with quotes', () => { + const text = 'She said, "Hello, world!" and smiled.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +test('parse English text with parentheses', () => { + const text = 'The conference (scheduled for next week) will be online.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +test('parse English text with brackets', () => { + const text = 'According to the report [see page 42], the results were positive.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'square')); + assert.equal(serialize(result), text); +}); + +test('parse English dialogue', () => { + const text = `"How are you?" she asked. "I'm fine," he replied.`; + const result = parse(text); + assert.ok(result.filter(b => b.type === 'doubleQuote').length === 2); + assert.equal(serialize(result), text); +}); + +test('parse English nested structure', () => { + const text = 'He said, "I heard her say \'hello\' yesterday."'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Spanish +test('parse Spanish text with quotes', () => { + const text = 'Ella dijo, "¡Hola, mundo!" y sonrió.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +test('parse Spanish text with parentheses', () => { + const text = 'La conferencia (programada para la próxima semana) será en línea.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// French +test('parse French text with quotes', () => { + const text = 'Elle a dit, "Bonjour, le monde!" et a souri.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +test('parse French text with parentheses', () => { + const text = 'La conférence (prévue pour la semaine prochaine) sera en ligne.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// German +test('parse German text with quotes', () => { + const text = 'Sie sagte, "Hallo, Welt!" und lächelte.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +test('parse German text with parentheses', () => { + const text = 'Die Konferenz (geplant für nächste Woche) wird online sein.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Italian +test('parse Italian text with quotes', () => { + const text = 'Lei disse, "Ciao, mondo!" e sorrise.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Portuguese +test('parse Portuguese text with quotes', () => { + const text = 'Ela disse, "Olá, mundo!" e sorriu.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Russian (with Latin transliteration) +test('parse Russian text (transliterated) with quotes', () => { + const text = 'Ona skazala, "Privet, mir!" i ulyblas.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Japanese (romanized) +test('parse Japanese text (romanized) with quotes', () => { + const text = 'Kanojo wa "Konnichiwa, sekai!" to itta.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Chinese (Pinyin) +test('parse Chinese text (Pinyin) with quotes', () => { + const text = 'Ta shuo, "Nǐ hǎo, shìjiè!" ránhòu xiàole.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); + +// Academic citations +test('parse academic text with citations', () => { + const text = 'The study [Smith et al., 2020] found that performance (measured in ms) improved.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Mathematical expressions +test('parse mathematical text', () => { + const text = 'The formula is f(x) = [a + b] * {c - d}.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.equal(serialize(result), text); +}); + +// Literature with nested quotes +test('parse literature with nested quotes', () => { + const text = `He thought, "Did she really say that?" and she said, 'Yes, I did.'`; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.ok(hasDelimiterType(result, 'singleQuote')); + assert.equal(serialize(result), text); +}); + +// Poetry with parenthetical notes +test('parse poetry with annotations', () => { + const text = 'Roses are red (traditionally), violets are blue.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Legal text with references +test('parse legal text with section references', () => { + const text = 'According to Section 5(a) [see amendment], the party must comply.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'square')); + assert.equal(serialize(result), text); +}); + +// News article +test('parse news article text', () => { + const text = 'The CEO said, "Our growth exceeded expectations." The increase (50% year-over-year) was significant.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Email format +test('parse email-like text', () => { + const text = 'Hi [Name], I wanted to follow up on our meeting (yesterday). Thanks!'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Social media post +test('parse social media post', () => { + const text = 'Just learned something cool! [link] Check it out!'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'square')); + assert.equal(serialize(result), text); +}); + +// Recipe format +test('parse recipe text', () => { + const text = 'Add ingredients [see list] and mix (gently) for 2 minutes.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Technical documentation +test('parse technical documentation', () => { + const text = 'The function `process` accepts parameters [x, y] and returns an object {status, data}.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'backtick')); + assert.ok(hasDelimiterType(result, 'square')); + assert.ok(hasDelimiterType(result, 'curly')); + assert.equal(serialize(result), text); +}); + +// Mixed language and code +test('parse mixed natural language and code', () => { + const text = 'To debug, run `npm test` (in terminal) and check [output].'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'backtick')); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'square')); + assert.equal(serialize(result), text); +}); + +// Lyrics with annotations +test('parse song lyrics with notes', () => { + const text = `"Imagine" (by John Lennon) says "You may say I'm a dreamer."`; + const result = parse(text); + assert.ok(result.filter(b => b.type === 'doubleQuote').length === 2); + assert.ok(hasDelimiterType(result, 'paren')); + assert.equal(serialize(result), text); +}); + +// Complex academic text +test('parse complex academic text', () => { + const text = 'The hypothesis (H₁) states that "performance improves" [p < 0.05].'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.ok(hasDelimiterType(result, 'square')); + assert.equal(serialize(result), text); +}); + +// Empty delimiters in text +test('parse text with empty delimiters', () => { + const text = 'The list () was empty, the quote "" was blank.'; + const result = parse(text); + assert.ok(hasDelimiterType(result, 'paren')); + assert.ok(hasDelimiterType(result, 'doubleQuote')); + assert.equal(serialize(result), text); +}); diff --git a/js/tests/parser.test.ts b/js/tests/parser.test.ts new file mode 100644 index 0000000..a035872 --- /dev/null +++ b/js/tests/parser.test.ts @@ -0,0 +1,138 @@ +/** + * Tests for the meta-notation parser + */ + +import { test, assert } from 'test-anywhere'; +import { parse } from '../src/parser.js'; + +test('parse plain text', () => { + const result = parse('hello world'); + assert.deepEqual(result, [ + { type: 'text', content: 'hello world' } + ]); +}); + +test('parse parentheses', () => { + const result = parse('(hello)'); + assert.deepEqual(result, [ + { type: 'paren', content: [{ type: 'text', content: 'hello' }] } + ]); +}); + +test('parse curly braces', () => { + const result = parse('{world}'); + assert.deepEqual(result, [ + { type: 'curly', content: [{ type: 'text', content: 'world' }] } + ]); +}); + +test('parse square brackets', () => { + const result = parse('[test]'); + assert.deepEqual(result, [ + { type: 'square', content: [{ type: 'text', content: 'test' }] } + ]); +}); + +test('parse single quotes', () => { + const result = parse("'hello'"); + assert.deepEqual(result, [ + { type: 'singleQuote', content: 'hello' } + ]); +}); + +test('parse double quotes', () => { + const result = parse('"world"'); + assert.deepEqual(result, [ + { type: 'doubleQuote', content: 'world' } + ]); +}); + +test('parse backticks', () => { + const result = parse('`code`'); + assert.deepEqual(result, [ + { type: 'backtick', content: 'code' } + ]); +}); + +test('parse mixed delimiters', () => { + const result = parse('hello (world) {test}'); + assert.deepEqual(result, [ + { type: 'text', content: 'hello ' }, + { type: 'paren', content: [{ type: 'text', content: 'world' }] }, + { type: 'text', content: ' ' }, + { type: 'curly', content: [{ type: 'text', content: 'test' }] } + ]); +}); + +test('parse nested structures', () => { + const result = parse('(a (b) c)'); + assert.deepEqual(result, [ + { + type: 'paren', + content: [ + { type: 'text', content: 'a ' }, + { type: 'paren', content: [{ type: 'text', content: 'b' }] }, + { type: 'text', content: ' c' } + ] + } + ]); +}); + +test('parse complex nested structures', () => { + const result = parse('{a [b (c) d] e}'); + assert.deepEqual(result, [ + { + type: 'curly', + content: [ + { type: 'text', content: 'a ' }, + { + type: 'square', + content: [ + { type: 'text', content: 'b ' }, + { type: 'paren', content: [{ type: 'text', content: 'c' }] }, + { type: 'text', content: ' d' } + ] + }, + { type: 'text', content: ' e' } + ] + } + ]); +}); + +test('parse empty delimiters', () => { + const result = parse('(){}[]'); + assert.deepEqual(result, [ + { type: 'paren', content: [] }, + { type: 'curly', content: [] }, + { type: 'square', content: [] } + ]); +}); + +test('parse quotes with special chars', () => { + const result = parse('"hello {world}"'); + assert.deepEqual(result, [ + { type: 'doubleQuote', content: 'hello {world}' } + ]); +}); + +test('parse JavaScript-like code', () => { + const result = parse('function test() { return "hello"; }'); + assert.equal(result.length, 4); + assert.equal(result[0].type, 'text'); + assert.equal(result[1].type, 'paren'); + assert.equal(result[3].type, 'curly'); +}); + +test('parse Python-like code', () => { + const result = parse('def test(): return [1, 2, 3]'); + const types = result.map(b => b.type); + assert.ok(types.includes('paren')); + assert.ok(types.includes('square')); + assert.ok(types.includes('text')); +}); + +test('parse JSON-like structure', () => { + const result = parse('{"key": "value", "array": [1, 2, 3]}'); + assert.equal(result[0].type, 'curly'); + assert.ok(Array.isArray(result[0].content)); +}); diff --git a/js/tests/serializer.test.ts b/js/tests/serializer.test.ts new file mode 100644 index 0000000..abb904d --- /dev/null +++ b/js/tests/serializer.test.ts @@ -0,0 +1,83 @@ +/** + * Tests for the meta-notation serializer + */ + +import { test, assert } from 'test-anywhere'; +import { parse } from '../src/parser.js'; +import { serialize } from '../src/serializer.js'; + +test('serialize plain text', () => { + const blocks = [{ type: 'text' as const, content: 'hello world' }]; + const result = serialize(blocks); + assert.equal(result, 'hello world'); +}); + +test('serialize parentheses', () => { + const blocks = [ + { type: 'paren' as const, content: [{ type: 'text' as const, content: 'hello' }] } + ]; + const result = serialize(blocks); + assert.equal(result, '(hello)'); +}); + +test('serialize curly braces', () => { + const blocks = [ + { type: 'curly' as const, content: [{ type: 'text' as const, content: 'world' }] } + ]; + const result = serialize(blocks); + assert.equal(result, '{world}'); +}); + +test('serialize square brackets', () => { + const blocks = [ + { type: 'square' as const, content: [{ type: 'text' as const, content: 'test' }] } + ]; + const result = serialize(blocks); + assert.equal(result, '[test]'); +}); + +test('serialize quotes', () => { + const blocks = [ + { type: 'singleQuote' as const, content: 'hello' }, + { type: 'text' as const, content: ' ' }, + { type: 'doubleQuote' as const, content: 'world' } + ]; + const result = serialize(blocks); + assert.equal(result, `'hello' "world"`); +}); + +test('serialize backticks', () => { + const blocks = [ + { type: 'backtick' as const, content: 'code' } + ]; + const result = serialize(blocks); + assert.equal(result, '`code`'); +}); + +test('round-trip: parse then serialize', () => { + const original = 'hello (world) {test} [array] "string" `code`'; + const parsed = parse(original); + const serialized = serialize(parsed); + assert.equal(serialized, original); +}); + +test('round-trip: nested structures', () => { + const original = '{a [b (c) d] e}'; + const parsed = parse(original); + const serialized = serialize(parsed); + assert.equal(serialized, original); +}); + +test('round-trip: empty delimiters', () => { + const original = '(){}[]'; + const parsed = parse(original); + const serialized = serialize(parsed); + assert.equal(serialized, original); +}); + +test('round-trip: complex code', () => { + const original = 'function test() { return "hello"; }'; + const parsed = parse(original); + const serialized = serialize(parsed); + assert.equal(serialized, original); +}); diff --git a/js/tsconfig.json b/js/tsconfig.json new file mode 100644 index 0000000..0219c1b --- /dev/null +++ b/js/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "ES2020", + "moduleResolution": "node", + "lib": ["ES2020"], + "outDir": "./dist", + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true + }, + "include": ["src/**/*", "tests/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..57f4dfe --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "meta-notation" +version = "0.1.0" +edition = "2021" +description = "Rust implementation of the Meta-Notation parser" +license = "Unlicense" +repository = "https://github.com/link-foundation/meta-notation" +keywords = ["meta", "parser", "notation", "delimiters"] +categories = ["parsing"] + +[lib] +name = "meta_notation" +path = "src/lib.rs" + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +[dev-dependencies] diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000..9bbcf8e --- /dev/null +++ b/rust/README.md @@ -0,0 +1,75 @@ +# Meta-Notation (Rust) + +Rust implementation of meta-notation parser. + +## Installation + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +meta-notation = "0.1" +``` + +## Usage + +```rust +use meta_notation::{parse, serialize}; + +fn main() { + // Parse code with delimiters + let code = r#"function test() { return "hello"; }"#; + let parsed = parse(code); + + // Serialize back to string + let serialized = serialize(&parsed); + assert_eq!(serialized, code); +} +``` + +## API + +### `parse(input: &str) -> Vec` + +Parses text into a sequence of blocks. + +### `serialize(blocks: &[Block]) -> String` + +Converts a sequence of blocks back to text. + +## Types + +```rust +pub enum Block { + Paren(Vec), + Curly(Vec), + Square(Vec), + SingleQuote(String), + DoubleQuote(String), + Backtick(String), + Text(String), +} +``` + +## Building + +```bash +cargo build --release +``` + +## Testing + +```bash +cargo test +``` + +## Features + +- **Universal Delimiter Parsing**: Parses `()`, `{}`, `[]`, `''`, `""`, `` ` ` `` +- **Language Agnostic**: Works with programming and natural languages +- **Nested Structures**: Supports arbitrary nesting of delimiters +- **Round-trip Serialization**: Parse and serialize back to original text +- **Serde Support**: Serialize/deserialize to JSON +- **Zero Dependencies**: Only uses `serde` for serialization + +See the [main README](../README.md) for more information. diff --git a/rust/rustfmt.toml b/rust/rustfmt.toml new file mode 100644 index 0000000..c2a6547 --- /dev/null +++ b/rust/rustfmt.toml @@ -0,0 +1,23 @@ +edition = "2021" + +# Formatting settings +max_width = 100 +hard_tabs = false +tab_spaces = 4 +newline_style = "Auto" + +# Import settings +reorder_imports = true +reorder_modules = true + +# Match style +match_block_trailing_comma = false + +# Use field init shorthand +use_field_init_shorthand = true + +# Chain width +chain_width = 60 + +# Function call width +fn_call_width = 80 diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..4ada669 --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,330 @@ +//! # Meta-Notation +//! +//! A notation for the largest possible set of languages. +//! Focuses on parsing common delimiters: (), {}, [], '', "", `` +//! +//! This is a simpler version of links-notation without the : self-reference syntax, +//! making it compatible with a much larger set of programming languages. + +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// Delimiter types supported by meta-notation +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum DelimiterType { + Paren, + Curly, + Square, + SingleQuote, + DoubleQuote, + Backtick, + Text, +} + +/// A block is either a delimited structure or plain text +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", content = "content", rename_all = "camelCase")] +pub enum Block { + Paren(Vec), + Curly(Vec), + Square(Vec), + SingleQuote(String), + DoubleQuote(String), + Backtick(String), + Text(String), +} + +impl Block { + /// Get the delimiter type of this block + pub fn delimiter_type(&self) -> DelimiterType { + match self { + Block::Paren(_) => DelimiterType::Paren, + Block::Curly(_) => DelimiterType::Curly, + Block::Square(_) => DelimiterType::Square, + Block::SingleQuote(_) => DelimiterType::SingleQuote, + Block::DoubleQuote(_) => DelimiterType::DoubleQuote, + Block::Backtick(_) => DelimiterType::Backtick, + Block::Text(_) => DelimiterType::Text, + } + } + + /// Check if this block contains a specific delimiter type (recursively) + pub fn has_delimiter_type(&self, dtype: &DelimiterType) -> bool { + if &self.delimiter_type() == dtype { + return true; + } + + match self { + Block::Paren(blocks) | Block::Curly(blocks) | Block::Square(blocks) => { + blocks.iter().any(|b| b.has_delimiter_type(dtype)) + } + _ => false, + } + } +} + +/// Parser for meta-notation +pub struct Parser<'a> { + input: &'a str, + pos: usize, +} + +impl<'a> Parser<'a> { + /// Create a new parser for the given input + pub fn new(input: &'a str) -> Self { + Parser { input, pos: 0 } + } + + /// Get the current character without consuming it + fn peek(&self) -> Option { + self.input[self.pos..].chars().next() + } + + /// Consume and return the current character + fn consume(&mut self) -> Option { + let c = self.peek()?; + self.pos += c.len_utf8(); + Some(c) + } + + /// Check if we're at the end of input + fn is_eof(&self) -> bool { + self.pos >= self.input.len() + } + + /// Parse a sequence of blocks + pub fn parse_sequence(&mut self) -> Vec { + let mut blocks = Vec::new(); + + while !self.is_eof() { + if let Some(block) = self.parse_block() { + blocks.push(block); + } else { + break; + } + } + + blocks + } + + /// Parse a single block + fn parse_block(&mut self) -> Option { + match self.peek()? { + '(' => self.parse_delimited('(', ')', Block::Paren), + '{' => self.parse_delimited('{', '}', Block::Curly), + '[' => self.parse_delimited('[', ']', Block::Square), + '\'' => self.parse_quote('\'', Block::SingleQuote), + '"' => self.parse_quote('"', Block::DoubleQuote), + '`' => self.parse_quote('`', Block::Backtick), + ')' | '}' | ']' => { + // Unmatched closing delimiter - treat as text + let c = self.consume()?; + Some(Block::Text(c.to_string())) + } + _ => self.parse_text(), + } + } + + /// Parse a delimited block (parentheses, braces, brackets) + fn parse_delimited(&mut self, open: char, close: char, constructor: F) -> Option + where + F: FnOnce(Vec) -> Block, + { + // Consume opening delimiter + if self.consume()? != open { + return None; + } + + let mut blocks = Vec::new(); + + // Parse until we find the closing delimiter + while let Some(c) = self.peek() { + if c == close { + self.consume(); // Consume closing delimiter + return Some(constructor(blocks)); + } + + if let Some(block) = self.parse_block() { + blocks.push(block); + } else { + break; + } + } + + // If we reach here, we didn't find the closing delimiter + // Return what we have anyway + Some(constructor(blocks)) + } + + /// Parse a quoted string + fn parse_quote(&mut self, quote: char, constructor: F) -> Option + where + F: FnOnce(String) -> Block, + { + // Consume opening quote + if self.consume()? != quote { + return None; + } + + let mut content = String::new(); + + // Collect until closing quote + while let Some(c) = self.peek() { + if c == quote { + self.consume(); // Consume closing quote + return Some(constructor(content)); + } + self.consume(); + content.push(c); + } + + // If we reach here, we didn't find the closing quote + // Return what we have anyway + Some(constructor(content)) + } + + /// Parse plain text (non-delimiter characters) + fn parse_text(&mut self) -> Option { + let mut content = String::new(); + + while let Some(c) = self.peek() { + match c { + '(' | ')' | '{' | '}' | '[' | ']' | '\'' | '"' | '`' => break, + _ => { + self.consume(); + content.push(c); + } + } + } + + if content.is_empty() { + None + } else { + Some(Block::Text(content)) + } + } +} + +/// Parse meta-notation text into a sequence of blocks +/// +/// # Examples +/// +/// ``` +/// use meta_notation::parse; +/// +/// let result = parse("hello (world) {test}"); +/// assert_eq!(result.len(), 4); +/// ``` +pub fn parse(input: &str) -> Vec { + let mut parser = Parser::new(input); + parser.parse_sequence() +} + +/// Serialize a sequence of blocks back to text +/// +/// # Examples +/// +/// ``` +/// use meta_notation::{parse, serialize}; +/// +/// let text = "hello (world)"; +/// let blocks = parse(text); +/// let serialized = serialize(&blocks); +/// assert_eq!(serialized, text); +/// ``` +pub fn serialize(blocks: &[Block]) -> String { + blocks.iter().map(serialize_block).collect() +} + +/// Serialize a single block +fn serialize_block(block: &Block) -> String { + match block { + Block::Text(s) => s.clone(), + Block::Paren(blocks) => format!("({})", serialize(blocks)), + Block::Curly(blocks) => format!("{{{}}}", serialize(blocks)), + Block::Square(blocks) => format!("[{}]", serialize(blocks)), + Block::SingleQuote(s) => format!("'{}'", s), + Block::DoubleQuote(s) => format!("\"{}\"", s), + Block::Backtick(s) => format!("`{}`", s), + } +} + +impl fmt::Display for Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", serialize_block(self)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_plain_text() { + let result = parse("hello world"); + assert_eq!(result.len(), 1); + assert!(matches!(result[0], Block::Text(_))); + } + + #[test] + fn test_parse_parentheses() { + let result = parse("(hello)"); + assert_eq!(result.len(), 1); + assert!(matches!(result[0], Block::Paren(_))); + } + + #[test] + fn test_parse_mixed_delimiters() { + let result = parse("hello (world) {test}"); + assert_eq!(result.len(), 4); + } + + #[test] + fn test_round_trip() { + let original = "hello (world) {test}"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); + } + + #[test] + fn test_nested_structures() { + let original = "{a [b (c) d] e}"; + let parsed = parse(original); + let serialized = serialize(&parsed); + assert_eq!(serialized, original); + } + + #[test] + fn test_quotes() { + let original = r#"'single' "double" `backtick`"#; + let parsed = parse(original); + assert_eq!(parsed.len(), 5); // 3 quotes + 2 spaces + let serialized = serialize(&parsed); + assert_eq!(serialized, original); + } + + #[test] + fn test_has_delimiter_type() { + let parsed = parse("hello (world) {test}"); + assert!(parsed[1].has_delimiter_type(&DelimiterType::Paren)); + assert!(parsed[3].has_delimiter_type(&DelimiterType::Curly)); + } + + #[test] + fn test_javascript_code() { + let code = "const greet = (name) => { return `Hello, ${name}!`; };"; + let parsed = parse(code); + let serialized = serialize(&parsed); + assert_eq!(serialized, code); + } + + #[test] + fn test_natural_language() { + let text = "She said, \"Hello, world!\" and smiled."; + let parsed = parse(text); + let serialized = serialize(&parsed); + assert_eq!(serialized, text); + } +} diff --git a/rust/tests/languages_test.rs b/rust/tests/languages_test.rs new file mode 100644 index 0000000..57d9d33 --- /dev/null +++ b/rust/tests/languages_test.rs @@ -0,0 +1,256 @@ +//! Tests for meta-notation with various programming languages + +use meta_notation::{parse, serialize, DelimiterType}; + +// Helper function to check if a delimiter type exists anywhere in the parsed result +fn has_delimiter_type(blocks: &[meta_notation::Block], dtype: &DelimiterType) -> bool { + blocks.iter().any(|b| b.has_delimiter_type(dtype)) +} + +#[test] +fn test_parse_javascript_code() { + let code = "const greet = (name) => { return `Hello, ${name}!`; };"; + let result = parse(code); + assert!(result.len() > 0); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Backtick)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_python_code() { + let code = r#"def calculate(x, y): return {"sum": x + y, "list": [x, y]}"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_go_code() { + let code = r#"func main() { fmt.Println("Hello, World!") }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_rust_code() { + let code = r#"fn main() { let x = vec![1, 2, 3]; println!("{:?}", x); }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_cpp_code() { + let code = r#"int main() { std::cout << "Hello" << std::endl; return 0; }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_java_code() { + let code = r#"public class Main { public static void main(String[] args) { System.out.println("Hello"); } }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_csharp_code() { + let code = + r#"public void Test() { var list = new List {1, 2, 3}; Console.WriteLine("Done"); }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_ruby_code() { + let code = r#"def greet(name); puts "Hello, #{name}!"; end"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_php_code() { + let code = r#"function test($x) { return ["key" => "value"]; }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_swift_code() { + let code = r#"func greet(name: String) -> String { return "Hello, \(name)!" }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_kotlin_code() { + let code = r#"fun main() { val list = listOf(1, 2, 3); println("Hello") }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_typescript_code() { + let code = "const add = (a: number, b: number): number => { return a + b; };"; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_scala_code() { + let code = "def add(x: Int, y: Int): Int = { x + y }"; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_perl_code() { + let code = r#"sub greet { my ($name) = @_; print "Hello, $name!\n"; }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_haskell_code() { + let code = r#"main = putStrLn "Hello, World!""#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_lisp_code() { + let code = "(define (factorial n) (if (= n 0) 1 (* n (factorial (- n 1)))))"; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_clojure_code() { + let code = r#"(defn greet [name] (str "Hello, " name "!"))"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_lua_code() { + let code = r#"function greet(name) return "Hello, " .. name end"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_elixir_code() { + let code = r#"def greet(name), do: "Hello, #{name}!""#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_r_code() { + let code = r#"greet <- function(name) { paste("Hello,", name) }"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_matlab_code() { + let code = "function y = square(x); y = x .^ 2; end"; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_sql_code() { + let code = r#"SELECT name, age FROM users WHERE status = "active" ORDER BY created_at;"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_json() { + let code = r#"{"name": "John", "age": 30, "tags": ["developer", "designer"]}"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_yaml_with_brackets() { + let code = r#"dependencies: ["react", "typescript"]"#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_bash_code() { + let code = r#"echo "Hello, ${USER}!" | grep "Hello""#; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), code); +} + +#[test] +fn test_parse_markdown_with_code_blocks() { + let code = "Here is code: `const x = [1, 2, 3];` in backticks."; + let result = parse(code); + assert!(has_delimiter_type(&result, &DelimiterType::Backtick)); + assert_eq!(serialize(&result), code); +} diff --git a/rust/tests/natural_languages_test.rs b/rust/tests/natural_languages_test.rs new file mode 100644 index 0000000..cab3f60 --- /dev/null +++ b/rust/tests/natural_languages_test.rs @@ -0,0 +1,277 @@ +//! Tests for meta-notation with natural language text +//! +//! Meta-notation should work seamlessly with natural languages since they +//! use the same delimiters: quotes for speech, parentheses for asides, +//! brackets for references, etc. + +use meta_notation::{parse, serialize, Block, DelimiterType}; + +// Helper function to check if a delimiter type exists anywhere in the parsed result +fn has_delimiter_type(blocks: &[Block], dtype: &DelimiterType) -> bool { + blocks.iter().any(|b| b.has_delimiter_type(dtype)) +} + +#[test] +fn test_parse_english_text_with_quotes() { + let text = r#"She said, "Hello, world!" and smiled."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_english_text_with_parentheses() { + let text = "The conference (scheduled for next week) will be online."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_english_text_with_brackets() { + let text = "According to the report [see page 42], the results were positive."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_english_dialogue() { + let text = r#""How are you?" she asked. "I'm fine," he replied."#; + let result = parse(text); + let quote_count = result + .iter() + .filter(|b| b.delimiter_type() == DelimiterType::DoubleQuote) + .count(); + assert_eq!(quote_count, 2); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_english_nested_structure() { + let text = r#"He said, "I heard her say 'hello' yesterday.""#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_spanish_text_with_quotes() { + let text = r#"Ella dijo, "¡Hola, mundo!" y sonrió."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_spanish_text_with_parentheses() { + let text = "La conferencia (programada para la próxima semana) será en línea."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_french_text_with_quotes() { + let text = r#"Elle a dit, "Bonjour, le monde!" et a souri."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_french_text_with_parentheses() { + let text = "La conférence (prévue pour la semaine prochaine) sera en ligne."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_german_text_with_quotes() { + let text = r#"Sie sagte, "Hallo, Welt!" und lächelte."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_german_text_with_parentheses() { + let text = "Die Konferenz (geplant für nächste Woche) wird online sein."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_italian_text_with_quotes() { + let text = r#"Lei disse, "Ciao, mondo!" e sorrise."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_portuguese_text_with_quotes() { + let text = r#"Ela disse, "Olá, mundo!" e sorriu."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_russian_text_transliterated_with_quotes() { + let text = r#"Ona skazala, "Privet, mir!" i ulyblas."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_japanese_text_romanized_with_quotes() { + let text = r#"Kanojo wa "Konnichiwa, sekai!" to itta."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_chinese_text_pinyin_with_quotes() { + let text = r#"Ta shuo, "Nǐ hǎo, shìjiè!" ránhòu xiàole."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_academic_text_with_citations() { + let text = "The study [Smith et al., 2020] found that performance (measured in ms) improved."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_mathematical_text() { + let text = "The formula is f(x) = [a + b] * {c - d}."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_literature_with_nested_quotes() { + let text = r#"He thought, "Did she really say that?" and she said, 'Yes, I did.'"#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert!(has_delimiter_type(&result, &DelimiterType::SingleQuote)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_poetry_with_annotations() { + let text = "Roses are red (traditionally), violets are blue."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_legal_text_with_section_references() { + let text = "According to Section 5(a) [see amendment], the party must comply."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_news_article_text() { + let text = r#"The CEO said, "Our growth exceeded expectations." The increase (50% year-over-year) was significant."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_email_like_text() { + let text = "Hi [Name], I wanted to follow up on our meeting (yesterday). Thanks!"; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_social_media_post() { + let text = "Just learned something cool! [link] Check it out!"; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_recipe_text() { + let text = "Add ingredients [see list] and mix (gently) for 2 minutes."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_technical_documentation() { + let text = + "The function `process` accepts parameters [x, y] and returns an object {status, data}."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Backtick)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert!(has_delimiter_type(&result, &DelimiterType::Curly)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_mixed_natural_language_and_code() { + let text = "To debug, run `npm test` (in terminal) and check [output]."; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Backtick)); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_song_lyrics_with_notes() { + let text = r#""Imagine" (by John Lennon) says "You may say I'm a dreamer.""#; + let result = parse(text); + let quote_count = result + .iter() + .filter(|b| b.delimiter_type() == DelimiterType::DoubleQuote) + .count(); + assert_eq!(quote_count, 2); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_complex_academic_text() { + let text = r#"The hypothesis (H₁) states that "performance improves" [p < 0.05]."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert!(has_delimiter_type(&result, &DelimiterType::Square)); + assert_eq!(serialize(&result), text); +} + +#[test] +fn test_parse_text_with_empty_delimiters() { + let text = r#"The list () was empty, the quote "" was blank."#; + let result = parse(text); + assert!(has_delimiter_type(&result, &DelimiterType::Paren)); + assert!(has_delimiter_type(&result, &DelimiterType::DoubleQuote)); + assert_eq!(serialize(&result), text); +}