diff --git a/go.mod b/go.mod index 3bc2af423..b382dc304 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,6 @@ require ( github.com/andybalholm/brotli v1.2.1 github.com/apache/thrift v0.22.0 github.com/cespare/xxhash/v2 v2.3.0 - github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 github.com/goccy/go-json v0.10.6 github.com/google/flatbuffers v25.12.19+incompatible github.com/google/uuid v1.6.0 diff --git a/go.sum b/go.sum index 5e7c17249..343b5b071 100644 --- a/go.sum +++ b/go.sum @@ -40,8 +40,6 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 h1:bWDMxwH3px2JBh6AyO7hdCn/PkvCZXii8TGj7sbtEbQ= -github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= diff --git a/parquet/cmd/parquet_reader/main.go b/parquet/cmd/parquet_reader/main.go index cafc41b34..522781c96 100644 --- a/parquet/cmd/parquet_reader/main.go +++ b/parquet/cmd/parquet_reader/main.go @@ -18,6 +18,8 @@ package main import ( "bufio" + "errors" + "flag" "fmt" "io" "log" @@ -30,8 +32,6 @@ import ( "github.com/apache/arrow-go/v18/parquet/file" "github.com/apache/arrow-go/v18/parquet/metadata" "github.com/apache/arrow-go/v18/parquet/schema" - - "github.com/docopt/docopt-go" ) var version = "" @@ -47,33 +47,68 @@ Commands: Options: -h --help Show this screen. - --print-key-value-metadata Print out the key-value metadata. [default: false] - --only-metadata Stop after printing metadata, no values. - --no-metadata Do not print metadata. - --output=FILE Specify output file for data. [default: -] - --no-memory-map Disable memory mapping the file. - --int96-timestamp Parse INT96 as TIMESTAMP for legacy support. - --json Format output as JSON instead of text. - --csv Format output as CSV instead of text. - --columns=COLUMNS Specify a subset of columns to print, comma delimited indexes.` +` + +func printUsage(fs *flag.FlagSet) { + fmt.Fprint(fs.Output(), usage) + fs.VisitAll(func(f *flag.Flag) { + name, flagUsage := flag.UnquoteUsage(f) + flagName := "--" + f.Name + if name != "" { + flagName += "=" + name + } + fmt.Fprintf(fs.Output(), " %-30s%s\n", flagName, flagUsage) + }) +} func main() { - opts, _ := docopt.ParseDoc(usage) var config struct { - ColumnIndexes bool `docopt:"column-indexes"` + ColumnIndexes bool PrintKeyValueMetadata bool OnlyMetadata bool NoMetadata bool Output string NoMemoryMap bool - JSON bool `docopt:"--json"` - CSV bool `docopt:"--csv"` - ParseInt96AsTimestamp bool `docopt:"--int96-timestamp"` + JSON bool + CSV bool + ParseInt96AsTimestamp bool Columns string File string } - opts.Bind(&config) + args := os.Args[1:] + if len(args) > 0 && args[0] == "column-indexes" { + config.ColumnIndexes = true + args = args[1:] + } + + fs := flag.NewFlagSet("parquet_reader", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + fs.BoolVar(&config.OnlyMetadata, "only-metadata", false, "Stop after printing metadata, no values.") + fs.BoolVar(&config.NoMetadata, "no-metadata", false, "Do not print metadata.") + fs.BoolVar(&config.NoMemoryMap, "no-memory-map", false, "Disable memory mapping the file.") + fs.BoolVar(&config.JSON, "json", false, "Format output as JSON instead of text.") + fs.BoolVar(&config.CSV, "csv", false, "Format output as CSV instead of text.") + fs.StringVar(&config.Output, "output", "-", "Specify output `FILE` for data.") + fs.BoolVar(&config.PrintKeyValueMetadata, "print-key-value-metadata", false, "Print out the key-value metadata.") + fs.BoolVar(&config.ParseInt96AsTimestamp, "int96-timestamp", false, "Parse INT96 as TIMESTAMP for legacy support.") + fs.StringVar(&config.Columns, "columns", "", "Specify a subset of `COLUMNS` to print, comma delimited indexes.") + fs.Usage = func() { + printUsage(fs) + } + + if err := fs.Parse(args); err != nil { + if errors.Is(err, flag.ErrHelp) { + os.Exit(0) + } + os.Exit(1) + } + if fs.NArg() != 1 { + fs.Usage() + fmt.Fprintln(os.Stderr, "expected exactly one parquet file") + os.Exit(1) + } + config.File = fs.Arg(0) parseInt96AsTimestamp = config.ParseInt96AsTimestamp var dataOut io.Writer diff --git a/parquet/cmd/parquet_schema/main.go b/parquet/cmd/parquet_schema/main.go index 6f61f09fa..fde48b9c0 100644 --- a/parquet/cmd/parquet_schema/main.go +++ b/parquet/cmd/parquet_schema/main.go @@ -22,23 +22,39 @@ import ( "github.com/apache/arrow-go/v18/parquet/file" "github.com/apache/arrow-go/v18/parquet/schema" - "github.com/docopt/docopt-go" ) const usage = `Parquet Schema Dumper. + Usage: parquet_schema -h | --help parquet_schema + Options: - -h --help Show this screen.` + -h --help Show this screen. +` func main() { - args, _ := docopt.ParseDoc(usage) - rdr, err := file.OpenParquetFile(args[""].(string), false) - if err != nil { - fmt.Fprintln(os.Stderr, "Error opening parquet file: ", err) + args := os.Args[1:] + + switch len(args) { + case 1: + switch args[0] { + case "-h", "--help": + fmt.Fprint(os.Stderr, usage) + os.Exit(0) + } + + rdr, err := file.OpenParquetFile(args[0], false) + if err != nil { + fmt.Fprintln(os.Stderr, "Error opening parquet file:", err) + os.Exit(1) + } + + schema.PrintSchema(rdr.MetaData().Schema.Root(), os.Stdout, 2) + default: + fmt.Fprint(os.Stderr, usage) + fmt.Fprintln(os.Stderr, "expected exactly one parquet file") os.Exit(1) } - - schema.PrintSchema(rdr.MetaData().Schema.Root(), os.Stdout, 2) }