mirror of
https://github.com/therootcompany/golib.git
synced 2025-12-23 22:08:46 +00:00
feat(gsheet2csv): parse URLs and CSVs with comments
This commit is contained in:
parent
dc951ce388
commit
24ec3f021d
7
io/transform/gsheet2csv/LICENSE
Normal file
7
io/transform/gsheet2csv/LICENSE
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
Authored in 2025 by AJ ONeal <aj@therootcompany.com>
|
||||||
|
To the extent possible under law, the author(s) have dedicated all copyright
|
||||||
|
and related and neighboring rights to this software to the public domain
|
||||||
|
worldwide. This software is distributed without any warranty.
|
||||||
|
|
||||||
|
You should have received a copy of the CC0 Public Domain Dedication along with
|
||||||
|
this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||||
125
io/transform/gsheet2csv/README.md
Normal file
125
io/transform/gsheet2csv/README.md
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# gsheet2csv
|
||||||
|
|
||||||
|
[](https://pkg.go.dev/github.com/therootcompany/golib/io/transform/gsheet2csv)
|
||||||
|
|
||||||
|
A simple wrapper around `encoding/csv` to read Google Sheet CSVs from URL, or a given Reader.
|
||||||
|
|
||||||
|
This does surprisingly little - you should probably just handle the boilerplate yourself. However, these are the problems it solves for us:
|
||||||
|
|
||||||
|
- works with Google Sheet URLs, regardless of URL format
|
||||||
|
- Edit URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?gid=0000000000#gid=0000000000>
|
||||||
|
- Share URL (Sheet 1): <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/edit?usp=sharing>
|
||||||
|
- CSV Export URL: <https://docs.google.com/spreadsheets/d/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX/export?format=csv&usp=sharing&gid=0000000000>
|
||||||
|
- anything with a path like `/spreadsheets/d/{docid}/` and (optionally) a hash or query param like `gid={gid}`
|
||||||
|
- can write out for import to gsheet (comments containing quotes or commas are quoted), \
|
||||||
|
or in RFC form (comments are never quoted, but values beginning with a comment character are)
|
||||||
|
- swaps `\r` (Windows) for `\n` (Unix) and ensures trailing newline (a la `encoding/csv`)
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
- The Google Sheet must be shared to **Anyone with the link**.
|
||||||
|
- Read and write in 'gsheet' style for reciprocity of comment handling
|
||||||
|
- Be careful about single-column CSVs \
|
||||||
|
(all comment-like lines are comments, same as with `encoding/csv` and empty lines)
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
Same as `encoding/csv` (embedded), but with two extra options:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
switch len(os.Args) {
|
||||||
|
case 2:
|
||||||
|
break
|
||||||
|
case 1:
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
urlOrPath := os.Args[1]
|
||||||
|
|
||||||
|
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||||
|
records, err := gsr.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||||
|
csvw.Comment = gsr.Comment
|
||||||
|
if err := csvw.WriteAll(records); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
# CLI
|
||||||
|
|
||||||
|
There are a few convenience utilities:
|
||||||
|
|
||||||
|
- `gsheet2csv` (also `gsheet2tsv`)
|
||||||
|
- `gsheet2env`
|
||||||
|
|
||||||
|
## gsheet2csv
|
||||||
|
|
||||||
|
They're only slightly different from a direct export of a Google CSV in that they reformat comments and newlines.
|
||||||
|
|
||||||
|
The alterable behavior is almost exclusively for testing.
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
```sh
|
||||||
|
go get github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```sh
|
||||||
|
gsheet2csv -raw -o ./gsheet.csv 'https://docs.google.com/spreadsheets/...'
|
||||||
|
|
||||||
|
gsheet2csv -d '\t' --write-style 'gsheet' ./gsheet.csv > ./gsheet.tsv
|
||||||
|
|
||||||
|
gsheet2csv --strip-comments ./gsheet.csv > ./sheet.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
```text
|
||||||
|
--raw download without processing
|
||||||
|
--print-ids print ids to stdout without download
|
||||||
|
--print-url print url to stdout without downloading
|
||||||
|
-o <filepath> write records to file (default: stdout)
|
||||||
|
-d field delimiter (for output)
|
||||||
|
--read-delimiter input field delimiter (for testing reciprocity)
|
||||||
|
--crlf write using CRLF (\r\n) as the record separator
|
||||||
|
--comment '#' treat lines starting with # as comments
|
||||||
|
--strip-comments ignore single-field data beginning with a comment character
|
||||||
|
--read-style 'gsheet' (preserves comments as single-field records)
|
||||||
|
or 'rfc' (ignore lines starting with comment character)
|
||||||
|
--write-style 'gsheet' (quote single-field comments containing quotes or commas)
|
||||||
|
or 'rfc' (only quote values starting with a comment character)
|
||||||
|
```
|
||||||
|
|
||||||
|
### ASCII Delimiters
|
||||||
|
|
||||||
|
```
|
||||||
|
, comma
|
||||||
|
\t tab (or a normal tab)
|
||||||
|
space (just a normal space)
|
||||||
|
: colon
|
||||||
|
; semicolon
|
||||||
|
| pipe
|
||||||
|
^_ unit separator
|
||||||
|
^^ record separator
|
||||||
|
^] group separator
|
||||||
|
^\ file separator
|
||||||
|
\f form feed (also ^L)
|
||||||
|
\v vertical tab (also ^K)
|
||||||
|
```
|
||||||
220
io/transform/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
220
io/transform/gsheet2csv/cmd/gsheet2csv/main.go
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CSVReader interface {
|
||||||
|
Read() ([]string, error)
|
||||||
|
ReadAll() ([][]string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type CSVWriter interface {
|
||||||
|
Write([]string) error
|
||||||
|
WriteAll([][]string) error
|
||||||
|
Flush()
|
||||||
|
Error() error
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var commentArg string
|
||||||
|
format := "CSV"
|
||||||
|
delim := ','
|
||||||
|
if strings.Contains(os.Args[0], "tsv") {
|
||||||
|
delim = '\t'
|
||||||
|
format = "TSV"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command-line flags
|
||||||
|
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
|
||||||
|
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||||
|
readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||||
|
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||||
|
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||||
|
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||||
|
noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
|
||||||
|
readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
|
||||||
|
writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||||
|
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Check for URL argument
|
||||||
|
if len(flag.Args()) != 1 {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||||
|
flag.Usage()
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := flag.Args()[0]
|
||||||
|
|
||||||
|
// Prepare output writer
|
||||||
|
var out *os.File
|
||||||
|
if *outputFile != "" {
|
||||||
|
var err error
|
||||||
|
out, err = os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = out.Close() }()
|
||||||
|
} else {
|
||||||
|
out = os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
delim, err = gsheet2csv.DecodeDelimiter(*delimString)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
var rc io.ReadCloser
|
||||||
|
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||||
|
docid, gid := gsheet2csv.ParseIDs(url)
|
||||||
|
if *parseOnly {
|
||||||
|
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||||
|
if *urlOnly {
|
||||||
|
fmt.Printf("%s\n", sheetURL)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !*urlOnly {
|
||||||
|
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
rc = resp.Body
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
url = strings.TrimPrefix(url, "file://")
|
||||||
|
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||||
|
f, err := os.Open(url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rc = f
|
||||||
|
}
|
||||||
|
|
||||||
|
if out == os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if *urlOnly || *parseOnly {
|
||||||
|
os.Exit(0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *rawOnly {
|
||||||
|
if _, err := io.Copy(out, rc); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var comment rune
|
||||||
|
if commentArg == "0" {
|
||||||
|
comment = 0
|
||||||
|
} else {
|
||||||
|
comment, _ = utf8.DecodeRuneInString(commentArg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a reader for the Google Sheet
|
||||||
|
var csvr CSVReader
|
||||||
|
if *readStyle == "rfc" {
|
||||||
|
rfcr := csv.NewReader(rc)
|
||||||
|
rfcr.Comma = inputDelim
|
||||||
|
rfcr.Comment = comment
|
||||||
|
rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||||
|
csvr = rfcr
|
||||||
|
} else {
|
||||||
|
gsr := gsheet2csv.NewReader(rc)
|
||||||
|
gsr.Comma = inputDelim
|
||||||
|
if *noReadComments {
|
||||||
|
gsr.Comment = comment
|
||||||
|
} else {
|
||||||
|
gsr.Comment = 0
|
||||||
|
}
|
||||||
|
gsr.ReuseRecord = true
|
||||||
|
csvr = gsr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create CSV writer
|
||||||
|
var csvw CSVWriter
|
||||||
|
// if *writeStyle == "gsheet"
|
||||||
|
{
|
||||||
|
gsw := gsheet2csv.NewWriter(out)
|
||||||
|
gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
|
||||||
|
gsw.Comment = comment
|
||||||
|
gsw.Comma = delim // Set delimiter to tab for TSV
|
||||||
|
gsw.UseCRLF = *useCRLF
|
||||||
|
csvw = gsw
|
||||||
|
}
|
||||||
|
// else {
|
||||||
|
// rfcw := csv.NewWriter(out)
|
||||||
|
// rfcw.Comma = delim
|
||||||
|
// rfcw.UseCRLF = *useCRLF
|
||||||
|
// csvw = rfcw
|
||||||
|
// }
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Convert each record
|
||||||
|
record, err := csvr.Read()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := csvw.Write(record); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
csvw.Flush()
|
||||||
|
if err := csvw.Error(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if out != os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
220
io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
220
io/transform/gsheet2csv/cmd/gsheet2tsv/main.go
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CSVReader interface {
|
||||||
|
Read() ([]string, error)
|
||||||
|
ReadAll() ([][]string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type CSVWriter interface {
|
||||||
|
Write([]string) error
|
||||||
|
WriteAll([][]string) error
|
||||||
|
Flush()
|
||||||
|
Error() error
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var commentArg string
|
||||||
|
format := "CSV"
|
||||||
|
delim := ','
|
||||||
|
if strings.Contains(os.Args[0], "tsv") {
|
||||||
|
delim = '\t'
|
||||||
|
format = "TSV"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command-line flags
|
||||||
|
flag.StringVar(&commentArg, "comment", "#", "treat lines beginning with this rune as comments, 0 to disable (which may cause read errors)")
|
||||||
|
outputFile := flag.String("o", "", "Output "+format+" file (default: stdout)")
|
||||||
|
readDelimString := flag.String("read-delimiter", ",", "field delimiter to use for input file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
delimString := flag.String("d", string(delim), "field delimiter to use for output file ('\\t' for tab, '^_' for Unit Separator, etc)")
|
||||||
|
useCRLF := flag.Bool("crlf", false, "use CRLF (\\r\\n) as record separator")
|
||||||
|
urlOnly := flag.Bool("print-url", false, "don't download, just print the Google Sheet URL")
|
||||||
|
parseOnly := flag.Bool("print-ids", false, "don't download, just print the Doc ID and Sheet ID (gid)")
|
||||||
|
rawOnly := flag.Bool("raw", false, "don't parse, just download")
|
||||||
|
noReadComments := flag.Bool("strip-comments", false, "strip comments when reading (gsheet-only, control rfc behavior with --comment)")
|
||||||
|
readStyle := flag.String("read-style", "gsheet", "'gsheet' or 'rfc' to read either as a gsheet or rfc CSV")
|
||||||
|
writeStyle := flag.String("write-style", "rfc", "'gsheet' or 'rfc' to write either for gsheet import or rfc CSV read")
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s [flags] <google-sheet-url-or-file-path>\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "Converts a Google Sheet to %s format.\n\n", format)
|
||||||
|
fmt.Fprintf(os.Stderr, "Flags:\n")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Fprintf(os.Stderr, "\nExample:\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv 'file://gsheet.csv'\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, " %s -o output.tsv './gsheet.csv'\n", os.Args[0])
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Check for URL argument
|
||||||
|
if len(flag.Args()) != 1 {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error: exactly one Google Sheet URL is required\n")
|
||||||
|
flag.Usage()
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
url := flag.Args()[0]
|
||||||
|
|
||||||
|
// Prepare output writer
|
||||||
|
var out *os.File
|
||||||
|
if *outputFile != "" {
|
||||||
|
var err error
|
||||||
|
out, err = os.Create(*outputFile)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error creating output file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = out.Close() }()
|
||||||
|
} else {
|
||||||
|
out = os.Stdout
|
||||||
|
}
|
||||||
|
|
||||||
|
inputDelim, err := gsheet2csv.DecodeDelimiter(*readDelimString)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error decoding input delimiter: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
delim, err = gsheet2csv.DecodeDelimiter(*delimString)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error decoding output delimiter: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
var rc io.ReadCloser
|
||||||
|
if strings.HasPrefix(url, "https://") || strings.HasPrefix(url, "http://") {
|
||||||
|
docid, gid := gsheet2csv.ParseIDs(url)
|
||||||
|
if *parseOnly {
|
||||||
|
fmt.Printf("docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "docid=%s\ngid=%s\n", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
sheetURL := gsheet2csv.ToCSVURL(docid, gid)
|
||||||
|
if *urlOnly {
|
||||||
|
fmt.Printf("%s\n", sheetURL)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "downloading %s\n", sheetURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !*urlOnly {
|
||||||
|
resp, err := gsheet2csv.GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer func() { _ = resp.Body.Close() }()
|
||||||
|
rc = resp.Body
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
url = strings.TrimPrefix(url, "file://")
|
||||||
|
fmt.Fprintf(os.Stderr, "opening %s\n", url)
|
||||||
|
f, err := os.Open(url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
rc = f
|
||||||
|
}
|
||||||
|
|
||||||
|
if out == os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if *urlOnly || *parseOnly {
|
||||||
|
os.Exit(0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if *rawOnly {
|
||||||
|
if _, err := io.Copy(out, rc); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error getting url body: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var comment rune
|
||||||
|
if commentArg == "0" {
|
||||||
|
comment = 0
|
||||||
|
} else {
|
||||||
|
comment, _ = utf8.DecodeRuneInString(commentArg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a reader for the Google Sheet
|
||||||
|
var csvr CSVReader
|
||||||
|
if *readStyle == "rfc" {
|
||||||
|
rfcr := csv.NewReader(rc)
|
||||||
|
rfcr.Comma = inputDelim
|
||||||
|
rfcr.Comment = comment
|
||||||
|
rfcr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||||
|
csvr = rfcr
|
||||||
|
} else {
|
||||||
|
gsr := gsheet2csv.NewReader(rc)
|
||||||
|
gsr.Comma = inputDelim
|
||||||
|
if *noReadComments {
|
||||||
|
gsr.Comment = comment
|
||||||
|
} else {
|
||||||
|
gsr.Comment = 0
|
||||||
|
}
|
||||||
|
gsr.ReuseRecord = true
|
||||||
|
csvr = gsr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create CSV writer
|
||||||
|
var csvw CSVWriter
|
||||||
|
// if *writeStyle == "gsheet"
|
||||||
|
{
|
||||||
|
gsw := gsheet2csv.NewWriter(out)
|
||||||
|
gsw.QuoteAmbiguousComments = *writeStyle == "gsheet"
|
||||||
|
gsw.Comment = comment
|
||||||
|
gsw.Comma = delim // Set delimiter to tab for TSV
|
||||||
|
gsw.UseCRLF = *useCRLF
|
||||||
|
csvw = gsw
|
||||||
|
}
|
||||||
|
// else {
|
||||||
|
// rfcw := csv.NewWriter(out)
|
||||||
|
// rfcw.Comma = delim
|
||||||
|
// rfcw.UseCRLF = *useCRLF
|
||||||
|
// csvw = rfcw
|
||||||
|
// }
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Convert each record
|
||||||
|
record, err := csvr.Read()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := csvw.Write(record); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing "+format+": %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
csvw.Flush()
|
||||||
|
if err := csvw.Error(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error flushing "+format+" writer: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if out != os.Stdout {
|
||||||
|
fmt.Fprintf(os.Stderr, "wrote %s\n", *outputFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
33
io/transform/gsheet2csv/fixtures/example.go
Normal file
33
io/transform/gsheet2csv/fixtures/example.go
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/therootcompany/golib/io/transform/gsheet2csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
switch len(os.Args) {
|
||||||
|
case 2:
|
||||||
|
break
|
||||||
|
case 1:
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: %s <url>\n", os.Args[0])
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
urlOrPath := os.Args[1]
|
||||||
|
|
||||||
|
gsr := gsheet2csv.NewReaderFrom(urlOrPath)
|
||||||
|
records, err := gsr.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error reading from %s: %v\n", gsr.URL, err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
csvw := gsheet2csv.NewWriter(os.Stdout)
|
||||||
|
csvw.Comment = gsr.Comment
|
||||||
|
if err := csvw.WriteAll(records); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Error writing csv %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
18
io/transform/gsheet2csv/fixtures/gsheet-raw.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-raw.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment,,
|
||||||
|
"# this is, well, a quoted comment",,
|
||||||
|
"# this is a ""super""-quoted comment",,
|
||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
"# comment with trailing comma,",,
|
||||||
|
#1,2,#3
|
||||||
|
14
io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
Normal file
14
io/transform/gsheet2csv/fixtures/gsheet-stripped.csv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
"#1",2,#3
|
||||||
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-gsheet.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment
|
||||||
|
"# this is, well, a quoted comment"
|
||||||
|
"# this is a ""super""-quoted comment"
|
||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
"# comment with trailing comma,"
|
||||||
|
"#1",2,#3
|
||||||
|
Can't render this file because it contains an unexpected character in line 10 and column 16.
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment
|
||||||
|
# this is, well, a quoted comment
|
||||||
|
# this is a "super"-quoted comment
|
||||||
|
Key,Value,
|
||||||
|
Name,55,
|
||||||
|
Girlfriend's Age,55,
|
||||||
|
,,
|
||||||
|
My IQ,55,
|
||||||
|
,55,
|
||||||
|
"Key,with,Comma",,
|
||||||
|
,"Value,with,Comma",
|
||||||
|
"Quoted ""Key""",Normal Value,
|
||||||
|
Normal Key,"Quoted ""Value""",
|
||||||
|
"Quoted ""Key""",,
|
||||||
|
,"Quoted ""Value""",
|
||||||
|
x,y,z
|
||||||
|
# comment with trailing comma,
|
||||||
|
"#1",2,#3
|
||||||
|
Can't render this file because it contains an unexpected character in line 3 and column 13.
|
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
Normal file
18
io/transform/gsheet2csv/fixtures/gsheet-to-rfc.tsv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# this is a comment
|
||||||
|
# this is, well, a quoted comment
|
||||||
|
# this is a "super"-quoted comment
|
||||||
|
Key Value
|
||||||
|
Name 55
|
||||||
|
Girlfriend's Age 55
|
||||||
|
|
||||||
|
My IQ 55
|
||||||
|
55
|
||||||
|
Key,with,Comma
|
||||||
|
Value,with,Comma
|
||||||
|
"Quoted ""Key""" Normal Value
|
||||||
|
Normal Key "Quoted ""Value"""
|
||||||
|
"Quoted ""Key"""
|
||||||
|
"Quoted ""Value"""
|
||||||
|
x y z
|
||||||
|
# comment with trailing comma,
|
||||||
|
"#1" 2 #3
|
||||||
|
Can't render this file because it contains an unexpected character in line 3 and column 13.
|
3
io/transform/gsheet2csv/go.mod
Normal file
3
io/transform/gsheet2csv/go.mod
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module github.com/therootcompany/golib/io/transform/gsheet2csv
|
||||||
|
|
||||||
|
go 1.24.6
|
||||||
309
io/transform/gsheet2csv/gsheet2csv.go
Normal file
309
io/transform/gsheet2csv/gsheet2csv.go
Normal file
@ -0,0 +1,309 @@
|
|||||||
|
// Authored in 2025 by AJ ONeal <aj@therootcompany.com> (https://therootcompany.com)
|
||||||
|
//
|
||||||
|
// To the extent possible under law, the author(s) have dedicated all copyright
|
||||||
|
// and related and neighboring rights to this software to the public domain
|
||||||
|
// worldwide. This software is distributed without any warranty.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the CC0 Public Domain Dedication along with
|
||||||
|
// this software. If not, see <https://creativecommons.org/publicdomain/zero/1.0/>.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: CC0-1.0
|
||||||
|
|
||||||
|
package gsheet2csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
fileSeparator = '\x1c'
|
||||||
|
groupSeparator = '\x1d'
|
||||||
|
recordSeparator = '\x1e'
|
||||||
|
unitSeparator = '\x1f'
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrHTTPGet = errors.New("did not get 200 OK when downloading from URL")
|
||||||
|
|
||||||
|
// For mocking for tests
|
||||||
|
var httpGet = http.Get
|
||||||
|
|
||||||
|
type Reader struct {
|
||||||
|
*csv.Reader
|
||||||
|
DocID string
|
||||||
|
GID string
|
||||||
|
URL string
|
||||||
|
Comment rune
|
||||||
|
r io.Reader
|
||||||
|
resp *http.Response
|
||||||
|
close bool
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFrom(urlOrPath string) *Reader {
|
||||||
|
if strings.HasPrefix(urlOrPath, "https://") || strings.HasPrefix(urlOrPath, "http://") {
|
||||||
|
return NewReaderFromURL(urlOrPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
urlOrPath = strings.TrimPrefix(urlOrPath, "file://")
|
||||||
|
f, err := os.Open(urlOrPath)
|
||||||
|
r := NewReader(f)
|
||||||
|
r.URL = urlOrPath
|
||||||
|
if err != nil {
|
||||||
|
r.err = err
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromURL(url string) *Reader {
|
||||||
|
docid, gid := ParseIDs(url)
|
||||||
|
|
||||||
|
return NewReaderFromIDs(docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReaderFromIDs(docid, gid string) *Reader {
|
||||||
|
resp, err := GetSheet(docid, gid)
|
||||||
|
if err != nil {
|
||||||
|
r := NewReader(nil)
|
||||||
|
r.err = err
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
r := NewReader(resp.Body)
|
||||||
|
r.URL = ToCSVURL(docid, gid)
|
||||||
|
r.DocID = docid
|
||||||
|
r.GID = gid
|
||||||
|
r.resp = resp
|
||||||
|
r.close = true
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToCSVURL(docid, gid string) string {
|
||||||
|
return fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/export?format=csv&usp=sharing&gid=%s", docid, gid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetSheet(docid, gid string) (*http.Response, error) {
|
||||||
|
downloadURL := ToCSVURL(docid, gid)
|
||||||
|
|
||||||
|
resp, err := httpGet(downloadURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
return nil, ErrHTTPGet
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReader(r io.Reader) *Reader {
|
||||||
|
csvr := csv.NewReader(r)
|
||||||
|
csvr.Comma = ','
|
||||||
|
csvr.Comment = 0 // to allow distinguishing between quoted comments and fields
|
||||||
|
csvr.FieldsPerRecord = -1 // Google Sheets is consistent, but our commented files are not
|
||||||
|
csvr.LazyQuotes = false // fields that need quotes use them correctly
|
||||||
|
csvr.TrimLeadingSpace = false
|
||||||
|
csvr.ReuseRecord = false
|
||||||
|
return &Reader{
|
||||||
|
Reader: csvr,
|
||||||
|
Comment: '#',
|
||||||
|
r: r,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func DecodeDelimiter(delimString string) (rune, error) {
|
||||||
|
switch delimString {
|
||||||
|
case "^_", "\\x1f":
|
||||||
|
delimString = string(unitSeparator)
|
||||||
|
case "^^", "\\x1e":
|
||||||
|
delimString = string(recordSeparator)
|
||||||
|
case "^]", "\\x1d":
|
||||||
|
delimString = string(groupSeparator)
|
||||||
|
case "^\\", "\\x1c":
|
||||||
|
delimString = string(fileSeparator)
|
||||||
|
case "^L", "\\f":
|
||||||
|
delimString = "\f"
|
||||||
|
case "^K", "\\v":
|
||||||
|
delimString = "\v"
|
||||||
|
case "^I", "\\t":
|
||||||
|
delimString = " "
|
||||||
|
}
|
||||||
|
delim, _ := utf8.DecodeRuneInString(delimString)
|
||||||
|
return delim, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reader) Read() ([]string, error) {
|
||||||
|
if r.err != nil {
|
||||||
|
return nil, r.err
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
record, err := r.Reader.Read()
|
||||||
|
if err != nil {
|
||||||
|
if r.close {
|
||||||
|
_ = r.resp.Body.Close()
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Comment > 0 {
|
||||||
|
if rv, _ := utf8.DecodeRuneInString(record[0]); rv == r.Comment {
|
||||||
|
last := len(record) - 1
|
||||||
|
for len(record[last]) == 0 {
|
||||||
|
last -= 1
|
||||||
|
}
|
||||||
|
if last == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return record, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reader) ReadAll() ([][]string, error) {
|
||||||
|
var records [][]string
|
||||||
|
|
||||||
|
for {
|
||||||
|
record, err := r.Read()
|
||||||
|
if nil != err {
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
return records, nil
|
||||||
|
}
|
||||||
|
return records, err
|
||||||
|
}
|
||||||
|
records = append(records, record)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseIDs(urlStr string) (docid string, gid string) {
|
||||||
|
// Find key: look for /spreadsheets/d/{key}
|
||||||
|
const prefix = "/spreadsheets/d/"
|
||||||
|
startIdx := strings.Index(urlStr, prefix)
|
||||||
|
if startIdx == -1 {
|
||||||
|
return "", gid
|
||||||
|
}
|
||||||
|
startIdx += len(prefix)
|
||||||
|
|
||||||
|
// Find end of key (next / or end of string)
|
||||||
|
endIdx := strings.Index(urlStr[startIdx:], "/")
|
||||||
|
if endIdx == -1 {
|
||||||
|
endIdx = len(urlStr)
|
||||||
|
} else {
|
||||||
|
endIdx += startIdx
|
||||||
|
}
|
||||||
|
|
||||||
|
docid = urlStr[startIdx:endIdx]
|
||||||
|
if docid == "" {
|
||||||
|
return "", ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find gid: look for gid= and take until #, &, ?, /, or end
|
||||||
|
gidIdx := strings.Index(urlStr, "gid=")
|
||||||
|
if gidIdx != -1 {
|
||||||
|
gidStart := gidIdx + len("gid=")
|
||||||
|
endChars := "#&?/"
|
||||||
|
gidEnd := strings.IndexAny(urlStr[gidStart:], endChars)
|
||||||
|
if gidEnd == -1 {
|
||||||
|
gid = urlStr[gidStart:]
|
||||||
|
} else {
|
||||||
|
gid = urlStr[gidStart : gidStart+gidEnd]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(gid) == 0 {
|
||||||
|
gid = "0"
|
||||||
|
}
|
||||||
|
return docid, gid
|
||||||
|
}
|
||||||
|
|
||||||
|
type Writer struct {
|
||||||
|
*csv.Writer
|
||||||
|
Comment rune
|
||||||
|
QuoteAmbiguousComments bool
|
||||||
|
w io.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewWriter(w io.Writer) *Writer {
|
||||||
|
return &Writer{
|
||||||
|
Writer: csv.NewWriter(w),
|
||||||
|
Comment: '#',
|
||||||
|
w: w,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Writer) Write(record []string) error {
|
||||||
|
// Not handling comments? Move along.
|
||||||
|
if w.Comment == 0 || len(record) == 0 {
|
||||||
|
return w.Writer.Write(record)
|
||||||
|
}
|
||||||
|
|
||||||
|
// First char not a comment char? Move along.
|
||||||
|
if rv1, _ := utf8.DecodeRuneInString(record[0]); rv1 != w.Comment {
|
||||||
|
return w.Writer.Write(record)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is this a true comment? Or data that should be quoted that begins with the comment char?
|
||||||
|
lastNonEmpty := len(record) - 1
|
||||||
|
if lastNonEmpty > -1 {
|
||||||
|
for len(record[lastNonEmpty]) == 0 {
|
||||||
|
lastNonEmpty -= 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We will be doing custom writes ahead
|
||||||
|
w.Flush()
|
||||||
|
var newline = "\n"
|
||||||
|
if w.UseCRLF {
|
||||||
|
newline = "\r\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write true comments out plain
|
||||||
|
first := 0
|
||||||
|
if lastNonEmpty == 0 {
|
||||||
|
record = record[:1]
|
||||||
|
if !w.QuoteAmbiguousComments {
|
||||||
|
if _, err := w.w.Write([]byte(record[0] + newline)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Quote the comment iff it contains quotes or commas, not universally
|
||||||
|
first = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quote if
|
||||||
|
// - the line contains quotes or commas
|
||||||
|
// - there are multiple fields and the first starts with a comment character
|
||||||
|
// (but NOT a single-field comment with no quotes or commas)
|
||||||
|
for i, f := range record {
|
||||||
|
if i == first || strings.Contains(f, `"`) || strings.Contains(f, string(w.Comma)) {
|
||||||
|
f = strings.ReplaceAll(f, `"`, `""`)
|
||||||
|
record[i] = `"` + f + `"`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
line := strings.Join(record, string(w.Comma))
|
||||||
|
if _, err := w.w.Write([]byte(line + newline)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *Writer) WriteAll(records [][]string) error {
|
||||||
|
for _, r := range records {
|
||||||
|
if err := w.Write(r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.Flush()
|
||||||
|
return w.Error()
|
||||||
|
}
|
||||||
249
io/transform/gsheet2csv/gsheet2csv_test.go
Normal file
249
io/transform/gsheet2csv/gsheet2csv_test.go
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
package gsheet2csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"slices"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// mockHTTPClient allows controlling HTTP responses for testing.
|
||||||
|
type mockHTTPClient struct {
|
||||||
|
resp *http.Response
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockHTTPClient) Get(url string) (*http.Response, error) {
|
||||||
|
return m.resp, m.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// sampleCSV mimics the structure of ai-models.csv from the project README.
|
||||||
|
const sampleCSV = `# Generated by ollama list
|
||||||
|
"# Sample Quoted Comment, with ""quotes"" itself"
|
||||||
|
"NAME","ID","SIZE","MODIFIED"
|
||||||
|
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||||
|
"gpt-oss:20b","aa4295ac10c3","13 GB","8 days ago"
|
||||||
|
|
||||||
|
"gpt-oss:latest","aa4295ac10c3","13 GB","7 weeks ago"
|
||||||
|
`
|
||||||
|
|
||||||
|
// malformedCSV for testing error handling.
|
||||||
|
const malformedCSV = `# Comment
|
||||||
|
"NAME","ID","SIZE","MODIFIED
|
||||||
|
"qwen3-coder:30b","06c1097efce0","18 GB","8 days ago"
|
||||||
|
`
|
||||||
|
|
||||||
|
// TestParseIDs verifies the ParseIDs function for various URL formats.
|
||||||
|
func TestParseIDs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
url string
|
||||||
|
wantDoc string
|
||||||
|
wantGid string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Google Sheets Edit / Share URL with gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238#gid=559037238",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "559037238",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Google Sheets CSV URL with gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/export?format=csv&usp=sharing&gid=559037238",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "559037238",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "URL without gid",
|
||||||
|
url: "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit",
|
||||||
|
wantDoc: "1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34",
|
||||||
|
wantGid: "0",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid URL",
|
||||||
|
url: "https://example.com/invalid",
|
||||||
|
wantDoc: "",
|
||||||
|
wantGid: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
gotDoc, gotGid := ParseIDs(tt.url)
|
||||||
|
if gotDoc != tt.wantDoc {
|
||||||
|
t.Errorf("ParseIDs() docid = %q, want %q", gotDoc, tt.wantDoc)
|
||||||
|
}
|
||||||
|
if gotGid != tt.wantGid {
|
||||||
|
t.Errorf("ParseIDs() gid = %q, want %q", gotGid, tt.wantGid)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNewReaderFromURL tests initializing a Reader from a Google Sheets URL.
|
||||||
|
func TestNewReaderFromURL(t *testing.T) {
|
||||||
|
originalGet := httpGet
|
||||||
|
defer func() { httpGet = originalGet }()
|
||||||
|
|
||||||
|
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||||
|
|
||||||
|
// Test successful HTTP response
|
||||||
|
mockResp := &http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: io.NopCloser(strings.NewReader(sampleCSV)),
|
||||||
|
}
|
||||||
|
client := &mockHTTPClient{resp: mockResp}
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader := NewReaderFromURL(url)
|
||||||
|
if reader.err != nil {
|
||||||
|
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||||
|
}
|
||||||
|
if reader.resp != mockResp {
|
||||||
|
t.Error("NewReaderFromURL() did not set response correctly")
|
||||||
|
}
|
||||||
|
if !reader.close {
|
||||||
|
t.Error("NewReaderFromURL() did not set close flag")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test HTTP failure
|
||||||
|
client = &mockHTTPClient{resp: mockResp}
|
||||||
|
client.err = errors.New("network error")
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader = NewReaderFromURL(url)
|
||||||
|
if reader.err == nil {
|
||||||
|
t.Error("NewReaderFromURL() expected error, got nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test non-200 status
|
||||||
|
client = &mockHTTPClient{resp: &http.Response{
|
||||||
|
StatusCode: http.StatusNotFound,
|
||||||
|
Body: io.NopCloser(strings.NewReader("these aren't the droids you're looking for")),
|
||||||
|
}}
|
||||||
|
httpGet = client.Get
|
||||||
|
|
||||||
|
reader = NewReaderFromURL(url)
|
||||||
|
if reader.err == nil {
|
||||||
|
t.Error("NewReaderFromURL() expected error for non-200 status, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRead tests the Read method for comment handling.
|
||||||
|
func TestRead(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
preserveComments bool
|
||||||
|
expected [][]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Skip comments",
|
||||||
|
expected: [][]string{
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Don't skip comments",
|
||||||
|
preserveComments: true,
|
||||||
|
expected: [][]string{
|
||||||
|
{"# Generated by ollama list"},
|
||||||
|
{"# Sample Quoted Comment, with \"quotes\" itself"},
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
reader := NewReader(strings.NewReader(sampleCSV))
|
||||||
|
if tt.preserveComments {
|
||||||
|
reader.Comment = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, want := range tt.expected {
|
||||||
|
got, err := reader.Read()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Read() error at record %d: %v", i, err)
|
||||||
|
}
|
||||||
|
if !slices.Equal(got, want) {
|
||||||
|
t.Errorf("Read() record %d = %v, want %v", i, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify EOF
|
||||||
|
_, err := reader.Read()
|
||||||
|
if !errors.Is(err, io.EOF) {
|
||||||
|
t.Errorf("Read() expected EOF, got %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReadAll tests the ReadAll method for different configurations.
|
||||||
|
func TestReadAll(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
expected [][]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Skip comments",
|
||||||
|
expected: [][]string{
|
||||||
|
{"NAME", "ID", "SIZE", "MODIFIED"},
|
||||||
|
{"qwen3-coder:30b", "06c1097efce0", "18 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:20b", "aa4295ac10c3", "13 GB", "8 days ago"},
|
||||||
|
{"gpt-oss:latest", "aa4295ac10c3", "13 GB", "7 weeks ago"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
reader := NewReader(strings.NewReader(sampleCSV))
|
||||||
|
|
||||||
|
got, err := reader.ReadAll()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("ReadAll() error: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != len(tt.expected) {
|
||||||
|
t.Errorf("ReadAll() returned %d records, want %d", len(got), len(tt.expected))
|
||||||
|
}
|
||||||
|
for i, want := range tt.expected {
|
||||||
|
if !slices.Equal(got[i], want) {
|
||||||
|
t.Errorf("ReadAll() record %d = %v, want %v", i, got[i], want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestNewReaderFromURLWithMalformedCSV tests NewReaderFromURL with malformed CSV.
|
||||||
|
func TestNewReaderFromURLWithMalformedCSV(t *testing.T) {
|
||||||
|
mockResp := &http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: io.NopCloser(strings.NewReader(malformedCSV)),
|
||||||
|
}
|
||||||
|
client := &mockHTTPClient{resp: mockResp}
|
||||||
|
originalGet := httpGet
|
||||||
|
httpGet = client.Get
|
||||||
|
defer func() { httpGet = originalGet }()
|
||||||
|
|
||||||
|
url := "https://docs.google.com/spreadsheets/d/1KdNsc63pk0QRerWDPcIL9cMnGQlG-9Ue9Jlf0PAAA34/edit?gid=559037238"
|
||||||
|
reader := NewReaderFromURL(url)
|
||||||
|
if reader.err != nil {
|
||||||
|
t.Errorf("NewReaderFromURL() unexpected error: %v", reader.err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reading should fail due to malformed CSV
|
||||||
|
_, err := reader.Read()
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Read() expected error for malformed CSV, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user