#------------------------------------------------------------------ # csvio.awk --- facilities for handling CSV data (posix awk version) # # Author: Manuel Collado, , http://mcollado.z15.es # License: Public domain - see UNLICENSE for details # Version: 0.0 - Apr 2023 # # Public functions: # # csvimport([ [, ]]) (single chars) # Import a CSV record as a regular awk record delimited by OFS. # Optional: The input CSV field delimiter (default comma ',') # Optional: The input CSV quote char (default '"') # # csvrecord( [, [, ]]) # Returns a CSV formatted record from an array of fields. # Array of field values indexed 1..N, or null to convert $0 # Optional: The output CSV field delimiter (default comma ',') # Optional: The output CSV quote char (default '"') # # csvrecord0( [, ]) # Returns a CSV formatted record from $0. # Optional: The output CSV field delimiter (default comma ',') # Optional: The output CSV quote char (default '"') # # Local mode variables: # _csv_fpat[]: cached FPAT regexes to parse CSV fields #------------------------------------------------------------------ #------------------------------------------------------------------ # Import a CSV record. First line already read. #------------------------------------------------------------------ function csvimport(comma, quote, cm, qt, more, af, k, fk, ofs) { #print # Process arguments cm = comma ? comma : "," qt = quote ? quote : "\"" if (!((cm qt) in _csv_fpat)) { # Build and store a suitable FPAT # CSV FPAT: "([^,]*)|(\"([^\"]|\"\")+\"[^,]*)" _csv_fpat[cm qt] = "([^" cm "]*)|(" qt "([^" qt "]|" qt qt ")+" qt "[^" cm "]*)" } # Collect multi-line data, if it is the case # If necessary, add more input lines to ensure an even number of quotes while (gsub(qt, qt, $0) % 2 == 1 && (getline more) > 0) { $0 = $0 "\n" more NR-- FNR-- } # Strip unwanted CRs gsub("\r", "", $0) # Decode the CSV fields and rebuild $0 more = $0 $0 = "" # Start rebuilding $0 if ((k = length(more)) <= 0) return ofs = "" # No delimiter before the first field while (k>=0 && match(more, _csv_fpat[cm qt])) { fk = substr(more, RSTART, RLENGTH) #print "[" fk "]" k = k-RSTART-RLENGTH more = substr(more, RSTART+RLENGTH+1) if (fk ~ ("^" qt ".*" qt "$")) { fk = substr(fk, 2, length(fk)-2) gsub(qt qt, qt, fk) } $0 = $0 ofs fk # Concatenate fields, delimited by OFS ofs = OFS } } #------------------------------------------------------------------ # Generate a CSV record from a collection of fields #------------------------------------------------------------------ # Compose a CSV record from an array of fields function csvrecord(af, comma, quote, record, k, sep, field) { comma = comma ? comma : "," quote = quote ? quote : "\"" record = "" sep = "" for (k=1; k in af; k++) { field = af[k] if (index(field, comma) || index(field, quote) || index(field, "\n")) { gsub(quote, quote quote, field) field = quote field quote } record = record sep field sep = comma } return record } # Compose a CSV record from $0 function csvrecord0(comma, quote, af) { split($0, af) return csvrecord(af, comma, quote) }