diff --git a/src/lib/core/cli.ts b/src/lib/core/cli.ts index 3ce34b10..2a983f53 100644 --- a/src/lib/core/cli.ts +++ b/src/lib/core/cli.ts @@ -29,6 +29,150 @@ export interface Config { outputColumnName: string; } +export type DatasetFormat = 'csv' | 'json'; + +const DATASET_FILENAMES: Record = { + csv: 'dataset.csv', + json: 'dataset.json', +}; + +export function detectDatasetFormat(datasetPath: string): DatasetFormat { + const ext = path.extname(datasetPath).toLowerCase(); + if (ext === '.csv') { + return 'csv'; + } + if (ext === '.json') { + return 'json'; + } + throw new Error(`Unsupported dataset format: ${datasetPath}`); +} + +function parseCsvLine(line: string): string[] { + const result: string[] = []; + let current = ''; + let inQuotes = false; + + for (let i = 0; i < line.length; i += 1) { + const char = line[i]; + const next = line[i + 1]; + + if (inQuotes) { + if (char === '"' && next === '"') { + current += '"'; + i += 1; // skip escaped quote + } else if (char === '"') { + inQuotes = false; + } else { + current += char; + } + } else if (char === ',') { + result.push(current); + current = ''; + } else if (char === '"') { + inQuotes = true; + } else { + current += char; + } + } + + result.push(current); + return result; +} + +export function parseCsv(content: string): Record[] { + const lines = content.split(/\r?\n/).filter((line) => line.trim().length > 0); + if (lines.length === 0) { + return []; + } + + const headers = parseCsvLine(lines[0]); + return lines.slice(1).map((line) => { + const values = parseCsvLine(line); + const row: Record = {}; + headers.forEach((header, idx) => { + row[header] = values[idx] ?? ''; + }); + return row; + }); +} + +function escapeCsvValue(value: unknown): string { + if (value === undefined || value === null) { + return ''; + } + const str = String(value); + const needsQuotes = /[",\n\r]/.test(str); + const escaped = str.replace(/"/g, '""'); + return needsQuotes ? `"${escaped}"` : escaped; +} + +function collectHeaders(rows: Record[]): string[] { + const headers: string[] = []; + rows.forEach((row) => { + Object.keys(row).forEach((key) => { + if (!headers.includes(key)) { + headers.push(key); + } + }); + }); + return headers; +} + +export function serializeCsv(rows: Record[]): string { + if (rows.length === 0) { + return ''; + } + const headers = collectHeaders(rows); + const headerLine = headers.map(escapeCsvValue).join(','); + const dataLines = rows.map((row) => + headers.map((header) => escapeCsvValue(row[header])).join(','), + ); + return [headerLine, ...dataLines].join('\n'); +} + +export function loadDataset(datasetPath: string): { data: any[]; format: DatasetFormat } { + const datasetFullPath = path.resolve(datasetPath); + const rawData = fs.readFileSync(datasetFullPath, 'utf8'); + const format = detectDatasetFormat(datasetFullPath); + + if (format === 'json') { + const parsed = JSON.parse(rawData); + if (!Array.isArray(parsed)) { + throw new Error('Dataset JSON must be an array of records'); + } + return { data: parsed, format }; + } + + const parsed = parseCsv(rawData); + return { data: parsed, format }; +} + +export function writeDataset( + outputDir: string, + rows: RunReturn[], + format: DatasetFormat, + config: Config, +): void { + const outputDirPath = path.resolve(outputDir); + fs.mkdirSync(outputDirPath, { recursive: true }); + + const datasetFilename = DATASET_FILENAMES[format]; + const datasetPath = path.join(outputDirPath, datasetFilename); + const configPath = path.join(outputDirPath, 'config.json'); + + if (format === 'json') { + fs.writeFileSync(datasetPath, JSON.stringify(rows, null, 4), 'utf8'); + } else { + const csvContent = serializeCsv(rows); + fs.writeFileSync(datasetPath, csvContent, 'utf8'); + } + + fs.writeFileSync(configPath, JSON.stringify(config, null, 4), 'utf8'); + + console.info(`Output written to ${datasetPath}`); + console.info(`Config written to ${configPath}`); +} + class CLIHandler { private run: (...args: any[]) => Promise; @@ -46,10 +190,7 @@ class CLIHandler { const options = program.opts(); const { datasetPath, outputDir } = options; - // Load dataset - const datasetFullPath = path.resolve(datasetPath); - const rawData = fs.readFileSync(datasetFullPath, 'utf8'); - const dataset = JSON.parse(rawData); + const { data: dataset, format } = loadDataset(datasetPath); // Process each item in the dataset dynamically Promise.all( @@ -64,7 +205,7 @@ class CLIHandler { * Wait for all rows to be run * Write results now to output dir or log to console */ - this.writeOutput(results, outputDir); + this.writeOutput(results, outputDir, format); console.log('Results processing completed. Check console for output.'); }) .catch((err) => { @@ -72,24 +213,13 @@ class CLIHandler { }); } - private writeOutput(results: RunReturn[], outputDir: string) { + private writeOutput(results: RunReturn[], outputDir: string, format: DatasetFormat) { const config: Config = { metadata: { outputTimestamp: Date.now() }, outputColumnName: 'output', }; - // Construct an output directory {outputDir}/{datasetName}/ - const outputDirPath = path.resolve(outputDir); - fs.mkdirSync(outputDirPath, { recursive: true }); - - const datasetPath = path.join(outputDirPath, 'dataset.json'); - const configPath = path.join(outputDirPath, 'config.json'); - - fs.writeFileSync(datasetPath, JSON.stringify(results, null, 4), 'utf8'); - fs.writeFileSync(configPath, JSON.stringify(config, null, 4), 'utf8'); - - console.info(`Output written to ${datasetPath}`); - console.info(`Config written to ${configPath}`); + writeDataset(outputDir, results, format, config); } } diff --git a/tests/cli-dataset.test.ts b/tests/cli-dataset.test.ts new file mode 100644 index 00000000..8cb501d3 --- /dev/null +++ b/tests/cli-dataset.test.ts @@ -0,0 +1,109 @@ +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; + +import { + Config, + loadDataset, + parseCsv, + serializeCsv, + writeDataset, +} from '../src/lib/core/cli'; + +describe('CLI dataset helpers', () => { + const makeTempDir = (): string => fs.mkdtempSync(path.join(os.tmpdir(), 'ol-cli-')); + + it('loads JSON datasets', () => { + const dir = makeTempDir(); + const filePath = path.join(dir, 'dataset.json'); + const payload = [{ a: 1 }, { a: 2 }]; + fs.writeFileSync(filePath, JSON.stringify(payload), 'utf8'); + + const { data, format } = loadDataset(filePath); + + expect(format).toBe('json'); + expect(data).toEqual(payload); + }); + + it('loads CSV datasets', () => { + const dir = makeTempDir(); + const filePath = path.join(dir, 'dataset.csv'); + const payload = 'id,name\n1,Alice\n2,Bob\n'; + fs.writeFileSync(filePath, payload, 'utf8'); + + const { data, format } = loadDataset(filePath); + + expect(format).toBe('csv'); + expect(data).toEqual([ + { id: '1', name: 'Alice' }, + { id: '2', name: 'Bob' }, + ]); + }); + + it('writes JSON outputs and config', () => { + const dir = makeTempDir(); + const config: Config = { + metadata: { outputTimestamp: 123 }, + outputColumnName: 'output', + }; + const rows = [ + { input: 'a', output: 'x', otherFields: {}, latency: 10 }, + { input: 'b', output: 'y', otherFields: {}, latency: 20 }, + ]; + + writeDataset(dir, rows, 'json', config); + + const datasetPath = path.join(dir, 'dataset.json'); + const configPath = path.join(dir, 'config.json'); + expect(fs.existsSync(datasetPath)).toBe(true); + expect(fs.existsSync(configPath)).toBe(true); + + const writtenDataset = JSON.parse(fs.readFileSync(datasetPath, 'utf8')); + const writtenConfig = JSON.parse(fs.readFileSync(configPath, 'utf8')); + + expect(writtenDataset).toEqual(rows); + expect(writtenConfig).toEqual(config); + }); + + it('writes CSV outputs preserving headers and quoting values', () => { + const dir = makeTempDir(); + const config: Config = { + metadata: { outputTimestamp: 456 }, + outputColumnName: 'output', + }; + const rows = [ + { prompt: 'hello', output: 'hi', latency: 1 }, + { prompt: 'quote, "comma"', output: 'ok', latency: 2 }, + ]; + + writeDataset(dir, rows, 'csv', config); + + const datasetPath = path.join(dir, 'dataset.csv'); + const configPath = path.join(dir, 'config.json'); + expect(fs.existsSync(datasetPath)).toBe(true); + expect(fs.existsSync(configPath)).toBe(true); + + const csvContent = fs.readFileSync(datasetPath, 'utf8'); + const parsed = parseCsv(csvContent); + + expect(parsed).toEqual([ + { prompt: 'hello', output: 'hi', latency: '1' }, + { prompt: 'quote, "comma"', output: 'ok', latency: '2' }, + ]); + }); + + it('serializes and parses CSV round-trip', () => { + const rows = [ + { a: '1', b: 'text' }, + { a: '2', b: 'text, with comma' }, + ]; + const serialized = serializeCsv(rows); + const parsed = parseCsv(serialized); + + expect(parsed).toEqual([ + { a: '1', b: 'text' }, + { a: '2', b: 'text, with comma' }, + ]); + }); +}); +