Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 148 additions & 18 deletions src/lib/core/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,150 @@
outputColumnName: string;
}

export type DatasetFormat = 'csv' | 'json';

const DATASET_FILENAMES: Record<DatasetFormat, string> = {
csv: 'dataset.csv',
json: 'dataset.json',
};

export function detectDatasetFormat(datasetPath: string): DatasetFormat {
const ext = path.extname(datasetPath).toLowerCase();
if (ext === '.csv') {
return 'csv';
}
if (ext === '.json') {
return 'json';
}
throw new Error(`Unsupported dataset format: ${datasetPath}`);
}

function parseCsvLine(line: string): string[] {
const result: string[] = [];
let current = '';
let inQuotes = false;

for (let i = 0; i < line.length; i += 1) {
const char = line[i];
const next = line[i + 1];

if (inQuotes) {
if (char === '"' && next === '"') {
current += '"';
i += 1; // skip escaped quote
} else if (char === '"') {
inQuotes = false;
} else {
current += char;
}
} else if (char === ',') {
result.push(current);
current = '';
} else if (char === '"') {
inQuotes = true;
} else {
current += char;
}
}

result.push(current);
return result;
}

export function parseCsv(content: string): Record<string, string>[] {
const lines = content.split(/\r?\n/).filter((line) => line.trim().length > 0);
if (lines.length === 0) {
return [];
}

const headers = parseCsvLine(lines[0]);
return lines.slice(1).map((line) => {
const values = parseCsvLine(line);
const row: Record<string, string> = {};
headers.forEach((header, idx) => {
row[header] = values[idx] ?? '';
});
return row;
});
}

function escapeCsvValue(value: unknown): string {
if (value === undefined || value === null) {
return '';
}
const str = String(value);
const needsQuotes = /[",\n\r]/.test(str);
const escaped = str.replace(/"/g, '""');
return needsQuotes ? `"${escaped}"` : escaped;
}

function collectHeaders(rows: Record<string, unknown>[]): string[] {
const headers: string[] = [];
rows.forEach((row) => {
Object.keys(row).forEach((key) => {
if (!headers.includes(key)) {
headers.push(key);
}
});
});
return headers;
}

export function serializeCsv(rows: Record<string, unknown>[]): string {
if (rows.length === 0) {
return '';
}
const headers = collectHeaders(rows);
const headerLine = headers.map(escapeCsvValue).join(',');
const dataLines = rows.map((row) =>

Check failure on line 127 in src/lib/core/cli.ts

View workflow job for this annotation

GitHub Actions / lint

Replace `⏎····headers.map((header)·=>·escapeCsvValue(row[header])).join(','),⏎··` with `·headers.map((header)·=>·escapeCsvValue(row[header])).join(',')`
headers.map((header) => escapeCsvValue(row[header])).join(','),
);
return [headerLine, ...dataLines].join('\n');
}

export function loadDataset(datasetPath: string): { data: any[]; format: DatasetFormat } {
const datasetFullPath = path.resolve(datasetPath);
const rawData = fs.readFileSync(datasetFullPath, 'utf8');
const format = detectDatasetFormat(datasetFullPath);

if (format === 'json') {
const parsed = JSON.parse(rawData);
if (!Array.isArray(parsed)) {
throw new Error('Dataset JSON must be an array of records');
}
return { data: parsed, format };
}

const parsed = parseCsv(rawData);
return { data: parsed, format };
}

export function writeDataset(
outputDir: string,
rows: RunReturn[],
format: DatasetFormat,
config: Config,
): void {
const outputDirPath = path.resolve(outputDir);
fs.mkdirSync(outputDirPath, { recursive: true });

const datasetFilename = DATASET_FILENAMES[format];
const datasetPath = path.join(outputDirPath, datasetFilename);
const configPath = path.join(outputDirPath, 'config.json');

if (format === 'json') {
fs.writeFileSync(datasetPath, JSON.stringify(rows, null, 4), 'utf8');
} else {
const csvContent = serializeCsv(rows);
fs.writeFileSync(datasetPath, csvContent, 'utf8');
}

fs.writeFileSync(configPath, JSON.stringify(config, null, 4), 'utf8');

console.info(`Output written to ${datasetPath}`);
console.info(`Config written to ${configPath}`);
}

class CLIHandler {
private run: (...args: any[]) => Promise<any>;

Expand All @@ -46,10 +190,7 @@
const options = program.opts();
const { datasetPath, outputDir } = options;

// Load dataset
const datasetFullPath = path.resolve(datasetPath);
const rawData = fs.readFileSync(datasetFullPath, 'utf8');
const dataset = JSON.parse(rawData);
const { data: dataset, format } = loadDataset(datasetPath);

// Process each item in the dataset dynamically
Promise.all(
Expand All @@ -64,32 +205,21 @@
* Wait for all rows to be run
* Write results now to output dir or log to console
*/
this.writeOutput(results, outputDir);
this.writeOutput(results, outputDir, format);
console.log('Results processing completed. Check console for output.');
})
.catch((err) => {
console.error(`Error processing dataset: ${err}`);
});
}

private writeOutput(results: RunReturn[], outputDir: string) {
private writeOutput(results: RunReturn[], outputDir: string, format: DatasetFormat) {
const config: Config = {
metadata: { outputTimestamp: Date.now() },
outputColumnName: 'output',
};

// Construct an output directory {outputDir}/{datasetName}/
const outputDirPath = path.resolve(outputDir);
fs.mkdirSync(outputDirPath, { recursive: true });

const datasetPath = path.join(outputDirPath, 'dataset.json');
const configPath = path.join(outputDirPath, 'config.json');

fs.writeFileSync(datasetPath, JSON.stringify(results, null, 4), 'utf8');
fs.writeFileSync(configPath, JSON.stringify(config, null, 4), 'utf8');

console.info(`Output written to ${datasetPath}`);
console.info(`Config written to ${configPath}`);
writeDataset(outputDir, results, format, config);
}
}

Expand Down
109 changes: 109 additions & 0 deletions tests/cli-dataset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';

import {

Check failure on line 5 in tests/cli-dataset.test.ts

View workflow job for this annotation

GitHub Actions / lint

Replace `⏎··Config,⏎··loadDataset,⏎··parseCsv,⏎··serializeCsv,⏎··writeDataset,⏎` with `·Config,·loadDataset,·parseCsv,·serializeCsv,·writeDataset·`
Config,
loadDataset,
parseCsv,
serializeCsv,
writeDataset,
} from '../src/lib/core/cli';

describe('CLI dataset helpers', () => {
const makeTempDir = (): string => fs.mkdtempSync(path.join(os.tmpdir(), 'ol-cli-'));

Check failure on line 15 in tests/cli-dataset.test.ts

View workflow job for this annotation

GitHub Actions / lint

Delete `····`
it('loads JSON datasets', () => {
const dir = makeTempDir();
const filePath = path.join(dir, 'dataset.json');
const payload = [{ a: 1 }, { a: 2 }];
fs.writeFileSync(filePath, JSON.stringify(payload), 'utf8');

const { data, format } = loadDataset(filePath);

expect(format).toBe('json');
expect(data).toEqual(payload);
});

it('loads CSV datasets', () => {
const dir = makeTempDir();
const filePath = path.join(dir, 'dataset.csv');
const payload = 'id,name\n1,Alice\n2,Bob\n';
fs.writeFileSync(filePath, payload, 'utf8');

const { data, format } = loadDataset(filePath);

expect(format).toBe('csv');
expect(data).toEqual([
{ id: '1', name: 'Alice' },
{ id: '2', name: 'Bob' },
]);
});

it('writes JSON outputs and config', () => {
const dir = makeTempDir();
const config: Config = {
metadata: { outputTimestamp: 123 },
outputColumnName: 'output',
};
const rows = [
{ input: 'a', output: 'x', otherFields: {}, latency: 10 },
{ input: 'b', output: 'y', otherFields: {}, latency: 20 },
];

writeDataset(dir, rows, 'json', config);

const datasetPath = path.join(dir, 'dataset.json');
const configPath = path.join(dir, 'config.json');
expect(fs.existsSync(datasetPath)).toBe(true);
expect(fs.existsSync(configPath)).toBe(true);

const writtenDataset = JSON.parse(fs.readFileSync(datasetPath, 'utf8'));
const writtenConfig = JSON.parse(fs.readFileSync(configPath, 'utf8'));

expect(writtenDataset).toEqual(rows);
expect(writtenConfig).toEqual(config);
});

it('writes CSV outputs preserving headers and quoting values', () => {
const dir = makeTempDir();
const config: Config = {
metadata: { outputTimestamp: 456 },
outputColumnName: 'output',
};
const rows = [
{ prompt: 'hello', output: 'hi', latency: 1 },
{ prompt: 'quote, "comma"', output: 'ok', latency: 2 },
];

writeDataset(dir, rows, 'csv', config);

const datasetPath = path.join(dir, 'dataset.csv');
const configPath = path.join(dir, 'config.json');
expect(fs.existsSync(datasetPath)).toBe(true);
expect(fs.existsSync(configPath)).toBe(true);

const csvContent = fs.readFileSync(datasetPath, 'utf8');
const parsed = parseCsv(csvContent);

expect(parsed).toEqual([
{ prompt: 'hello', output: 'hi', latency: '1' },
{ prompt: 'quote, "comma"', output: 'ok', latency: '2' },
]);
});

it('serializes and parses CSV round-trip', () => {
const rows = [
{ a: '1', b: 'text' },
{ a: '2', b: 'text, with comma' },
];
const serialized = serializeCsv(rows);
const parsed = parseCsv(serialized);

expect(parsed).toEqual([
{ a: '1', b: 'text' },
{ a: '2', b: 'text, with comma' },
]);
});
});

Check failure on line 109 in tests/cli-dataset.test.ts

View workflow job for this annotation

GitHub Actions / lint

Delete `⏎`
Loading