Parallel Processing
Scale workloads across multiple sandboxes for massive parallelization.
The Opportunity
Many AI tasks are embarrassingly parallel:
- Processing multiple documents
- Running experiments with different parameters
- Evaluating code across test cases
- Batch transformations
Akira lets you spin up hundreds of sandboxes to process work in parallel.
Basic Pattern
import SandboxSDK from '@akiralabs/sandbox-sdk';
const client = new SandboxSDK({ apiKey: process.env.AKIRA_API_KEY });
async function processInParallel(items: string[]) {
// Create sandboxes for each item
const jobs = items.map(async (item) => {
const sandbox = await client.sandboxes.create({
image: 'akiralabs/akira-default-sandbox',
});
try {
const result = await client.sandboxes.execute(sandbox.id, {
command: `python process.py "${item}"`,
timeout: 60,
});
return { item, result: result.stdout };
} finally {
await client.sandboxes.delete(sandbox.id);
}
});
return Promise.all(jobs);
}
// Process 100 items in parallel
const results = await processInParallel(items);Worker Pool Pattern
For more control, maintain a pool of reusable sandboxes:
class SandboxPool {
private available: string[] = [];
private inUse: Set<string> = new Set();
constructor(
private client: SandboxSDK,
private poolSize: number,
private image: string
) {}
async initialize() {
const promises = Array(this.poolSize).fill(null).map(async () => {
const sandbox = await this.client.sandboxes.create({
image: this.image,
});
this.available.push(sandbox.id);
});
await Promise.all(promises);
}
async acquire(): Promise<string> {
while (this.available.length === 0) {
await new Promise(resolve => setTimeout(resolve, 100));
}
const id = this.available.pop()!;
this.inUse.add(id);
return id;
}
release(id: string) {
this.inUse.delete(id);
this.available.push(id);
}
async destroy() {
const all = [...this.available, ...this.inUse];
await Promise.all(all.map(id => this.client.sandboxes.delete(id)));
}
}
// Usage
const pool = new SandboxPool(client, 10, 'akiralabs/akira-default-sandbox');
await pool.initialize();
const results = await Promise.all(items.map(async (item) => {
const sandboxId = await pool.acquire();
try {
return await client.sandboxes.execute(sandboxId, {
command: `python process.py "${item}"`,
});
} finally {
pool.release(sandboxId);
}
}));
await pool.destroy();Example: Batch Document Processing
Process a batch of documents with AI:
async function processDocuments(documents: Buffer[]) {
const pool = new SandboxPool(client, 20, 'akiralabs/akira-default-sandbox');
await pool.initialize();
try {
const results = await Promise.all(documents.map(async (doc, i) => {
const sandboxId = await pool.acquire();
try {
// Upload document
await client.sandboxes.upload(sandboxId, {
path: `/tmp/doc_${i}.pdf`,
content: doc,
});
// Process with Python
const result = await client.sandboxes.execute(sandboxId, {
command: `python extract_text.py /tmp/doc_${i}.pdf`,
timeout: 120,
});
return {
index: i,
text: result.stdout,
success: result.exit_code === 0,
};
} finally {
pool.release(sandboxId);
}
}));
return results;
} finally {
await pool.destroy();
}
}Example: Hyperparameter Search
Run ML experiments in parallel using snapshots to share training data:
interface Hyperparameters {
learningRate: number;
batchSize: number;
epochs: number;
}
async function hyperparameterSearch(configs: Hyperparameters[], trainingData: Buffer) {
// Create a base sandbox and set up training data
const baseSandbox = await client.sandboxes.create({
image: 'akiralabs/akira-default-sandbox',
resources: { cpu: 4, memory: 8192 },
});
// Upload training data to base sandbox
await client.sandboxes.upload(baseSandbox.id, {
path: '/data/training.tar.gz',
content: trainingData,
});
// Extract and prepare data
await client.sandboxes.execute(baseSandbox.id, {
command: 'tar -xzf /data/training.tar.gz -C /data && python prepare_data.py',
timeout: 300,
});
// Create snapshot of prepared environment
const snapshot = await client.sandboxes.snapshot(baseSandbox.id);
await client.sandboxes.delete(baseSandbox.id);
// Run experiments in parallel, each restored from snapshot
const experiments = configs.map(async (config) => {
const sandbox = await client.snapshots.restore(snapshot.id, {
name: `experiment-${config.learningRate}-${config.batchSize}`,
resources: { cpus: 4, memory: 8192 },
});
try {
const result = await client.sandboxes.execute(sandbox.id, {
command: `python train.py \
--lr ${config.learningRate} \
--batch-size ${config.batchSize} \
--epochs ${config.epochs}`,
timeout: 3600,
});
// Parse metrics from output
const metrics = JSON.parse(result.stdout);
return { config, metrics };
} finally {
await client.sandboxes.delete(sandbox.id);
}
});
const results = await Promise.all(experiments);
// Find best configuration
const best = results.reduce((a, b) =>
a.metrics.accuracy > b.metrics.accuracy ? a : b
);
// Clean up snapshot
await client.snapshots.delete(snapshot.id);
return best;
}Example: Clone-Based Parallel Workers
Use clone for the fastest way to spin up parallel workers from a configured base:
async function runParallelTasks(tasks: string[]) {
// Set up a base sandbox with all dependencies
const baseSandbox = await client.sandboxes.create({
image: 'akiralabs/akira-default-sandbox',
resources: { cpus: 2, memory: 4096 },
});
// Install dependencies once
await client.sandboxes.execute(baseSandbox.id, {
command: 'pip install torch numpy pandas transformers',
timeout: 300,
});
// Clone the base for each task (cloning is faster than creating fresh sandboxes)
const workers = await Promise.all(
tasks.map(async (task, i) => {
const result = await client.sandboxes.clone(baseSandbox.id, {
name: `worker-${i}`,
wait_for_ready: true,
});
return { sandbox: result.sandbox, task };
})
);
// Execute tasks in parallel on all clones
const results = await Promise.all(
workers.map(async ({ sandbox, task }) => {
try {
const result = await client.sandboxes.execute(sandbox.id, {
command: `python process.py "${task}"`,
timeout: 600,
});
return { task, success: true, output: result.stdout };
} catch (error) {
return { task, success: false, error: String(error) };
} finally {
await client.sandboxes.delete(sandbox.id);
}
})
);
// Clean up base sandbox
await client.sandboxes.delete(baseSandbox.id);
return results;
}This pattern is ideal when:
- Each worker needs the same dependencies
- Setup time is significant (installing packages, downloading models)
- You want identical starting state for each worker
Example: Code Testing at Scale
Test code across multiple test cases:
async function runTestSuite(code: string, testCases: TestCase[]) {
const results = await Promise.all(testCases.map(async (test, i) => {
const sandbox = await client.sandboxes.create({
image: 'akiralabs/akira-default-sandbox',
});
try {
// Upload code
await client.sandboxes.upload(sandbox.id, {
path: '/app/solution.py',
content: Buffer.from(code),
});
// Upload test input
await client.sandboxes.upload(sandbox.id, {
path: '/app/input.txt',
content: Buffer.from(test.input),
});
// Run with input
const result = await client.sandboxes.execute(sandbox.id, {
command: 'python /app/solution.py < /app/input.txt',
timeout: 10,
});
return {
testCase: i,
passed: result.stdout.trim() === test.expected.trim(),
output: result.stdout,
expected: test.expected,
};
} finally {
await client.sandboxes.delete(sandbox.id);
}
}));
return {
total: testCases.length,
passed: results.filter(r => r.passed).length,
results,
};
}Scaling Considerations
Concurrency Limits
⚠️
API Rate Limits
Check your plan's concurrency limits. Use a pool pattern to stay within bounds.
Resource Usage
Each sandbox consumes resources:
- Balance parallelism vs. resource costs
- Use appropriate instance sizes
- Clean up promptly
Error Handling
Parallel jobs can fail individually:
const results = await Promise.allSettled(jobs);
const successful = results
.filter((r): r is PromiseFulfilledResult<any> => r.status === 'fulfilled')
.map(r => r.value);
const failed = results
.filter((r): r is PromiseRejectedResult => r.status === 'rejected')
.map(r => r.reason);Best Practices
- Use cloning for configured environments - Clone a pre-configured sandbox instead of recreating from scratch
- Use connection pooling - Reuse sandboxes when possible
- Handle failures gracefully - Use Promise.allSettled for resilience
- Monitor resource usage - Track sandbox count and costs
- Set appropriate timeouts - Prevent stuck jobs
- Clean up on failure - Always delete sandboxes, even on error
Next Steps
- Learn about autonomous agents
- Explore RL environments for training
- Review sandbox configuration