How to Build a Multi-Step AI Agent in Node.js (Without Frameworks)
Most "agent" tutorials reach for LangChain or AutoGPT immediately. Here's why you shouldn't — and how to build a robust, production-ready agent from scratch in 200 lines of Node.js.
Frameworks like LangChain, AutoGen, and CrewAI have exploded in popularity. They promise to make building AI agents easy. And they do — right up until the moment your agent does something unexpected, and you spend three days reading 10,000 lines of framework source code to understand why.
This article takes a different approach. We'll build a fully functional multi-step AI agent — capable of using tools, maintaining memory, and completing complex tasks — from scratch. No frameworks. No magic. Just Claude's API and clean Node.js.
By the end, you'll understand exactly how agents work, and you'll have a solid base you can extend without fighting someone else's abstractions.
What Makes an AI Agent an Agent?
A simple LLM call is: user sends message → LLM responds → done.
An agent is a loop:
- LLM receives task + tools + history
- LLM either responds with an answer OR calls a tool
- If tool call: execute the tool, add result to history
- Go to step 1
- Repeat until LLM produces a final answer
That's it. The "intelligence" is in the LLM deciding which tools to call and in what order.
Architecture Overview
Our agent will have four components:
┌─────────────────────────────────────────────────────┐
│ Agent Loop │
│ │
│ ┌──────────┐ ┌──────────┐ ┌──────────────┐ │
│ │ Memory │───▶│ LLM │───▶│ Tool Router │ │
│ │ Manager │ │ (Claude) │ │ │ │
│ └──────────┘ └──────────┘ └──────┬───────┘ │
│ ▲ │ │
│ │ ┌──────────────────────┘ │
│ │ ▼ │
│ │ ┌─────────────┐ │
│ └───│ Tool Store │ │
│ │ (Executor) │ │
│ └─────────────┘ │
└─────────────────────────────────────────────────────┘
Step 1: Define the Tool Interface
Tools are functions the agent can call. We'll define a clean interface:
// agent/types.js
/**
* @typedef {Object} Tool
* @property {string} name - Unique identifier for the tool
* @property {string} description - What this tool does (the LLM reads this)
* @property {Object} inputSchema - JSON Schema for the tool's parameters
* @property {Function} execute - The actual implementation
*/
/**
* @typedef {Object} ToolResult
* @property {boolean} success - Whether the tool executed successfully
* @property {any} output - The tool's output (serialized to string for context)
* @property {string} [error] - Error message if success is false
*/
/**
* @typedef {Object} Message
* @property {'user'|'assistant'|'tool_result'} role
* @property {string|Array} content
*/
Step 2: Build the Tool Registry
// agent/tools.js
import { execSync } from 'child_process';
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { resolve } from 'path';
class ToolRegistry {
constructor() {
this.tools = new Map();
}
register(tool) {
if (this.tools.has(tool.name)) {
throw new Error(`Tool '${tool.name}' is already registered`);
}
// Validate tool has required fields
if (!tool.name || !tool.description || !tool.inputSchema || !tool.execute) {
throw new Error(`Tool '${tool.name}' is missing required fields`);
}
this.tools.set(tool.name, tool);
return this; // Chainable
}
get(name) {
return this.tools.get(name);
}
// Format tools for Claude's API
toAnthropicFormat() {
return Array.from(this.tools.values()).map(tool => ({
name: tool.name,
description: "tool.description,"
input_schema: tool.inputSchema,
}));
}
async execute(name, input) {
const tool = this.tools.get(name);
if (!tool) {
return { success: false, error: `Unknown tool: ${name}` };
}
try {
const output = await tool.execute(input);
return {
success: true,
output: typeof output === 'string' ? output : JSON.stringify(output, null, 2),
};
} catch (error) {
return {
success: false,
error: error.message,
};
}
}
}
// Create and export the default registry with built-in tools
export const registry = new ToolRegistry();
// Tool: Execute shell commands (sandboxed)
registry.register({
name: 'shell',
description: "'Execute a shell command and return its output. Use for file operations, git commands, npm, etc. Commands run in the current directory.',"
inputSchema: {
type: 'object',
properties: {
command: {
type: 'string',
description: "'The shell command to execute',"
},
cwd: {
type: 'string',
description: "'Working directory for the command (optional)',"
},
},
required: ['command'],
},
execute: async ({ command, cwd }) => {
// Basic safety: block obviously dangerous commands
const BLOCKED = ['rm -rf /', 'mkfs', 'dd if=', ':(){ :|:& };:'];
if (BLOCKED.some(b => command.includes(b))) {
throw new Error(`Command blocked for safety: ${command}`);
}
const output = execSync(command, {
cwd: cwd || process.cwd(),
timeout: 30_000,
maxBuffer: 1024 * 1024, // 1MB
encoding: 'utf8',
});
return output.trim() || '(no output)';
},
});
// Tool: Read a file
registry.register({
name: 'read_file',
description: "'Read the contents of a file. Returns the file content as a string.',"
inputSchema: {
type: 'object',
properties: {
path: {
type: 'string',
description: "'Absolute or relative path to the file',"
},
maxLines: {
type: 'number',
description: "'Maximum number of lines to return (default: 200)',"
},
},
required: ['path'],
},
execute: async ({ path, maxLines = 200 }) => {
const fullPath = resolve(path);
if (!existsSync(fullPath)) {
throw new Error(`File not found: ${fullPath}`);
}
const content = readFileSync(fullPath, 'utf8');
const lines = content.split('
');
if (lines.length > maxLines) {
return lines.slice(0, maxLines).join('
') + `
... (${lines.length - maxLines} more lines)`;
}
return content;
},
});
// Tool: Write a file
registry.register({
name: 'write_file',
description: "'Write content to a file, creating it if it does not exist.',"
inputSchema: {
type: 'object',
properties: {
path: {
type: 'string',
description: "'Path to write to',"
},
content: {
type: 'string',
description: "'Content to write',"
},
},
required: ['path', 'content'],
},
execute: async ({ path, content }) => {
const fullPath = resolve(path);
writeFileSync(fullPath, content, 'utf8');
return `Written ${content.length} bytes to ${fullPath}`;
},
});
// Tool: HTTP fetch
registry.register({
name: 'http_get',
description: "'Make an HTTP GET request and return the response body.',"
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: "'URL to fetch',"
},
headers: {
type: 'object',
description: "'Optional request headers',"
},
},
required: ['url'],
},
execute: async ({ url, headers = {} }) => {
const response = await fetch(url, {
headers: { 'User-Agent': 'AI-Agent/1.0', ...headers },
signal: AbortSignal.timeout(15_000),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const text = await response.text();
// Truncate large responses
return text.length > 5000 ? text.slice(0, 5000) + '... (truncated)' : text;
},
});
Step 3: Memory Manager
Context management is the most underrated part of agent design. The naive approach — appending every message to one array — hits context limits fast. We need smarter memory:
// agent/memory.js
export class MemoryManager {
constructor({
maxMessages = 50,
maxContextTokens = 80_000,
summaryThreshold = 30,
} = {}) {
this.messages = [];
this.maxMessages = maxMessages;
this.maxContextTokens = maxContextTokens;
this.summaryThreshold = summaryThreshold;
this.sessionSummary = null;
}
add(message) {
this.messages.push({
...message,
timestamp: Date.now(),
});
}
/**
* Returns messages formatted for Claude's API,
* with intelligent truncation to stay within context limits
*/
getContextWindow() {
if (this.messages.length <= this.summaryThreshold) {
return this.messages.map(({ role, content }) => ({ role, content }));
}
// Keep: first 5 messages (task context), last 20 messages (recent activity)
const head = this.messages.slice(0, 5);
const tail = this.messages.slice(-20);
// Build summary of the middle
const middle = this.messages.slice(5, -20);
const summarized = this.summarizeMessages(middle);
const contextMessages = [
...head.map(({ role, content }) => ({ role, content })),
{
role: 'user',
content: `[Context summary: ${summarized}]`,
},
{
role: 'assistant',
content: 'Understood, I have the context from earlier in our session.',
},
...tail.map(({ role, content }) => ({ role, content })),
];
return contextMessages;
}
summarizeMessages(messages) {
// Extract key actions and findings from tool calls
const toolCalls = messages.filter(m =>
Array.isArray(m.content) &&
m.content.some(c => c.type === 'tool_use')
);
const toolResults = messages.filter(m =>
Array.isArray(m.content) &&
m.content.some(c => c.type === 'tool_result')
);
const actions = toolCalls.map(m => {
const toolUse = m.content.find(c => c.type === 'tool_use');
return `Called ${toolUse.name}(${JSON.stringify(toolUse.input).slice(0, 100)})`;
});
return `${messages.length} messages, ${toolCalls.length} tool calls. Recent actions: ${actions.slice(-5).join(', ')}`;
}
/**
* Persist memory to disk for cross-session continuity
*/
save(path) {
const data = {
messages: this.messages,
sessionSummary: this.sessionSummary,
savedAt: new Date().toISOString(),
};
writeFileSync(path, JSON.stringify(data, null, 2));
}
/**
* Load memory from disk
*/
load(path) {
if (!existsSync(path)) return;
const data = JSON.parse(readFileSync(path, 'utf8'));
this.messages = data.messages || [];
this.sessionSummary = data.sessionSummary;
console.log(`Loaded ${this.messages.length} messages from memory`);
}
get messageCount() {
return this.messages.length;
}
}
Step 4: The Agent Loop
This is the heart of the system — the loop that drives the agent until it reaches a final answer:
// agent/agent.js
import Anthropic from '@anthropic-ai/sdk';
import { MemoryManager } from './memory.js';
import { registry } from './tools.js';
const DEFAULT_SYSTEM_PROMPT = `You are a helpful AI assistant with access to tools.
When given a task:
1. Break it into clear steps
2. Use available tools to gather information or take actions
3. Think carefully about each tool call's output before proceeding
4. If a tool fails, try an alternative approach
5. Provide a clear, concise final answer when the task is complete
Guidelines:
- Always verify your work (re-read files you've written, check commands succeeded)
- Be explicit about what you're doing and why
- If you're uncertain about something, use a tool to check rather than guessing
- Stop when the task is complete — don't keep calling tools unnecessarily`;
export class Agent {
constructor({
model = 'claude-opus-4-5',
systemPrompt = DEFAULT_SYSTEM_PROMPT,
maxIterations = 20,
tools = registry,
verbose = false,
} = {}) {
this.model = model;
this.systemPrompt = systemPrompt;
this.maxIterations = maxIterations;
this.tools = tools;
this.verbose = verbose;
this.memory = new MemoryManager();
this.client = new Anthropic();
}
log(...args) {
if (this.verbose) console.log('[Agent]', ...args);
}
/**
* Run the agent on a task
* @param {string} task - The user's task
* @returns {Promise<string>} - The agent's final response
*/
async run(task) {
this.log(`Starting task: ${task}`);
// Add the initial task to memory
this.memory.add({
role: 'user',
content: task,
});
let iterations = 0;
while (iterations < this.maxIterations) {
iterations++;
this.log(`Iteration ${iterations}/${this.maxIterations}`);
// Call the LLM
const response = await this.callLLM();
// Extract text and tool use from response
const textContent = response.content.filter(c => c.type === 'text');
const toolUseContent = response.content.filter(c => c.type === 'tool_use');
// Add assistant response to memory
this.memory.add({
role: 'assistant',
content: response.content,
});
// If stop reason is 'end_turn' or no tool calls, we're done
if (response.stop_reason === 'end_turn' || toolUseContent.length === 0) {
const finalText = textContent.map(c => c.text).join('
').trim();
this.log(`Task complete after ${iterations} iterations`);
return finalText;
}
// Process tool calls
const toolResults = await this.executeTools(toolUseContent);
// Add tool results to memory
this.memory.add({
role: 'user',
content: toolResults,
});
}
throw new Error(`Agent exceeded maximum iterations (${this.maxIterations})`);
}
/**
* Call Claude with current memory as context
*/
async callLLM() {
const messages = this.memory.getContextWindow();
const response = await this.client.messages.create({
model: this.model,
max_tokens: 4096,
system: this.systemPrompt,
tools: this.tools.toAnthropicFormat(),
messages,
});
this.log(`LLM response: stop_reason=${response.stop_reason}, tool_calls=${
response.content.filter(c => c.type === 'tool_use').length
}`);
return response;
}
/**
* Execute all tool calls from a response
*/
async executeTools(toolUseBlocks) {
const results = await Promise.all(
toolUseBlocks.map(async (toolUse) => {
this.log(`Calling tool: ${toolUse.name}`, toolUse.input);
const result = await this.tools.execute(toolUse.name, toolUse.input);
this.log(`Tool result (${toolUse.name}): ${result.success ? 'success' : 'error'}`);
return {
type: 'tool_result',
tool_use_id: toolUse.id,
content: result.success
? result.output
: `ERROR: ${result.error}`,
is_error: !result.success,
};
})
);
return results;
}
/**
* Run agent in streaming mode (shows progress in real-time)
*/
async runStreaming(task, onChunk) {
// Similar to run() but uses streaming API
this.memory.add({ role: 'user', content: task });
let iterations = 0;
while (iterations < this.maxIterations) {
iterations++;
const stream = this.client.messages.stream({
model: this.model,
max_tokens: 4096,
system: this.systemPrompt,
tools: this.tools.toAnthropicFormat(),
messages: this.memory.getContextWindow(),
});
let fullResponse = '';
const toolUseBlocks = [];
for await (const event of stream) {
if (event.type === 'content_block_delta') {
if (event.delta.type === 'text_delta') {
fullResponse += event.delta.text;
onChunk?.(event.delta.text);
}
}
}
const finalMessage = await stream.finalMessage();
this.memory.add({
role: 'assistant',
content: finalMessage.content,
});
if (finalMessage.stop_reason === 'end_turn') {
return fullResponse;
}
const toolUses = finalMessage.content.filter(c => c.type === 'tool_use');
if (toolUses.length > 0) {
const results = await this.executeTools(toolUses);
this.memory.add({ role: 'user', content: results });
}
}
throw new Error('Max iterations exceeded');
}
}
Step 5: Putting It Together
Now let's use the agent to accomplish a real task:
// examples/code-analyzer.js
import { Agent } from '../agent/agent.js';
import { registry } from '../agent/tools.js';
// Add a custom tool for this use case
registry.register({
name: 'count_lines',
description: "'Count lines of code in a directory, broken down by file type.',"
inputSchema: {
type: 'object',
properties: {
directory: { type: 'string', description: "'Directory path to analyze' },"
},
required: ['directory'],
},
execute: async ({ directory }) => {
const { execSync } = await import('child_process');
const output = execSync(
`find ${directory} -type f \\( -name "*.js" -o -name "*.ts" -o -name "*.py" \\) | xargs wc -l 2>/dev/null | sort -n`,
{ encoding: 'utf8' }
);
return output;
},
});
const agent = new Agent({
verbose: true,
maxIterations: 15,
});
// Run a multi-step analysis task
const result = await agent.run(`
Analyze the Node.js project in the current directory and provide:
1. The total lines of code by file type
2. The 5 largest files
3. Any obvious code quality issues you can spot
4. A brief summary of what the project does based on its structure
Use the available tools to explore the codebase. Start with package.json.
`);
console.log('
=== ANALYSIS RESULT ===
');
console.log(result);
Step 6: Advanced Memory Patterns
Semantic Memory (Long-term Facts)
For agents that run over multiple sessions, you need persistent memory:
// agent/semantic-memory.js
import { readFileSync, writeFileSync, existsSync } from 'fs';
export class SemanticMemory {
constructor(storagePath = './agent-memory.json') {
this.storagePath = storagePath;
this.facts = [];
this.load();
}
load() {
if (existsSync(this.storagePath)) {
const data = JSON.parse(readFileSync(this.storagePath, 'utf8'));
this.facts = data.facts || [];
}
}
save() {
writeFileSync(this.storagePath, JSON.stringify({ facts: this.facts }, null, 2));
}
remember(fact, category = 'general') {
this.facts.push({
fact,
category,
timestamp: new Date().toISOString(),
});
this.save();
}
recall(category = null) {
const relevant = category
? this.facts.filter(f => f.category === category)
: this.facts;
return relevant.map(f => `[${f.category}] ${f.fact}`).join('
');
}
// Build a memory prefix to inject at the start of system prompt
buildMemoryContext() {
if (this.facts.length === 0) return '';
return `## Your Memory
You remember the following from previous sessions:
${this.recall()}
`;
}
}
// Usage: inject memory into agent
const memory = new SemanticMemory('./my-agent-memory.json');
const agent = new Agent({
systemPrompt: `${memory.buildMemoryContext()}
You are a helpful assistant...`,
});
// Register a "remember" tool so the agent can save things
registry.register({
name: 'remember',
description: "'Save an important fact to long-term memory for future sessions.',"
inputSchema: {
type: 'object',
properties: {
fact: { type: 'string', description: "'The fact to remember' },"
category: { type: 'string', description: "'Category: user_preference, task_result, technical_fact' },"
},
required: ['fact'],
},
execute: async ({ fact, category }) => {
memory.remember(fact, category);
return `Remembered: "${fact}"`;
},
});
Working Memory (Task-specific State)
// Sometimes agents need shared state across tool calls
export class WorkingMemory {
constructor() {
this.state = {};
}
set(key, value) {
this.state[key] = value;
}
get(key) {
return this.state[key];
}
dump() {
return JSON.stringify(this.state, null, 2);
}
}
const workingMemory = new WorkingMemory();
// Tools can read/write shared state
registry.register({
name: 'set_variable',
description: "'Store a value in working memory for later use.',"
inputSchema: {
type: 'object',
properties: {
key: { type: 'string' },
value: { type: 'string' },
},
required: ['key', 'value'],
},
execute: async ({ key, value }) => {
workingMemory.set(key, value);
return `Stored ${key} = ${value}`;
},
});
Step 7: Multi-Agent Orchestration (Without a Framework)
Sometimes one agent isn't enough. Here's how to orchestrate multiple agents:
// agent/orchestrator.js
import { Agent } from './agent.js';
export class Orchestrator {
constructor() {
this.agents = new Map();
}
addAgent(name, config) {
this.agents.set(name, new Agent(config));
return this;
}
/**
* Run a pipeline of agents where each passes results to the next
*/
async pipeline(stages) {
let result = null;
for (const stage of stages) {
const agent = this.agents.get(stage.agent);
if (!agent) throw new Error(`Unknown agent: ${stage.agent}`);
const task = typeof stage.task === 'function'
? stage.task(result) // Dynamic task based on previous result
: stage.task;
console.log(`
=== Running ${stage.agent} ===`);
result = await agent.run(task);
if (stage.transform) {
result = stage.transform(result);
}
}
return result;
}
/**
* Run agents in parallel and synthesize results
*/
async parallel(tasks) {
const results = await Promise.all(
tasks.map(async ({ agent: agentName, task }) => {
const agent = this.agents.get(agentName);
const result = await agent.run(task);
return { agent: agentName, result };
})
);
return results;
}
}
// Example: Research + Write pipeline
const orchestrator = new Orchestrator();
orchestrator
.addAgent('researcher', {
systemPrompt: 'You are a research assistant. Find facts, summarize information, and identify key points.',
maxIterations: 10,
})
.addAgent('writer', {
systemPrompt: 'You are a technical writer. Take research notes and turn them into clear, structured documentation.',
maxIterations: 8,
});
const documentation = await orchestrator.pipeline([
{
agent