feat: add model management tools to add-ollama-tool skill

Adds four new MCP tools to the existing ollama integration, consolidating
model management (from #1331) into the single add-ollama-tool skill as
requested by @gavrielc:

- ollama_pull_model  — pull a model from the Ollama registry
- ollama_delete_model — delete a local model to free disk space
- ollama_show_model  — inspect modelfile, parameters, and architecture
- ollama_list_running — list models loaded in memory with VRAM/processor info

All four tools follow the existing patterns in this file: OLLAMA_HOST env
var, ollamaFetch() with host.docker.internal fallback, log() and
writeStatus() helpers. No changes to index.ts or container-runner.ts
needed — OLLAMA_HOST is already forwarded via sdkEnv.

Also updates SKILL.md description, tool list, verify steps, and adds a
troubleshooting entry for large-model pull timeouts.

Closes #1331.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Gary Walker
2026-03-26 12:08:54 +11:00
parent 69348510e9
commit 54a8648c95
2 changed files with 156 additions and 7 deletions

View File

@@ -143,5 +143,139 @@ server.tool(
},
);
server.tool(
'ollama_pull_model',
'Pull (download) a model from the Ollama registry by name. Returns the final status once the pull is complete. Use model names like "llama3.2", "mistral", "gemma2:9b".',
{
model: z.string().describe('Model name to pull, e.g. "llama3.2", "mistral", "gemma2:9b"'),
},
async (args) => {
log(`Pulling model: ${args.model}...`);
writeStatus('pulling', `Pulling ${args.model}`);
try {
const res = await ollamaFetch('/api/pull', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: args.model, stream: false }),
});
if (!res.ok) {
const errorText = await res.text();
return {
content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }],
isError: true,
};
}
const data = await res.json() as { status: string };
log(`Pull complete: ${args.model}${data.status}`);
writeStatus('done', `Pulled ${args.model}`);
return { content: [{ type: 'text' as const, text: `Pull complete: ${args.model}${data.status}` }] };
} catch (err) {
return {
content: [{ type: 'text' as const, text: `Failed to pull model: ${err instanceof Error ? err.message : String(err)}` }],
isError: true,
};
}
},
);
server.tool(
'ollama_delete_model',
'Delete a locally installed Ollama model to free up disk space.',
{
model: z.string().describe('Model name to delete, e.g. "llama3.2", "mistral:latest"'),
},
async (args) => {
log(`Deleting model: ${args.model}...`);
writeStatus('deleting', `Deleting ${args.model}`);
try {
const res = await ollamaFetch('/api/delete', {
method: 'DELETE',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: args.model }),
});
if (!res.ok) {
const errorText = await res.text();
return {
content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }],
isError: true,
};
}
log(`Deleted: ${args.model}`);
writeStatus('done', `Deleted ${args.model}`);
return { content: [{ type: 'text' as const, text: `Deleted model: ${args.model}` }] };
} catch (err) {
return {
content: [{ type: 'text' as const, text: `Failed to delete model: ${err instanceof Error ? err.message : String(err)}` }],
isError: true,
};
}
},
);
server.tool(
'ollama_show_model',
'Show details for a locally installed Ollama model: modelfile, parameters, template, system prompt, and architecture info (context length, parameter count, etc.).',
{
model: z.string().describe('Model name to inspect, e.g. "llama3.2", "mistral:latest"'),
},
async (args) => {
log(`Showing model info: ${args.model}...`);
try {
const res = await ollamaFetch('/api/show', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: args.model }),
});
if (!res.ok) {
const errorText = await res.text();
return {
content: [{ type: 'text' as const, text: `Ollama error (${res.status}): ${errorText}` }],
isError: true,
};
}
const data = await res.json();
return { content: [{ type: 'text' as const, text: JSON.stringify(data, null, 2) }] };
} catch (err) {
return {
content: [{ type: 'text' as const, text: `Failed to show model info: ${err instanceof Error ? err.message : String(err)}` }],
isError: true,
};
}
},
);
server.tool(
'ollama_list_running',
'List Ollama models currently loaded in memory with their memory usage, processor type (CPU/GPU), and time until they are unloaded.',
{},
async () => {
log('Listing running models...');
try {
const res = await ollamaFetch('/api/ps');
if (!res.ok) {
return {
content: [{ type: 'text' as const, text: `Ollama API error: ${res.status} ${res.statusText}` }],
isError: true,
};
}
const data = await res.json() as { models?: Array<{ name: string; size_vram: number; processor: string; expires_at: string }> };
const models = data.models || [];
if (models.length === 0) {
return { content: [{ type: 'text' as const, text: 'No models currently loaded in memory.' }] };
}
const list = models
.map(m => `- ${m.name} (${(m.size_vram / 1e9).toFixed(1)}GB ${m.processor}, unloads at ${m.expires_at})`)
.join('\n');
log(`${models.length} model(s) running`);
return { content: [{ type: 'text' as const, text: `Models loaded in memory:\n${list}` }] };
} catch (err) {
return {
content: [{ type: 'text' as const, text: `Failed to list running models: ${err instanceof Error ? err.message : String(err)}` }],
isError: true,
};
}
},
);
const transport = new StdioServerTransport();
await server.connect(transport);