Full JSON Schema#
The formal JSON Schema for BioLM Protocol YAML (inputs, tasks, execution, MLflow outputs) is defined below. The Python client validates protocol YAML against this schema (e.g. biolmai protocol validate).
protocol_schema.json#
1{
2 "$schema": "http://json-schema.org/draft/2020-12/schema",
3 "$id": "https://biolm.ai/schemas/protocol/v1",
4 "title": "BioLM Protocol YAML Schema",
5 "description": "Schema for BioLM Protocol YAMLs. A Protocol defines name, description, example_inputs, inputs (InputSpec), progress, ranking, writing, concurrency, tasks, and optional outputs (MLflow).",
6 "type": "object",
7 "required": ["name", "inputs", "tasks"],
8 "additionalProperties": false,
9 "properties": {
10 "name": {
11 "type": "string",
12 "minLength": 1,
13 "description": "Protocol name identifier"
14 },
15 "description": {
16 "type": "string",
17 "description": "Human-readable description of the protocol"
18 },
19 "about": {
20 "$ref": "#/$defs/About"
21 },
22 "schema_version": {
23 "oneOf": [
24 { "type": "integer" },
25 { "$ref": "#/$defs/ExprString" }
26 ],
27 "default": 1,
28 "description": "Schema version. Optional; defaults to 1."
29 },
30 "protocol_version": {
31 "oneOf": [
32 { "type": "string" },
33 { "$ref": "#/$defs/ExprString" }
34 ],
35 "description": "Protocol version (optional)."
36 },
37 "example_inputs": {
38 "type": "object",
39 "additionalProperties": true,
40 "description": "Example input values (literals) for documentation or UI. Keys are input names."
41 },
42 "inputs": {
43 "type": "object",
44 "additionalProperties": { "$ref": "#/$defs/InputSpec" },
45 "description": "Input definitions. Each key is an input name; value is an InputSpec (type, label, required/optional, initial, min/max, choices, etc.)."
46 },
47 "progress": {
48 "$ref": "#/$defs/Progress",
49 "description": "Progress tracking. Top-level; total_expected can be integer or expression."
50 },
51 "ranking": {
52 "$ref": "#/$defs/Ranking",
53 "description": "Top-N ranking for real-time updates. Top-level; field, order, top_n."
54 },
55 "writing": {
56 "$ref": "#/$defs/Writing",
57 "description": "Output writing (deduplicate, max_dedupe_size). Top-level."
58 },
59 "concurrency": {
60 "$ref": "#/$defs/Concurrency",
61 "description": "Concurrency control (workflow, tasks). Top-level."
62 },
63 "outputs": {
64 "type": "array",
65 "minItems": 1,
66 "items": { "$ref": "#/$defs/OutputRule" },
67 "description": "Output rules for MLflow logging. Unchanged from issue49."
68 },
69 "execution": {
70 "$ref": "#/$defs/Execution",
71 "description": "Legacy nested execution (progress, ranking, concurrency, writing). Prefer top-level keys."
72 },
73 "tasks": {
74 "type": "array",
75 "minItems": 1,
76 "items": { "$ref": "#/$defs/Task" },
77 "description": "List of workflow tasks (model tasks or gather tasks)."
78 }
79 },
80 "$defs": {
81 "ExprString": {
82 "type": "string",
83 "pattern": "^\\$\\{\\{[\\s\\S]+\\}\\}$",
84 "description": "Template expression (${{ ... }})."
85 },
86 "StringOrExpr": {
87 "oneOf": [
88 { "type": "string" },
89 { "$ref": "#/$defs/ExprString" }
90 ],
91 "description": "Plain string or template expression."
92 },
93 "ExprInteger": {
94 "oneOf": [
95 { "type": "integer" },
96 { "$ref": "#/$defs/ExprString" }
97 ],
98 "description": "Integer or template expression."
99 },
100 "ExprNumber": {
101 "oneOf": [
102 { "type": "number" },
103 { "$ref": "#/$defs/ExprString" }
104 ],
105 "description": "Number or template expression."
106 },
107 "ExprBoolean": {
108 "oneOf": [
109 { "type": "boolean" },
110 { "$ref": "#/$defs/ExprString" }
111 ],
112 "description": "Boolean or template expression."
113 },
114 "InputSpec": {
115 "type": "object",
116 "description": "Input definition: type, label, required/optional, help_text, initial, min/max, min_length/max_length, choices, advanced, step. Server format.",
117 "additionalProperties": true,
118 "properties": {
119 "type": {
120 "type": "string",
121 "description": "Input type: text, float, integer, boolean, select, list_of_str, pdb_text, multiselect, etc."
122 },
123 "label": { "type": "string" },
124 "required": { "type": "boolean" },
125 "optional": { "type": "boolean" },
126 "help_text": { "type": "string" },
127 "initial": {},
128 "min": { "type": "number" },
129 "max": { "type": "number" },
130 "min_length": { "type": "integer" },
131 "max_length": { "type": "integer" },
132 "choices": {
133 "type": "array",
134 "items": { "type": "string" }
135 },
136 "advanced": { "type": "boolean" },
137 "step": { "type": "number" }
138 }
139 },
140 "About": {
141 "type": "object",
142 "additionalProperties": false,
143 "properties": {
144 "title": { "type": "string" },
145 "description": { "type": "string" },
146 "authors": {
147 "type": "array",
148 "items": {
149 "type": "object",
150 "required": ["name"],
151 "additionalProperties": false,
152 "properties": {
153 "name": { "type": "string" },
154 "affiliation": { "type": "string" },
155 "email": { "type": "string" },
156 "orcid": { "type": "string" }
157 }
158 }
159 },
160 "keywords": { "type": "array", "items": { "type": "string" } },
161 "doi": { "type": "string" },
162 "cite": { "type": "string" },
163 "links": { "type": "object", "additionalProperties": { "type": "string" } }
164 }
165 },
166 "Execution": {
167 "type": "object",
168 "additionalProperties": false,
169 "properties": {
170 "progress": { "$ref": "#/$defs/Progress" },
171 "ranking": { "$ref": "#/$defs/Ranking" },
172 "concurrency": { "$ref": "#/$defs/Concurrency" },
173 "writing": { "$ref": "#/$defs/Writing" }
174 }
175 },
176 "Progress": {
177 "type": "object",
178 "additionalProperties": false,
179 "properties": {
180 "total_expected": { "$ref": "#/$defs/ExprInteger" }
181 }
182 },
183 "Ranking": {
184 "type": "object",
185 "required": ["field", "order", "top_n"],
186 "additionalProperties": false,
187 "properties": {
188 "field": { "type": "string" },
189 "order": { "type": "string", "enum": ["ascending", "descending"] },
190 "top_n": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
191 }
192 },
193 "Concurrency": {
194 "type": "object",
195 "required": ["workflow", "tasks"],
196 "additionalProperties": false,
197 "properties": {
198 "workflow": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] },
199 "tasks": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
200 }
201 },
202 "Writing": {
203 "type": "object",
204 "additionalProperties": false,
205 "properties": {
206 "deduplicate": { "$ref": "#/$defs/ExprBoolean" },
207 "max_dedupe_size": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
208 }
209 },
210 "TaskBase": {
211 "type": "object",
212 "required": ["id"],
213 "additionalProperties": true,
214 "description": "Base for all tasks. ApiTask and GatherTask add their own properties (slug and action for model tasks; type/from/fields for gather).",
215 "properties": {
216 "id": { "type": "string" },
217 "depends_on": { "type": "array", "items": { "type": "string" } },
218 "foreach": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
219 "skip_if": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
220 "skip_if_empty": { "$ref": "#/$defs/ExprBoolean" },
221 "response_mapping": { "$ref": "#/$defs/ResponseMapping" }
222 }
223 },
224 "ApiTask": {
225 "description": "Model task. Use slug and action (e.g. predict, encode, generate). Requires request_body (items and optional params).",
226 "allOf": [
227 { "$ref": "#/$defs/TaskBase" },
228 {
229 "type": "object",
230 "required": ["request_body"],
231 "properties": {
232 "type": { "enum": ["task"], "default": "task" },
233 "slug": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
234 "action": { "type": "string", "description": "Action name (predict, encode, generate, similarity, predict_log_prob, etc.)" },
235 "class": { "type": "string" },
236 "app": { "type": "string" },
237 "method": { "type": "string" },
238 "request_body": { "$ref": "#/$defs/RequestBody" },
239 "fail_on_error": { "$ref": "#/$defs/ExprBoolean", "default": true },
240 "subtasks": {
241 "type": "object",
242 "additionalProperties": false,
243 "properties": {
244 "count": { "$ref": "#/$defs/ExprInteger" },
245 "split_params": {
246 "type": "object",
247 "additionalProperties": { "$ref": "#/$defs/ExprString" }
248 }
249 }
250 }
251 }
252 }
253 ],
254 "oneOf": [
255 { "required": ["slug", "action"] },
256 { "required": ["class", "app", "method"] }
257 ]
258 },
259 "GatherTask": {
260 "description": "Gather task: collects fields from another task or from an input. from is a task ID or input name; fields lists keys to collect; optional into (integer).",
261 "allOf": [
262 { "$ref": "#/$defs/TaskBase" },
263 {
264 "type": "object",
265 "required": ["type", "from", "fields"],
266 "properties": {
267 "type": { "const": "gather" },
268 "from": {
269 "type": "string",
270 "description": "Source task ID or input name to gather from."
271 },
272 "fields": { "type": "array", "items": { "type": "string" }, "minItems": 1 },
273 "into": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
274 }
275 }
276 ]
277 },
278 "Task": {
279 "oneOf": [
280 { "$ref": "#/$defs/ApiTask" },
281 { "$ref": "#/$defs/GatherTask" }
282 ]
283 },
284 "RequestBody": {
285 "description": "Request payload for a model task: items (array, object, or expression) and optional params object.",
286 "type": "object",
287 "required": ["items"],
288 "additionalProperties": false,
289 "properties": {
290 "items": {
291 "oneOf": [
292 { "type": "array" },
293 { "type": "object" },
294 { "$ref": "#/$defs/ExprString" }
295 ]
296 },
297 "params": { "type": "object" }
298 }
299 },
300 "ResponseMapping": {
301 "type": "object",
302 "minProperties": 1,
303 "additionalProperties": {
304 "oneOf": [
305 { "$ref": "#/$defs/ExprString" },
306 { "type": "string", "not": { "$ref": "#/$defs/ExprString" } },
307 {
308 "type": "object",
309 "required": ["path"],
310 "additionalProperties": false,
311 "properties": {
312 "path": { "oneOf": [{ "$ref": "#/$defs/ExprString" }, { "type": "string", "not": { "$ref": "#/$defs/ExprString" } }] },
313 "explode": { "type": "boolean" },
314 "prefix": { "type": "string" }
315 }
316 }
317 ]
318 },
319 "description": "Response field mappings. Value can be string expression or object with path, optional explode, optional prefix."
320 },
321 "OutputRule": {
322 "type": "object",
323 "required": [],
324 "additionalProperties": false,
325 "properties": {
326 "where": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
327 "order_by": { "type": "array", "items": { "$ref": "#/$defs/OrderBy" } },
328 "limit": { "$ref": "#/$defs/ExprInteger", "default": 200 },
329 "run": {
330 "type": "object",
331 "additionalProperties": false,
332 "properties": { "name": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] } }
333 },
334 "log": { "$ref": "#/$defs/LogSpec" }
335 }
336 },
337 "OrderBy": {
338 "type": "object",
339 "required": ["field", "order"],
340 "additionalProperties": false,
341 "properties": {
342 "field": { "type": "string" },
343 "order": { "type": "string", "enum": ["asc", "desc"] }
344 }
345 },
346 "LogSpec": {
347 "type": "object",
348 "additionalProperties": false,
349 "properties": {
350 "params": { "$ref": "#/$defs/KeyToScalarOrExpr" },
351 "metrics": { "$ref": "#/$defs/KeyToScalarOrExpr" },
352 "tags": { "$ref": "#/$defs/KeyToScalarOrExpr" },
353 "aggregates": { "type": "array", "items": { "$ref": "#/$defs/AggregateSpec" } },
354 "artifacts": { "type": "array", "items": { "$ref": "#/$defs/ArtifactSpec" } }
355 }
356 },
357 "KeyToScalarOrExpr": {
358 "type": "object",
359 "additionalProperties": {
360 "oneOf": [
361 { "type": "string" },
362 { "type": "number" },
363 { "type": "boolean" },
364 { "$ref": "#/$defs/ExprString" }
365 ]
366 }
367 },
368 "AggregateSpec": {
369 "type": "object",
370 "required": ["field", "ops"],
371 "additionalProperties": false,
372 "properties": {
373 "field": { "type": "string" },
374 "ops": {
375 "type": "array",
376 "items": { "enum": ["count", "mean", "sum", "min", "max", "p50", "p90", "p95", "p99", "std"] }
377 }
378 }
379 },
380 "ArtifactSpec": {
381 "type": "object",
382 "required": ["type"],
383 "additionalProperties": false,
384 "properties": {
385 "name": { "type": "string" },
386 "type": { "type": "string", "enum": ["seqparse", "pdb", "fasta", "table", "msa", "plot", "json", "text"] },
387 "path": { "type": "string" },
388 "content": { "oneOf": [{ "type": "string" }, { "type": "object" }, { "$ref": "#/$defs/ExprString" }] },
389 "entries": { "type": "array", "items": { "$ref": "#/$defs/SequenceEntry" } },
390 "rows": { "oneOf": [{ "type": "array" }, { "$ref": "#/$defs/ExprString" }] },
391 "format": { "type": "string" },
392 "spec": { "type": "object" }
393 }
394 },
395 "SequenceEntry": {
396 "type": "object",
397 "required": ["sequence"],
398 "additionalProperties": false,
399 "properties": {
400 "id": { "type": "string" },
401 "sequence": { "type": "string" },
402 "metadata": { "type": "object", "additionalProperties": { "oneOf": [{ "type": "string" }, { "type": "number" }, { "type": "boolean" }] } }
403 }
404 }
405 }
406}