Full JSON Schema#

The formal JSON Schema for BioLM Protocol YAML (inputs, tasks, execution, MLflow outputs) is defined below. The Python client validates protocol YAML against this schema (e.g. biolmai protocol validate).

protocol_schema.json#
  1{
  2  "$schema": "http://json-schema.org/draft/2020-12/schema",
  3  "$id": "https://biolm.ai/schemas/protocol/v1",
  4  "title": "BioLM Protocol YAML Schema",
  5  "description": "Schema for BioLM Protocol YAMLs. A Protocol defines name, description, example_inputs, inputs (InputSpec), progress, ranking, writing, concurrency, tasks, and optional outputs (MLflow).",
  6  "type": "object",
  7  "required": ["name", "inputs", "tasks"],
  8  "additionalProperties": false,
  9  "properties": {
 10    "name": {
 11      "type": "string",
 12      "minLength": 1,
 13      "description": "Protocol name identifier"
 14    },
 15    "description": {
 16      "type": "string",
 17      "description": "Human-readable description of the protocol"
 18    },
 19    "about": {
 20      "$ref": "#/$defs/About"
 21    },
 22    "schema_version": {
 23      "oneOf": [
 24        { "type": "integer" },
 25        { "$ref": "#/$defs/ExprString" }
 26      ],
 27      "default": 1,
 28      "description": "Schema version. Optional; defaults to 1."
 29    },
 30    "protocol_version": {
 31      "oneOf": [
 32        { "type": "string" },
 33        { "$ref": "#/$defs/ExprString" }
 34      ],
 35      "description": "Protocol version (optional)."
 36    },
 37    "example_inputs": {
 38      "type": "object",
 39      "additionalProperties": true,
 40      "description": "Example input values (literals) for documentation or UI. Keys are input names."
 41    },
 42    "inputs": {
 43      "type": "object",
 44      "additionalProperties": { "$ref": "#/$defs/InputSpec" },
 45      "description": "Input definitions. Each key is an input name; value is an InputSpec (type, label, required/optional, initial, min/max, choices, etc.)."
 46    },
 47    "progress": {
 48      "$ref": "#/$defs/Progress",
 49      "description": "Progress tracking. Top-level; total_expected can be integer or expression."
 50    },
 51    "ranking": {
 52      "$ref": "#/$defs/Ranking",
 53      "description": "Top-N ranking for real-time updates. Top-level; field, order, top_n."
 54    },
 55    "writing": {
 56      "$ref": "#/$defs/Writing",
 57      "description": "Output writing (deduplicate, max_dedupe_size). Top-level."
 58    },
 59    "concurrency": {
 60      "$ref": "#/$defs/Concurrency",
 61      "description": "Concurrency control (workflow, tasks). Top-level."
 62    },
 63    "outputs": {
 64      "type": "array",
 65      "minItems": 1,
 66      "items": { "$ref": "#/$defs/OutputRule" },
 67      "description": "Output rules for MLflow logging. Unchanged from issue49."
 68    },
 69    "execution": {
 70      "$ref": "#/$defs/Execution",
 71      "description": "Legacy nested execution (progress, ranking, concurrency, writing). Prefer top-level keys."
 72    },
 73    "tasks": {
 74      "type": "array",
 75      "minItems": 1,
 76      "items": { "$ref": "#/$defs/Task" },
 77      "description": "List of workflow tasks (model tasks or gather tasks)."
 78    }
 79  },
 80  "$defs": {
 81    "ExprString": {
 82      "type": "string",
 83      "pattern": "^\\$\\{\\{[\\s\\S]+\\}\\}$",
 84      "description": "Template expression (${{ ... }})."
 85    },
 86    "StringOrExpr": {
 87      "oneOf": [
 88        { "type": "string" },
 89        { "$ref": "#/$defs/ExprString" }
 90      ],
 91      "description": "Plain string or template expression."
 92    },
 93    "ExprInteger": {
 94      "oneOf": [
 95        { "type": "integer" },
 96        { "$ref": "#/$defs/ExprString" }
 97      ],
 98      "description": "Integer or template expression."
 99    },
100    "ExprNumber": {
101      "oneOf": [
102        { "type": "number" },
103        { "$ref": "#/$defs/ExprString" }
104      ],
105      "description": "Number or template expression."
106    },
107    "ExprBoolean": {
108      "oneOf": [
109        { "type": "boolean" },
110        { "$ref": "#/$defs/ExprString" }
111      ],
112      "description": "Boolean or template expression."
113    },
114    "InputSpec": {
115      "type": "object",
116      "description": "Input definition: type, label, required/optional, help_text, initial, min/max, min_length/max_length, choices, advanced, step. Server format.",
117      "additionalProperties": true,
118      "properties": {
119        "type": {
120          "type": "string",
121          "description": "Input type: text, float, integer, boolean, select, list_of_str, pdb_text, multiselect, etc."
122        },
123        "label": { "type": "string" },
124        "required": { "type": "boolean" },
125        "optional": { "type": "boolean" },
126        "help_text": { "type": "string" },
127        "initial": {},
128        "min": { "type": "number" },
129        "max": { "type": "number" },
130        "min_length": { "type": "integer" },
131        "max_length": { "type": "integer" },
132        "choices": {
133          "type": "array",
134          "items": { "type": "string" }
135        },
136        "advanced": { "type": "boolean" },
137        "step": { "type": "number" }
138      }
139    },
140    "About": {
141      "type": "object",
142      "additionalProperties": false,
143      "properties": {
144        "title": { "type": "string" },
145        "description": { "type": "string" },
146        "authors": {
147          "type": "array",
148          "items": {
149            "type": "object",
150            "required": ["name"],
151            "additionalProperties": false,
152            "properties": {
153              "name": { "type": "string" },
154              "affiliation": { "type": "string" },
155              "email": { "type": "string" },
156              "orcid": { "type": "string" }
157            }
158          }
159        },
160        "keywords": { "type": "array", "items": { "type": "string" } },
161        "doi": { "type": "string" },
162        "cite": { "type": "string" },
163        "links": { "type": "object", "additionalProperties": { "type": "string" } }
164      }
165    },
166    "Execution": {
167      "type": "object",
168      "additionalProperties": false,
169      "properties": {
170        "progress": { "$ref": "#/$defs/Progress" },
171        "ranking": { "$ref": "#/$defs/Ranking" },
172        "concurrency": { "$ref": "#/$defs/Concurrency" },
173        "writing": { "$ref": "#/$defs/Writing" }
174      }
175    },
176    "Progress": {
177      "type": "object",
178      "additionalProperties": false,
179      "properties": {
180        "total_expected": { "$ref": "#/$defs/ExprInteger" }
181      }
182    },
183    "Ranking": {
184      "type": "object",
185      "required": ["field", "order", "top_n"],
186      "additionalProperties": false,
187      "properties": {
188        "field": { "type": "string" },
189        "order": { "type": "string", "enum": ["ascending", "descending"] },
190        "top_n": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
191      }
192    },
193    "Concurrency": {
194      "type": "object",
195      "required": ["workflow", "tasks"],
196      "additionalProperties": false,
197      "properties": {
198        "workflow": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] },
199        "tasks": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
200      }
201    },
202    "Writing": {
203      "type": "object",
204      "additionalProperties": false,
205      "properties": {
206        "deduplicate": { "$ref": "#/$defs/ExprBoolean" },
207        "max_dedupe_size": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
208      }
209    },
210    "TaskBase": {
211      "type": "object",
212      "required": ["id"],
213      "additionalProperties": true,
214      "description": "Base for all tasks. ApiTask and GatherTask add their own properties (slug and action for model tasks; type/from/fields for gather).",
215      "properties": {
216        "id": { "type": "string" },
217        "depends_on": { "type": "array", "items": { "type": "string" } },
218        "foreach": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
219        "skip_if": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
220        "skip_if_empty": { "$ref": "#/$defs/ExprBoolean" },
221        "response_mapping": { "$ref": "#/$defs/ResponseMapping" }
222      }
223    },
224    "ApiTask": {
225      "description": "Model task. Use slug and action (e.g. predict, encode, generate). Requires request_body (items and optional params).",
226      "allOf": [
227        { "$ref": "#/$defs/TaskBase" },
228        {
229          "type": "object",
230          "required": ["request_body"],
231          "properties": {
232            "type": { "enum": ["task"], "default": "task" },
233            "slug": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
234            "action": { "type": "string", "description": "Action name (predict, encode, generate, similarity, predict_log_prob, etc.)" },
235            "class": { "type": "string" },
236            "app": { "type": "string" },
237            "method": { "type": "string" },
238            "request_body": { "$ref": "#/$defs/RequestBody" },
239            "fail_on_error": { "$ref": "#/$defs/ExprBoolean", "default": true },
240            "subtasks": {
241              "type": "object",
242              "additionalProperties": false,
243              "properties": {
244                "count": { "$ref": "#/$defs/ExprInteger" },
245                "split_params": {
246                  "type": "object",
247                  "additionalProperties": { "$ref": "#/$defs/ExprString" }
248                }
249              }
250            }
251          }
252        }
253      ],
254      "oneOf": [
255        { "required": ["slug", "action"] },
256        { "required": ["class", "app", "method"] }
257      ]
258    },
259    "GatherTask": {
260      "description": "Gather task: collects fields from another task or from an input. from is a task ID or input name; fields lists keys to collect; optional into (integer).",
261      "allOf": [
262        { "$ref": "#/$defs/TaskBase" },
263        {
264          "type": "object",
265          "required": ["type", "from", "fields"],
266          "properties": {
267            "type": { "const": "gather" },
268            "from": {
269              "type": "string",
270              "description": "Source task ID or input name to gather from."
271            },
272            "fields": { "type": "array", "items": { "type": "string" }, "minItems": 1 },
273            "into": { "allOf": [{ "$ref": "#/$defs/ExprInteger" }, { "minimum": 1 }] }
274          }
275        }
276      ]
277    },
278    "Task": {
279      "oneOf": [
280        { "$ref": "#/$defs/ApiTask" },
281        { "$ref": "#/$defs/GatherTask" }
282      ]
283    },
284    "RequestBody": {
285      "description": "Request payload for a model task: items (array, object, or expression) and optional params object.",
286      "type": "object",
287      "required": ["items"],
288      "additionalProperties": false,
289      "properties": {
290        "items": {
291          "oneOf": [
292            { "type": "array" },
293            { "type": "object" },
294            { "$ref": "#/$defs/ExprString" }
295          ]
296        },
297        "params": { "type": "object" }
298      }
299    },
300    "ResponseMapping": {
301      "type": "object",
302      "minProperties": 1,
303      "additionalProperties": {
304        "oneOf": [
305          { "$ref": "#/$defs/ExprString" },
306          { "type": "string", "not": { "$ref": "#/$defs/ExprString" } },
307          {
308            "type": "object",
309            "required": ["path"],
310            "additionalProperties": false,
311            "properties": {
312              "path": { "oneOf": [{ "$ref": "#/$defs/ExprString" }, { "type": "string", "not": { "$ref": "#/$defs/ExprString" } }] },
313              "explode": { "type": "boolean" },
314              "prefix": { "type": "string" }
315            }
316          }
317        ]
318      },
319      "description": "Response field mappings. Value can be string expression or object with path, optional explode, optional prefix."
320    },
321    "OutputRule": {
322      "type": "object",
323      "required": [],
324      "additionalProperties": false,
325      "properties": {
326        "where": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] },
327        "order_by": { "type": "array", "items": { "$ref": "#/$defs/OrderBy" } },
328        "limit": { "$ref": "#/$defs/ExprInteger", "default": 200 },
329        "run": {
330          "type": "object",
331          "additionalProperties": false,
332          "properties": { "name": { "oneOf": [{ "type": "string" }, { "$ref": "#/$defs/ExprString" }] } }
333        },
334        "log": { "$ref": "#/$defs/LogSpec" }
335      }
336    },
337    "OrderBy": {
338      "type": "object",
339      "required": ["field", "order"],
340      "additionalProperties": false,
341      "properties": {
342        "field": { "type": "string" },
343        "order": { "type": "string", "enum": ["asc", "desc"] }
344      }
345    },
346    "LogSpec": {
347      "type": "object",
348      "additionalProperties": false,
349      "properties": {
350        "params": { "$ref": "#/$defs/KeyToScalarOrExpr" },
351        "metrics": { "$ref": "#/$defs/KeyToScalarOrExpr" },
352        "tags": { "$ref": "#/$defs/KeyToScalarOrExpr" },
353        "aggregates": { "type": "array", "items": { "$ref": "#/$defs/AggregateSpec" } },
354        "artifacts": { "type": "array", "items": { "$ref": "#/$defs/ArtifactSpec" } }
355      }
356    },
357    "KeyToScalarOrExpr": {
358      "type": "object",
359      "additionalProperties": {
360        "oneOf": [
361          { "type": "string" },
362          { "type": "number" },
363          { "type": "boolean" },
364          { "$ref": "#/$defs/ExprString" }
365        ]
366      }
367    },
368    "AggregateSpec": {
369      "type": "object",
370      "required": ["field", "ops"],
371      "additionalProperties": false,
372      "properties": {
373        "field": { "type": "string" },
374        "ops": {
375          "type": "array",
376          "items": { "enum": ["count", "mean", "sum", "min", "max", "p50", "p90", "p95", "p99", "std"] }
377        }
378      }
379    },
380    "ArtifactSpec": {
381      "type": "object",
382      "required": ["type"],
383      "additionalProperties": false,
384      "properties": {
385        "name": { "type": "string" },
386        "type": { "type": "string", "enum": ["seqparse", "pdb", "fasta", "table", "msa", "plot", "json", "text"] },
387        "path": { "type": "string" },
388        "content": { "oneOf": [{ "type": "string" }, { "type": "object" }, { "$ref": "#/$defs/ExprString" }] },
389        "entries": { "type": "array", "items": { "$ref": "#/$defs/SequenceEntry" } },
390        "rows": { "oneOf": [{ "type": "array" }, { "$ref": "#/$defs/ExprString" }] },
391        "format": { "type": "string" },
392        "spec": { "type": "object" }
393      }
394    },
395    "SequenceEntry": {
396      "type": "object",
397      "required": ["sequence"],
398      "additionalProperties": false,
399      "properties": {
400        "id": { "type": "string" },
401        "sequence": { "type": "string" },
402        "metadata": { "type": "object", "additionalProperties": { "oneOf": [{ "type": "string" }, { "type": "number" }, { "type": "boolean" }] } }
403      }
404    }
405  }
406}