NebiusForge
Model CatalogTrainingAgentsDocsStatusAccountSign in

API Docs

Forge exposes an OpenAI-compatible API surface plus a generic inference endpoint for native schemas. Start with the Quickstart, then dive into the full API reference.

Quickstart

OpenAI-compatible chat

Use the same API key with Forge's proxy base URL. List or search public models, narrow to a chat-capable model, then run one exported MODEL_OR_FAMILY_SLUG line. The reliability snippet returns fastest and lowest-cost verified GPU/region exports before inference. The smoke test copies as one block, prefers a warm chat-capable model when available, and verifies auth, model discovery, and chat routing together. The native route snippet pairs model or family slugs with their public /v1/inference/... routes, while the OpenAI route finder lists chat, completions, and embeddings model values.

Deploy and optimize

These snippets call action-guide endpoints only. They print related endpoints, request templates, and next steps without creating private endpoints or optimized model builds.

First-run path
  1. 1Set shell envCopy the API base, key, and cold-start timeout exports once.
  2. 2Check platformConfirm auth, routing, and platform status before inference.
  3. 3Run smoke testSend one warm-model chat request and print the response.

Client auth: Set FORGE_API_KEY to a real Forge API key before running copied curl, fetch, or SDK snippets. Browser SSO only authenticates this web session.

Open Account
Snippet API base

Effective base: /api/proxy/v1

Auth header
Authorization: Bearer $FORGE_API_KEY

One-command smoke test

Auto-picks a warm chat-capable model when available, sends one completion, and prints the response text.

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
MODEL_OR_FAMILY_SLUG="$(
  curl -sS --fail-with-body "$(forge_api_url '/v1/models?modality=text&sort=recently_added')" \
    --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
    -H "Authorization: Bearer ${FORGE_API_KEY}" | \
  python3 -c 'import json, sys

payload = json.load(sys.stdin)
if isinstance(payload, list):
    models = payload
elif isinstance(payload, dict):
    models = payload.get("models") or payload.get("data") or []
else:
    models = []
def normalize_endpoint(value):
    endpoint = str(value or "").strip()
    if not endpoint:
        return ""
    if not endpoint.startswith("/"):
        endpoint = f"/{endpoint}"
    return endpoint.rstrip("/") or "/"
def chat_status_rank(status):
    status = str(status or "").lower()
    return {"warm": 0, "starting": 1, "cold": 2}.get(status, 3)
rows = []
for model in models:
    endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
    if endpoint != "/v1/chat/completions":
        continue
    model_ref = model.get("model_family") or model.get("slug")
    if not model_ref:
        continue
    rows.append((chat_status_rank(model.get("status")), len(rows), model_ref))

if rows:
    print(min(rows)[2])
    raise SystemExit(0)

print("No chat-capable models returned. Open the Forge catalog at / for native inference routes.", file=sys.stderr)
raise SystemExit(1)'
)"
export MODEL_OR_FAMILY_SLUG
echo "Using ${MODEL_OR_FAMILY_SLUG}"
python3 - <<'PY' |
import json
import os

print(json.dumps({
    "model": os.environ["MODEL_OR_FAMILY_SLUG"],
    "messages": [
        {"role": "user", "content": "Write a one sentence status update."},
    ],
}))
PY
curl -sS --fail-with-body "$(forge_api_url '/v1/chat/completions')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" \
  -H "Content-Type: application/json" \
  -d @- | \
python3 -c 'import json, sys

data = json.load(sys.stdin)
message = (((data.get("choices") or [{}])[0].get("message") or {}).get("content"))
if message:
    print(message)
else:
    print(json.dumps(data, indent=2))'

Shell setup

export FORGE_API_BASE='/api/proxy'
export FORGE_API_KEY="${FORGE_API_KEY:-replace-with-your-forge-api-key}"
export FORGE_REQUEST_TIMEOUT_SECONDS="${FORGE_REQUEST_TIMEOUT_SECONDS:-600}"

Install Python packages

python3 -m pip install --upgrade openai requests

Platform status

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/status')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" | \
python3 -c 'import json, sys

payload = json.load(sys.stdin)
regions = payload.get("regions") or []
status = payload.get("status") or "unknown"
region_summary = ", ".join(
    f"{row.get('\''name'\'')}={row.get('\''status'\'')}"
    for row in regions
    if isinstance(row, dict)
)
print(f"Forge status={status} regions={len(regions)}")
if region_summary:
    print(region_summary)'

OpenAPI schema

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/openapi.json')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" | \
python3 -c 'import json, sys

schema = json.load(sys.stdin)
info = schema.get("info") or {}
paths = schema.get("paths") or {}
title = info.get("title") or "Forge API"
version = info.get("version") or "unknown"
openapi_version = schema.get("openapi") or "unknown"
print(f"{title} {version} openapi={openapi_version} paths={len(paths)}")'

List models

FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
forge_print_response() {
  response_file="$1"
  if [ ! -s "$response_file" ]; then
    printf '%s\n' '(empty response)'
    return 0
  fi
  if command -v python3 >/dev/null 2>&1; then
    python3 -m json.tool "$response_file" 2>/dev/null || cat "$response_file"
  else
    cat "$response_file"
  fi
}
response_file="$(mktemp)"
if curl -sS --fail-with-body "$(forge_api_url '/v1/models')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" \
  -o "$response_file"; then
  forge_print_response "$response_file"
  status=$?
  rm -f "$response_file"
  (exit "$status")
else
  status=$?
  cat "$response_file" >&2
  rm -f "$response_file"
  (exit "$status")
fi

Search models

FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'protein'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
search_path="$(python3 -c 'import os
from urllib.parse import urlencode

query = os.environ.get("FORGE_MODEL_SEARCH_QUERY", "protein").strip()
if not query:
    query = "protein"
print("/v1/models?" + urlencode({"search": query, "sort": "recently_added"}))')"
curl -sS --fail-with-body "$(forge_api_url "$search_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, os, sys

payload = json.load(sys.stdin)
if isinstance(payload, list):
    models = payload
elif isinstance(payload, dict):
    models = payload.get("models") or payload.get("data") or []
else:
    models = []
def normalize_endpoint(value):
    endpoint = str(value or "").strip()
    if not endpoint:
        return ""
    if not endpoint.startswith("/"):
        endpoint = f"/{endpoint}"
    return endpoint.rstrip("/") or "/"
query = os.environ.get("FORGE_MODEL_SEARCH_QUERY", "protein").strip()
rows = []
for model in models:
    model_ref = model.get("model_family") or model.get("slug")
    if not model_ref:
        continue
    endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
    name = model.get("name") or model_ref
    status = model.get("status", "unknown")
    category = model.get("category", "uncategorized")
    rows.append((model_ref, endpoint or "route pending", name, status, category))

if not rows:
    print(f"No models matched {query!r}. Try another FORGE_MODEL_SEARCH_QUERY.", file=sys.stderr)
    raise SystemExit(1)
for model_ref, endpoint, name, status, category in rows[:10]:
    print(f"{model_ref}  {endpoint}  # {name} ({status}, {category})")'

Rank models

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'chat'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'latency'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode

def env_value(name):
    value = os.environ.get(name, "").strip()
    return value or None

params = {
    "q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
    "sort": env_value("FORGE_SEARCH_SORT") or "latency",
    "limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
    value.strip()
    for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "chat").replace(",", "\n").splitlines()
    if value.strip()
]
if capabilities:
    params["capability"] = capabilities
for query_key, env_name in (
    ("gpu_type", "FORGE_GPU_TYPE"),
    ("region", "FORGE_REGION"),
    ("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
    ("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
    value = env_value(env_name)
    if value:
        params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
    params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
    filters = payload.get("filters") or {}
    print("No ranked models matched the requested search filters.", file=sys.stderr)
    if filters:
        print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
    raise SystemExit(1)

for item in results[:10]:
    model = item.get("model") or {}
    model_family = model.get("model_family")
    model_ref = model_family or model.get("slug")
    if not model_ref:
        continue
    target = item.get("matched_target") or {}
    assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
    version_key = model.get("version_key") if model_family else None
    if version_key:
        assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
    gpu_type = target.get("gpu_type")
    if gpu_type:
        assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
    region = target.get("region")
    if region:
        assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
    details = []
    request_ms = target.get("request_ms_p50") or target.get("request_ms")
    if request_ms is not None:
        details.append(f"p50={request_ms}ms")
    warm_cost = target.get("estimated_warm_request_cost_usd")
    if warm_cost is not None:
        details.append(f"warm_cost_usd={warm_cost}")
    name = model.get("name") or model_ref
    suffix = f"  # {name}"
    if details:
        suffix += "; " + ", ".join(details)
    print(f"export {'\'' '\''.join(assignments)}{suffix}")
    for reason in (item.get("match_reasons") or [])[:3]:
        print(f"#   {reason}")'

Low-cost models

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'chat'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'cost'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode

def env_value(name):
    value = os.environ.get(name, "").strip()
    return value or None

params = {
    "q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
    "sort": env_value("FORGE_SEARCH_SORT") or "cost",
    "limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
    value.strip()
    for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "chat").replace(",", "\n").splitlines()
    if value.strip()
]
if capabilities:
    params["capability"] = capabilities
for query_key, env_name in (
    ("gpu_type", "FORGE_GPU_TYPE"),
    ("region", "FORGE_REGION"),
    ("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
    ("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
    value = env_value(env_name)
    if value:
        params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
    params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
    filters = payload.get("filters") or {}
    print("No ranked models matched the requested search filters.", file=sys.stderr)
    if filters:
        print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
    raise SystemExit(1)

for item in results[:10]:
    model = item.get("model") or {}
    model_family = model.get("model_family")
    model_ref = model_family or model.get("slug")
    if not model_ref:
        continue
    target = item.get("matched_target") or {}
    assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
    version_key = model.get("version_key") if model_family else None
    if version_key:
        assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
    gpu_type = target.get("gpu_type")
    if gpu_type:
        assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
    region = target.get("region")
    if region:
        assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
    details = []
    request_ms = target.get("request_ms_p50") or target.get("request_ms")
    if request_ms is not None:
        details.append(f"p50={request_ms}ms")
    warm_cost = target.get("estimated_warm_request_cost_usd")
    if warm_cost is not None:
        details.append(f"warm_cost_usd={warm_cost}")
    name = model.get("name") or model_ref
    suffix = f"  # {name}"
    if details:
        suffix += "; " + ", ".join(details)
    print(f"export {'\'' '\''.join(assignments)}{suffix}")
    for reason in (item.get("match_reasons") or [])[:3]:
        print(f"#   {reason}")'

Tool-calling models

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'tool_calling'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'latency'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode

def env_value(name):
    value = os.environ.get(name, "").strip()
    return value or None

params = {
    "q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
    "sort": env_value("FORGE_SEARCH_SORT") or "latency",
    "limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
    value.strip()
    for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "tool_calling").replace(",", "\n").splitlines()
    if value.strip()
]
if capabilities:
    params["capability"] = capabilities
for query_key, env_name in (
    ("gpu_type", "FORGE_GPU_TYPE"),
    ("region", "FORGE_REGION"),
    ("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
    ("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
    value = env_value(env_name)
    if value:
        params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
    params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
    filters = payload.get("filters") or {}
    print("No ranked models matched the requested search filters.", file=sys.stderr)
    if filters:
        print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
    raise SystemExit(1)

for item in results[:10]:
    model = item.get("model") or {}
    model_family = model.get("model_family")
    model_ref = model_family or model.get("slug")
    if not model_ref:
        continue
    target = item.get("matched_target") or {}
    assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
    version_key = model.get("version_key") if model_family else None
    if version_key:
        assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
    gpu_type = target.get("gpu_type")
    if gpu_type:
        assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
    region = target.get("region")
    if region:
        assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
    details = []
    request_ms = target.get("request_ms_p50") or target.get("request_ms")
    if request_ms is not None:
        details.append(f"p50={request_ms}ms")
    warm_cost = target.get("estimated_warm_request_cost_usd")
    if warm_cost is not None:
        details.append(f"warm_cost_usd={warm_cost}")
    name = model.get("name") or model_ref
    suffix = f"  # {name}"
    if details:
        suffix += "; " + ", ".join(details)
    print(f"export {'\'' '\''.join(assignments)}{suffix}")
    for reason in (item.get("match_reasons") or [])[:3]:
        print(f"#   {reason}")'

Model versions

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
family_path="$(python3 -c 'import os
import sys
from urllib.parse import quote

model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
    print("Set MODEL_OR_FAMILY_SLUG from search or route finder output before listing versions.", file=sys.stderr)
    raise SystemExit(1)
print("/v1/model-families/" + quote(model, safe=""))')"
curl -sS --fail-with-body "$(forge_api_url "$family_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

family = json.load(sys.stdin)
versions = family.get("versions") or []
default_key = family.get("default_version_key")
selected_key = family.get("selected_version_key")
if not versions:
    print("No active versions returned for this model family.", file=sys.stderr)
    raise SystemExit(1)

for version in versions:
    version_key = version.get("version_key") or version.get("slug")
    if not version_key:
        continue
    badges = []
    if version_key == default_key or version.get("is_default"):
        badges.append("default")
    if version_key == selected_key:
        badges.append("selected")
    stability = version.get("stability")
    if stability:
        badges.append(str(stability))
    status = version.get("status")
    if status:
        badges.append(str(status))
    label = version.get("version_label") or version_key
    suffix = f" ({'\'', '\''.join(badges)})" if badges else ""
    print(f"export FORGE_MODEL_VERSION={shlex.quote(str(version_key))}  # {label}{suffix}")'

Model reliability

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before checking reliability.}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
reliability_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode

model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
    raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before checking reliability.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
    params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
    params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
    params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/reliability"
if params:
    path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$reliability_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
print(
    f"{payload.get('\''slug'\'')} reliability={payload.get('\''reliability_status'\'')} "
    f"supported={payload.get('\''supported_rows'\'', 0)}/{payload.get('\''total_rows'\'', 0)}"
)
filters = payload.get("filters") or {}
if filters:
    print("filters: " + ", ".join(f"{key}={value}" for key, value in filters.items()))

def describe_target(target):
    details = []
    request_ms = target.get("request_ms_p50") or target.get("request_ms")
    if request_ms is not None:
        details.append(f"p50={request_ms}ms")
    warm_cost = target.get("estimated_warm_request_cost_usd")
    if warm_cost is not None:
        details.append(f"warm_cost_usd={warm_cost}")
    elif target.get("cost_per_gpu_hour_usd") is not None:
        details.append(f"gpu_hour_usd={target['\''cost_per_gpu_hour_usd'\'']}")
    success_rate = target.get("observed_success_rate")
    if isinstance(success_rate, (int, float)):
        details.append(f"success={success_rate:.0%}")
    return ", ".join(details) or target.get("status") or "supported"

exports = {}
for label, key in (
    ("fastest supported", "fastest_supported_target"),
    ("lowest-cost supported", "lowest_cost_supported_target"),
):
    target = payload.get(key) or {}
    gpu_type = target.get("gpu_type")
    if not gpu_type:
        continue
    identity = (str(gpu_type), str(target.get("region") or ""))
    exports.setdefault(identity, {"labels": [], "target": target})["labels"].append(label)

if not exports:
    print("No supported GPU/region target returned.", file=sys.stderr)
    print(json.dumps({
        "status_counts": payload.get("status_counts", {}),
        "failure_reason_counts": payload.get("failure_reason_counts", {}),
    }, indent=2))
    raise SystemExit(1)

for (gpu_type, region), entry in exports.items():
    assignments = [f"FORGE_GPU_TYPE={shlex.quote(gpu_type)}"]
    if region:
        assignments.append(f"FORGE_REGION={shlex.quote(region)}")
    labels = " + ".join(entry["labels"])
    details = describe_target(entry["target"])
    print(f"export {'\'' '\''.join(assignments)}  # {labels}: {details}")'

Deploy action guide

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
action_guide_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode

model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
    raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
    params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
    params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
    params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/deploy"
if params:
    path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$action_guide_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -X POST \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, sys

payload = json.load(sys.stdin)
slug = payload.get("slug") or "model"
action = payload.get("action") or "deploy"
status = payload.get("status") or "unknown"
managed_by = payload.get("managed_by") or "external workflow"
print(f"{slug} {action}_guide status={status} managed_by={managed_by}")
message = payload.get("message")
if message:
    print("message: " + message)
routing_hints = payload.get("routing_hints") or {}
if routing_hints:
    print("routing_hints: " + ", ".join(f"{key}={value}" for key, value in routing_hints.items()))
print("guide_only: this request does not create private endpoints or optimized model builds")
related = payload.get("related_endpoints") or {}
for key in ("regional_deployment", "reliability", "run_estimate", "status", "start", "keep_warm", "stop"):
    value = related.get(key)
    if value:
        print(f"{key}: {value}")
templates = payload.get("request_templates") or {}
if templates:
    print("request_templates:")
    print(json.dumps(templates, indent=2, sort_keys=True))
steps = payload.get("next_steps") or []
if steps:
    print("next_steps:")
    for step in steps:
        print(f"- {step}")'

Optimize action guide

set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
action_guide_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode

model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
    raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
    params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
    params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
    params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/optimize"
if params:
    path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$action_guide_path")" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -X POST \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, sys

payload = json.load(sys.stdin)
slug = payload.get("slug") or "model"
action = payload.get("action") or "optimize"
status = payload.get("status") or "unknown"
managed_by = payload.get("managed_by") or "external workflow"
print(f"{slug} {action}_guide status={status} managed_by={managed_by}")
message = payload.get("message")
if message:
    print("message: " + message)
routing_hints = payload.get("routing_hints") or {}
if routing_hints:
    print("routing_hints: " + ", ".join(f"{key}={value}" for key, value in routing_hints.items()))
print("guide_only: this request does not create private endpoints or optimized model builds")
related = payload.get("related_endpoints") or {}
for key in ("regional_deployment", "reliability", "run_estimate", "status", "start", "keep_warm", "stop"):
    value = related.get(key)
    if value:
        print(f"{key}: {value}")
templates = payload.get("request_templates") or {}
if templates:
    print("request_templates:")
    print(json.dumps(templates, indent=2, sort_keys=True))
steps = payload.get("next_steps") or []
if steps:
    print("next_steps:")
    for step in steps:
        print(f"- {step}")'

OpenAI route finder

FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?sort=recently_added')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
if isinstance(payload, list):
    models = payload
elif isinstance(payload, dict):
    models = payload.get("models") or payload.get("data") or []
else:
    models = []
def normalize_endpoint(value):
    endpoint = str(value or "").strip()
    if not endpoint:
        return ""
    if not endpoint.startswith("/"):
        endpoint = f"/{endpoint}"
    return endpoint.rstrip("/") or "/"
openai_endpoints = {
    "/v1/chat/completions",
    "/v1/completions",
    "/v1/embeddings",
}
rows = []
for model in models:
    endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
    if endpoint not in openai_endpoints:
        continue
    model_ref = model.get("model_family") or model.get("slug")
    name = model.get("name") or model_ref
    status = model.get("status", "unknown")
    rows.append((endpoint, model_ref, name, status))

if not rows:
    print("No OpenAI-compatible routes returned. Use the native route finder for /v1/inference/... routes.", file=sys.stderr)
    raise SystemExit(1)
for endpoint, model_ref, name, status in rows[:10]:
    print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}  # {endpoint} - {name} ({status})")'

Native route finder

FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?sort=recently_added')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
if isinstance(payload, list):
    models = payload
elif isinstance(payload, dict):
    models = payload.get("models") or payload.get("data") or []
else:
    models = []
def normalize_endpoint(value):
    endpoint = str(value or "").strip()
    if not endpoint:
        return ""
    if not endpoint.startswith("/"):
        endpoint = f"/{endpoint}"
    return endpoint.rstrip("/") or "/"
openai_endpoints = {
    "/v1/chat/completions",
    "/v1/completions",
    "/v1/embeddings",
}
rows = []
for model in models:
    endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
    if not endpoint:
        continue
    if endpoint in openai_endpoints:
        continue
    if endpoint.startswith("/v1/inference/") and model.get("model_family"):
        endpoint = f"/v1/inference/{model['\''model_family'\'']}"
    model_ref = model.get("model_family") or model.get("slug")
    name = model.get("name") or model_ref
    status = model.get("status", "unknown")
    rows.append((endpoint, model_ref, name, status))

if not rows:
    print("No native inference routes returned. Use the OpenAI-compatible snippets for chat, completions, and embeddings.", file=sys.stderr)
    raise SystemExit(1)
for endpoint, model_ref, name, status in rows[:10]:
    print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}  # {endpoint} - {name} ({status})")'

Find chat model

FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?modality=text&sort=recently_added')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys

payload = json.load(sys.stdin)
if isinstance(payload, list):
    models = payload
elif isinstance(payload, dict):
    models = payload.get("models") or payload.get("data") or []
else:
    models = []
def normalize_endpoint(value):
    endpoint = str(value or "").strip()
    if not endpoint:
        return ""
    if not endpoint.startswith("/"):
        endpoint = f"/{endpoint}"
    return endpoint.rstrip("/") or "/"
def chat_status_rank(status):
    status = str(status or "").lower()
    return {"warm": 0, "starting": 1, "cold": 2}.get(status, 3)
rows = []
for model in models:
    endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
    if endpoint != "/v1/chat/completions":
        continue
    model_ref = model.get("model_family") or model.get("slug")
    if not model_ref:
        continue
    status = str(model.get("status") or "unknown")
    rows.append((chat_status_rank(status), len(rows), model_ref, model.get("name") or model_ref, status))

if not rows:
    print("No chat-capable models returned. Open the Forge catalog at / for native inference routes.", file=sys.stderr)
    raise SystemExit(1)
for _, _, slug, name, status in sorted(rows)[:10]:
    print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(slug)}  # {name} ({status})")'

Chat curl

set -euo pipefail
# Forge API smoke test
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from the model picker output}
case "${FORGE_API_KEY:-}" in
  ""|replace-with-your-forge-api-key)
    echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
    exit 1
    ;;
esac
forge_api_url() {
  endpoint="$1"
  base="${FORGE_API_BASE%/}"
  case "$base:$endpoint" in
    */v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
    *) printf '%s%s\n' "$base" "$endpoint" ;;
  esac
}
python3 - <<'PY' |
import json
import os

payload = {
    "model": os.environ["MODEL_OR_FAMILY_SLUG"],
    "messages": [
        {"role": "user", "content": "Write a one sentence status update."},
    ],
}
model_version = os.environ.get("FORGE_MODEL_VERSION")
if model_version:
    payload["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE")
if gpu_type:
    payload["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION")
if region:
    payload["region"] = region
print(json.dumps(payload))
PY
curl -sS --fail-with-body "$(forge_api_url '/v1/chat/completions')" \
  --max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
  -X POST \
  -H "Authorization: Bearer ${FORGE_API_KEY}" \
  -H "Content-Type: application/json" \
  -d @- | \
python3 -c 'import json, sys

data = json.load(sys.stdin)
message = (((data.get("choices") or [{}])[0].get("message") or {}).get("content"))
if message:
    print(message)
else:
    print(json.dumps(data, indent=2))'

Python SDK

import os

from openai import OpenAI

api_base = os.environ.get("FORGE_API_BASE", "/api/proxy").rstrip("/")
openai_base = os.environ.get("FORGE_OPENAI_BASE_URL", "").strip().rstrip("/")
if not openai_base:
    openai_base = api_base if api_base.endswith("/v1") else f"{api_base}/v1"
request_timeout_seconds = float(os.environ.get("FORGE_REQUEST_TIMEOUT_SECONDS", "600"))
api_key = os.environ.get("FORGE_API_KEY")
if not api_key or api_key == "replace-with-your-forge-api-key":
    raise SystemExit("Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.")

model = os.environ.get("MODEL_OR_FAMILY_SLUG")
if not model:
    raise SystemExit("Set MODEL_OR_FAMILY_SLUG from the model picker output.")
model_version = os.environ.get("FORGE_MODEL_VERSION")
extra_body = {}
if model_version:
    extra_body["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE")
if gpu_type:
    extra_body["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION")
if region:
    extra_body["region"] = region

client = OpenAI(
    api_key=api_key,
    base_url=openai_base,
    timeout=request_timeout_seconds,
)

response = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "user", "content": "Write a one sentence status update."},
    ],
    **({"extra_body": extra_body} if extra_body else {}),
)
message = response.choices[0].message if response.choices else None
content = message.content if message else None
if isinstance(content, str) and content.strip():
    print(content)
else:
    print(response.model_dump_json(indent=2))

JavaScript fetch

async function main() {
  const apiBase = (process.env.FORGE_API_BASE ?? "/api/proxy").replace(/\/+$/, "");
  const apiKey = process.env.FORGE_API_KEY;
  const requestTimeoutSeconds = Number(process.env.FORGE_REQUEST_TIMEOUT_SECONDS ?? "600");
  const requestTimeoutMs = Number.isFinite(requestTimeoutSeconds) && requestTimeoutSeconds > 0 ? requestTimeoutSeconds * 1000 : 600000;
  if (!apiKey || apiKey === "replace-with-your-forge-api-key") {
    throw new Error("Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.");
  }
  const model = process.env.MODEL_OR_FAMILY_SLUG;
  if (!model) {
    throw new Error("Set MODEL_OR_FAMILY_SLUG from the model picker output.");
  }

  function forgeApiUrl(endpoint) {
    if (apiBase.endsWith("/v1") && (endpoint === "/v1" || endpoint.startsWith("/v1/") || endpoint.startsWith("/v1?"))) {
      return `${apiBase}${endpoint.slice(3)}`;
    }
    return `${apiBase}${endpoint}`;
  }

  const payload = {
    model,
    messages: [
      { role: "user", content: "Write a one sentence status update." },
    ],
  };
  const modelVersion = process.env.FORGE_MODEL_VERSION;
  if (modelVersion) payload.model_version = modelVersion;
  const gpuType = process.env.FORGE_GPU_TYPE;
  if (gpuType) payload.gpu_type = gpuType;
  const region = process.env.FORGE_REGION;
  if (region) payload.region = region;

  const response = await fetch(forgeApiUrl("/v1/chat/completions"), {
    method: "POST",
    headers: {
      Authorization: `Bearer ${apiKey}`,
      "Content-Type": "application/json",
    },
    body: JSON.stringify(payload),
    signal: AbortSignal.timeout(requestTimeoutMs),
  });
  const responseText = await response.text();
  if (!response.ok) {
    throw new Error(`/v1/chat/completions failed with ${response.status}: ${responseText || response.statusText}`);
  }
  const data = responseText.trim() ? JSON.parse(responseText) : {};
  const message = data.choices?.[0]?.message;
  const content = message?.content;
  if (typeof content === "string" && content.trim()) {
    console.log(content);
  } else {
    console.log(JSON.stringify(data, null, 2));
  }
}

main().catch((error) => {
  console.error(error);
  process.exit(1);
});