Forge exposes an OpenAI-compatible API surface plus a generic inference endpoint for native schemas. Start with the Quickstart, then dive into the full API reference.
Use the same API key with Forge's proxy base URL. List or search public models, narrow to a chat-capable model, then run one exported MODEL_OR_FAMILY_SLUG line. The reliability snippet returns fastest and lowest-cost verified GPU/region exports before inference. The smoke test copies as one block, prefers a warm chat-capable model when available, and verifies auth, model discovery, and chat routing together. The native route snippet pairs model or family slugs with their public /v1/inference/... routes, while the OpenAI route finder lists chat, completions, and embeddings model values.
These snippets call action-guide endpoints only. They print related endpoints, request templates, and next steps without creating private endpoints or optimized model builds.
Client auth: Set FORGE_API_KEY to a real Forge API key before running copied curl, fetch, or SDK snippets. Browser SSO only authenticates this web session.
Open AccountEffective base: /api/proxy/v1
Auto-picks a warm chat-capable model when available, sends one completion, and prints the response text.
set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
MODEL_OR_FAMILY_SLUG="$(
curl -sS --fail-with-body "$(forge_api_url '/v1/models?modality=text&sort=recently_added')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, sys
payload = json.load(sys.stdin)
if isinstance(payload, list):
models = payload
elif isinstance(payload, dict):
models = payload.get("models") or payload.get("data") or []
else:
models = []
def normalize_endpoint(value):
endpoint = str(value or "").strip()
if not endpoint:
return ""
if not endpoint.startswith("/"):
endpoint = f"/{endpoint}"
return endpoint.rstrip("/") or "/"
def chat_status_rank(status):
status = str(status or "").lower()
return {"warm": 0, "starting": 1, "cold": 2}.get(status, 3)
rows = []
for model in models:
endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
if endpoint != "/v1/chat/completions":
continue
model_ref = model.get("model_family") or model.get("slug")
if not model_ref:
continue
rows.append((chat_status_rank(model.get("status")), len(rows), model_ref))
if rows:
print(min(rows)[2])
raise SystemExit(0)
print("No chat-capable models returned. Open the Forge catalog at / for native inference routes.", file=sys.stderr)
raise SystemExit(1)'
)"
export MODEL_OR_FAMILY_SLUG
echo "Using ${MODEL_OR_FAMILY_SLUG}"
python3 - <<'PY' |
import json
import os
print(json.dumps({
"model": os.environ["MODEL_OR_FAMILY_SLUG"],
"messages": [
{"role": "user", "content": "Write a one sentence status update."},
],
}))
PY
curl -sS --fail-with-body "$(forge_api_url '/v1/chat/completions')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" \
-H "Content-Type: application/json" \
-d @- | \
python3 -c 'import json, sys
data = json.load(sys.stdin)
message = (((data.get("choices") or [{}])[0].get("message") or {}).get("content"))
if message:
print(message)
else:
print(json.dumps(data, indent=2))'export FORGE_API_BASE='/api/proxy'
export FORGE_API_KEY="${FORGE_API_KEY:-replace-with-your-forge-api-key}"
export FORGE_REQUEST_TIMEOUT_SECONDS="${FORGE_REQUEST_TIMEOUT_SECONDS:-600}"python3 -m pip install --upgrade openai requestsset -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/status')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" | \
python3 -c 'import json, sys
payload = json.load(sys.stdin)
regions = payload.get("regions") or []
status = payload.get("status") or "unknown"
region_summary = ", ".join(
f"{row.get('\''name'\'')}={row.get('\''status'\'')}"
for row in regions
if isinstance(row, dict)
)
print(f"Forge status={status} regions={len(regions)}")
if region_summary:
print(region_summary)'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/openapi.json')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" | \
python3 -c 'import json, sys
schema = json.load(sys.stdin)
info = schema.get("info") or {}
paths = schema.get("paths") or {}
title = info.get("title") or "Forge API"
version = info.get("version") or "unknown"
openapi_version = schema.get("openapi") or "unknown"
print(f"{title} {version} openapi={openapi_version} paths={len(paths)}")'FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
forge_print_response() {
response_file="$1"
if [ ! -s "$response_file" ]; then
printf '%s\n' '(empty response)'
return 0
fi
if command -v python3 >/dev/null 2>&1; then
python3 -m json.tool "$response_file" 2>/dev/null || cat "$response_file"
else
cat "$response_file"
fi
}
response_file="$(mktemp)"
if curl -sS --fail-with-body "$(forge_api_url '/v1/models')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" \
-o "$response_file"; then
forge_print_response "$response_file"
status=$?
rm -f "$response_file"
(exit "$status")
else
status=$?
cat "$response_file" >&2
rm -f "$response_file"
(exit "$status")
fiFORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'protein'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
search_path="$(python3 -c 'import os
from urllib.parse import urlencode
query = os.environ.get("FORGE_MODEL_SEARCH_QUERY", "protein").strip()
if not query:
query = "protein"
print("/v1/models?" + urlencode({"search": query, "sort": "recently_added"}))')"
curl -sS --fail-with-body "$(forge_api_url "$search_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, os, sys
payload = json.load(sys.stdin)
if isinstance(payload, list):
models = payload
elif isinstance(payload, dict):
models = payload.get("models") or payload.get("data") or []
else:
models = []
def normalize_endpoint(value):
endpoint = str(value or "").strip()
if not endpoint:
return ""
if not endpoint.startswith("/"):
endpoint = f"/{endpoint}"
return endpoint.rstrip("/") or "/"
query = os.environ.get("FORGE_MODEL_SEARCH_QUERY", "protein").strip()
rows = []
for model in models:
model_ref = model.get("model_family") or model.get("slug")
if not model_ref:
continue
endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
name = model.get("name") or model_ref
status = model.get("status", "unknown")
category = model.get("category", "uncategorized")
rows.append((model_ref, endpoint or "route pending", name, status, category))
if not rows:
print(f"No models matched {query!r}. Try another FORGE_MODEL_SEARCH_QUERY.", file=sys.stderr)
raise SystemExit(1)
for model_ref, endpoint, name, status, category in rows[:10]:
print(f"{model_ref} {endpoint} # {name} ({status}, {category})")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'chat'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'latency'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode
def env_value(name):
value = os.environ.get(name, "").strip()
return value or None
params = {
"q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
"sort": env_value("FORGE_SEARCH_SORT") or "latency",
"limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
value.strip()
for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "chat").replace(",", "\n").splitlines()
if value.strip()
]
if capabilities:
params["capability"] = capabilities
for query_key, env_name in (
("gpu_type", "FORGE_GPU_TYPE"),
("region", "FORGE_REGION"),
("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
value = env_value(env_name)
if value:
params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
filters = payload.get("filters") or {}
print("No ranked models matched the requested search filters.", file=sys.stderr)
if filters:
print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
raise SystemExit(1)
for item in results[:10]:
model = item.get("model") or {}
model_family = model.get("model_family")
model_ref = model_family or model.get("slug")
if not model_ref:
continue
target = item.get("matched_target") or {}
assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
version_key = model.get("version_key") if model_family else None
if version_key:
assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
gpu_type = target.get("gpu_type")
if gpu_type:
assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
region = target.get("region")
if region:
assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
details = []
request_ms = target.get("request_ms_p50") or target.get("request_ms")
if request_ms is not None:
details.append(f"p50={request_ms}ms")
warm_cost = target.get("estimated_warm_request_cost_usd")
if warm_cost is not None:
details.append(f"warm_cost_usd={warm_cost}")
name = model.get("name") or model_ref
suffix = f" # {name}"
if details:
suffix += "; " + ", ".join(details)
print(f"export {'\'' '\''.join(assignments)}{suffix}")
for reason in (item.get("match_reasons") or [])[:3]:
print(f"# {reason}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'chat'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'cost'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode
def env_value(name):
value = os.environ.get(name, "").strip()
return value or None
params = {
"q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
"sort": env_value("FORGE_SEARCH_SORT") or "cost",
"limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
value.strip()
for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "chat").replace(",", "\n").splitlines()
if value.strip()
]
if capabilities:
params["capability"] = capabilities
for query_key, env_name in (
("gpu_type", "FORGE_GPU_TYPE"),
("region", "FORGE_REGION"),
("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
value = env_value(env_name)
if value:
params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
filters = payload.get("filters") or {}
print("No ranked models matched the requested search filters.", file=sys.stderr)
if filters:
print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
raise SystemExit(1)
for item in results[:10]:
model = item.get("model") or {}
model_family = model.get("model_family")
model_ref = model_family or model.get("slug")
if not model_ref:
continue
target = item.get("matched_target") or {}
assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
version_key = model.get("version_key") if model_family else None
if version_key:
assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
gpu_type = target.get("gpu_type")
if gpu_type:
assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
region = target.get("region")
if region:
assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
details = []
request_ms = target.get("request_ms_p50") or target.get("request_ms")
if request_ms is not None:
details.append(f"p50={request_ms}ms")
warm_cost = target.get("estimated_warm_request_cost_usd")
if warm_cost is not None:
details.append(f"warm_cost_usd={warm_cost}")
name = model.get("name") or model_ref
suffix = f" # {name}"
if details:
suffix += "; " + ", ".join(details)
print(f"export {'\'' '\''.join(assignments)}{suffix}")
for reason in (item.get("match_reasons") or [])[:3]:
print(f"# {reason}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export FORGE_MODEL_SEARCH_QUERY=${FORGE_MODEL_SEARCH_QUERY:-'chat'}
export FORGE_SEARCH_CAPABILITIES=${FORGE_SEARCH_CAPABILITIES:-'tool_calling'}
export FORGE_SEARCH_SORT=${FORGE_SEARCH_SORT:-'latency'}
export FORGE_SEARCH_LIMIT=${FORGE_SEARCH_LIMIT:-'5'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
ranked_search_path="$(python3 -c 'import os
from urllib.parse import urlencode
def env_value(name):
value = os.environ.get(name, "").strip()
return value or None
params = {
"q": os.environ.get("FORGE_MODEL_SEARCH_QUERY", "chat").strip() or "chat",
"sort": env_value("FORGE_SEARCH_SORT") or "latency",
"limit": env_value("FORGE_SEARCH_LIMIT") or "5",
}
capabilities = [
value.strip()
for value in os.environ.get("FORGE_SEARCH_CAPABILITIES", "tool_calling").replace(",", "\n").splitlines()
if value.strip()
]
if capabilities:
params["capability"] = capabilities
for query_key, env_name in (
("gpu_type", "FORGE_GPU_TYPE"),
("region", "FORGE_REGION"),
("max_latency_ms", "FORGE_SEARCH_MAX_LATENCY_MS"),
("max_warm_request_cost_usd", "FORGE_SEARCH_MAX_WARM_REQUEST_COST_USD"),
):
value = env_value(env_name)
if value:
params[query_key] = value
if os.environ.get("FORGE_SEARCH_PARETO_ONLY", "").strip().lower() in {"1", "true", "yes", "on"}:
params["pareto_only"] = "true"
print("/v1/search?" + urlencode(params, doseq=True))')"
curl -sS --fail-with-body "$(forge_api_url "$ranked_search_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
results = payload.get("results") or []
if not results:
filters = payload.get("filters") or {}
print("No ranked models matched the requested search filters.", file=sys.stderr)
if filters:
print(json.dumps({"filters": filters}, indent=2), file=sys.stderr)
raise SystemExit(1)
for item in results[:10]:
model = item.get("model") or {}
model_family = model.get("model_family")
model_ref = model_family or model.get("slug")
if not model_ref:
continue
target = item.get("matched_target") or {}
assignments = [f"MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))}"]
version_key = model.get("version_key") if model_family else None
if version_key:
assignments.append(f"FORGE_MODEL_VERSION={shlex.quote(str(version_key))}")
gpu_type = target.get("gpu_type")
if gpu_type:
assignments.append(f"FORGE_GPU_TYPE={shlex.quote(str(gpu_type))}")
region = target.get("region")
if region:
assignments.append(f"FORGE_REGION={shlex.quote(str(region))}")
details = []
request_ms = target.get("request_ms_p50") or target.get("request_ms")
if request_ms is not None:
details.append(f"p50={request_ms}ms")
warm_cost = target.get("estimated_warm_request_cost_usd")
if warm_cost is not None:
details.append(f"warm_cost_usd={warm_cost}")
name = model.get("name") or model_ref
suffix = f" # {name}"
if details:
suffix += "; " + ", ".join(details)
print(f"export {'\'' '\''.join(assignments)}{suffix}")
for reason in (item.get("match_reasons") or [])[:3]:
print(f"# {reason}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
family_path="$(python3 -c 'import os
import sys
from urllib.parse import quote
model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
print("Set MODEL_OR_FAMILY_SLUG from search or route finder output before listing versions.", file=sys.stderr)
raise SystemExit(1)
print("/v1/model-families/" + quote(model, safe=""))')"
curl -sS --fail-with-body "$(forge_api_url "$family_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
family = json.load(sys.stdin)
versions = family.get("versions") or []
default_key = family.get("default_version_key")
selected_key = family.get("selected_version_key")
if not versions:
print("No active versions returned for this model family.", file=sys.stderr)
raise SystemExit(1)
for version in versions:
version_key = version.get("version_key") or version.get("slug")
if not version_key:
continue
badges = []
if version_key == default_key or version.get("is_default"):
badges.append("default")
if version_key == selected_key:
badges.append("selected")
stability = version.get("stability")
if stability:
badges.append(str(stability))
status = version.get("status")
if status:
badges.append(str(status))
label = version.get("version_label") or version_key
suffix = f" ({'\'', '\''.join(badges)})" if badges else ""
print(f"export FORGE_MODEL_VERSION={shlex.quote(str(version_key))} # {label}{suffix}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before checking reliability.}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
reliability_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode
model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before checking reliability.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/reliability"
if params:
path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$reliability_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
print(
f"{payload.get('\''slug'\'')} reliability={payload.get('\''reliability_status'\'')} "
f"supported={payload.get('\''supported_rows'\'', 0)}/{payload.get('\''total_rows'\'', 0)}"
)
filters = payload.get("filters") or {}
if filters:
print("filters: " + ", ".join(f"{key}={value}" for key, value in filters.items()))
def describe_target(target):
details = []
request_ms = target.get("request_ms_p50") or target.get("request_ms")
if request_ms is not None:
details.append(f"p50={request_ms}ms")
warm_cost = target.get("estimated_warm_request_cost_usd")
if warm_cost is not None:
details.append(f"warm_cost_usd={warm_cost}")
elif target.get("cost_per_gpu_hour_usd") is not None:
details.append(f"gpu_hour_usd={target['\''cost_per_gpu_hour_usd'\'']}")
success_rate = target.get("observed_success_rate")
if isinstance(success_rate, (int, float)):
details.append(f"success={success_rate:.0%}")
return ", ".join(details) or target.get("status") or "supported"
exports = {}
for label, key in (
("fastest supported", "fastest_supported_target"),
("lowest-cost supported", "lowest_cost_supported_target"),
):
target = payload.get(key) or {}
gpu_type = target.get("gpu_type")
if not gpu_type:
continue
identity = (str(gpu_type), str(target.get("region") or ""))
exports.setdefault(identity, {"labels": [], "target": target})["labels"].append(label)
if not exports:
print("No supported GPU/region target returned.", file=sys.stderr)
print(json.dumps({
"status_counts": payload.get("status_counts", {}),
"failure_reason_counts": payload.get("failure_reason_counts", {}),
}, indent=2))
raise SystemExit(1)
for (gpu_type, region), entry in exports.items():
assignments = [f"FORGE_GPU_TYPE={shlex.quote(gpu_type)}"]
if region:
assignments.append(f"FORGE_REGION={shlex.quote(region)}")
labels = " + ".join(entry["labels"])
details = describe_target(entry["target"])
print(f"export {'\'' '\''.join(assignments)} # {labels}: {details}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
action_guide_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode
model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/deploy"
if params:
path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$action_guide_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-X POST \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, sys
payload = json.load(sys.stdin)
slug = payload.get("slug") or "model"
action = payload.get("action") or "deploy"
status = payload.get("status") or "unknown"
managed_by = payload.get("managed_by") or "external workflow"
print(f"{slug} {action}_guide status={status} managed_by={managed_by}")
message = payload.get("message")
if message:
print("message: " + message)
routing_hints = payload.get("routing_hints") or {}
if routing_hints:
print("routing_hints: " + ", ".join(f"{key}={value}" for key, value in routing_hints.items()))
print("guide_only: this request does not create private endpoints or optimized model builds")
related = payload.get("related_endpoints") or {}
for key in ("regional_deployment", "reliability", "run_estimate", "status", "start", "keep_warm", "stop"):
value = related.get(key)
if value:
print(f"{key}: {value}")
templates = payload.get("request_templates") or {}
if templates:
print("request_templates:")
print(json.dumps(templates, indent=2, sort_keys=True))
steps = payload.get("next_steps") or []
if steps:
print("next_steps:")
for step in steps:
print(f"- {step}")'set -euo pipefail
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
action_guide_path="$(python3 -c 'import os
from urllib.parse import quote, urlencode
model = os.environ.get("MODEL_OR_FAMILY_SLUG", "").strip()
if not model:
raise SystemExit("Set MODEL_OR_FAMILY_SLUG from search or route finder output before requesting an action guide.")
params = {}
model_version = os.environ.get("FORGE_MODEL_VERSION", "").strip()
if model_version:
params["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE", "").strip()
if gpu_type:
params["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION", "").strip()
if region:
params["region"] = region
path = "/v1/models/" + quote(model, safe="") + "/optimize"
if params:
path += "?" + urlencode(params)
print(path)')"
curl -sS --fail-with-body "$(forge_api_url "$action_guide_path")" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-X POST \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, sys
payload = json.load(sys.stdin)
slug = payload.get("slug") or "model"
action = payload.get("action") or "optimize"
status = payload.get("status") or "unknown"
managed_by = payload.get("managed_by") or "external workflow"
print(f"{slug} {action}_guide status={status} managed_by={managed_by}")
message = payload.get("message")
if message:
print("message: " + message)
routing_hints = payload.get("routing_hints") or {}
if routing_hints:
print("routing_hints: " + ", ".join(f"{key}={value}" for key, value in routing_hints.items()))
print("guide_only: this request does not create private endpoints or optimized model builds")
related = payload.get("related_endpoints") or {}
for key in ("regional_deployment", "reliability", "run_estimate", "status", "start", "keep_warm", "stop"):
value = related.get(key)
if value:
print(f"{key}: {value}")
templates = payload.get("request_templates") or {}
if templates:
print("request_templates:")
print(json.dumps(templates, indent=2, sort_keys=True))
steps = payload.get("next_steps") or []
if steps:
print("next_steps:")
for step in steps:
print(f"- {step}")'FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?sort=recently_added')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
if isinstance(payload, list):
models = payload
elif isinstance(payload, dict):
models = payload.get("models") or payload.get("data") or []
else:
models = []
def normalize_endpoint(value):
endpoint = str(value or "").strip()
if not endpoint:
return ""
if not endpoint.startswith("/"):
endpoint = f"/{endpoint}"
return endpoint.rstrip("/") or "/"
openai_endpoints = {
"/v1/chat/completions",
"/v1/completions",
"/v1/embeddings",
}
rows = []
for model in models:
endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
if endpoint not in openai_endpoints:
continue
model_ref = model.get("model_family") or model.get("slug")
name = model.get("name") or model_ref
status = model.get("status", "unknown")
rows.append((endpoint, model_ref, name, status))
if not rows:
print("No OpenAI-compatible routes returned. Use the native route finder for /v1/inference/... routes.", file=sys.stderr)
raise SystemExit(1)
for endpoint, model_ref, name, status in rows[:10]:
print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))} # {endpoint} - {name} ({status})")'FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?sort=recently_added')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
if isinstance(payload, list):
models = payload
elif isinstance(payload, dict):
models = payload.get("models") or payload.get("data") or []
else:
models = []
def normalize_endpoint(value):
endpoint = str(value or "").strip()
if not endpoint:
return ""
if not endpoint.startswith("/"):
endpoint = f"/{endpoint}"
return endpoint.rstrip("/") or "/"
openai_endpoints = {
"/v1/chat/completions",
"/v1/completions",
"/v1/embeddings",
}
rows = []
for model in models:
endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
if not endpoint:
continue
if endpoint in openai_endpoints:
continue
if endpoint.startswith("/v1/inference/") and model.get("model_family"):
endpoint = f"/v1/inference/{model['\''model_family'\'']}"
model_ref = model.get("model_family") or model.get("slug")
name = model.get("name") or model_ref
status = model.get("status", "unknown")
rows.append((endpoint, model_ref, name, status))
if not rows:
print("No native inference routes returned. Use the OpenAI-compatible snippets for chat, completions, and embeddings.", file=sys.stderr)
raise SystemExit(1)
for endpoint, model_ref, name, status in rows[:10]:
print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(str(model_ref))} # {endpoint} - {name} ({status})")'FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
curl -sS --fail-with-body "$(forge_api_url '/v1/models?modality=text&sort=recently_added')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-H "Authorization: Bearer ${FORGE_API_KEY}" | \
python3 -c 'import json, shlex, sys
payload = json.load(sys.stdin)
if isinstance(payload, list):
models = payload
elif isinstance(payload, dict):
models = payload.get("models") or payload.get("data") or []
else:
models = []
def normalize_endpoint(value):
endpoint = str(value or "").strip()
if not endpoint:
return ""
if not endpoint.startswith("/"):
endpoint = f"/{endpoint}"
return endpoint.rstrip("/") or "/"
def chat_status_rank(status):
status = str(status or "").lower()
return {"warm": 0, "starting": 1, "cold": 2}.get(status, 3)
rows = []
for model in models:
endpoint = normalize_endpoint(((model.get("playground_config") or {}).get("api_mapping") or {}).get("endpoint"))
if endpoint != "/v1/chat/completions":
continue
model_ref = model.get("model_family") or model.get("slug")
if not model_ref:
continue
status = str(model.get("status") or "unknown")
rows.append((chat_status_rank(status), len(rows), model_ref, model.get("name") or model_ref, status))
if not rows:
print("No chat-capable models returned. Open the Forge catalog at / for native inference routes.", file=sys.stderr)
raise SystemExit(1)
for _, _, slug, name, status in sorted(rows)[:10]:
print(f"export MODEL_OR_FAMILY_SLUG={shlex.quote(slug)} # {name} ({status})")'set -euo pipefail
# Forge API smoke test
FORGE_API_BASE=${FORGE_API_BASE:-'/api/proxy'}
export MODEL_OR_FAMILY_SLUG=${MODEL_OR_FAMILY_SLUG:?Set MODEL_OR_FAMILY_SLUG from the model picker output}
case "${FORGE_API_KEY:-}" in
""|replace-with-your-forge-api-key)
echo 'Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.' >&2
exit 1
;;
esac
forge_api_url() {
endpoint="$1"
base="${FORGE_API_BASE%/}"
case "$base:$endpoint" in
*/v1:/v1|*/v1:/v1/*|*/v1:/v1\?*) printf '%s%s\n' "$base" "${endpoint#/v1}" ;;
*) printf '%s%s\n' "$base" "$endpoint" ;;
esac
}
python3 - <<'PY' |
import json
import os
payload = {
"model": os.environ["MODEL_OR_FAMILY_SLUG"],
"messages": [
{"role": "user", "content": "Write a one sentence status update."},
],
}
model_version = os.environ.get("FORGE_MODEL_VERSION")
if model_version:
payload["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE")
if gpu_type:
payload["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION")
if region:
payload["region"] = region
print(json.dumps(payload))
PY
curl -sS --fail-with-body "$(forge_api_url '/v1/chat/completions')" \
--max-time "${FORGE_REQUEST_TIMEOUT_SECONDS:-600}" \
-X POST \
-H "Authorization: Bearer ${FORGE_API_KEY}" \
-H "Content-Type: application/json" \
-d @- | \
python3 -c 'import json, sys
data = json.load(sys.stdin)
message = (((data.get("choices") or [{}])[0].get("message") or {}).get("content"))
if message:
print(message)
else:
print(json.dumps(data, indent=2))'import os
from openai import OpenAI
api_base = os.environ.get("FORGE_API_BASE", "/api/proxy").rstrip("/")
openai_base = os.environ.get("FORGE_OPENAI_BASE_URL", "").strip().rstrip("/")
if not openai_base:
openai_base = api_base if api_base.endswith("/v1") else f"{api_base}/v1"
request_timeout_seconds = float(os.environ.get("FORGE_REQUEST_TIMEOUT_SECONDS", "600"))
api_key = os.environ.get("FORGE_API_KEY")
if not api_key or api_key == "replace-with-your-forge-api-key":
raise SystemExit("Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.")
model = os.environ.get("MODEL_OR_FAMILY_SLUG")
if not model:
raise SystemExit("Set MODEL_OR_FAMILY_SLUG from the model picker output.")
model_version = os.environ.get("FORGE_MODEL_VERSION")
extra_body = {}
if model_version:
extra_body["model_version"] = model_version
gpu_type = os.environ.get("FORGE_GPU_TYPE")
if gpu_type:
extra_body["gpu_type"] = gpu_type
region = os.environ.get("FORGE_REGION")
if region:
extra_body["region"] = region
client = OpenAI(
api_key=api_key,
base_url=openai_base,
timeout=request_timeout_seconds,
)
response = client.chat.completions.create(
model=model,
messages=[
{"role": "user", "content": "Write a one sentence status update."},
],
**({"extra_body": extra_body} if extra_body else {}),
)
message = response.choices[0].message if response.choices else None
content = message.content if message else None
if isinstance(content, str) and content.strip():
print(content)
else:
print(response.model_dump_json(indent=2))async function main() {
const apiBase = (process.env.FORGE_API_BASE ?? "/api/proxy").replace(/\/+$/, "");
const apiKey = process.env.FORGE_API_KEY;
const requestTimeoutSeconds = Number(process.env.FORGE_REQUEST_TIMEOUT_SECONDS ?? "600");
const requestTimeoutMs = Number.isFinite(requestTimeoutSeconds) && requestTimeoutSeconds > 0 ? requestTimeoutSeconds * 1000 : 600000;
if (!apiKey || apiKey === "replace-with-your-forge-api-key") {
throw new Error("Set FORGE_API_KEY to a real Forge API key before running this snippet; browser SSO sessions are not sent to copied curl or SDK clients.");
}
const model = process.env.MODEL_OR_FAMILY_SLUG;
if (!model) {
throw new Error("Set MODEL_OR_FAMILY_SLUG from the model picker output.");
}
function forgeApiUrl(endpoint) {
if (apiBase.endsWith("/v1") && (endpoint === "/v1" || endpoint.startsWith("/v1/") || endpoint.startsWith("/v1?"))) {
return `${apiBase}${endpoint.slice(3)}`;
}
return `${apiBase}${endpoint}`;
}
const payload = {
model,
messages: [
{ role: "user", content: "Write a one sentence status update." },
],
};
const modelVersion = process.env.FORGE_MODEL_VERSION;
if (modelVersion) payload.model_version = modelVersion;
const gpuType = process.env.FORGE_GPU_TYPE;
if (gpuType) payload.gpu_type = gpuType;
const region = process.env.FORGE_REGION;
if (region) payload.region = region;
const response = await fetch(forgeApiUrl("/v1/chat/completions"), {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
signal: AbortSignal.timeout(requestTimeoutMs),
});
const responseText = await response.text();
if (!response.ok) {
throw new Error(`/v1/chat/completions failed with ${response.status}: ${responseText || response.statusText}`);
}
const data = responseText.trim() ? JSON.parse(responseText) : {};
const message = data.choices?.[0]?.message;
const content = message?.content;
if (typeof content === "string" && content.trim()) {
console.log(content);
} else {
console.log(JSON.stringify(data, null, 2));
}
}
main().catch((error) => {
console.error(error);
process.exit(1);
});