From 3dfd04bc6468f2f98d47e93858c7ec39acab889b Mon Sep 17 00:00:00 2001 From: Ramon Figueiredo Date: Fri, 12 Jun 2026 00:37:00 -0700 Subject: [PATCH 01/27] [cueweb] Add Services (Facility Service Defaults) page (CueCommander parity) Wire the existing CueCommander "Services" menu item to a new /services page mirroring CueGUI's "Facility Service Defaults" tab (ServiceDialog): a left pane listing facility default services and a right-pane edit form. (In CueGUI the Services tab is the Facility Service Defaults tab, so the page lives at /services where the sidebar item already points.) - Form fields match CueGUI: Name, Threadable, Min/Max Threads (100 = 1 thread), Min Memory MB, Min Gpu Memory MB, Timeout, Timeout LLU, OOM Increase MB, predefined Tags (two-column order) plus a Custom Tags toggle. Memory converts MB<->KB (x1024); threads stored as cores*100. - Save validates (name length/charset, non-negative numbers, min <= max threads when max > 0, OOM > 0, tag charset) then shows a facility-wide confirmation before Create (new) or Update (existing). New / Del in the left pane (Del confirms first). - Proxy routes under app/api/service/: getdefaultservices, create, update, delete. Service type + getDefaultServices in get_utils; create/update/ deleteService in action_utils. --- cueweb/app/api/service/create/route.ts | 45 +++ cueweb/app/api/service/delete/route.ts | 45 +++ .../api/service/getdefaultservices/route.ts | 50 ++++ cueweb/app/api/service/update/route.ts | 45 +++ cueweb/app/services/page.tsx | 175 +++++++++++ cueweb/app/utils/action_utils.ts | 25 +- cueweb/app/utils/get_utils.ts | 29 ++ .../components/ui/service-defaults-form.tsx | 271 ++++++++++++++++++ 8 files changed, 684 insertions(+), 1 deletion(-) create mode 100644 cueweb/app/api/service/create/route.ts create mode 100644 cueweb/app/api/service/delete/route.ts create mode 100644 cueweb/app/api/service/getdefaultservices/route.ts create mode 100644 cueweb/app/api/service/update/route.ts create mode 100644 cueweb/app/services/page.tsx create mode 100644 cueweb/components/ui/service-defaults-form.tsx diff --git a/cueweb/app/api/service/create/route.ts b/cueweb/app/api/service/create/route.ts new file mode 100644 index 000000000..ad6dcdb24 --- /dev/null +++ b/cueweb/app/api/service/create/route.ts @@ -0,0 +1,45 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Create a new facility-wide default service. Request: { data: Service }. +// RPC: /service.ServiceInterface/CreateService. +export async function POST(request: NextRequest) { + const endpoint = "/service.ServiceInterface/CreateService"; + const method = request.method; + if (method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody || typeof jsonBody !== 'object' || !jsonBody.data || typeof jsonBody.data.name !== 'string') { + return NextResponse.json({ error: 'Invalid request body: data with a name is required' }, { status: 400 }); + } + + const body = JSON.stringify(jsonBody); + const response = await handleRoute(method, endpoint, body, true); + const responseData = await response.json(); + + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/service/delete/route.ts b/cueweb/app/api/service/delete/route.ts new file mode 100644 index 000000000..312ff5226 --- /dev/null +++ b/cueweb/app/api/service/delete/route.ts @@ -0,0 +1,45 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Delete a facility-wide default service. Request: { service: Service }. +// RPC: /service.ServiceInterface/Delete. +export async function POST(request: NextRequest) { + const endpoint = "/service.ServiceInterface/Delete"; + const method = request.method; + if (method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody || typeof jsonBody !== 'object' || !jsonBody.service) { + return NextResponse.json({ error: 'Invalid request body: service is required' }, { status: 400 }); + } + + const body = JSON.stringify(jsonBody); + const response = await handleRoute(method, endpoint, body, true); + const responseData = await response.json(); + + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/service/getdefaultservices/route.ts b/cueweb/app/api/service/getdefaultservices/route.ts new file mode 100644 index 000000000..bc11009c4 --- /dev/null +++ b/cueweb/app/api/service/getdefaultservices/route.ts @@ -0,0 +1,50 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Lists the facility-wide default services (the left pane of the Facility +// Service Defaults page, mirroring CueGUI's opencue.api.getDefaultServices()). +// The gateway double-nests the result as { services: { services: [...] } }; +// we unwrap to a flat array. RPC: /service.ServiceInterface/GetDefaultServices. +export async function POST(request: NextRequest) { + const endpoint = "/service.ServiceInterface/GetDefaultServices"; + const method = request.method; + if (method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + + let parsed: unknown = {}; + try { + parsed = await request.json(); + } catch { + // Empty body is acceptable - GetDefaultServices takes no parameters. + } + const body = JSON.stringify(parsed ?? {}); + + const response = await handleRoute(method, endpoint, body); + const responseData = await response.json(); + + if (!response.ok) { + return NextResponse.json( + { error: responseData?.error ?? "Failed to fetch services" }, + { status: response.status }, + ); + } + const services = responseData?.data?.services?.services ?? []; + return NextResponse.json({ data: services }, { status: response.status }); +} diff --git a/cueweb/app/api/service/update/route.ts b/cueweb/app/api/service/update/route.ts new file mode 100644 index 000000000..1784d3df9 --- /dev/null +++ b/cueweb/app/api/service/update/route.ts @@ -0,0 +1,45 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Update an existing facility-wide default service. Request: { service: Service }. +// RPC: /service.ServiceInterface/Update. +export async function POST(request: NextRequest) { + const endpoint = "/service.ServiceInterface/Update"; + const method = request.method; + if (method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody || typeof jsonBody !== 'object' || !jsonBody.service || typeof jsonBody.service.name !== 'string') { + return NextResponse.json({ error: 'Invalid request body: service with a name is required' }, { status: 400 }); + } + + const body = JSON.stringify(jsonBody); + const response = await handleRoute(method, endpoint, body, true); + const responseData = await response.json(); + + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/services/page.tsx b/cueweb/app/services/page.tsx new file mode 100644 index 000000000..e99a0a490 --- /dev/null +++ b/cueweb/app/services/page.tsx @@ -0,0 +1,175 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; + +import { Service, getDefaultServices } from "@/app/utils/get_utils"; +import { deleteService } from "@/app/utils/action_utils"; +import { handleError, toastSuccess } from "@/app/utils/notify_utils"; +import { Button } from "@/components/ui/button"; +import { ConfirmDialog } from "@/components/ui/confirm-dialog"; +import { ServiceDefaultsForm } from "@/components/ui/service-defaults-form"; +import { Skeleton } from "@/components/ui/skeleton"; +import { cn } from "@/lib/utils"; + +export default function FacilityServiceDefaultsPage() { + const [services, setServices] = React.useState(null); + const [selectedName, setSelectedName] = React.useState(null); + const [isNew, setIsNew] = React.useState(false); + const [deleteOpen, setDeleteOpen] = React.useState(false); + + const load = React.useCallback(async (isCancelled?: () => boolean) => { + try { + const data = await getDefaultServices(); + if (isCancelled?.()) return; + setServices(data); + } catch (err) { + if (isCancelled?.()) return; + handleError(err, "Could not load services"); + setServices((prev) => prev ?? []); + } + }, []); + + React.useEffect(() => { + let cancelled = false; + load(() => cancelled); + return () => { + cancelled = true; + }; + }, [load]); + + const sorted = React.useMemo( + () => (services ? [...services].sort((a, b) => a.name.localeCompare(b.name)) : null), + [services], + ); + const selectedService = React.useMemo( + () => services?.find((s) => s.name === selectedName) ?? null, + [services, selectedName], + ); + + function selectService(name: string) { + setIsNew(false); + setSelectedName(name); + } + + function startNew() { + setSelectedName(null); + setIsNew(true); + } + + async function handleSaved(name: string) { + await load(); + setIsNew(false); + setSelectedName(name); + } + + async function handleDeleteConfirm() { + if (!selectedService) return; + const ok = await deleteService(selectedService); + if (ok) { + toastSuccess(`Deleted service ${selectedService.name}`); + setSelectedName(null); + setIsNew(false); + await load(); + } + } + + const showForm = isNew || selectedService !== null; + + return ( +
+

Facility Service Defaults

+ +
+ {/* Left pane: service list + New/Del. */} +
+
+ {sorted === null ? ( +
+ + + +
+ ) : sorted.length === 0 ? ( +

No services defined.

+ ) : ( +
    + {sorted.map((s) => ( +
  • + +
  • + ))} +
+ )} +
+
+ + +
+
+ + {/* Right pane: edit form. Keyed so it re-initializes on selection. */} +
+ {showForm ? ( + + ) : ( +

+ Select a service to edit, or click New to create one. +

+ )} +
+
+ + +
+ ); +} diff --git a/cueweb/app/utils/action_utils.ts b/cueweb/app/utils/action_utils.ts index 182b437d0..c53990de9 100644 --- a/cueweb/app/utils/action_utils.ts +++ b/cueweb/app/utils/action_utils.ts @@ -20,7 +20,7 @@ import * as React from "react"; import { Frame } from "../frames/frame-columns"; import { Layer } from "../layers/layer-columns"; import { accessActionApi, accessGetApi } from "./api_utils"; -import { getFrameLogDir, getJobForLayer, Host, JobComment, Show } from "./get_utils"; +import { getFrameLogDir, getJobForLayer, Host, JobComment, Service, Show } from "./get_utils"; import { handleError, toastSuccess, toastWarning } from "./notify_utils"; /**************************************/ @@ -48,6 +48,29 @@ export async function performAction(endpoint: string, bodyAr: string[], successM } } +/**************************************/ +// Facility default services (CueGUI ServiceDialog parity) +/**************************************/ + +// These call accessActionApi directly (no per-call success toast) so the +// Facility Service Defaults form can show a single toast after the call +// resolves. Errors are still surfaced as toasts by accessActionApi. Returns +// true on success so the form can gate its refresh on it. +export async function createService(data: Service): Promise { + const result = await accessActionApi("/api/service/create", JSON.stringify({ data })); + return !!result?.success; +} + +export async function updateService(service: Service): Promise { + const result = await accessActionApi("/api/service/update", JSON.stringify({ service })); + return !!result?.success; +} + +export async function deleteService(service: Service): Promise { + const result = await accessActionApi("/api/service/delete", JSON.stringify({ service })); + return !!result?.success; +} + /**************************************/ // Kill Jobs, Layers, and Frames /**************************************/ diff --git a/cueweb/app/utils/get_utils.ts b/cueweb/app/utils/get_utils.ts index 12e5f92ff..61eca8830 100644 --- a/cueweb/app/utils/get_utils.ts +++ b/cueweb/app/utils/get_utils.ts @@ -173,6 +173,27 @@ export type Allocation = { }; }; +// Service shape - mirrors service.Service. This is a facility-wide default +// service template (Facility Service Defaults page). Cores are stored as +// cores*100 (the UI calls them "threads", 100 = 1 thread); memory fields are +// stored in KB and shown as MB (divide by 1024). int64 memory fields can +// arrive from the gateway as strings, so callers coerce with Number(). +export type Service = { + id: string; + name: string; + threadable: boolean; + minCores: number; + maxCores: number; + minMemory: number | string; // KB (int64) + minGpuMemory: number | string; // KB (int64) + tags: string[]; + timeout: number; // minutes + timeoutLlu: number; // minutes + minGpus?: number; + maxGpus?: number; + minMemoryIncrease: number; // KB (OOM increase) +}; + // Fetch a single frame based on the request body export async function getFrame(body: string): Promise { const ENDPOINT = "/api/frame/getframe"; @@ -343,6 +364,14 @@ export async function getAllocations(): Promise { return Array.isArray(response) ? response : []; } +// Fetch the facility-wide default services (the Facility Service Defaults +// page). Mirrors CueGUI's opencue.api.getDefaultServices(). +export async function getDefaultServices(): Promise { + const ENDPOINT = "/api/service/getdefaultservices"; + const response = await accessGetApi(ENDPOINT, JSON.stringify({})); + return Array.isArray(response) ? response : []; +} + // Fetch all comments for a given job export async function getJobComments(job: Job): Promise { const ENDPOINT = "/api/job/getcomments"; diff --git a/cueweb/components/ui/service-defaults-form.tsx b/cueweb/components/ui/service-defaults-form.tsx new file mode 100644 index 000000000..b2f1d83e3 --- /dev/null +++ b/cueweb/components/ui/service-defaults-form.tsx @@ -0,0 +1,271 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; + +import type { Service } from "@/app/utils/get_utils"; +import { createService, updateService } from "@/app/utils/action_utils"; +import { toastSuccess, toastWarning } from "@/app/utils/notify_utils"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { ConfirmDialog } from "@/components/ui/confirm-dialog"; +import { Input } from "@/components/ui/input"; + +/** + * Facility Service Defaults edit form (CueGUI ServiceForm parity). The right + * pane of the Facility Service Defaults page: an editable form for one service + * template. Memory fields are MB in the UI but KB in the proto (x1024), and + * threads are stored as cores*100 (100 = 1 thread, shown directly). Save shows + * a facility-wide confirmation, then calls Create (new) or Update (existing). + * + * The parent remounts this via a `key` when the selected service changes, so + * state initializes straight from props. + */ + +// Predefined tag order matches CueGUI's CheckBoxSelectionMatrix (row-major, +// two columns): general/desktop, playblast/util, preprocess/wan, cuda/splathw, +// naiad/massive. +const PREDEFINED_TAGS = [ + "general", + "desktop", + "playblast", + "util", + "preprocess", + "wan", + "cuda", + "splathw", + "naiad", + "massive", +]; + +const toMb = (kb: number | string | undefined) => Math.round(Number(kb ?? 0) / 1024); +const parseCustomTags = (text: string) => text.split(/[\s,|]+/).filter(Boolean); + +function Field({ label, children }: { label: string; children: React.ReactNode }) { + return ( +
+ + {children} +
+ ); +} + +export function ServiceDefaultsForm({ + service, + onSaved, +}: { + // null => creating a new service. + service: Service | null; + onSaved: (name: string) => void; +}) { + const isNew = service === null; + + const initAllPredefined = !service || service.tags.every((t) => PREDEFINED_TAGS.includes(t)); + + const [name, setName] = React.useState(service?.name ?? ""); + const [threadable, setThreadable] = React.useState(service?.threadable ?? false); + const [minThreads, setMinThreads] = React.useState(String(service?.minCores ?? 100)); + const [maxThreads, setMaxThreads] = React.useState(String(service?.maxCores ?? 100)); + const [minMemoryMb, setMinMemoryMb] = React.useState(String(service ? toMb(service.minMemory) : 3276)); + const [minGpuMemoryMb, setMinGpuMemoryMb] = React.useState(String(service ? toMb(service.minGpuMemory) : 0)); + const [timeout, setTimeoutMin] = React.useState(String(service?.timeout ?? 0)); + const [timeoutLlu, setTimeoutLlu] = React.useState(String(service?.timeoutLlu ?? 0)); + const [oomIncreaseMb, setOomIncreaseMb] = React.useState(String(service ? toMb(service.minMemoryIncrease) : 2048)); + + const [useCustomTags, setUseCustomTags] = React.useState(!initAllPredefined); + const [selectedTags, setSelectedTags] = React.useState>( + new Set(initAllPredefined ? service?.tags ?? ["general"] : []), + ); + const [customTags, setCustomTags] = React.useState(initAllPredefined ? "" : (service?.tags ?? []).join(" ")); + + const [confirmOpen, setConfirmOpen] = React.useState(false); + + function toggleTag(tag: string, checked: boolean) { + setSelectedTags((prev) => { + const next = new Set(prev); + if (checked) next.add(tag); + else next.delete(tag); + return next; + }); + } + + function resolveTags(): string[] { + return useCustomTags ? parseCustomTags(customTags) : PREDEFINED_TAGS.filter((t) => selectedTags.has(t)); + } + + // Returns an error message, or null when the form is valid. + function validate(): string | null { + const n = name.trim(); + if (n.length < 3) return "Service name must be at least 3 characters."; + if (!/^[a-zA-Z0-9|/_-]+$/.test(n)) return "Service name may only contain letters, numbers, and | / - _"; + + const numericFields: [string, string][] = [ + ["Min Threads", minThreads], + ["Max Threads", maxThreads], + ["Min Memory MB", minMemoryMb], + ["Min Gpu Memory MB", minGpuMemoryMb], + ["Timeout", timeout], + ["Timeout LLU", timeoutLlu], + ["OOM Increase MB", oomIncreaseMb], + ]; + for (const [fieldLabel, value] of numericFields) { + const x = Number(value); + if (value.trim() === "" || !Number.isFinite(x) || x < 0) { + return `${fieldLabel} must be a non-negative number.`; + } + } + if (Number(maxThreads) > 0 && Number(minThreads) > Number(maxThreads)) { + return "Min Threads cannot exceed Max Threads."; + } + if (Number(oomIncreaseMb) <= 0) { + return "OOM Increase must be greater than 0 MB."; + } + if (useCustomTags) { + const tags = parseCustomTags(customTags); + if (tags.length === 0) return "Enter at least one custom tag."; + if (tags.some((t) => !/^[a-zA-Z0-9_-]+$/.test(t))) { + return "Custom tags may only contain letters, numbers, _ and -."; + } + } else if (selectedTags.size === 0) { + return "Select at least one tag."; + } + return null; + } + + function handleSaveClick() { + const err = validate(); + if (err) { + toastWarning(err); + return; + } + setConfirmOpen(true); + } + + async function handleConfirm() { + const payload: Service = { + id: service?.id ?? "", + name: name.trim(), + threadable, + minCores: Number(minThreads), + maxCores: Number(maxThreads), + minMemory: Number(minMemoryMb) * 1024, + minGpuMemory: Number(minGpuMemoryMb) * 1024, + tags: resolveTags(), + timeout: Number(timeout), + timeoutLlu: Number(timeoutLlu), + minGpus: service?.minGpus ?? 0, + maxGpus: service?.maxGpus ?? 0, + minMemoryIncrease: Number(oomIncreaseMb) * 1024, + }; + const ok = isNew ? await createService(payload) : await updateService(payload); + if (ok) { + toastSuccess(isNew ? `Created service ${payload.name}` : `Saved service ${payload.name}`); + onSaved(payload.name); + } + } + + return ( +
+ + setName(e.target.value)} aria-label="Name" /> + + + setThreadable(!!c)} + aria-label="Threadable" + /> + + + setMinThreads(e.target.value)} aria-label="Min Threads" /> + + + setMaxThreads(e.target.value)} aria-label="Max Threads" /> + + + setMinMemoryMb(e.target.value)} aria-label="Min Memory MB" /> + + + setMinGpuMemoryMb(e.target.value)} aria-label="Min Gpu Memory MB" /> + + + setTimeoutMin(e.target.value)} aria-label="Timeout" /> + + + setTimeoutLlu(e.target.value)} aria-label="Timeout LLU" /> + + + setOomIncreaseMb(e.target.value)} aria-label="OOM Increase MB" /> + + +
+ Tags +
+ {PREDEFINED_TAGS.map((tag) => ( + + ))} +
+
+ +
+ + setCustomTags(e.target.value)} + disabled={!useCustomTags} + placeholder="space- or comma-separated tags" + aria-label="Custom Tags value" + className="flex-1" + /> +
+ +
+ +
+ + {/* CueGUI shows a facility-wide confirmation before persisting. The + original references an internal team name; genericized here. */} + +
+ ); +} From 311cfb17fd75e683bac3a5f00661e003ffc915ed Mon Sep 17 00:00:00 2001 From: Ramon Figueiredo Date: Fri, 12 Jun 2026 00:59:42 -0700 Subject: [PATCH 02/27] [cueweb] Add Stuck Frames page (CueCommander parity) Add a /stuck-frames route listing frames in RUNNING state longer than a configurable threshold, with per-row Retry and Kill actions. - Threshold slider (1-48h, default 8h) persisted to localStorage; filtering is client-side so the slider is instant. - Server-side aggregation route /api/stuck-frames lists unfinished jobs (GetJobs) and fans out GetFrames per job filtered to RUNNING (FrameState 2), so the browser makes a single request. - Retry/Kill reuse retryFrames/killFrames; kill records the signed-in user in the reason. Polls every 30s. --- cueweb/app/api/stuck-frames/route.ts | 83 ++++++++++ cueweb/app/stuck-frames/page.tsx | 221 +++++++++++++++++++++++++++ cueweb/app/utils/get_utils.ts | 12 ++ 3 files changed, 316 insertions(+) create mode 100644 cueweb/app/api/stuck-frames/route.ts create mode 100644 cueweb/app/stuck-frames/page.tsx diff --git a/cueweb/app/api/stuck-frames/route.ts b/cueweb/app/api/stuck-frames/route.ts new file mode 100644 index 000000000..301c230be --- /dev/null +++ b/cueweb/app/api/stuck-frames/route.ts @@ -0,0 +1,83 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { fetchObjectFromRestGateway } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Server-side aggregation for the Stuck Frames page. There is no single RPC +// that returns every running frame, so we list the unfinished jobs +// (GetJobs) and fan out a GetFrames call per job filtered to RUNNING +// (FrameState 2), then flatten. Doing the fan-out here keeps the browser to a +// single request and avoids leaking the N+1 to the client. The page applies +// the running-time threshold locally so the slider stays instant. +// +// RPCs: /job.JobInterface/GetJobs, /job.JobInterface/GetFrames. + +const RUNNING_STATE = 2; // FrameState.RUNNING (proto/src/job.proto) +const MAX_FRAMES_PER_JOB = 1000; + +export async function POST(_request: NextRequest) { + try { + // 1. All unfinished jobs. + const jobsResp = await fetchObjectFromRestGateway( + "/job.JobInterface/GetJobs", + "POST", + JSON.stringify({ r: { include_finished: false } }), + ); + const jobsJson = await jobsResp.json(); + if (jobsJson?.error) { + return NextResponse.json({ error: jobsJson.error }, { status: 500 }); + } + const jobs: any[] = jobsJson?.data?.jobs?.jobs ?? []; + + // 2. Running frames per job, in parallel. A single job's failure drops to + // an empty list rather than failing the whole page. + const perJob = await Promise.all( + jobs.map(async (job) => { + const body = JSON.stringify({ + job: { id: job.id, name: job.name }, + req: { + include_finished: false, + page: 1, + limit: MAX_FRAMES_PER_JOB, + states: { frame_states: [RUNNING_STATE] }, + }, + }); + try { + const framesResp = await fetchObjectFromRestGateway( + "/job.JobInterface/GetFrames", + "POST", + body, + ); + const framesJson = await framesResp.json(); + if (framesJson?.error) return []; + const frames: any[] = framesJson?.data?.frames?.frames ?? []; + // Defensive: keep only RUNNING even if the state filter was ignored, + // and stamp the parent job so the table can show / act on it. + return frames + .filter((f) => f.state === "RUNNING") + .map((f) => ({ ...f, jobId: job.id, jobName: job.name })); + } catch { + return []; + } + }), + ); + + return NextResponse.json({ data: perJob.flat() }, { status: 200 }); + } catch (error) { + return NextResponse.json({ error: (error as Error).message }, { status: 500 }); + } +} diff --git a/cueweb/app/stuck-frames/page.tsx b/cueweb/app/stuck-frames/page.tsx new file mode 100644 index 000000000..888ceb486 --- /dev/null +++ b/cueweb/app/stuck-frames/page.tsx @@ -0,0 +1,221 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; +import { useSession } from "next-auth/react"; + +import type { Frame } from "@/app/frames/frame-columns"; +import { StuckFrame, getStuckFrames } from "@/app/utils/get_utils"; +import { killFrames, retryFrames } from "@/app/utils/action_utils"; +import { handleError } from "@/app/utils/notify_utils"; +import { Button } from "@/components/ui/button"; +import { Skeleton } from "@/components/ui/skeleton"; + +const REFRESH_MS = 30000; +const THRESHOLD_KEY = "cueweb.stuck-frames.thresholdHours"; +const DEFAULT_HOURS = 8; +const MIN_HOURS = 1; +const MAX_HOURS = 48; + +function fmtDuration(seconds: number): string { + if (!Number.isFinite(seconds) || seconds < 0) return "—"; + const h = Math.floor(seconds / 3600); + const m = Math.floor((seconds % 3600) / 60); + const s = Math.floor(seconds % 60); + return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`; +} + +// last_resource is "host/procid"; show just the host. +const hostOf = (lastResource: string) => (lastResource || "").split("/")[0] || "—"; + +export default function StuckFramesPage() { + const { data: session } = useSession(); + const username = session?.user?.name ?? session?.user?.email ?? "cueweb"; + + const [frames, setFrames] = React.useState(null); + const [thresholdHours, setThresholdHours] = React.useState(DEFAULT_HOURS); + const [now, setNow] = React.useState(() => Date.now() / 1000); + const [busyId, setBusyId] = React.useState(null); + + const load = React.useCallback(async (isCancelled?: () => boolean) => { + try { + const data = await getStuckFrames(); + if (isCancelled?.()) return; + setFrames(data); + setNow(Date.now() / 1000); + } catch (err) { + if (isCancelled?.()) return; + handleError(err, "Could not load stuck frames"); + setFrames((prev) => prev ?? []); + } + }, []); + + React.useEffect(() => { + // Restore the persisted threshold on mount (kept out of the initial state + // to avoid an SSR/client hydration mismatch). + const stored = window.localStorage.getItem(THRESHOLD_KEY); + if (stored) { + const n = Number(stored); + if (Number.isFinite(n) && n >= MIN_HOURS && n <= MAX_HOURS) setThresholdHours(n); + } + let cancelled = false; + const isCancelled = () => cancelled; + load(isCancelled); + const interval = setInterval(() => load(isCancelled), REFRESH_MS); + return () => { + cancelled = true; + clearInterval(interval); + }; + }, [load]); + + function changeThreshold(hours: number) { + setThresholdHours(hours); + window.localStorage.setItem(THRESHOLD_KEY, String(hours)); + } + + const runtimeOf = React.useCallback( + (f: StuckFrame) => (f.startTime ? now - f.startTime : 0), + [now], + ); + + const stuck = React.useMemo(() => { + if (!frames) return null; + const thresholdSeconds = thresholdHours * 3600; + return frames + .filter((f) => runtimeOf(f) > thresholdSeconds) + .sort((a, b) => runtimeOf(b) - runtimeOf(a)); + }, [frames, thresholdHours, runtimeOf]); + + // Strip the page-only jobId/jobName before sending the frame to a Cuebot RPC + // (they are not Frame proto fields). + function toFrame(sf: StuckFrame): Frame { + const { jobId: _jobId, jobName: _jobName, ...frame } = sf; + return frame as Frame; + } + + async function handleRetry(sf: StuckFrame) { + setBusyId(sf.id); + try { + await retryFrames([toFrame(sf)]); + await load(); + } finally { + setBusyId(null); + } + } + + async function handleKill(sf: StuckFrame) { + setBusyId(sf.id); + try { + await killFrames([toFrame(sf)], username, `Manual frame kill from CueWeb Stuck Frames by ${username}`); + await load(); + } finally { + setBusyId(null); + } + } + + return ( +
+

Stuck Frames

+ +
+ + {stuck ? ( + + {stuck.length} {stuck.length === 1 ? "frame" : "frames"} + + ) : null} +
+ + {stuck === null ? ( +
+ + + +
+ ) : stuck.length === 0 ? ( +

+ No frames have been running longer than {thresholdHours}{" "} + {thresholdHours === 1 ? "hour" : "hours"}. +

+ ) : ( +
+ + + + + + + + + + + + + {stuck.map((f) => ( + + + + + + + + + ))} + +
JobLayerFrameHostRuntimeActions
{f.jobName}{f.layerName}{f.number}{hostOf(f.lastResource)}{fmtDuration(runtimeOf(f))} +
+ + +
+
+
+ )} +
+ ); +} diff --git a/cueweb/app/utils/get_utils.ts b/cueweb/app/utils/get_utils.ts index 12e5f92ff..448fdad6a 100644 --- a/cueweb/app/utils/get_utils.ts +++ b/cueweb/app/utils/get_utils.ts @@ -187,6 +187,18 @@ export async function getFrames(body: string): Promise { return response ? response : []; } +// A running frame plus its parent job, for the Stuck Frames page. +export type StuckFrame = Frame & { jobId: string; jobName: string }; + +// Fetch every RUNNING frame across all unfinished jobs (server-aggregated via +// /api/stuck-frames). The Stuck Frames page applies the running-time threshold +// locally so the slider stays instant. +export async function getStuckFrames(): Promise { + const ENDPOINT = "/api/stuck-frames"; + const response = await accessGetApi(ENDPOINT, JSON.stringify({})); + return Array.isArray(response) ? response : []; +} + // Fetch a pending job based on the request body export async function getPendingJob(body: string): Promise { const ENDPOINT = "/api/job/getjob"; From e5502dea37a8b095722089c9168f5c7ff616379f Mon Sep 17 00:00:00 2001 From: Ramon Figueiredo Date: Fri, 12 Jun 2026 01:25:19 -0700 Subject: [PATCH 03/27] [cueweb] Stuck Frames: full StuckFramePlugin parity (CueCommander) Replace the running-time MVP with CueGUI's StuckFramePlugin behavior: - Detection predicate (LLU / % stuck / avg-completion / runtime / >500s, %stuck<1.1) applied live client-side. - Multi-service filter bar (catch-all + per-service filters) with % Run Since LLU, Min LLU, % Avg Completion, Total Runtime, Exclude Keywords, Enable; CueGUI service defaults; persisted to localStorage. - Job-grouped table: Name, Comment, Frame, Host, LLU, Runtime, % Stuck, Average, Last Line (lazy rqlog tail). - Frame menu: Tail/View/Last Log, Retry/Eat/Kill, Log + Log-and-X, Frame Not Stuck, Add/Exclude Job, Core Up, View Host. Job menu: View Comments, Job Not Stuck, Add/Exclude Job, Core Up. Auto-refresh, Notification, Refresh, Clear. - Aggregation route attaches service/avgFrameSec/layer via GetLayers; add /api/stuck-frames/lastline and /api/layer/action/setmincores. --- .../app/api/layer/action/setmincores/route.ts | 45 ++ cueweb/app/api/stuck-frames/lastline/route.ts | 44 ++ cueweb/app/api/stuck-frames/route.ts | 115 ++-- cueweb/app/stuck-frames/page.tsx | 573 ++++++++++++++---- cueweb/app/utils/action_utils.ts | 7 + cueweb/app/utils/get_utils.ts | 34 +- cueweb/components/ui/stuck-frame-filters.tsx | 192 ++++++ 7 files changed, 852 insertions(+), 158 deletions(-) create mode 100644 cueweb/app/api/layer/action/setmincores/route.ts create mode 100644 cueweb/app/api/stuck-frames/lastline/route.ts create mode 100644 cueweb/components/ui/stuck-frame-filters.tsx diff --git a/cueweb/app/api/layer/action/setmincores/route.ts b/cueweb/app/api/layer/action/setmincores/route.ts new file mode 100644 index 000000000..5d398a100 --- /dev/null +++ b/cueweb/app/api/layer/action/setmincores/route.ts @@ -0,0 +1,45 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Set a layer's minimum cores (CueGUI Stuck Frame "Core Up"). Request: +// { layer, cores }. RPC: /job.LayerInterface/SetMinCores. +export async function POST(request: NextRequest) { + const endpoint = "/job.LayerInterface/SetMinCores"; + const method = request.method; + if (method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody || typeof jsonBody !== 'object' || !jsonBody.layer || typeof jsonBody.cores !== 'number') { + return NextResponse.json({ error: 'Invalid request body: layer and numeric cores are required' }, { status: 400 }); + } + + const body = JSON.stringify(jsonBody); + const response = await handleRoute(method, endpoint, body, true); + const responseData = await response.json(); + + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/stuck-frames/lastline/route.ts b/cueweb/app/api/stuck-frames/lastline/route.ts new file mode 100644 index 000000000..c25cbc09c --- /dev/null +++ b/cueweb/app/api/stuck-frames/lastline/route.ts @@ -0,0 +1,44 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { NextRequest, NextResponse } from "next/server"; +import { execFile as execFileCallback } from "child_process"; +import { promisify } from "util"; + +const execFile = promisify(execFileCallback); + +// Returns the last non-empty line of a frame's .rqlog (the Stuck Frames +// "Last Line" column, mirroring CueGUI's getLastLine). Best-effort: if the log +// filesystem isn't mounted in this deployment, or the file is missing, it +// returns an empty line rather than erroring. execFile (no shell) + an rqlog +// path allowlist keep the caller-supplied path from being abused. +export async function GET(request: NextRequest) { + const path = request.nextUrl.searchParams.get("path"); + if (!path || !path.endsWith(".rqlog") || path.includes("..")) { + return NextResponse.json({ lastLine: "" }, { status: 200 }); + } + try { + // tail the file, then keep the last non-blank line. + const { stdout } = await execFile("tail", ["-n", "20", "--", path], { + timeout: 5000, + maxBuffer: 1024 * 1024, + }); + const lines = stdout.split("\n").map((l) => l.trimEnd()).filter((l) => l.trim() !== ""); + return NextResponse.json({ lastLine: lines.length ? lines[lines.length - 1] : "" }, { status: 200 }); + } catch { + return NextResponse.json({ lastLine: "" }, { status: 200 }); + } +} diff --git a/cueweb/app/api/stuck-frames/route.ts b/cueweb/app/api/stuck-frames/route.ts index 301c230be..5690fe29a 100644 --- a/cueweb/app/api/stuck-frames/route.ts +++ b/cueweb/app/api/stuck-frames/route.ts @@ -17,62 +17,97 @@ import { fetchObjectFromRestGateway } from '@/app/utils/api_utils'; import { NextRequest, NextResponse } from "next/server"; -// Server-side aggregation for the Stuck Frames page. There is no single RPC -// that returns every running frame, so we list the unfinished jobs -// (GetJobs) and fan out a GetFrames call per job filtered to RUNNING -// (FrameState 2), then flatten. Doing the fan-out here keeps the browser to a -// single request and avoids leaking the N+1 to the client. The page applies -// the running-time threshold locally so the slider stays instant. +// Server-side data gathering for the Stuck Frames page. CueGUI's +// StuckFramePlugin walks every show's procs; we approximate by listing the +// unfinished jobs (GetJobs) and, per job, fetching its RUNNING frames +// (GetFrames, FrameState 2) and its layers (GetLayers, for the per-service +// average frame time). Each frame is stamped with its job, service and the +// layer's average frame time so the client can apply the full CueGUI +// stuck-detection predicate (LLU / % stuck / avg-completion / runtime) live +// against the user's per-service filter thresholds. // -// RPCs: /job.JobInterface/GetJobs, /job.JobInterface/GetFrames. +// RPCs: /job.JobInterface/GetJobs, /job.JobInterface/GetFrames, +// /job.JobInterface/GetLayers. const RUNNING_STATE = 2; // FrameState.RUNNING (proto/src/job.proto) const MAX_FRAMES_PER_JOB = 1000; +async function gatewayJson(endpoint: string, body: string): Promise { + try { + const resp = await fetchObjectFromRestGateway(endpoint, "POST", body); + const json = await resp.json(); + if (json?.error) return null; + return json?.data ?? null; + } catch { + return null; + } +} + export async function POST(_request: NextRequest) { try { - // 1. All unfinished jobs. - const jobsResp = await fetchObjectFromRestGateway( + const jobsData = await gatewayJson( "/job.JobInterface/GetJobs", - "POST", JSON.stringify({ r: { include_finished: false } }), ); - const jobsJson = await jobsResp.json(); - if (jobsJson?.error) { - return NextResponse.json({ error: jobsJson.error }, { status: 500 }); + if (jobsData === null) { + return NextResponse.json({ error: "Failed to list jobs" }, { status: 500 }); } - const jobs: any[] = jobsJson?.data?.jobs?.jobs ?? []; + const jobs: any[] = jobsData?.jobs?.jobs ?? []; - // 2. Running frames per job, in parallel. A single job's failure drops to - // an empty list rather than failing the whole page. const perJob = await Promise.all( jobs.map(async (job) => { - const body = JSON.stringify({ - job: { id: job.id, name: job.name }, - req: { - include_finished: false, - page: 1, - limit: MAX_FRAMES_PER_JOB, - states: { frame_states: [RUNNING_STATE] }, - }, - }); - try { - const framesResp = await fetchObjectFromRestGateway( + const [framesData, layersData] = await Promise.all([ + gatewayJson( "/job.JobInterface/GetFrames", - "POST", - body, - ); - const framesJson = await framesResp.json(); - if (framesJson?.error) return []; - const frames: any[] = framesJson?.data?.frames?.frames ?? []; - // Defensive: keep only RUNNING even if the state filter was ignored, - // and stamp the parent job so the table can show / act on it. - return frames - .filter((f) => f.state === "RUNNING") - .map((f) => ({ ...f, jobId: job.id, jobName: job.name })); - } catch { - return []; + JSON.stringify({ + job: { id: job.id, name: job.name }, + req: { + include_finished: false, + page: 1, + limit: MAX_FRAMES_PER_JOB, + states: { frame_states: [RUNNING_STATE] }, + }, + }), + ), + gatewayJson( + "/job.JobInterface/GetLayers", + JSON.stringify({ job: { id: job.id, name: job.name } }), + ), + ]); + + const layers: any[] = layersData?.layers?.layers ?? []; + // layerName -> details for attaching to each frame (service + average + // frame time for detection; id + minCores for the Core Up action). + const layerInfo = new Map< + string, + { id: string; service: string; avgFrameSec: number; minCores: number } + >(); + for (const layer of layers) { + layerInfo.set(layer.name, { + id: layer.id ?? "", + service: Array.isArray(layer.services) && layer.services.length ? layer.services[0] : "", + avgFrameSec: Number(layer.layerStats?.avgFrameSec ?? 0), + minCores: Number(layer.minCores ?? 0), + }); } + + const frames: any[] = framesData?.frames?.frames ?? []; + return frames + .filter((f) => f.state === "RUNNING") + .map((f) => { + const info = layerInfo.get(f.layerName); + return { + ...f, + jobId: job.id, + jobName: job.name, + jobLogDir: job.logDir ?? "", + jobHasComment: !!job.hasComment, + service: info?.service ?? "", + avgFrameSec: info?.avgFrameSec ?? 0, + layerId: info?.id ?? "", + layerMinCores: info?.minCores ?? 0, + }; + }); }), ); diff --git a/cueweb/app/stuck-frames/page.tsx b/cueweb/app/stuck-frames/page.tsx index 888ceb486..1aa0d5ed1 100644 --- a/cueweb/app/stuck-frames/page.tsx +++ b/cueweb/app/stuck-frames/page.tsx @@ -18,204 +18,549 @@ import * as React from "react"; import { useSession } from "next-auth/react"; +import { useRouter } from "next/navigation"; +import { MessageSquare } from "lucide-react"; import type { Frame } from "@/app/frames/frame-columns"; -import { StuckFrame, getStuckFrames } from "@/app/utils/get_utils"; -import { killFrames, retryFrames } from "@/app/utils/action_utils"; -import { handleError } from "@/app/utils/notify_utils"; +import { StuckFrame, getStuckFrames, getStuckFrameLastLine } from "@/app/utils/get_utils"; +import { eatFrames, killFrames, retryFrames, setLayerMinCores } from "@/app/utils/action_utils"; +import { handleError, toastSuccess } from "@/app/utils/notify_utils"; import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; import { Skeleton } from "@/components/ui/skeleton"; +import { + DEFAULT_FILTER, + StuckFrameFilters, + type StuckFilter, +} from "@/components/ui/stuck-frame-filters"; -const REFRESH_MS = 30000; -const THRESHOLD_KEY = "cueweb.stuck-frames.thresholdHours"; -const DEFAULT_HOURS = 8; -const MIN_HOURS = 1; -const MAX_HOURS = 48; +const AUTO_REFRESH_MS = 60000; +const FILTERS_KEY = "cueweb.stuck-frames.filters"; -function fmtDuration(seconds: number): string { - if (!Number.isFinite(seconds) || seconds < 0) return "—"; +// --- formatting ----------------------------------------------------------- +function fmtDur(seconds: number): string { + if (!Number.isFinite(seconds) || seconds <= 0) return ""; const h = Math.floor(seconds / 3600); const m = Math.floor((seconds % 3600) / 60); const s = Math.floor(seconds % 60); return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`; } +const hostOf = (lastResource: string) => (lastResource || "").split("/")[0] || ""; -// last_resource is "host/procid"; show just the host. -const hostOf = (lastResource: string) => (lastResource || "").split("/")[0] || "—"; +// --- detection (CueGUI StuckFramePlugin parity) --------------------------- +type Metrics = { runtime: number; llu: number; percentStuck: number; avg: number }; + +function metricsOf(f: StuckFrame, now: number): Metrics { + const runtime = f.startTime ? now - f.startTime : 0; + const llu = f.state === "RUNNING" && f.lluTime ? now - f.lluTime : 0; + const percentStuck = runtime > 0 ? llu / runtime : 0; + return { runtime, llu, percentStuck, avg: f.avgFrameSec }; +} + +// The catch-all filter (index 0) applies unless a later, service-specific +// filter matches the frame's service. +function pickFilter(f: StuckFrame, filters: StuckFilter[]): StuckFilter | undefined { + const specific = filters.find((flt, i) => i > 0 && flt.service && flt.service === f.service); + return specific ?? filters[0]; +} + +function isExcluded(f: StuckFrame, filter: StuckFilter): boolean { + const keywords = filter.regex.split(",").map((s) => s.trim()).filter(Boolean); + return keywords.some((kw) => { + try { + const re = new RegExp(kw, "i"); + return re.test(f.jobName) || re.test(f.layerName); + } catch { + const k = kw.toLowerCase(); + return f.jobName.toLowerCase().includes(k) || f.layerName.toLowerCase().includes(k); + } + }); +} + +// Mirrors CueGUI: lluTime > minLLU AND %stuck > threshold AND runtime > +// avg*avgComp% AND %stuck < 1.1 AND runtime > 500s. +function isStuck(f: StuckFrame, filter: StuckFilter | undefined, now: number): boolean { + if (!filter || !filter.enabled) return false; + if (isExcluded(f, filter)) return false; + const { runtime, llu, percentStuck, avg } = metricsOf(f, now); + return ( + llu > filter.minLlu * 60 && + percentStuck * 100 > filter.percentStuck && + runtime > (avg * filter.avgComp) / 100 && + percentStuck < 1.1 && + runtime > 500 + ); +} + +type MenuState = + | { kind: "frame"; x: number; y: number; frame: StuckFrame } + | { kind: "job"; x: number; y: number; jobName: string }; export default function StuckFramesPage() { + const router = useRouter(); const { data: session } = useSession(); const username = session?.user?.name ?? session?.user?.email ?? "cueweb"; - const [frames, setFrames] = React.useState(null); - const [thresholdHours, setThresholdHours] = React.useState(DEFAULT_HOURS); + const [raw, setRaw] = React.useState(null); const [now, setNow] = React.useState(() => Date.now() / 1000); + const [filters, setFilters] = React.useState([{ ...DEFAULT_FILTER }]); + const [autoRefresh, setAutoRefresh] = React.useState(false); + const [notify, setNotify] = React.useState(false); + const [loading, setLoading] = React.useState(false); + + // Client-side removals: "Frame/Job Not Stuck". + const [hiddenFrames, setHiddenFrames] = React.useState>(new Set()); + const [hiddenJobs, setHiddenJobs] = React.useState>(new Set()); + + const [lastLines, setLastLines] = React.useState>({}); + const [menu, setMenu] = React.useState(null); + const [coreUp, setCoreUp] = React.useState<{ targets: { id: string; name: string }[]; cores: string } | null>(null); const [busyId, setBusyId] = React.useState(null); + // Restore persisted filters on mount. + React.useEffect(() => { + const stored = window.localStorage.getItem(FILTERS_KEY); + if (stored) { + try { + const parsed = JSON.parse(stored); + if (Array.isArray(parsed) && parsed.length > 0) setFilters(parsed); + } catch { + /* ignore corrupt value */ + } + } + }, []); + + function persistFilters(next: StuckFilter[]) { + setFilters(next); + window.localStorage.setItem(FILTERS_KEY, JSON.stringify(next)); + } + const load = React.useCallback(async (isCancelled?: () => boolean) => { + setLoading(true); try { const data = await getStuckFrames(); if (isCancelled?.()) return; - setFrames(data); + setRaw(data); setNow(Date.now() / 1000); } catch (err) { if (isCancelled?.()) return; handleError(err, "Could not load stuck frames"); - setFrames((prev) => prev ?? []); + setRaw((prev) => prev ?? []); + } finally { + if (!isCancelled?.()) setLoading(false); } }, []); React.useEffect(() => { - // Restore the persisted threshold on mount (kept out of the initial state - // to avoid an SSR/client hydration mismatch). - const stored = window.localStorage.getItem(THRESHOLD_KEY); - if (stored) { - const n = Number(stored); - if (Number.isFinite(n) && n >= MIN_HOURS && n <= MAX_HOURS) setThresholdHours(n); - } let cancelled = false; - const isCancelled = () => cancelled; - load(isCancelled); - const interval = setInterval(() => load(isCancelled), REFRESH_MS); + load(() => cancelled); return () => { cancelled = true; - clearInterval(interval); }; }, [load]); - function changeThreshold(hours: number) { - setThresholdHours(hours); - window.localStorage.setItem(THRESHOLD_KEY, String(hours)); + // Auto-refresh. CueGUI refreshes ~every 30 min; a web monitor wants fresher + // data, so this polls every 60s while enabled. Fires a desktop notification + // on completion when armed and stuck frames are present. + React.useEffect(() => { + if (!autoRefresh) return; + let cancelled = false; + const id = setInterval(async () => { + await load(() => cancelled); + if (cancelled) return; + if (notify && typeof Notification !== "undefined" && Notification.permission === "granted") { + new Notification("CueWeb: stuck-frame scan complete"); + } + }, AUTO_REFRESH_MS); + return () => { + cancelled = true; + clearInterval(id); + }; + }, [autoRefresh, notify, load]); + + function toggleNotify(checked: boolean) { + setNotify(checked); + if (checked && typeof Notification !== "undefined" && Notification.permission === "default") { + Notification.requestPermission(); + } } - const runtimeOf = React.useCallback( - (f: StuckFrame) => (f.startTime ? now - f.startTime : 0), - [now], - ); + // Services present in the data, for the add-filter service dropdown. + const availableServices = React.useMemo(() => { + const set = new Set(); + (raw ?? []).forEach((f) => f.service && set.add(f.service)); + return Array.from(set).sort(); + }, [raw]); + + // Apply detection + client-side removals, group by job. + const groups = React.useMemo(() => { + if (!raw) return null; + const stuck = raw.filter( + (f) => + !hiddenFrames.has(f.id) && + !hiddenJobs.has(f.jobName) && + isStuck(f, pickFilter(f, filters), now), + ); + const byJob = new Map(); + for (const f of stuck) { + const arr = byJob.get(f.jobName) ?? []; + arr.push(f); + byJob.set(f.jobName, arr); + } + return Array.from(byJob.entries()) + .sort((a, b) => a[0].localeCompare(b[0])) + .map(([jobName, frames]) => ({ + jobName, + frames: frames.sort((a, b) => metricsOf(b, now).runtime - metricsOf(a, now).runtime), + })); + }, [raw, filters, now, hiddenFrames, hiddenJobs]); - const stuck = React.useMemo(() => { - if (!frames) return null; - const thresholdSeconds = thresholdHours * 3600; - return frames - .filter((f) => runtimeOf(f) > thresholdSeconds) - .sort((a, b) => runtimeOf(b) - runtimeOf(a)); - }, [frames, thresholdHours, runtimeOf]); + const totalStuck = groups?.reduce((n, g) => n + g.frames.length, 0) ?? 0; - // Strip the page-only jobId/jobName before sending the frame to a Cuebot RPC - // (they are not Frame proto fields). + // Lazily fetch the last log line for visible stuck frames. + React.useEffect(() => { + if (!groups) return; + const pending = groups + .flatMap((g) => g.frames) + .filter((f) => f.jobLogDir && lastLines[f.id] === undefined) + .slice(0, 50); // bound per pass + if (pending.length === 0) return; + let cancelled = false; + (async () => { + const entries = await Promise.all( + pending.map(async (f) => { + const logPath = `${f.jobLogDir}/${f.jobName}.${f.name}.rqlog`; + const line = await getStuckFrameLastLine(logPath); + return [f.id, line] as const; + }), + ); + if (cancelled) return; + setLastLines((prev) => { + const next = { ...prev }; + for (const [id, line] of entries) next[id] = line; + return next; + }); + })(); + return () => { + cancelled = true; + }; + }, [groups, lastLines]); + + // Close the context menu on any outside interaction. + React.useEffect(() => { + if (!menu) return; + const close = () => setMenu(null); + const onKey = (e: KeyboardEvent) => e.key === "Escape" && setMenu(null); + window.addEventListener("click", close); + window.addEventListener("scroll", close, true); + window.addEventListener("keydown", onKey); + return () => { + window.removeEventListener("click", close); + window.removeEventListener("scroll", close, true); + window.removeEventListener("keydown", onKey); + }; + }, [menu]); + + // --- helpers ------------------------------------------------------------- function toFrame(sf: StuckFrame): Frame { - const { jobId: _jobId, jobName: _jobName, ...frame } = sf; + const { + jobId: _a, jobName: _b, jobLogDir: _c, jobHasComment: _d, + service: _e, avgFrameSec: _f, layerId: _g, layerMinCores: _h, + ...frame + } = sf; return frame as Frame; } - async function handleRetry(sf: StuckFrame) { - setBusyId(sf.id); - try { - await retryFrames([toFrame(sf)]); - await load(); - } finally { - setBusyId(null); + function openLog(f: StuckFrame) { + const logDir = `${f.jobLogDir}/${f.jobName}.${f.name}.rqlog`; + const params = new URLSearchParams({ frameId: f.id, frameLogDir: logDir, username }); + window.open(`/frames/${encodeURIComponent(f.name)}?${params.toString()}`, "_blank", "noopener,noreferrer"); + } + + function exportLog(frames: StuckFrame[]) { + // Web adaptation of CueGUI's YAML "stuck_frames_db" file: a JSON download + // (the browser can't write to a fileshare). + const db: Record> = {}; + for (const f of frames) { + const { runtime, llu, avg } = metricsOf(f, now); + const byJob = db[f.jobName] ?? (db[f.jobName] = {}); + byJob[`${f.number}-${Math.floor(now)}`] = { + layer: f.layerName, + host: f.lastResource, + llu, + runtime, + average: avg, + log: lastLines[f.id] ?? "", + }; } + const blob = new Blob([JSON.stringify(db, null, 2)], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = "stuck_frames.json"; + a.click(); + URL.revokeObjectURL(url); + toastSuccess(`Logged ${frames.length} stuck frame(s)`); } - async function handleKill(sf: StuckFrame) { - setBusyId(sf.id); + function hideFrame(f: StuckFrame) { + setHiddenFrames((prev) => new Set(prev).add(f.id)); + } + function hideJob(jobName: string) { + setHiddenJobs((prev) => new Set(prev).add(jobName)); + } + function addJobToExcludes(jobName: string) { + // Append the job name to the catch-all filter's exclude keywords. + persistFilters( + filters.map((flt, i) => + i === 0 + ? { ...flt, regex: flt.regex ? `${flt.regex}, ${jobName}` : jobName } + : flt, + ), + ); + toastSuccess(`Excluded ${jobName}`); + } + + async function act(f: StuckFrame, fn: () => Promise) { + setBusyId(f.id); + setMenu(null); try { - await killFrames([toFrame(sf)], username, `Manual frame kill from CueWeb Stuck Frames by ${username}`); + await fn(); + hideFrame(f); await load(); } finally { setBusyId(null); } } + const retry = (f: StuckFrame) => act(f, () => retryFrames([toFrame(f)])); + const eat = (f: StuckFrame) => act(f, () => eatFrames([toFrame(f)])); + const kill = (f: StuckFrame) => + act(f, () => killFrames([toFrame(f)], username, `Manual frame kill from CueWeb Stuck Frames by ${username}`)); + + function openCoreUpForFrame(f: StuckFrame) { + setMenu(null); + if (!f.layerId) return; + setCoreUp({ targets: [{ id: f.layerId, name: f.layerName }], cores: String(Math.max(1, f.layerMinCores || 1)) }); + } + function openCoreUpForJob(jobName: string) { + setMenu(null); + const frames = (raw ?? []).filter((f) => f.jobName === jobName && f.layerId); + const seen = new Map(); + frames.forEach((f) => seen.set(f.layerId, f.layerName)); + if (seen.size === 0) return; + setCoreUp({ targets: Array.from(seen.entries()).map(([id, name]) => ({ id, name })), cores: "1" }); + } + async function applyCoreUp() { + if (!coreUp) return; + const cores = Number(coreUp.cores); + if (!Number.isFinite(cores) || cores < 0) return; + await Promise.all(coreUp.targets.map((t) => setLayerMinCores(t, cores))); + setCoreUp(null); + await load(); + } + + // --- render -------------------------------------------------------------- + const menuItemCls = "block w-full rounded px-2 py-1.5 text-left hover:bg-accent disabled:opacity-50"; + return (
-

Stuck Frames

- -
- - {stuck ? ( - - {stuck.length} {stuck.length === 1 ? "frame" : "frames"} - - ) : null} +
+

Stuck Frames

+
+ + + + +
- {stuck === null ? ( +
+ +
+ + {groups === null ? (
- ) : stuck.length === 0 ? ( + ) : totalStuck === 0 ? (

- No frames have been running longer than {thresholdHours}{" "} - {thresholdHours === 1 ? "hour" : "hours"}. + No stuck frames detected with the current filters.

) : (
- - + + + - + + + - {stuck.map((f) => ( - - - - - - - { + e.preventDefault(); + setMenu({ kind: "job", x: e.clientX, y: e.clientY, jobName: g.jobName }); + }} + > + + + + {g.frames.map((f) => { + const m = metricsOf(f, now); + return ( + { + e.preventDefault(); + setMenu({ kind: "frame", x: e.clientX, y: e.clientY, frame: f }); + }} > - Retry - - - - - + + + + + + + + + + ); + })} + ))}
JobLayerName{/* comment icon col */} Frame HostLLU RuntimeActions% StuckAverageLast Line
{f.jobName}{f.layerName}{f.number}{hostOf(f.lastResource)}{fmtDuration(runtimeOf(f))} -
-
{g.jobName} + {g.frames[0]?.jobHasComment ? ( + + ) : null} + +
{f.layerName} + {f.number}{hostOf(f.lastResource)}{fmtDur(m.llu)}{fmtDur(m.runtime)}{(m.percentStuck * 100).toFixed(2)}{fmtDur(m.avg)} + {lastLines[f.id] ?? ""} + {busyId === f.id ? " …" : ""} +
)} + + {!loading && groups !== null ? ( +

+ {totalStuck} stuck frame(s) across {groups.length} job(s). +

+ ) : null} + + {/* Context menu */} + {menu ? ( +
e.stopPropagation()} + > + {menu.kind === "frame" ? ( + <> + + + {menu.frame.retryCount >= 1 ? ( + + ) : null} +
+ + + +
+ + + + +
+ + + +
+ + + + ) : ( + <> + +
+ + + +
+ + + )} +
+ ) : null} + + {/* Core Up dialog */} + !o && setCoreUp(null)}> + + + Core Up + +
+

+ Set minimum cores for {coreUp?.targets.length === 1 ? `layer "${coreUp.targets[0].name}"` : `${coreUp?.targets.length ?? 0} layer(s)`}. +

+ setCoreUp((c) => (c ? { ...c, cores: e.target.value } : c))} + aria-label="Minimum cores" + /> +
+ + + + +
+
); } diff --git a/cueweb/app/utils/action_utils.ts b/cueweb/app/utils/action_utils.ts index 182b437d0..c291c0acf 100644 --- a/cueweb/app/utils/action_utils.ts +++ b/cueweb/app/utils/action_utils.ts @@ -163,6 +163,13 @@ export async function retryFrames(frames: Frame[]) { await performAction(endpoint, bodyAr, `Retried ${frames.length} frame(s)`); } +// Set a layer's minimum cores (CueGUI Stuck Frame "Core Up"). cores is a float +// core count. Returns success so callers can gate a refresh. +export async function setLayerMinCores(layer: { id: string; name?: string }, cores: number): Promise { + const endpoint = "/api/layer/action/setmincores"; + return performAction(endpoint, [JSON.stringify({ layer, cores })], `Set min cores to ${cores}`); +} + /**************************************/ // Unbook /**************************************/ diff --git a/cueweb/app/utils/get_utils.ts b/cueweb/app/utils/get_utils.ts index 448fdad6a..cac6e5b48 100644 --- a/cueweb/app/utils/get_utils.ts +++ b/cueweb/app/utils/get_utils.ts @@ -187,18 +187,44 @@ export async function getFrames(body: string): Promise { return response ? response : []; } -// A running frame plus its parent job, for the Stuck Frames page. -export type StuckFrame = Frame & { jobId: string; jobName: string }; +// A running frame plus the job/layer context the Stuck Frames page needs to +// apply CueGUI's per-service stuck-detection predicate (service + average +// frame time) and to act on the row (job, log dir, comment flag). +export type StuckFrame = Frame & { + jobId: string; + jobName: string; + jobLogDir: string; + jobHasComment: boolean; + service: string; + avgFrameSec: number; + layerId: string; + layerMinCores: number; +}; // Fetch every RUNNING frame across all unfinished jobs (server-aggregated via -// /api/stuck-frames). The Stuck Frames page applies the running-time threshold -// locally so the slider stays instant. +// /api/stuck-frames), each stamped with its service and average frame time. +// The Stuck Frames page applies the detection thresholds locally so the +// filters stay instant. export async function getStuckFrames(): Promise { const ENDPOINT = "/api/stuck-frames"; const response = await accessGetApi(ENDPOINT, JSON.stringify({})); return Array.isArray(response) ? response : []; } +// Best-effort fetch of a frame log's last line (the "Last Line" column). Empty +// when the log filesystem isn't reachable from the web server. +export async function getStuckFrameLastLine(logPath: string): Promise { + if (!logPath) return ""; + const base = process.env.NEXT_PUBLIC_URL ?? ""; + try { + const resp = await fetch(`${base}/api/stuck-frames/lastline?path=${encodeURIComponent(logPath)}`); + const json = await resp.json(); + return typeof json?.lastLine === "string" ? json.lastLine : ""; + } catch { + return ""; + } +} + // Fetch a pending job based on the request body export async function getPendingJob(body: string): Promise { const ENDPOINT = "/api/job/getjob"; diff --git a/cueweb/components/ui/stuck-frame-filters.tsx b/cueweb/components/ui/stuck-frame-filters.tsx new file mode 100644 index 000000000..04d7ac98f --- /dev/null +++ b/cueweb/components/ui/stuck-frame-filters.tsx @@ -0,0 +1,192 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; +import { Plus, X } from "lucide-react"; + +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Input } from "@/components/ui/input"; + +// One detection filter (CueGUI StuckFrameBar). service === "" is the catch-all +// ("All" when it's the only filter, "All Other Types" when service filters +// exist). The four thresholds mirror CueGUI's spinboxes. +export type StuckFilter = { + service: string; + regex: string; // exclude keywords, comma-separated + percentStuck: number; // % of runtime since last log update + minLlu: number; // minutes + avgComp: number; // % of average completion time + runtime: number; // minutes + enabled: boolean; +}; + +// CueGUI defaults: [percentStuck, minLlu, avgComp, runtime]. +export const SERVICE_DEFAULTS: Record = { + preprocess: [1, 1, 115, 10], + nuke: [50, 5, 115, 10], + arnold: [50, 60, 115, 120], +}; + +export const DEFAULT_FILTER: StuckFilter = { + service: "", + regex: "", + percentStuck: 50, + minLlu: 30, + avgComp: 115, + runtime: 60, + enabled: true, +}; + +export function makeServiceFilter(service: string): StuckFilter { + const d = SERVICE_DEFAULTS[service]; + return d + ? { service, regex: "", percentStuck: d[0], minLlu: d[1], avgComp: d[2], runtime: d[3], enabled: true } + : { ...DEFAULT_FILTER, service }; +} + +const NUM = "h-8 w-20 text-right"; + +function NumberField({ + label, + suffix, + value, + disabled, + onChange, +}: { + label: string; + suffix: string; + value: number; + disabled: boolean; + onChange: (n: number) => void; +}) { + return ( + + ); +} + +export function StuckFrameFilters({ + filters, + onChange, + availableServices, +}: { + filters: StuckFilter[]; + onChange: (filters: StuckFilter[]) => void; + availableServices: string[]; +}) { + function update(index: number, patch: Partial) { + onChange(filters.map((f, i) => (i === index ? { ...f, ...patch } : f))); + } + function addFilter() { + // Default the new filter to the first available service not already used. + const used = new Set(filters.map((f) => f.service)); + const next = availableServices.find((s) => !used.has(s)) ?? ""; + onChange([...filters, makeServiceFilter(next)]); + } + function removeFilter(index: number) { + onChange(filters.filter((_, i) => i !== index)); + } + + const hasServiceFilters = filters.some((f, i) => i > 0); + + return ( +
+ {filters.map((f, i) => { + const isCatchAll = i === 0; + const disabled = !f.enabled; + return ( +
+
+ Layer Service + {isCatchAll ? ( + + {hasServiceFilters ? "All Other Types" : "All"} + + ) : ( + + )} +
+ + update(i, { percentStuck: n })} /> + update(i, { minLlu: n })} /> + update(i, { avgComp: n })} /> + update(i, { runtime: n })} /> + + + + + + {isCatchAll ? ( + + ) : ( + + )} +
+ ); + })} +
+ ); +} From 791ddde170853b343d8c1c7d9b306a9d33d401e4 Mon Sep 17 00:00:00 2001 From: Ramon Figueiredo Date: Fri, 12 Jun 2026 02:37:20 -0700 Subject: [PATCH 04/27] [cueweb] Monitor Hosts: Full CueCommander parity Extend /hosts to match CueGUI's Monitor Hosts window: - Full column set with Swap/Physical/GPU/Temp red/green bars, Load %, GPU + Temp Free columns, comment icon, and row coloring by hardware/ lock state (new SimpleDataTable getRowClassName hook). - Filter bar: name/regex + Filter Allocation/HardwareState/LockState/OS multi-selects, Auto-refresh, Refresh, Clear. - Context menu: Comments, View Procs, Lock/Unlock, Edit Tags, Rename Tag, Change Allocation, Reboot, Reboot when idle, Delete Host, Set/Clear Repair State (Take Ownership shown disabled). - New dialogs: host Comments (view/add), Rename Tag, Change Allocation, Delete confirm. - Bottom Proc monitor panel (View Procs) with View Job / Unbook / Kill / Unbook and Kill. - Routes: host renametag/setallocation/delete/sethardwarestate/addcomment, proc kill/unbookone, proc getprocs. Extended Host/Proc types + actions. --- cueweb/README.md | 1 + .../components/frame-range-selector.test.tsx | 146 ++++++++ .../app/api/host/action/addcomment/route.ts | 40 ++ cueweb/app/api/host/action/delete/route.ts | 40 ++ cueweb/app/api/host/action/renametag/route.ts | 40 ++ .../api/host/action/setallocation/route.ts | 40 ++ .../api/host/action/sethardwarestate/route.ts | 41 +++ cueweb/app/api/proc/action/kill/route.ts | 40 ++ cueweb/app/api/proc/action/unbookone/route.ts | 40 ++ cueweb/app/api/proc/getprocs/route.ts | 45 +++ cueweb/app/hosts/columns.tsx | 234 +++++++++--- cueweb/app/hosts/page.tsx | 180 +++++++-- cueweb/app/utils/action_utils.ts | 114 +++++- cueweb/app/utils/get_utils.ts | 22 ++ .../ui/context_menus/action-context-menu.tsx | 84 ++++- cueweb/components/ui/frame-range-selector.tsx | 330 +++++++++++++++++ cueweb/components/ui/host-action-events.ts | 33 +- cueweb/components/ui/host-monitor-dialogs.tsx | 347 ++++++++++++++++++ cueweb/components/ui/proc-monitor-panel.tsx | 232 ++++++++++++ cueweb/components/ui/simple-data-table.tsx | 19 +- 20 files changed, 1985 insertions(+), 83 deletions(-) create mode 100644 cueweb/app/__tests__/components/frame-range-selector.test.tsx create mode 100644 cueweb/app/api/host/action/addcomment/route.ts create mode 100644 cueweb/app/api/host/action/delete/route.ts create mode 100644 cueweb/app/api/host/action/renametag/route.ts create mode 100644 cueweb/app/api/host/action/setallocation/route.ts create mode 100644 cueweb/app/api/host/action/sethardwarestate/route.ts create mode 100644 cueweb/app/api/proc/action/kill/route.ts create mode 100644 cueweb/app/api/proc/action/unbookone/route.ts create mode 100644 cueweb/app/api/proc/getprocs/route.ts create mode 100644 cueweb/components/ui/frame-range-selector.tsx create mode 100644 cueweb/components/ui/host-monitor-dialogs.tsx create mode 100644 cueweb/components/ui/proc-monitor-panel.tsx diff --git a/cueweb/README.md b/cueweb/README.md index 1fa74e596..49dc24db0 100644 --- a/cueweb/README.md +++ b/cueweb/README.md @@ -104,6 +104,7 @@ CueWeb replicates the core functionality of CueGUI (Cuetopia and Cuecommander) i - Frame navigation with hyperlinks to logs and data pages. - Stacked job progress bar with a hover tooltip showing per-state frame counts and percentages (succeeded / running / waiting / depend / dead). The Layers table reuses the same `` renderer with `getLayerProgressSegments` so per-layer progress matches the per-job style. - Frame state filter chips above the frames table (`WAITING`, `RUNNING`, `SUCCEEDED`, `DEAD`, `EATEN`, `DEPEND`) with per-state counts, OR-combined selection, and selection mirrored to the `frameStates` URL query parameter for bookmarkable/shareable filtered views. + - Visual **frame range selector** above the frames table (CueGUI `FrameRangeSelection.py` parity): a horizontal strip with one state-colored cell per frame in ascending frame order. Click-drag selects a contiguous range, shift-click extends the selection from the anchor, and the selected subset feeds straight into the same **Retry** / **Eat** / **Kill** frame actions as the right-click menu (with a confirm step; Kill is destructive). The strip reflects the active frame-state filter and survives the 5s auto-refresh. - CueGUI-parity right-click menus on every row, following the CueGUI Monitor Jobs and Monitor Job Details menu structure. Menus scroll instead of overflowing on small viewports; items not yet implemented surface a friendly placeholder toast. - Mobile-friendly equivalent of right-click: every Jobs / Layers / Frames row has a small `⋮` button as its leftmost cell. Tapping it opens the same context menu the desktop right-click opens, so touch users get the full action set without a right-click event. - Wired copy actions: **Copy Job Name** (Job menu); **Copy Layer Name** (Layer menu); **Copy Frame Name** + **Copy Log Path** (Frame menu). Each pushes the value to the clipboard with a confirmation toast. Works whether CueWeb is served from `localhost` or from a LAN IP over plain HTTP. diff --git a/cueweb/app/__tests__/components/frame-range-selector.test.tsx b/cueweb/app/__tests__/components/frame-range-selector.test.tsx new file mode 100644 index 000000000..9113d950d --- /dev/null +++ b/cueweb/app/__tests__/components/frame-range-selector.test.tsx @@ -0,0 +1,146 @@ +/** + * @jest-environment jsdom + */ + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import "@testing-library/jest-dom"; +import { fireEvent, render, screen, waitFor, within } from "@testing-library/react"; +import type { Frame } from "@/app/frames/frame-columns"; +import { eatFrames, killFrames, retryFrames } from "@/app/utils/action_utils"; +import { FrameRangeSelector } from "@/components/ui/frame-range-selector"; + +// Mock the action layer so the test asserts the selected subset that gets +// handed off, not the network behavior (which has its own coverage). +jest.mock("@/app/utils/action_utils", () => ({ + retryFrames: jest.fn(), + eatFrames: jest.fn(), + killFrames: jest.fn(), +})); + +// The safety flag hook reads localStorage + window events; stub it "enabled". +jest.mock("@/app/utils/use_disable_job_interaction", () => ({ + useDisableJobInteraction: () => ({ disabled: false, setDisabled: jest.fn(), toggle: jest.fn() }), +})); + +function makeFrame(number: number, state = "DEAD"): Frame { + return { + id: `frame-${number}`, + name: `${number}-layer`, + layerName: "layer", + number, + state, + retryCount: 0, + exitStatus: 0, + dispatchOrder: number, + startTime: 0, + stopTime: 0, + maxRss: "0", + usedMemory: "0", + reservedMemory: "0", + reservedGpuMemory: "0", + lastResource: "/", + checkpointState: "", + checkpointCount: 0, + totalCoreTime: 0, + lluTime: 0, + totalGpuTime: 0, + maxGpuMemory: "0", + usedGpuMemory: "0", + frameStateDisplayOverride: "", + }; +} + +const FRAMES = [1, 2, 3, 4, 5].map((n) => makeFrame(n)); + +function cell(number: number): HTMLElement { + const el = document.querySelector(`[data-frame-number="${number}"]`); + if (!el) throw new Error(`cell #${number} not found`); + return el as HTMLElement; +} + +beforeEach(() => { + jest.clearAllMocks(); +}); + +describe("FrameRangeSelector", () => { + it("drag selects a contiguous range and feeds it into Retry", async () => { + render(); + + // Drag from frame #2 to frame #4 -> selects {2,3,4}. + fireEvent.mouseDown(cell(2)); + fireEvent.mouseEnter(cell(3)); + fireEvent.mouseEnter(cell(4)); + fireEvent.mouseUp(window); + + expect(screen.getByText(/Selected 3 frames \(#2–#4\)/)).toBeInTheDocument(); + + fireEvent.click(screen.getByRole("button", { name: "Retry" })); + + // Confirm in the dialog (there are two "Retry" buttons now; pick the + // one inside the dialog). + const dialog = await screen.findByRole("dialog"); + fireEvent.click(within(dialog).getByRole("button", { name: "Retry" })); + + await waitFor(() => expect(retryFrames).toHaveBeenCalledTimes(1)); + const handed = (retryFrames as jest.Mock).mock.calls[0][0] as Frame[]; + expect(handed.map((f) => f.number).sort((a, b) => a - b)).toEqual([2, 3, 4]); + expect(eatFrames).not.toHaveBeenCalled(); + expect(killFrames).not.toHaveBeenCalled(); + }); + + it("shift-click extends the selection from the anchor", async () => { + render(); + + // Anchor at #2 (single click), then shift-click #5 -> {2,3,4,5}. + fireEvent.mouseDown(cell(2)); + fireEvent.mouseUp(window); + fireEvent.mouseDown(cell(5), { shiftKey: true }); + + expect(screen.getByText(/Selected 4 frames \(#2–#5\)/)).toBeInTheDocument(); + }); + + it("routes Kill through a destructive confirm with the selected subset", async () => { + render(); + + fireEvent.mouseDown(cell(1)); + fireEvent.mouseEnter(cell(2)); + fireEvent.mouseUp(window); + + fireEvent.click(screen.getByRole("button", { name: "Kill" })); + const dialog = await screen.findByRole("dialog"); + fireEvent.click(within(dialog).getByRole("button", { name: "Kill" })); + + await waitFor(() => expect(killFrames).toHaveBeenCalledTimes(1)); + const [handed, username, reason] = (killFrames as jest.Mock).mock.calls[0]; + expect((handed as Frame[]).map((f) => f.number)).toEqual([1, 2]); + expect(username).toBe("tester"); + expect(reason).toMatch(/frame range selector/i); + }); + + it("Clear removes the current selection", () => { + render(); + + fireEvent.mouseDown(cell(1)); + fireEvent.mouseEnter(cell(3)); + fireEvent.mouseUp(window); + expect(screen.getByText(/Selected 3 frames/)).toBeInTheDocument(); + + fireEvent.click(screen.getByRole("button", { name: "Clear" })); + expect(screen.getByText(/Drag to select a range of 5 frames/)).toBeInTheDocument(); + }); +}); diff --git a/cueweb/app/api/host/action/addcomment/route.ts b/cueweb/app/api/host/action/addcomment/route.ts new file mode 100644 index 000000000..f8b0d6790 --- /dev/null +++ b/cueweb/app/api/host/action/addcomment/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Add a comment to a host. Request: { host, new_comment: { user, subject, +// message } }. RPC: /host.HostInterface/AddComment. +export async function POST(request: NextRequest) { + const endpoint = "/host.HostInterface/AddComment"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.host || !jsonBody?.new_comment) { + return NextResponse.json({ error: 'Invalid request body: host and new_comment required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/host/action/delete/route.ts b/cueweb/app/api/host/action/delete/route.ts new file mode 100644 index 000000000..773cc2072 --- /dev/null +++ b/cueweb/app/api/host/action/delete/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Delete a host (CueGUI "Delete Host", admin-only). Request: { host }. +// RPC: /host.HostInterface/Delete. +export async function POST(request: NextRequest) { + const endpoint = "/host.HostInterface/Delete"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.host) { + return NextResponse.json({ error: 'Invalid request body: host required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/host/action/renametag/route.ts b/cueweb/app/api/host/action/renametag/route.ts new file mode 100644 index 000000000..b58cdfb37 --- /dev/null +++ b/cueweb/app/api/host/action/renametag/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Rename a host tag. Request: { host, old_tag, new_tag }. +// RPC: /host.HostInterface/RenameTag. +export async function POST(request: NextRequest) { + const endpoint = "/host.HostInterface/RenameTag"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.host || typeof jsonBody.old_tag !== 'string' || typeof jsonBody.new_tag !== 'string') { + return NextResponse.json({ error: 'Invalid request body: host, old_tag, new_tag required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/host/action/setallocation/route.ts b/cueweb/app/api/host/action/setallocation/route.ts new file mode 100644 index 000000000..27701ec1c --- /dev/null +++ b/cueweb/app/api/host/action/setallocation/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Move a host to another allocation (CueGUI "Change Allocation"). Request: +// { host, allocation_id }. RPC: /host.HostInterface/SetAllocation. +export async function POST(request: NextRequest) { + const endpoint = "/host.HostInterface/SetAllocation"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.host || typeof jsonBody.allocation_id !== 'string' || !jsonBody.allocation_id) { + return NextResponse.json({ error: 'Invalid request body: host and allocation_id required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/host/action/sethardwarestate/route.ts b/cueweb/app/api/host/action/sethardwarestate/route.ts new file mode 100644 index 000000000..4d9ea0fca --- /dev/null +++ b/cueweb/app/api/host/action/sethardwarestate/route.ts @@ -0,0 +1,41 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Set a host's hardware state (CueGUI "Set/Clear Repair State"). Request: +// { host, state } where state is a HardwareState enum name (e.g. "REPAIR", +// "DOWN", "UP"). RPC: /host.HostInterface/SetHardwareState. +export async function POST(request: NextRequest) { + const endpoint = "/host.HostInterface/SetHardwareState"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.host || typeof jsonBody.state !== 'string') { + return NextResponse.json({ error: 'Invalid request body: host and state required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/proc/action/kill/route.ts b/cueweb/app/api/proc/action/kill/route.ts new file mode 100644 index 000000000..6e9870d40 --- /dev/null +++ b/cueweb/app/api/proc/action/kill/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Kill a single proc (Proc monitor "Kill"). Request: { proc }. +// RPC: /host.ProcInterface/Kill. +export async function POST(request: NextRequest) { + const endpoint = "/host.ProcInterface/Kill"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.proc) { + return NextResponse.json({ error: 'Invalid request body: proc required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/proc/action/unbookone/route.ts b/cueweb/app/api/proc/action/unbookone/route.ts new file mode 100644 index 000000000..f81342fc3 --- /dev/null +++ b/cueweb/app/api/proc/action/unbookone/route.ts @@ -0,0 +1,40 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// Unbook a single proc, optionally killing it (Proc monitor "Unbook" / +// "Unbook and Kill"). Request: { proc, kill }. RPC: /host.ProcInterface/Unbook. +export async function POST(request: NextRequest) { + const endpoint = "/host.ProcInterface/Unbook"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.proc || typeof jsonBody.kill !== 'boolean') { + return NextResponse.json({ error: 'Invalid request body: proc and kill required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody), true); + const responseData = await response.json(); + if (!response.ok) return NextResponse.json({ error: responseData.error }, { status: response.status }); + return NextResponse.json({ data: responseData.data }, { status: response.status }); +} diff --git a/cueweb/app/api/proc/getprocs/route.ts b/cueweb/app/api/proc/getprocs/route.ts new file mode 100644 index 000000000..db71c8907 --- /dev/null +++ b/cueweb/app/api/proc/getprocs/route.ts @@ -0,0 +1,45 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { handleRoute } from '@/app/utils/api_utils'; +import { NextRequest, NextResponse } from "next/server"; + +// List procs matching a ProcSearchCriteria (the Monitor Hosts proc panel, +// filtered by host names). Request: { r: { hosts: [...] } }. The gateway +// double-nests as { procs: { procs: [...] } }; we unwrap to a flat array. +// RPC: /host.ProcInterface/GetProcs. +export async function POST(request: NextRequest) { + const endpoint = "/host.ProcInterface/GetProcs"; + if (request.method !== 'POST') { + return NextResponse.json({ error: 'Invalid method. Only POST is allowed.' }, { status: 405 }); + } + let jsonBody: any; + try { + jsonBody = await request.json(); + } catch { + return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 }); + } + if (!jsonBody?.r || typeof jsonBody.r !== 'object') { + return NextResponse.json({ error: 'Invalid request body: r (ProcSearchCriteria) required' }, { status: 400 }); + } + const response = await handleRoute(request.method, endpoint, JSON.stringify(jsonBody)); + const responseData = await response.json(); + if (!response.ok) { + return NextResponse.json({ error: responseData?.error ?? "Failed to fetch procs" }, { status: response.status }); + } + const procs = responseData?.data?.procs?.procs ?? []; + return NextResponse.json({ data: procs }, { status: response.status }); +} diff --git a/cueweb/app/hosts/columns.tsx b/cueweb/app/hosts/columns.tsx index 540494682..fa5b221ee 100644 --- a/cueweb/app/hosts/columns.tsx +++ b/cueweb/app/hosts/columns.tsx @@ -17,12 +17,12 @@ */ import { ColumnDef } from "@tanstack/react-table"; -import { ArrowUpDown } from "lucide-react"; +import { ArrowUpDown, MessageSquare } from "lucide-react"; import Link from "next/link"; import { Button } from "@/components/ui/button"; -import { Status } from "@/components/ui/status"; import { Host } from "@/app/utils/get_utils"; -import { idleRatio, kbStringToHuman, kbStringToNumber } from "@/app/hosts/host_format_utils"; +import { kbStringToHuman, kbStringToNumber } from "@/app/hosts/host_format_utils"; +import { OPEN_HOST_COMMENTS_EVENT } from "@/components/ui/host-action-events"; function sortableHeader(label: string) { // eslint-disable-next-line react/display-name @@ -39,65 +39,213 @@ function sortableHeader(label: string) { ); } +// Red (used) + green (free) horizontal bar, mirroring CueGUI's Host*BarDelegate. +function MemBar({ usedKb, totalKb }: { usedKb: number; totalKb: number }) { + const pct = totalKb > 0 ? Math.min(100, Math.max(0, (usedKb / totalKb) * 100)) : 0; + return ( +
+
+
+
+ ); +} + +const kb = (v?: string) => kbStringToNumber(v ?? ""); + +function formatBootTime(epoch: number): string { + if (!epoch) return ""; + const d = new Date(epoch * 1000); + const mm = String(d.getMonth() + 1).padStart(2, "0"); + const dd = String(d.getDate()).padStart(2, "0"); + const hh = String(d.getHours()).padStart(2, "0"); + const mi = String(d.getMinutes()).padStart(2, "0"); + return `${mm}/${dd} ${hh}:${mi}`; +} + +function openComments(host: Host) { + if (typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent(OPEN_HOST_COMMENTS_EVENT, { detail: { hosts: [host] } })); +} + +// Full CueGUI Monitor Hosts column set. Header labels mirror CueGUI; numeric / +// memory columns sort by their underlying value, the bar columns by free space. export const hostColumns: ColumnDef[] = [ { accessorKey: "name", header: sortableHeader("Name"), - // Link into the host detail page (procs / comments / tags). stopPropagation - // so the click doesn't also trigger any row-level handler. cell: ({ row }) => ( - e.stopPropagation()} - > - {row.original.name} - +
+ e.stopPropagation()} + > + {row.original.name} + + {row.original.hasComment ? ( + + ) : null} +
), }, + { + id: "load", + header: sortableHeader("Load %"), + accessorFn: (h) => (h.cores ? (h.load ?? 0) / h.cores : 0), + cell: ({ row }) => { + const h = row.original; + const pct = h.cores ? (h.load ?? 0) / h.cores : 0; + return {Math.round(pct)}%; + }, + }, + { + id: "swap", + header: sortableHeader("Swap"), + accessorFn: (h) => kb(h.freeSwap), + cell: ({ row }) => , + }, + { + id: "physical", + header: sortableHeader("Physical"), + accessorFn: (h) => kb(h.freeMemory), + cell: ({ row }) => , + }, + { + id: "gpuMemoryBar", + header: sortableHeader("GPU Memory"), + accessorFn: (h) => kb(h.freeGpuMemory), + cell: ({ row }) => , + }, + { + id: "totalMemory", + header: sortableHeader("Total Memory"), + accessorFn: (h) => kb(h.memory), + cell: ({ row }) => {kbStringToHuman(row.original.memory)}, + }, + { + id: "idleMemory", + header: sortableHeader("Idle Memory"), + accessorFn: (h) => kb(h.idleMemory), + cell: ({ row }) => {kbStringToHuman(row.original.idleMemory)}, + }, + { + id: "temp", + header: sortableHeader("Temp"), + accessorFn: (h) => kb(h.freeMcp), + cell: ({ row }) => , + }, + { + id: "tempFree", + header: sortableHeader("Temp Free"), + accessorFn: (h) => kb(h.freeMcp), + cell: ({ row }) => {kbStringToHuman(row.original.freeMcp)}, + }, + { + id: "tempFreePct", + header: sortableHeader("Temp Free %"), + accessorFn: (h) => (kb(h.totalMcp) ? kb(h.freeMcp) / kb(h.totalMcp) : 0), + cell: ({ row }) => { + const total = kb(row.original.totalMcp); + if (!total) return ; + return {Math.round((100 * kb(row.original.freeMcp)) / total)}%; + }, + }, + { + id: "cores", + header: sortableHeader("Cores"), + accessorFn: (h) => h.cores, + cell: ({ row }) => {row.original.cores.toFixed(2)}, + }, + { + id: "idleCores", + header: sortableHeader("Idle Cores"), + accessorFn: (h) => h.idleCores, + cell: ({ row }) => {row.original.idleCores.toFixed(2)}, + }, + { + id: "gpus", + header: sortableHeader("GPUs"), + accessorFn: (h) => h.gpus ?? 0, + cell: ({ row }) => {row.original.gpus ?? 0}, + }, + { + id: "idleGpus", + header: sortableHeader("Idle GPUs"), + accessorFn: (h) => h.idleGpus ?? 0, + cell: ({ row }) => {row.original.idleGpus ?? 0}, + }, + { + id: "gpuMem", + header: sortableHeader("GPU Mem"), + accessorFn: (h) => kb(h.gpuMemory), + cell: ({ row }) => {kbStringToHuman(row.original.gpuMemory ?? "")}, + }, + { + id: "gpuMemIdle", + header: sortableHeader("GPU Mem Idle"), + accessorFn: (h) => kb(h.idleGpuMemory), + cell: ({ row }) => {kbStringToHuman(row.original.idleGpuMemory ?? "")}, + }, + { + id: "ping", + header: sortableHeader("Ping"), + accessorFn: (h) => h.pingTime ?? 0, + cell: ({ row }) => { + const ping = row.original.pingTime ? Math.max(0, Math.round(Date.now() / 1000 - row.original.pingTime)) : 0; + return {ping}; + }, + }, + { + id: "bootTime", + header: sortableHeader("Boot Time"), + accessorFn: (h) => h.bootTime ?? 0, + cell: ({ row }) => {formatBootTime(row.original.bootTime)}, + }, { accessorKey: "state", - header: sortableHeader("State"), - cell: ({ row }) => , + id: "hardware", + header: sortableHeader("Hardware"), + cell: ({ row }) => {row.original.state}, }, { accessorKey: "lockState", id: "locked", header: sortableHeader("Locked"), - cell: ({ row }) => , + cell: ({ row }) => {row.original.lockState}, }, { - accessorKey: "nimbyEnabled", - id: "nimby", - header: sortableHeader("NIMBY"), - cell: ({ row }) => {row.original.nimbyEnabled ? "Yes" : "No"}, + id: "threadMode", + header: sortableHeader("ThreadMode"), + accessorFn: (h) => h.threadMode ?? "", + cell: ({ row }) => {row.original.threadMode ?? ""}, }, { - id: "cores", - header: sortableHeader("Cores (Idle/Total)"), - // Sort by idle ratio so "most free" sorts together regardless of host size. - accessorFn: (h) => idleRatio(h.idleCores, h.cores), - cell: ({ row }) => ( - - {row.original.idleCores.toFixed(2)} / {row.original.cores.toFixed(2)} - - ), + id: "os", + header: sortableHeader("OS"), + accessorFn: (h) => h.os ?? "", + cell: ({ row }) => {row.original.os ?? ""}, }, { - id: "memory", - header: sortableHeader("Memory (Idle/Total)"), - // Sort by idle ratio (matching Cores), not the formatted string. - accessorFn: (h) => idleRatio(kbStringToNumber(h.idleMemory), kbStringToNumber(h.totalMemory)), - cell: ({ row }) => ( - - {kbStringToHuman(row.original.idleMemory)} / {kbStringToHuman(row.original.totalMemory)} - - ), - }, - { - id: "freeMcp", - header: sortableHeader("Free /mcp"), - accessorFn: (h) => kbStringToNumber(h.freeMcp), - cell: ({ row }) => {kbStringToHuman(row.original.freeMcp)}, + id: "tags", + header: sortableHeader("Tags"), + accessorFn: (h) => (h.tags ?? []).join(","), + cell: ({ row }) => {(row.original.tags ?? []).join(", ")}, }, ]; + +// Row tint by state/lock (CueGUI HostWidgetItem BackgroundRole). Returns a +// Tailwind class for SimpleDataTable's getRowClassName hook. +export function hostRowClassName(host: Host): string | undefined { + if (host.state === "REBOOT_WHEN_IDLE") return "bg-amber-950/40"; + if (host.state !== "UP") return "bg-red-950/40"; + if (host.lockState === "LOCKED") return "bg-yellow-950/40"; + return undefined; +} diff --git a/cueweb/app/hosts/page.tsx b/cueweb/app/hosts/page.tsx index 319648f72..8785beab0 100644 --- a/cueweb/app/hosts/page.tsx +++ b/cueweb/app/hosts/page.tsx @@ -17,27 +17,93 @@ */ import * as React from "react"; +import { ChevronDown } from "lucide-react"; + import { Host, getHosts } from "@/app/utils/get_utils"; -import { hostColumns } from "@/app/hosts/columns"; +import { hostColumns, hostRowClassName } from "@/app/hosts/columns"; import { SimpleDataTable } from "@/components/ui/simple-data-table"; import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { + DropdownMenu, + DropdownMenuCheckboxItem, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuSeparator, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { Input } from "@/components/ui/input"; import { Skeleton } from "@/components/ui/skeleton"; import { HostLockDialog } from "@/components/ui/host-lock-dialog"; import { HostRebootDialog } from "@/components/ui/host-reboot-dialog"; import { EditHostTagsDialog } from "@/components/ui/edit-host-tags-dialog"; -import { - HOSTS_CHANGED_EVENT, - type HostsChangedDetail, -} from "@/components/ui/host-action-events"; +import { HostMonitorDialogs } from "@/components/ui/host-monitor-dialogs"; +import { ProcMonitorPanel } from "@/components/ui/proc-monitor-panel"; +import { HOSTS_CHANGED_EVENT, type HostsChangedDetail } from "@/components/ui/host-action-events"; const REFRESH_MS = 30000; +const HARDWARE_STATES = ["UP", "DOWN", "REBOOTING", "REBOOT_WHEN_IDLE", "REPAIR"]; +const LOCK_STATES = ["OPEN", "LOCKED", "NIMBY_LOCKED"]; + +function FilterMenu({ + label, + options, + selected, + onChange, +}: { + label: string; + options: string[]; + selected: Set; + onChange: (next: Set) => void; +}) { + function toggle(value: string, checked: boolean) { + const next = new Set(selected); + if (checked) next.add(value); + else next.delete(value); + onChange(next); + } + return ( + + + + + + onChange(new Set())}>Clear + + {options.length === 0 ? ( +
None
+ ) : ( + options.map((o) => ( + toggle(o, !!c)} + onSelect={(e) => e.preventDefault()} + > + {o} + + )) + )} +
+
+ ); +} export default function HostsPage() { const [hosts, setHosts] = React.useState(null); const [error, setError] = React.useState(null); + const [autoRefresh, setAutoRefresh] = React.useState(true); + + const [search, setSearch] = React.useState(""); + const [allocFilter, setAllocFilter] = React.useState>(new Set()); + const [hwFilter, setHwFilter] = React.useState>(new Set()); + const [lockFilter, setLockFilter] = React.useState>(new Set()); + const [osFilter, setOsFilter] = React.useState>(new Set()); - // isCancelled lets the polling effect drop a late response after unmount; - // the Retry button omits it. const load = React.useCallback(async (isCancelled?: () => boolean) => { try { const data = await getHosts(); @@ -46,8 +112,6 @@ export default function HostsPage() { setError(null); } catch (err) { if (isCancelled?.()) return; - // Keep previously loaded rows on a failed poll; only blank to [] if we - // never loaded anything. getHosts already toasts via handleError. setError(err instanceof Error ? err.message : String(err)); setHosts((prev) => prev ?? []); } @@ -57,39 +121,96 @@ export default function HostsPage() { let cancelled = false; const isCancelled = () => cancelled; load(isCancelled); + if (!autoRefresh) return () => { cancelled = true; }; const interval = setInterval(() => load(isCancelled), REFRESH_MS); return () => { cancelled = true; clearInterval(interval); }; - }, [load]); + }, [load, autoRefresh]); - // After a lock/unlock/reboot the dialogs fire cueweb:hosts-changed. - // Optimistically apply the patch (lockState and/or state) to the affected - // rows so the table reflects the change immediately, then kick off a fetch - // to reconcile with Cuebot (the gateway may take a beat to settle, and a - // request it rejects will be corrected on the next poll). React.useEffect(() => { function handler(e: Event) { const detail = (e as CustomEvent).detail; if (!detail?.hostIds?.length || !detail.patch) return; const ids = new Set(detail.hostIds); - setHosts((prev) => - prev - ? prev.map((h) => (ids.has(h.id) ? { ...h, ...detail.patch } : h)) - : prev, - ); + setHosts((prev) => (prev ? prev.map((h) => (ids.has(h.id) ? { ...h, ...detail.patch } : h)) : prev)); load(); } window.addEventListener(HOSTS_CHANGED_EVENT, handler); return () => window.removeEventListener(HOSTS_CHANGED_EVENT, handler); }, [load]); + const allocOptions = React.useMemo( + () => Array.from(new Set((hosts ?? []).map((h) => h.allocName).filter(Boolean) as string[])).sort(), + [hosts], + ); + const osOptions = React.useMemo( + () => Array.from(new Set((hosts ?? []).map((h) => h.os).filter(Boolean) as string[])).sort(), + [hosts], + ); + + const filtered = React.useMemo(() => { + if (!hosts) return null; + let nameRe: RegExp | null = null; + if (search.trim()) { + try { + nameRe = new RegExp(search.trim(), "i"); + } catch { + nameRe = null; + } + } + const term = search.trim().toLowerCase(); + return hosts.filter((h) => { + if (search.trim()) { + const ok = nameRe ? nameRe.test(h.name) : h.name.toLowerCase().includes(term); + if (!ok) return false; + } + if (allocFilter.size && !(h.allocName && allocFilter.has(h.allocName))) return false; + if (hwFilter.size && !hwFilter.has(h.state)) return false; + if (lockFilter.size && !lockFilter.has(h.lockState)) return false; + if (osFilter.size && !(h.os && osFilter.has(h.os))) return false; + return true; + }); + }, [hosts, search, allocFilter, hwFilter, lockFilter, osFilter]); + + function clearFilters() { + setSearch(""); + setAllocFilter(new Set()); + setHwFilter(new Set()); + setLockFilter(new Set()); + setOsFilter(new Set()); + } + return (

Monitor Hosts

- {hosts === null ? ( + {/* Filter bar (CueGUI parity). */} +
+ setSearch(e.target.value)} + placeholder="Filter hosts (name / regex)" + className="h-8 w-64" + aria-label="Filter hosts" + /> + + + + + +
+ + + +
+
+ + {filtered === null ? (
@@ -97,30 +218,31 @@ export default function HostsPage() {
) : ( <> - {error && hosts.length === 0 ? ( + {error && hosts && hosts.length === 0 ? (
Could not load hosts from Cuebot. - +
) : null} )} - {/* Dialogs opened by the host row context menu: Lock / Unlock - (cueweb:open-host-lock), immediate Reboot (cueweb:open-host-reboot), - and Edit Tags (cueweb:open-host-tags). */} + {/* Bottom proc panel (View Procs). */} + + + {/* Dialogs opened by the host row context menu. */} +
); } diff --git a/cueweb/app/utils/action_utils.ts b/cueweb/app/utils/action_utils.ts index 182b437d0..dc4dbb168 100644 --- a/cueweb/app/utils/action_utils.ts +++ b/cueweb/app/utils/action_utils.ts @@ -20,7 +20,7 @@ import * as React from "react"; import { Frame } from "../frames/frame-columns"; import { Layer } from "../layers/layer-columns"; import { accessActionApi, accessGetApi } from "./api_utils"; -import { getFrameLogDir, getJobForLayer, Host, JobComment, Show } from "./get_utils"; +import { getFrameLogDir, getJobForLayer, Host, JobComment, Proc, Show } from "./get_utils"; import { handleError, toastSuccess, toastWarning } from "./notify_utils"; /**************************************/ @@ -789,6 +789,118 @@ export function editHostTagsGivenRow(row: Row) { ); } +/**************************************/ +// Host actions: rename tag, allocation, delete, repair, comment +/**************************************/ + +// Rename a tag on every host (CueGUI renameTag). Batch-capable. +export async function renameHostTag(hosts: Host[], oldTag: string, newTag: string): Promise { + const endpoint = "/api/host/action/renametag"; + const bodyAr = hosts.map((host) => JSON.stringify({ host, old_tag: oldTag, new_tag: newTag })); + return performAction(endpoint, bodyAr, `Renamed tag on ${hosts.length} host(s)`); +} + +// Move every host to a new allocation (CueGUI changeAllocation). Batch-capable. +export async function setHostAllocation(hosts: Host[], allocationId: string): Promise { + const endpoint = "/api/host/action/setallocation"; + const bodyAr = hosts.map((host) => JSON.stringify({ host, allocation_id: allocationId })); + return performAction(endpoint, bodyAr, `Moved ${hosts.length} host(s) to a new allocation`); +} + +// Delete the given hosts (CueGUI delete, admin-only). Batch-capable. +export async function deleteHosts(hosts: Host[]): Promise { + const endpoint = "/api/host/action/delete"; + const bodyAr = hosts.map((host) => JSON.stringify({ host })); + return performAction(endpoint, bodyAr, `Deleted ${hosts.length} host(s)`); +} + +// Set/clear the REPAIR hardware state (CueGUI setRepair/clearRepair). clearRepair +// sets the state back to DOWN, matching CueGUI. Batch-capable. +export async function setHostHardwareState(hosts: Host[], state: "REPAIR" | "DOWN" | "UP"): Promise { + const endpoint = "/api/host/action/sethardwarestate"; + const bodyAr = hosts.map((host) => JSON.stringify({ host, state })); + const verb = state === "REPAIR" ? "Set repair state on" : "Cleared repair state on"; + return performAction(endpoint, bodyAr, `${verb} ${hosts.length} host(s)`); +} + +// Add a comment to a host (CueGUI host Comments dialog). +export async function addHostComment( + host: Host, + user: string, + subject: string, + message: string, +): Promise { + const endpoint = "/api/host/action/addcomment"; + const body = JSON.stringify({ host, new_comment: { user, subject, message } }); + return performAction(endpoint, [body], "Added comment"); +} + +/**************************************/ +// Proc monitor actions (kill / unbook) +/**************************************/ + +export async function killProcs(procs: Proc[]): Promise { + const endpoint = "/api/proc/action/kill"; + const bodyAr = procs.map((proc) => JSON.stringify({ proc })); + return performAction(endpoint, bodyAr, `Killed ${procs.length} proc(s)`); +} + +// Unbook procs; kill=true also kills the running frame (CueGUI "Unbook" / +// "Unbook and Kill"). +export async function unbookProcs(procs: Proc[], kill: boolean): Promise { + const endpoint = "/api/proc/action/unbookone"; + const bodyAr = procs.map((proc) => JSON.stringify({ proc, kill })); + return performAction(endpoint, bodyAr, kill ? `Unbooked and killed ${procs.length} proc(s)` : `Unbooked ${procs.length} proc(s)`); +} + +/**************************************/ +// Host context-menu dispatchers (new items) +/**************************************/ + +export function viewHostCommentsGivenRow(row: Row) { + if (typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:open-host-comments", { detail: { hosts: [row.original as Host] } })); +} + +export function renameHostTagGivenRow(row: Row) { + if (typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:open-host-rename-tag", { detail: { hosts: [row.original as Host] } })); +} + +export function changeHostAllocationGivenRow(row: Row) { + if (typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:open-host-allocation", { detail: { hosts: [row.original as Host] } })); +} + +export function deleteHostGivenRow(row: Row) { + if (typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:open-host-delete", { detail: { hosts: [row.original as Host] } })); +} + +export function viewHostProcsGivenRow(row: Row) { + if (typeof window === "undefined") return; + const host = row.original as Host; + window.dispatchEvent(new CustomEvent("cueweb:view-host-procs", { detail: { hostNames: [host.name] } })); +} + +// Set/Clear Repair State fire immediately (CueGUI does them silently), then +// optimistically patch the row's hardware state. +export function setRepairGivenRow(row: Row) { + const host = row.original as Host; + void setHostHardwareState([host], "REPAIR").then((ok) => { + if (!ok || typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:hosts-changed", { detail: { hostIds: [host.id], patch: { state: "REPAIR" } } })); + }); +} + +export function clearRepairGivenRow(row: Row) { + const host = row.original as Host; + void setHostHardwareState([host], "DOWN").then((ok) => { + if (!ok || typeof window === "undefined") return; + window.dispatchEvent(new CustomEvent("cueweb:hosts-changed", { detail: { hostIds: [host.id], patch: { state: "DOWN" } } })); + }); +} + /**********************************************/ /* Per-row wrappers for the expanded job menu */ /**********************************************/ diff --git a/cueweb/app/utils/get_utils.ts b/cueweb/app/utils/get_utils.ts index 12e5f92ff..c435ce8b0 100644 --- a/cueweb/app/utils/get_utils.ts +++ b/cueweb/app/utils/get_utils.ts @@ -99,6 +99,17 @@ export type Host = { gpus?: number; idleGpus?: number; hasComment?: boolean; + // Extra fields the full Monitor Hosts table needs for the bar columns + // (Swap/Physical/GPU/Temp) and Load %. All memory values are KB and may + // arrive from the gateway as strings (int64). + freeMemory?: string; // free physical RAM, KB + freeSwap?: string; // KB + totalSwap?: string; // KB + freeGpuMemory?: string; // KB + totalGpuMemory?: string; // KB + gpuMemory?: string; // total reservable GPU mem, KB + idleGpuMemory?: string; // KB + totalMcp?: string; // /mcp/ total, KB }; // Minimal Proc shape - the host.Proc proto fields the host detail page's @@ -116,6 +127,7 @@ export type Proc = { dispatchTime: number; reservedMemory: string; // KB, as string usedMemory: string; // KB, as string + reservedGpuMemory?: string; // KB, as string reservedCores: number; services: string[]; logPath: string; @@ -343,6 +355,16 @@ export async function getAllocations(): Promise { return Array.isArray(response) ? response : []; } +// Fetch the procs running on a set of hosts (the Monitor Hosts proc panel, +// CueGUI's "View Procs"). Uses ProcInterface.GetProcs with a host filter. +export async function getProcsByHosts(hostNames: string[]): Promise { + if (hostNames.length === 0) return []; + const ENDPOINT = "/api/proc/getprocs"; + const body = JSON.stringify({ r: { hosts: hostNames } }); + const response = await accessGetApi(ENDPOINT, body); + return Array.isArray(response) ? response : []; +} + // Fetch all comments for a given job export async function getJobComments(job: Job): Promise { const ENDPOINT = "/api/job/getcomments"; diff --git a/cueweb/components/ui/context_menus/action-context-menu.tsx b/cueweb/components/ui/context_menus/action-context-menu.tsx index 748147cce..0863443bc 100644 --- a/cueweb/components/ui/context_menus/action-context-menu.tsx +++ b/cueweb/components/ui/context_menus/action-context-menu.tsx @@ -33,6 +33,13 @@ import { eatJobsDeadFramesGivenRow, eatLayerFramesGivenRow, editHostTagsGivenRow, + viewHostCommentsGivenRow, + viewHostProcsGivenRow, + renameHostTagGivenRow, + changeHostAllocationGivenRow, + deleteHostGivenRow, + setRepairGivenRow, + clearRepairGivenRow, emailArtistGivenRow, killFrameGivenRow, killJobGivenRow, @@ -567,19 +574,68 @@ export const HostContextMenu: React.FC = ({ const canRebootWhenIdle = hardwareState !== "REBOOTING" && hardwareState !== "REBOOT_WHEN_IDLE"; + // CueGUI parity: a host is at rest in REPAIR when its hardware state is + // REPAIR; Clear Repair only makes sense then. + const inRepair = hardwareState === "REPAIR"; + const items: MenuItem[] = [ { - label: "Lock", + label: "Comments...", + onClick: viewHostCommentsGivenRow, + isActive: true, + component: , + }, + { + label: "View Procs", + onClick: viewHostProcsGivenRow, + isActive: true, + component: , + }, + + sep("group-lock"), + + { + label: "Lock Host", onClick: lockHostGivenRow, isActive: canLock, component: , }, { - label: "Unlock", + label: "Unlock Host", onClick: unlockHostGivenRow, isActive: canUnlock, component: , }, + { + // Owner/deed RPCs aren't wired in CueWeb yet; shown disabled to mirror + // CueGUI (which greys it out unless the host is NIMBY-locked). + label: "Take Ownership", + onClick: () => {}, + isActive: false, + component: , + }, + + sep("group-tags"), + + { + // CueWeb merges CueGUI's Add Tags / Remove Tags into one editor. + label: "Edit Tags...", + onClick: editHostTagsGivenRow, + isActive: true, + component: , + }, + { + label: "Rename Tag...", + onClick: renameHostTagGivenRow, + isActive: true, + component: , + }, + { + label: "Change Allocation...", + onClick: changeHostAllocationGivenRow, + isActive: true, + component: , + }, sep("group-reboot"), @@ -590,19 +646,31 @@ export const HostContextMenu: React.FC = ({ component: , }, { - label: "Reboot When Idle", + label: "Reboot when idle", onClick: rebootHostWhenIdleGivenRow, isActive: canRebootWhenIdle, component: , }, + { + label: "Delete Host", + onClick: deleteHostGivenRow, + isActive: true, + component: , + }, - sep("group-tags"), + sep("group-repair"), { - label: "Edit Tags...", - onClick: editHostTagsGivenRow, - isActive: true, - component: , + label: "Set Repair State", + onClick: setRepairGivenRow, + isActive: !inRepair, + component: , + }, + { + label: "Clear Repair State", + onClick: clearRepairGivenRow, + isActive: inRepair, + component: , }, ]; diff --git a/cueweb/components/ui/frame-range-selector.tsx b/cueweb/components/ui/frame-range-selector.tsx new file mode 100644 index 000000000..91b449d81 --- /dev/null +++ b/cueweb/components/ui/frame-range-selector.tsx @@ -0,0 +1,330 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Visual frame-range selector. Web adaptation of CueGUI's + * `cuegui/cuegui/FrameRangeSelection.py`: a horizontal strip where every + * frame is one cell, colored by its state, laid out in ascending frame + * order. The user click-drags across the strip to select a contiguous run + * of frames, or shift-clicks to extend the current selection to the clicked + * cell. The selected subset feeds straight into the same Retry / Eat / Kill + * actions the row context menu uses (retryFrames / eatFrames / killFrames), + * so the action wiring is shared, not duplicated. + * + * Accessibility note: the strip itself is a pointer affordance (one cell per + * frame would otherwise create thousands of tab stops). Keyboard / AT users + * keep the per-row context menu in the table for the same Retry/Eat/Kill + * actions; once a selection exists here the action buttons below the strip + * are fully keyboard reachable. + */ + +import * as React from "react"; +import { ChevronDown, ChevronRight } from "lucide-react"; +import { MdOutlineCancel } from "react-icons/md"; +import { TbPacman, TbReload } from "react-icons/tb"; + +import type { Frame } from "@/app/frames/frame-columns"; +import { eatFrames, killFrames, retryFrames } from "@/app/utils/action_utils"; +import { useDisableJobInteraction } from "@/app/utils/use_disable_job_interaction"; +import { Button } from "@/components/ui/button"; +import { ConfirmDialog } from "@/components/ui/confirm-dialog"; + +// Frame state -> solid swatch color. Mirrors the hues in +// components/ui/status.tsx but as filled cells so the strip reads like a +// heatmap of the job's frames. Unknown / EATEN-but-unlisted states fall +// back to a neutral gray. +const STATE_CELL_COLOR: Record = { + SUCCEEDED: "bg-green-500", + FINISHED: "bg-green-500", + RUNNING: "bg-yellow-400", + WAITING: "bg-blue-400", + PAUSED: "bg-blue-300", + DEPEND: "bg-purple-400", + DEPENDENCY: "bg-purple-400", + DEAD: "bg-red-500", + FAILING: "bg-red-500", + EATEN: "bg-orange-400", +}; +const DEFAULT_CELL_COLOR = "bg-gray-300 dark:bg-gray-600"; + +function cellColor(state: string): string { + return STATE_CELL_COLOR[(state ?? "").toUpperCase()] ?? DEFAULT_CELL_COLOR; +} + +type PendingAction = "retry" | "eat" | "kill"; + +const ACTION_LABEL: Record = { + retry: "Retry", + eat: "Eat", + kill: "Kill", +}; + +export interface FrameRangeSelectorProps { + // Frames currently shown in the table (already state-filtered upstream by + // SimpleDataTable). The strip mirrors exactly what the user sees. + frames: Frame[]; + username: string; + // Notified whenever the selected subset changes, so a parent can mirror + // the selection elsewhere (e.g. highlight the matching table rows). + onSelectionChange?: (frames: Frame[]) => void; +} + +export function FrameRangeSelector({ frames, username, onSelectionChange }: FrameRangeSelectorProps) { + const [collapsed, setCollapsed] = React.useState(false); + const [selectedIds, setSelectedIds] = React.useState>(() => new Set()); + // Anchor for shift-click extend and drag origin, tracked by frame id so it + // survives the 5s poll re-creating the frames array. + const [anchorId, setAnchorId] = React.useState(null); + const draggingRef = React.useRef(false); + const [pending, setPending] = React.useState(null); + + const { disabled: jobInteractionDisabled } = useDisableJobInteraction(); + + // Stable display order: ascending frame number, then layer name so + // same-numbered frames from different layers sit next to each other. + const displayFrames = React.useMemo( + () => [...frames].sort((a, b) => a.number - b.number || a.layerName.localeCompare(b.layerName)), + [frames], + ); + + // Prune ids that no longer exist (frames removed by a poll) so the count + + // readout stay accurate. We deliberately do NOT clear the whole selection + // on every refresh - that would wipe an in-progress selection every 5s. + React.useEffect(() => { + setSelectedIds((prev) => { + if (prev.size === 0) return prev; + const present = new Set(displayFrames.map((f) => f.id)); + let changed = false; + const next = new Set(); + prev.forEach((id) => { + if (present.has(id)) next.add(id); + else changed = true; + }); + return changed ? next : prev; + }); + }, [displayFrames]); + + const selectedFrames = React.useMemo( + () => displayFrames.filter((f) => selectedIds.has(f.id)), + [displayFrames, selectedIds], + ); + + // Notify the parent without making onSelectionChange a render dependency + // (callers commonly pass an inline arrow). + const onSelectionChangeRef = React.useRef(onSelectionChange); + React.useEffect(() => { + onSelectionChangeRef.current = onSelectionChange; + }); + React.useEffect(() => { + onSelectionChangeRef.current?.(selectedFrames); + }, [selectedFrames]); + + // Finalize any drag on a global mouseup so releasing the button outside + // the strip still ends the drag. + React.useEffect(() => { + const stop = () => { + draggingRef.current = false; + }; + window.addEventListener("mouseup", stop); + return () => window.removeEventListener("mouseup", stop); + }, []); + + const selectRange = React.useCallback( + (a: number, b: number) => { + const lo = Math.min(a, b); + const hi = Math.max(a, b); + const ids = new Set(); + for (let i = lo; i <= hi; i++) { + const frame = displayFrames[i]; + if (frame) ids.add(frame.id); + } + setSelectedIds(ids); + }, + [displayFrames], + ); + + const handleMouseDown = React.useCallback( + (index: number, shiftKey: boolean) => { + const anchorIndex = anchorId ? displayFrames.findIndex((f) => f.id === anchorId) : -1; + if (shiftKey && anchorIndex >= 0) { + // Shift-click extends from the existing anchor; no drag is started. + selectRange(anchorIndex, index); + return; + } + // Fresh selection: this cell becomes the new anchor and the drag origin. + setAnchorId(displayFrames[index].id); + draggingRef.current = true; + selectRange(index, index); + }, + [anchorId, displayFrames, selectRange], + ); + + const handleMouseEnter = React.useCallback( + (index: number) => { + if (!draggingRef.current) return; + const anchorIndex = anchorId ? displayFrames.findIndex((f) => f.id === anchorId) : index; + selectRange(anchorIndex, index); + }, + [anchorId, displayFrames, selectRange], + ); + + const clearSelection = React.useCallback(() => { + setSelectedIds(new Set()); + setAnchorId(null); + }, []); + + // Selected-range readout: min/max frame number across the subset (the + // subset can span layers, so it isn't necessarily a single contiguous run + // of numbers - the readout reflects the actual endpoints). + const readout = React.useMemo(() => { + if (selectedFrames.length === 0) return null; + let min = Infinity; + let max = -Infinity; + for (const f of selectedFrames) { + if (f.number < min) min = f.number; + if (f.number > max) max = f.number; + } + return { min, max }; + }, [selectedFrames]); + + async function runPending() { + if (!pending) return; + const targets = selectedFrames; + if (targets.length === 0) return; + if (pending === "retry") { + await retryFrames(targets); + } else if (pending === "eat") { + await eatFrames(targets); + } else if (pending === "kill") { + const reason = `Manual frame kill request in Cueweb's frame range selector by ${username}`; + await killFrames(targets, username, reason); + } + clearSelection(); + } + + if (displayFrames.length === 0) return null; + + const hasSelection = selectedFrames.length > 0; + const actionsDisabled = jobInteractionDisabled || !hasSelection; + const firstNumber = displayFrames[0].number; + const lastNumber = displayFrames[displayFrames.length - 1].number; + + return ( +
+
+ + + {hasSelection && readout + ? `Selected ${selectedFrames.length} frame${selectedFrames.length === 1 ? "" : "s"} (#${readout.min}–#${readout.max})` + : `Drag to select a range of ${displayFrames.length} frame${displayFrames.length === 1 ? "" : "s"}`} + +
+ + {!collapsed && ( +
+
+ {displayFrames.map((f, i) => { + const selected = selectedIds.has(f.id); + return ( +
{ + // Suppress the native text/drag selection so a drag across + // the strip reads as a range pick, not a text highlight. + e.preventDefault(); + handleMouseDown(i, e.shiftKey); + }} + onMouseEnter={() => handleMouseEnter(i)} + className={`h-full min-w-[6px] flex-1 cursor-pointer ${cellColor(f.state)} ${ + selected + ? "opacity-100 ring-1 ring-inset ring-foreground" + : hasSelection + ? "opacity-30" + : "opacity-90" + }`} + /> + ); + })} +
+ +
+ #{firstNumber} + #{lastNumber} +
+ +
+ + + + +
+
+ )} + + { + if (!o) setPending(null); + }} + title={ + pending + ? `${ACTION_LABEL[pending]} ${selectedFrames.length} frame${selectedFrames.length === 1 ? "" : "s"}?` + : "" + } + description={ + pending && readout + ? `This will ${pending} ${selectedFrames.length} frame${selectedFrames.length === 1 ? "" : "s"} in the range #${readout.min}–#${readout.max}.` + : undefined + } + variant={pending === "kill" ? "destructive" : "default"} + confirmLabel={pending ? ACTION_LABEL[pending] : "Confirm"} + onConfirm={runPending} + /> +
+ ); +} diff --git a/cueweb/components/ui/host-action-events.ts b/cueweb/components/ui/host-action-events.ts index 8272ff958..2418ce5b0 100644 --- a/cueweb/components/ui/host-action-events.ts +++ b/cueweb/components/ui/host-action-events.ts @@ -41,6 +41,37 @@ export type OpenHostTagsDetail = { hosts: Host[]; }; +// Opens the host comments dialog (HostCommentsDialog). +export const OPEN_HOST_COMMENTS_EVENT = "cueweb:open-host-comments"; +export type OpenHostCommentsDetail = { + hosts: Host[]; +}; + +// Opens the rename-tag dialog (HostRenameTagDialog). +export const OPEN_HOST_RENAME_TAG_EVENT = "cueweb:open-host-rename-tag"; +export type OpenHostRenameTagDetail = { + hosts: Host[]; +}; + +// Opens the change-allocation dialog (HostChangeAllocationDialog). +export const OPEN_HOST_ALLOCATION_EVENT = "cueweb:open-host-allocation"; +export type OpenHostAllocationDetail = { + hosts: Host[]; +}; + +// Opens the delete-host confirmation dialog (HostDeleteDialog). +export const OPEN_HOST_DELETE_EVENT = "cueweb:open-host-delete"; +export type OpenHostDeleteDetail = { + hosts: Host[]; +}; + +// Drives the bottom Proc monitor panel ("View Procs"): the host panel asks +// the proc panel to load procs for these host names. +export const VIEW_HOST_PROCS_EVENT = "cueweb:view-host-procs"; +export type ViewHostProcsDetail = { + hostNames: string[]; +}; + // Fired after a host action so the open hosts table can update the // affected rows immediately (optimistic) instead of waiting for the 30s // poll. The page applies `patch` to every row whose id is in hostIds, @@ -48,5 +79,5 @@ export type OpenHostTagsDetail = { export const HOSTS_CHANGED_EVENT = "cueweb:hosts-changed"; export type HostsChangedDetail = { hostIds: string[]; - patch: Partial>; + patch: Partial>; }; diff --git a/cueweb/components/ui/host-monitor-dialogs.tsx b/cueweb/components/ui/host-monitor-dialogs.tsx new file mode 100644 index 000000000..c233c2c5c --- /dev/null +++ b/cueweb/components/ui/host-monitor-dialogs.tsx @@ -0,0 +1,347 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; +import { useSession } from "next-auth/react"; + +import type { Allocation, Host, JobComment } from "@/app/utils/get_utils"; +import { getAllocations, getHostComments } from "@/app/utils/get_utils"; +import { addHostComment, deleteHosts, renameHostTag, setHostAllocation } from "@/app/utils/action_utils"; +import { handleError, toastSuccess, toastWarning } from "@/app/utils/notify_utils"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { + HOSTS_CHANGED_EVENT, + OPEN_HOST_ALLOCATION_EVENT, + OPEN_HOST_COMMENTS_EVENT, + OPEN_HOST_DELETE_EVENT, + OPEN_HOST_RENAME_TAG_EVENT, + type OpenHostAllocationDetail, + type OpenHostCommentsDetail, + type OpenHostDeleteDetail, + type OpenHostRenameTagDetail, +} from "@/components/ui/host-action-events"; + +function notifyChanged(hosts: Host[], patch: object) { + window.dispatchEvent( + new CustomEvent(HOSTS_CHANGED_EVENT, { detail: { hostIds: hosts.map((h) => h.id), patch } }), + ); +} + +const SELECT_CLASS = + "h-9 w-full rounded-md border border-input bg-background px-3 py-1 text-sm focus:outline-none focus:ring-2 focus:ring-ring disabled:opacity-50"; + +// --- Comments ------------------------------------------------------------- +function HostCommentsDialog() { + const { data: session } = useSession(); + const username = session?.user?.name ?? session?.user?.email ?? "cueweb"; + const [open, setOpen] = React.useState(false); + const [host, setHost] = React.useState(null); + const [comments, setComments] = React.useState([]); + const [subject, setSubject] = React.useState(""); + const [message, setMessage] = React.useState(""); + const [busy, setBusy] = React.useState(false); + + const reload = React.useCallback(async (h: Host) => { + try { + setComments(await getHostComments(h)); + } catch (err) { + handleError(err, "Could not load host comments"); + } + }, []); + + React.useEffect(() => { + function handler(e: Event) { + const h = (e as CustomEvent).detail.hosts[0]; + setHost(h); + setSubject(""); + setMessage(""); + setComments([]); + setOpen(true); + if (h) reload(h); + } + window.addEventListener(OPEN_HOST_COMMENTS_EVENT, handler); + return () => window.removeEventListener(OPEN_HOST_COMMENTS_EVENT, handler); + }, [reload]); + + async function save() { + if (!host) return; + if (!subject.trim()) { + toastWarning("A subject is required."); + return; + } + setBusy(true); + try { + const ok = await addHostComment(host, username, subject.trim(), message); + if (ok) { + toastSuccess("Added comment"); + setSubject(""); + setMessage(""); + await reload(host); + notifyChanged([host], { hasComment: true }); + } + } finally { + setBusy(false); + } + } + + return ( + + + + Comments — {host?.name} + +
+ {comments.length === 0 ? ( +

No comments.

+ ) : ( +
    + {comments.map((c) => ( +
  • +
    + {c.subject} + {c.user} +
    +

    {c.message}

    +
  • + ))} +
+ )} +
+
+ setSubject(e.target.value)} placeholder="Subject" aria-label="Subject" /> +