RCA based on graph operations (PoC)

Operations

Identify unreachable nodes and emit deduced alerts (PodUnreachable) on connected pods:

FOR node IN nodes
    FILTER node.kind == "node"
    FILTER node.deleted_at == null
    LET node_unreachable = FIRST(
        FOR node_alert, link IN 1..1 ANY node links
            FILTER node_alert.kind == "alert"
            FILTER node_alert.deleted_at == null
            FILTER node_alert.properties.name == "KubeNodeUnreachable"
            RETURN node_alert
        )
    FILTER node_unreachable != null
    FOR pod, link IN 1..1 ANY node links
        FILTER pod.kind == "pod"
        FILTER pod.deleted_at == null
        INSERT {origin: "openrca", kind: "alert", properties: {name: "PodUnreachable"}} INTO nodes
        LET pod_alert = NEW
        INSERT {origin: "openrca", _from: pod_alert._id, _to: pod._id } INTO links

Identify unreachable pods and emit deduced alerts (ServiceDegraded) on connected services:

FOR svc IN nodes
    FILTER svc.kind == "service"
    FILTER svc.deleted_at == null
    LET svc_pods = (
        FOR pod, link IN 1..1 ANY svc links
            FILTER pod.kind == "pod"
            FILTER pod.deleted_at == null
            RETURN pod
    )
    LET svc_unreachable_pod = FIRST(
        FOR pod IN svc_pods
            FOR pod_alert, link IN 1..1 ANY pod links
                FILTER pod_alert.kind == "alert"
                FILTER pod_alert.deleted_at == null
                FILTER pod_alert.properties.name == "PodUnreachable"
                RETURN pod_alert
    )
    FILTER svc_unreachable_pod != null
    INSERT {origin: "openrca", kind: "alert", properties: {name: "ServiceDegraded"}} INTO nodes
    LET svc_alert = NEW
    INSERT {origin: "openrca", _from: svc_alert._id, _to: svc._id } INTO links

Identify unreachable pods and emit deduced alerts (ReplicaSetDegraded) on connected replica sets:

FOR rs IN nodes
    FILTER rs.kind == "replica_set"
    FILTER rs.deleted_at == null
    LET rs_pods = (
        FOR pod, link IN 1..1 ANY rs links
            FILTER pod.kind == "pod"
            FILTER pod.deleted_at == null
            RETURN pod
    )
    LET rs_unreachable_pod = FIRST(
        FOR pod IN rs_pods
            FOR pod_alert, link IN 1..1 ANY pod links
                FILTER pod_alert.kind == "alert"
                FILTER pod_alert.deleted_at == null
                FILTER pod_alert.properties.name == "PodUnreachable"
                RETURN pod_alert
    )
    FILTER rs_unreachable_pod != null
    INSERT {origin: "openrca", kind: "alert", properties: {name: "ReplicaSetDegraded"}} INTO nodes
    LET rs_alert = NEW
    INSERT {origin: "openrca", _from: rs_alert._id, _to: rs._id } INTO links

Construct alerts graph:

FOR link IN links
    LET source = DOCUMENT(link._from)
    LET target = DOCUMENT(link._to)
    FILTER source.deleted_at == null
    FILTER target.deleted_at == null
    LET source_alert = FIRST(
        FOR n, l IN 1..1 ANY source links
            FILTER n.deleted_at == null
            FILTER n.kind == "alert"
            RETURN n
    )
    LET target_alert = FIRST(
        FOR n, l IN 1..1 ANY target links
            FILTER n.deleted_at == null
            FILTER n.kind == "alert"
            RETURN n
    )
    FILTER source_alert != null AND target_alert != null
    INSERT {origin: "openrca", _from: source_alert._id, _to: target_alert._id} INTO links

Traverse alerts graph:

FOR link IN links
    FILTER link.origin == "openrca"
    RETURN link

Cheatsheet

Remove all RCA nodes:

FOR node IN nodes
    FILTER node.origin == "openrca"
    REMOVE node IN nodes

Remove all RCA links:

FOR link IN links
    FILTER link.origin == "openrca"
    REMOVE link IN links

results matching ""

    No results matching ""