Skip to content

Query builder

Query is the fluent Cypher builder at the heart of the ORM. Every method returns self, so calls chain naturally. build() returns (cypher_string, params_dict) — the same tuple shape used everywhere else in this library.

from cypher_validator import Query, Cond, NodeModel

class Person(NodeModel):
    __label__ = "Person"
    name: str
    age: int = 0

q = (Query()
     .match(Person, "p")
     .where(Cond("p.age", ">=", 18))
     .return_("p.name", "p.age")
     .order_by("p.name")
     .limit(10))

cypher, params = q.build()
# MATCH (p:Person) WHERE p.age >= 18 RETURN p.name, p.age ORDER BY p.name LIMIT 10
# params = {}   (Cond inlined the literal 18)

Method reference

MATCH / OPTIONAL MATCH

q.match(Person, "p")                          # MATCH (p:Person)
q.match(Person, "p", props={"name": "Alice"}) # MATCH (p:Person {name: $p_1})
q.match(pattern="(p:Person)-[:KNOWS]->(q)")   # raw pattern string
q.optional_match(Movie, "m")                  # OPTIONAL MATCH (m:Movie)

match(model, var, props=None, *, pattern=None) accepts:

  • model_or_label — a NodeModel subclass or a raw label string ("Person").
  • var — the Cypher variable name. Omit to produce (:Person).
  • props — inline property map, auto-parameterised through $var_N placeholders.
  • pattern (keyword-only) — full raw pattern string, bypasses model/label/props.

match_path — tuple syntax

For path matching, each "node" or "rel" is a 3-tuple (model_or_label, var, props):

q.match_path(
    (Person, "p", None),
    (ActedIn, "r", None),
    (Movie, "m", {"year": 1999}),
    direction="out",          # "in" / "both" also valid
    min_hops=1,
    max_hops=3,
    optional=False,
)
# MATCH (p:Person)-[r:ACTED_IN*1..3]->(m:Movie {year: $m_1})

direction is one of "out" (default, -->), "in" (<--), "both" (--).

WHERE / AND / OR

Single-argument API

Query.where() accepts exactly one argument — either a Cond/CondGroup or a raw string. There is no kwargs-style auto-parameterisation. Bind values via .params(name=...) or pass a Cond (which inlines scalar literals — see API caveats).

q.where(Cond("p.name", "=", "$name")).params(name="Alice")
q.where("p.age > $min_age").params(min_age=30)
q.where(Cond("p.age", ">", 18) & Cond("p.age", "<", 65))   # AND
q.where(Cond("p.age", "<", 18) | Cond("p.age", ">", 65))   # OR

and_where() / or_where() produce AND / OR continuation clauses for readability:

(Query()
 .match(Person, "p")
 .where(Cond("p.age", ">", 18))
 .and_where(Cond("p.age", "<", 65))
 .or_where(Cond("p.name", "STARTS WITH", "$prefix"))
 .return_("p"))
# MATCH (p:Person) WHERE p.age > 18 AND p.age < 65 OR p.name STARTS WITH $prefix RETURN p

CREATE / MERGE / CREATE path

q.create(Person, "p", props={"name": "Bob"})              # CREATE (p:Person {name: $p_1})
q.merge(Person, "p", props={"name": "Alice"})             # MERGE (p:Person {name: $p_1})
q.create_path((Person, "p", None),
              (ActedIn, "r", {"roles": ["Trinity"]}),
              (Movie, "m", {"title": "Matrix"}))
# CREATE (p:Person)-[r:ACTED_IN {roles: $r_1}]->(m:Movie {title: $m_2})

SET / REMOVE / DELETE

q.set("p.age = 31", "p.updated = timestamp()")            # raw assignments
q.set_props("p", {"age": 31, "email": "p@example.com"})   # parameterised
q.on_create_set("p.created = datetime()")                 # paired with MERGE
q.on_match_set("p.updated = datetime()")
q.remove("p.deprecated_field", "p:OldLabel")
q.delete("p")                                             # DELETE p
q.delete("p", "r", detach=True)                           # DETACH DELETE p, r

set_props(var, props) auto-parameterises — each value goes into a fresh $var_N slot. This is the safe default for user-supplied data.

WITH / RETURN

q.with_("p", "count(r) AS knows_count")
q.with_("p.name AS name", distinct=True)                  # WITH DISTINCT
q.return_("p.name", "p.age")
q.return_("p.name", distinct=True)                        # RETURN DISTINCT

ORDER BY / SKIP / LIMIT / UNWIND

q.order_by("p.age DESC", "p.name")
q.skip(20).limit(10)
q.unwind("[1, 2, 3]", "x")                                # UNWIND [1, 2, 3] AS x
q.unwind("$items", "item")                                # UNWIND $items AS item

CALL subquery

inner = Query().match(Movie, "m").where(Cond("m.year", ">", 2000)).return_("m")
q.call_subquery(inner)
# CALL {
#   MATCH (m:Movie) WHERE m.year > 2000 RETURN m
# }

q.call_subquery("MATCH (n:Tag) RETURN count(n) AS tag_count")   # raw string also works

When passing a Query, its parameters are merged into the outer query's params dict.

FOREACH / UNION / raw

q.foreach("x", "[1, 2, 3]", "CREATE (:Index {n: x})")
# FOREACH (x IN [1, 2, 3] | CREATE (:Index {n: x}))

q.union(other_query, all=False)                           # or all=True for UNION ALL

q.raw("CALL db.indexes() YIELD name")                     # append arbitrary text

Parameters

q.param("name", "Alice")            # single binding
q.params(name="Alice", age=30)      # multiple

.params() is the only way to pass scalar values into a where("…") string clause.

build() / build_cypher()

cypher, params = q.build()          # tuple
cypher = q.build_cypher()           # string only
print(q)                            # uses build_cypher()

to_dict() / to_json() / from_dict() / from_json()

Serialise a query for agent message passing:

d = q.to_dict()
# {
#   "cypher": "MATCH (p:Person) WHERE p.age >= 18 RETURN p",
#   "parameters": {},
#   "clauses": [("MATCH", "(p:Person)"), ("WHERE", "p.age >= 18"), ("RETURN", "p")],
# }

q2 = Query.from_dict(d)
assert q2.build() == q.build()

If "clauses" is present, the full builder is reconstructed (chainable downstream). Otherwise the query is rebuilt as a single raw clause.

validate(schema) and explain()

result = q.validate(schema)         # accepts CypherValidator, Schema, or GraphSchema
assert result.is_valid

print(q.explain())
# Find pattern: (p:Person)
# Filter: p.age >= 18
# Return: p.name, p.age

explain() produces a human-readable breakdown — handy for surfacing to non-technical users or as part of an LLM chain-of-thought.

Conditions — Cond, CondGroup, Op

Cond(left, op, right=None) builds a single condition. The op argument may be a string or an Op enum value:

from cypher_validator import Cond, Op

Cond("p.name", "=", "$name")
Cond("p.age", ">", 18)
Cond("p.status", "IS NULL")
Cond("p.email", "CONTAINS", "@example.com")
Cond("p.score", Op.GTE, 80)

Op enum values:

Op Cypher token
EQ =
NEQ <>
LT <
LTE <=
GT >
GTE >=
CONTAINS CONTAINS
STARTS_WITH STARTS WITH
ENDS_WITH ENDS WITH
IN IN
IS_NULL IS NULL (right ignored)
IS_NOT_NULL IS NOT NULL (right ignored)
REGEX =~

How Cond renders the right-hand side

Cond inlines literal scalars directly into the Cypher string. This is intentional for readability, but it means values are not added to the params dict:

Cond("p.age", ">", 18).render()            # 'p.age > 18'
Cond("p.name", "=", "Alice").render()      # "p.name = 'Alice'"
Cond("p.active", "=", True).render()       # 'p.active = true'
Cond("p.deleted", "=", None).render()      # 'p.deleted = null'

If the right-hand side starts with $, it is preserved as a parameter reference:

Cond("p.name", "=", "$name").render()      # 'p.name = $name'

This is the safe way to bind user-supplied data — wrap the value as "$name" in the Cond and supply the actual value via q.params(name=actual_value). See API caveats for the full discussion.

Composing with & and |

c = (Cond("p.age", ">", 18) & Cond("p.age", "<", 65)) | Cond("p.is_admin", "=", True)
# (p.age > 18 AND p.age < 65) OR p.is_admin = true
q.where(c)

& and | return CondGroup objects. Parentheses are added automatically when a nested group has a different operator from its parent.

vector_search() generates a CALL db.index.vector.queryNodes(...) clause for Neo4j vector similarity search (5.11+):

q = (Query()
     .vector_search("idx_document_embedding_vector", query_vector, top_k=5)
     .return_("node.title", "score"))
cypher, params = q.build()
# CALL db.index.vector.queryNodes('idx_document_embedding_vector', 5, $vec_1)
#   YIELD node AS node, score AS score
# RETURN node.title, score

vector_search(index_name, query_vector, top_k=10, node_var="node", score_var="score")

Parameter Type Default Notes
index_name str required Must match ^[A-Za-z_][A-Za-z0-9_]*$ (prevents Cypher injection).
query_vector list[float] required Auto-parameterised as $vec_N.
top_k int 10 Number of nearest neighbors.
node_var str "node" YIELD alias for the matched node.
score_var str "score" YIELD alias for the similarity score.

vector_search_model(model, property, query_vector, top_k=10, ...)

Convenience wrapper that derives the index name from the model's label and property using the canonical idx_<label_lower>_<prop>_vector pattern:

q = Query().vector_search_model(Document, "embedding", query_vector, top_k=5)
# Uses index_name = "idx_document_embedding_vector"

See Vector search for the full end-to-end workflow.

RawExpr

The escape hatch for arbitrary Cypher expressions:

from cypher_validator.models import RawExpr

q.where(RawExpr("apoc.text.levenshtein(p.name, $target) < 3"))

RawExpr.render() returns the expression verbatim — no escaping, no parameter binding.

PropExpr / NodeRef / RelRef

These let you write expressions that look like Python attribute access:

from cypher_validator import NodeRef

p = NodeRef(Person, "p")

q = (Query()
     .match(p)
     .where(p.age > 18)
     .where(p.name.starts_with("$prefix"))
     .return_(p))

p.age returns a PropExpr whose comparison operators (==, !=, <, <=, >, >=) return Cond objects. String / list methods produce the right Cond for CONTAINS, STARTS WITH, ENDS WITH, IN, IS NULL, IS NOT NULL, =~ (regex).

RelRef is the same idea for relationships:

from cypher_validator import RelRef
r = RelRef(ActedIn, "r")
q.where(r.year > 1990).return_(r.roles)

CypherFn / fn

Type-safe wrappers for common built-in functions. Use fn (alias of CypherFn) for brevity in projections:

from cypher_validator import fn, NodeRef

p = NodeRef(Person, "p")
q = (Query()
     .match(p)
     .return_(
         fn.count(p),
         fn.avg(p.age),
         fn.as_(fn.count_distinct(p.name), "unique_names"),
     ))
# RETURN count(p), avg(p.age), count(DISTINCT p.name) AS unique_names

Available helpers — count, count_distinct, sum, avg, min, max, collect, collect_distinct, coalesce, head, last, size, length, type, labels, id, element_id, to_lower, to_upper, trim, replace, substring, abs, ceil, floor, round, timestamp, date, datetime, as_.

PathBuilder

For complex multi-hop patterns, PathBuilder chains nodes and relationships fluently:

from cypher_validator import PathBuilder

path = (PathBuilder(Person, "actor")
        .rel(ActedIn, "r")
        .to(Movie, "movie")
        .rel("DIRECTED", direction="in")
        .to(Person, "director"))

q = (Query()
     .match(pattern=path.build())
     .return_("actor", "director"))
# MATCH (actor:Person)-[r:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
# RETURN actor, director

PathBuilder.params holds any literal property maps converted to parameters; use path.to_query() to obtain a Query pre-seeded with them.

QueryPlan / QueryStep / QueryResult

For multi-step agent workflows, QueryPlan lets you express dependencies between steps:

from cypher_validator import QueryPlan, QueryStep

plan = QueryPlan(
    goal="Find Alice's co-workers and their projects",
    steps=[
        QueryStep(
            description="Look up Alice",
            cypher="MATCH (a:Person {name: $name}) RETURN a",
            parameters={"name": "Alice"},
            depends_on=[],
            is_read=True,
        ),
        QueryStep(
            description="Find her colleagues",
            cypher="MATCH (a:Person {name: $name})-[:WORKS_WITH]->(c) RETURN c",
            parameters={"name": "Alice"},
            depends_on=[0],
        ),
    ],
)

# Topological sort into parallel waves
for wave in plan.to_execution_order():
    print("wave:", wave)
print(plan.explain())

plan.validate_all(schema) validates every step against a schema and returns a list of (step_index, ValidationResult) tuples.

QueryResult is the structured wrapper for execution output — useful when handing results back to an LLM:

from cypher_validator import QueryResult

qr = QueryResult(
    cypher=cypher,
    parameters=params,
    records=session.execute(cypher, params),
    summary="Found N actors",
)
print(qr.success)                # True if no error and validation passed
print(qr.count)                  # len(records)
print(qr.to_markdown())          # markdown table
print(qr.to_natural_language())  # one-sentence NL summary