Skip to main content

Language Grammar

This document describes the grammar of Wvlet language.

warning

As of September 2024, Wvlet is still under active development. The language syntax and grammar structure may change, so use this grammar just as a reference.

packageDef: 'package' qualifiedId statement*

qualifiedId: identifier ('.' identifier)*

identifier : IDENTIFIER
| BACKQUOTED_IDENTIFIER
| '*'
| reserved # Necessary to use reserved words as identifiers
IDENTIFIER : (LETTER | '_') (LETTER | DIGIT | '_')*
BACKQUOTED_IDENTIFIER: '`' (~'`' | '``')+ '`'
// All reserved keywords (TokenType.Keyword)
reserved : 'from' | 'select' | 'agg' | 'where' | 'group' | 'by' | ...


statements: statement+
statement : importStatement
| modelDef
| query ';'?
| typeDef
| funDef
| showCommand queryOp*
| executeCommand expression

importStatement: 'import' importRef (from str)?
importRef : qualifiedId ('.' '*')?
| qualifiedId 'as' identifier

modelDef : 'model' identifier modelParams? (':' qualifiedId)? '=' modelBody
modelBody : query 'end'
modelParams: '(' modelParam (',' modelParam)* ')'
modelParam : identifier ':' identifier ('=' expression)?

// top-level query
query : queryBody update?
queryBody : querySingle queryBlock*
// A rule for sub queries
querySingle: 'from' relation (',' relation)* ','? queryBlock*
| 'select' selectItems queryBlock*
// For braced query, do not continue queryBlock for disambiguation
| '{' queryBody '}' ('as' identifier)?

// relation that can be used after 'from', 'join', 'concat' (set operation), etc.:
relation : relationPrimary ('as' identifier)?
relationPrimary: qualifiedId ('(' functionArg (',' functionArg)* ')')?
| querySingle
| str // file scan
| strInterpolation // embedded raw SQL
| arrayValue
arrayValue : '[' arrayValue (',' arrayValue)* ','? ']'

queryBlock: '|' queryBlock // pipe operator for explicit split
| joinExpr
| 'group' 'by' groupByItemList
| 'where' booleanExpression
| 'select' 'distinct'? selectItems
| 'agg' selectItems
| 'pivot' 'on' pivotItem (',' pivotItem)*
('group' 'by' groupByItemList)?
('agg' selectItems)?
| 'limit' INTEGER_VALUE
| 'order' 'by' sortItem (',' sortItem)* ','?)?
| 'add' selectItems
| 'exclude' identifier ((',' identifier)* ','?)?
| 'shift' identifier (',' identifier)* ','?
| 'test' testExpr
| 'show' identifier
| 'sample' sampleExpr
| 'concat' relation
| ('intersect' | 'except') 'all'? relation
| 'dedup'
| 'describe'
| 'debug' '{' (queryBlock | update)+ '}'

update : 'save' 'as' updateTarget saveOptions?
| 'append' 'to' updateTarget
| 'delete'
updateTarget : qualifiedId | stringLiteral
saveOptions: 'with' updateOption (',' saveOption)* ','?
saveOption : identifier ':' expression

joinExpr : 'asof'? joinType? 'join' relation joinCriteria
| 'cross' 'join' relation
joinType : 'inner' | 'left' | 'right' | 'full'
joinCriteria: 'on' booleanExpression
// using equi join keys
| 'on' identifier (',' identifier)*

groupByItemList: groupByItem (',' groupByItem)* ','?
groupByItem : expression ('as' identifier (':' identifier)?)?

selectItems: selectItem (',' selectItem)* ','?
selectItem : (identifier '=')? expression
| expression ('as' identifier)?

window : 'over' '(' windowSpec ')'
windowSpec : ('partition' 'by' expression (',' expression)*)?
('order' 'by' sortItem (',' sortItem)*)?
frameSpec?
frameSpec | ('rows' | 'range') frame
frame : '[' frameBound? ':' frameBound? ']'
frameBound : INTEGER_VALUE | INTEGER_VALUE 'days'

testExpr: booleanExpression

showCommand: 'show' identifier ('in' qualifiedId)?

executeCommand: 'execute' expression

sampleExpr: sampleSize
| ('reservoir' | 'system') '(' sampleSize ')'

sampleSize: ((integerLiteral 'rows'?) | (floatLiteral '%'))

sortItem: expression ('asc' | 'desc')?

pivotKey: identifier ('in' '(' (valueExpression (',' valueExpression)*) ')')?

typeDef : 'type' identifier typeParams? context? typeExtends? ':' typeElem* 'end'
typeParams : '[' typeParam (',' typeParam)* ']'
typeParam : identifier ('of' identifier)?
typeExtends: 'extends' qualifiedId (',' qualifiedId)*
typeElem : valDef | funDef

valDef : identifier ':' identifier typeParams? ('=' expression)?
funDef: : 'def' funName defParams? (':' identifier '*'?)? ('=' expression)?
funName : identifier | symbol
symbol : '+' | '-' | '*' | '/' | '%' | '&' | '|' | '=' | '==' | '!=' | '<' | '<=' | '>' | '>=' | '&&' | '||'
defParams : '(' defParam (',' defParam)* ')'
defParam : identifier ':' identifier ('=' expression)?

context : '(' 'in' contextItem (',' contextItem)* ')'
contextItem: identifier (':' identifier)?

strInterpolation: identifier
| '"' stringPart* '"'
| '"""' stringPart* '"""' # triple quotes string
stringPart : stringLiteral | '${' expression '}'


expression : booleanExpression
booleanExpression : ('!' | 'not') booleanExpression
| valueExpression
| booleanExpression ('and' | 'or') booleanExpression
valueExpression : ('-' | '+') valueExpression
| primaryExpression
| valueExpression arithmeticOperator valueExpression
| valueExpression comparisonOperator valueExpression

arithmeticOperator: '+' | '-' | '*' | '/' | '%'
comparisonOperator: '=' | '==' | 'is' | '!=' | 'is' 'not' | '<' | '<=' | '>' | '>=' | 'like' | 'contains'

// Expression that can be chained with '.' operator
primaryExpression : 'this'
| '_'
| literal
| query
| 'case' expression? whenExpr+ elseExpr? # case-when
| '{' querySingle '}' # subquery
| '(' expression ')' # parenthesized expression
| '[' expression (',' expression)* ']' # array
| '{' rowElem (',' rowElem)* '}' # struct, row
| 'map' {' rowElem (',' rowElem)* '}' # map value
| 'if' booleanExpresssion 'then' expression 'else' expression # if-then-else
| qualifiedId
| primaryExpression '.' primaryExpression
| primaryExpression '(' functionArg? (',' functionArg)* ')' window? # function call
| primaryExpression '[' expression ']' # array access
| primaryExpression identifier expression # function infix

rowElem : stringLiteral ':' expression
functionArg | (identifier '=')? expression

literal : 'null' | '-'? integerLiteral | '-'? floatLiteral | booleanLiteral | stringLiteral

whenExpr : 'when' booleanExpression 'then' expression
elseExpr : 'else' expression
lambdaExpr : lambdaParams '->' expression
lambdaParams : identirifer
| '(' (identifier (',' identifier)*)? ')'