sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PIPE_SYNTAX_TRANSFORM_PARSERS = { 934 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 935 "AS": lambda self, query: self._build_pipe_cte( 936 query, [exp.Star()], self._parse_table_alias() 937 ), 938 "DROP": lambda self, query: self._parse_pipe_syntax_drop(query), 939 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 940 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 941 "ORDER BY": lambda self, query: query.order_by( 942 self._parse_order(), append=False, copy=False 943 ), 944 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 945 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 946 "SET": lambda self, query: self._parse_pipe_syntax_set(query), 947 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 948 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 950 } 951 952 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 953 "ALLOWED_VALUES": lambda self: self.expression( 954 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 955 ), 956 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 957 "AUTO": lambda self: self._parse_auto_property(), 958 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 959 "BACKUP": lambda self: self.expression( 960 exp.BackupProperty, this=self._parse_var(any_token=True) 961 ), 962 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 963 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 964 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 965 "CHECKSUM": lambda self: self._parse_checksum(), 966 "CLUSTER BY": lambda self: self._parse_cluster(), 967 "CLUSTERED": lambda self: self._parse_clustered_by(), 968 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 969 exp.CollateProperty, **kwargs 970 ), 971 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 972 "CONTAINS": lambda self: self._parse_contains_property(), 973 "COPY": lambda self: self._parse_copy_property(), 974 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 975 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 976 "DEFINER": lambda self: self._parse_definer(), 977 "DETERMINISTIC": lambda self: self.expression( 978 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 979 ), 980 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 981 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 982 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 983 "DISTKEY": lambda self: self._parse_distkey(), 984 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 985 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 986 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 987 "ENVIRONMENT": lambda self: self.expression( 988 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 989 ), 990 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 991 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 992 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 993 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 994 "FREESPACE": lambda self: self._parse_freespace(), 995 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 996 "HEAP": lambda self: self.expression(exp.HeapProperty), 997 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 998 "IMMUTABLE": lambda self: self.expression( 999 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1000 ), 1001 "INHERITS": lambda self: self.expression( 1002 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1003 ), 1004 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1005 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1006 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1007 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1008 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1009 "LIKE": lambda self: self._parse_create_like(), 1010 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1011 "LOCK": lambda self: self._parse_locking(), 1012 "LOCKING": lambda self: self._parse_locking(), 1013 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1014 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1015 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1016 "MODIFIES": lambda self: self._parse_modifies_property(), 1017 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1018 "NO": lambda self: self._parse_no_property(), 1019 "ON": lambda self: self._parse_on_property(), 1020 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1021 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1022 "PARTITION": lambda self: self._parse_partitioned_of(), 1023 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1024 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1025 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1026 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1027 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1028 "READS": lambda self: self._parse_reads_property(), 1029 "REMOTE": lambda self: self._parse_remote_with_connection(), 1030 "RETURNS": lambda self: self._parse_returns(), 1031 "STRICT": lambda self: self.expression(exp.StrictProperty), 1032 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1033 "ROW": lambda self: self._parse_row(), 1034 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1035 "SAMPLE": lambda self: self.expression( 1036 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1037 ), 1038 "SECURE": lambda self: self.expression(exp.SecureProperty), 1039 "SECURITY": lambda self: self._parse_security(), 1040 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1041 "SETTINGS": lambda self: self._parse_settings_property(), 1042 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1043 "SORTKEY": lambda self: self._parse_sortkey(), 1044 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1045 "STABLE": lambda self: self.expression( 1046 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1047 ), 1048 "STORED": lambda self: self._parse_stored(), 1049 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1050 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1051 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1052 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1053 "TO": lambda self: self._parse_to_table(), 1054 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1055 "TRANSFORM": lambda self: self.expression( 1056 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1057 ), 1058 "TTL": lambda self: self._parse_ttl(), 1059 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1060 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1061 "VOLATILE": lambda self: self._parse_volatile_property(), 1062 "WITH": lambda self: self._parse_with_property(), 1063 } 1064 1065 CONSTRAINT_PARSERS = { 1066 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1067 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1068 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1069 "CHARACTER SET": lambda self: self.expression( 1070 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1071 ), 1072 "CHECK": lambda self: self.expression( 1073 exp.CheckColumnConstraint, 1074 this=self._parse_wrapped(self._parse_assignment), 1075 enforced=self._match_text_seq("ENFORCED"), 1076 ), 1077 "COLLATE": lambda self: self.expression( 1078 exp.CollateColumnConstraint, 1079 this=self._parse_identifier() or self._parse_column(), 1080 ), 1081 "COMMENT": lambda self: self.expression( 1082 exp.CommentColumnConstraint, this=self._parse_string() 1083 ), 1084 "COMPRESS": lambda self: self._parse_compress(), 1085 "CLUSTERED": lambda self: self.expression( 1086 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1087 ), 1088 "NONCLUSTERED": lambda self: self.expression( 1089 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "DEFAULT": lambda self: self.expression( 1092 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1093 ), 1094 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1095 "EPHEMERAL": lambda self: self.expression( 1096 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1097 ), 1098 "EXCLUDE": lambda self: self.expression( 1099 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1100 ), 1101 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1102 "FORMAT": lambda self: self.expression( 1103 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1104 ), 1105 "GENERATED": lambda self: self._parse_generated_as_identity(), 1106 "IDENTITY": lambda self: self._parse_auto_increment(), 1107 "INLINE": lambda self: self._parse_inline(), 1108 "LIKE": lambda self: self._parse_create_like(), 1109 "NOT": lambda self: self._parse_not_constraint(), 1110 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1111 "ON": lambda self: ( 1112 self._match(TokenType.UPDATE) 1113 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1114 ) 1115 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1116 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1117 "PERIOD": lambda self: self._parse_period_for_system_time(), 1118 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1119 "REFERENCES": lambda self: self._parse_references(match=False), 1120 "TITLE": lambda self: self.expression( 1121 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1122 ), 1123 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1124 "UNIQUE": lambda self: self._parse_unique(), 1125 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1126 "WATERMARK": lambda self: self.expression( 1127 exp.WatermarkColumnConstraint, 1128 this=self._match(TokenType.FOR) and self._parse_column(), 1129 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1130 ), 1131 "WITH": lambda self: self.expression( 1132 exp.Properties, expressions=self._parse_wrapped_properties() 1133 ), 1134 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1135 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1136 } 1137 1138 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1139 klass = ( 1140 exp.PartitionedByBucket 1141 if self._prev.text.upper() == "BUCKET" 1142 else exp.PartitionByTruncate 1143 ) 1144 1145 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1146 this, expression = seq_get(args, 0), seq_get(args, 1) 1147 1148 if isinstance(this, exp.Literal): 1149 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1150 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1151 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1152 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1153 # 1154 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1155 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1156 this, expression = expression, this 1157 1158 return self.expression(klass, this=this, expression=expression) 1159 1160 ALTER_PARSERS = { 1161 "ADD": lambda self: self._parse_alter_table_add(), 1162 "AS": lambda self: self._parse_select(), 1163 "ALTER": lambda self: self._parse_alter_table_alter(), 1164 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1165 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1166 "DROP": lambda self: self._parse_alter_table_drop(), 1167 "RENAME": lambda self: self._parse_alter_table_rename(), 1168 "SET": lambda self: self._parse_alter_table_set(), 1169 "SWAP": lambda self: self.expression( 1170 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1171 ), 1172 } 1173 1174 ALTER_ALTER_PARSERS = { 1175 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1176 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1177 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1178 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1179 } 1180 1181 SCHEMA_UNNAMED_CONSTRAINTS = { 1182 "CHECK", 1183 "EXCLUDE", 1184 "FOREIGN KEY", 1185 "LIKE", 1186 "PERIOD", 1187 "PRIMARY KEY", 1188 "UNIQUE", 1189 "WATERMARK", 1190 "BUCKET", 1191 "TRUNCATE", 1192 } 1193 1194 NO_PAREN_FUNCTION_PARSERS = { 1195 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1196 "CASE": lambda self: self._parse_case(), 1197 "CONNECT_BY_ROOT": lambda self: self.expression( 1198 exp.ConnectByRoot, this=self._parse_column() 1199 ), 1200 "IF": lambda self: self._parse_if(), 1201 } 1202 1203 INVALID_FUNC_NAME_TOKENS = { 1204 TokenType.IDENTIFIER, 1205 TokenType.STRING, 1206 } 1207 1208 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1209 1210 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1211 1212 FUNCTION_PARSERS = { 1213 **{ 1214 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1215 }, 1216 **{ 1217 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1218 }, 1219 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1220 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1221 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1222 "DECODE": lambda self: self._parse_decode(), 1223 "EXTRACT": lambda self: self._parse_extract(), 1224 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1225 "GAP_FILL": lambda self: self._parse_gap_fill(), 1226 "JSON_OBJECT": lambda self: self._parse_json_object(), 1227 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1228 "JSON_TABLE": lambda self: self._parse_json_table(), 1229 "MATCH": lambda self: self._parse_match_against(), 1230 "NORMALIZE": lambda self: self._parse_normalize(), 1231 "OPENJSON": lambda self: self._parse_open_json(), 1232 "OVERLAY": lambda self: self._parse_overlay(), 1233 "POSITION": lambda self: self._parse_position(), 1234 "PREDICT": lambda self: self._parse_predict(), 1235 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1236 "STRING_AGG": lambda self: self._parse_string_agg(), 1237 "SUBSTRING": lambda self: self._parse_substring(), 1238 "TRIM": lambda self: self._parse_trim(), 1239 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1240 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1241 "XMLELEMENT": lambda self: self.expression( 1242 exp.XMLElement, 1243 this=self._match_text_seq("NAME") and self._parse_id_var(), 1244 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1245 ), 1246 "XMLTABLE": lambda self: self._parse_xml_table(), 1247 } 1248 1249 QUERY_MODIFIER_PARSERS = { 1250 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1251 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1252 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1253 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1254 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1255 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1256 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1257 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1258 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1259 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1260 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1261 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1262 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1263 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1264 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1265 TokenType.CLUSTER_BY: lambda self: ( 1266 "cluster", 1267 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1268 ), 1269 TokenType.DISTRIBUTE_BY: lambda self: ( 1270 "distribute", 1271 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1272 ), 1273 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1274 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1275 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1276 } 1277 1278 SET_PARSERS = { 1279 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1280 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1281 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1282 "TRANSACTION": lambda self: self._parse_set_transaction(), 1283 } 1284 1285 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1286 1287 TYPE_LITERAL_PARSERS = { 1288 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1289 } 1290 1291 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1292 1293 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1294 1295 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1296 1297 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1298 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1299 "ISOLATION": ( 1300 ("LEVEL", "REPEATABLE", "READ"), 1301 ("LEVEL", "READ", "COMMITTED"), 1302 ("LEVEL", "READ", "UNCOMITTED"), 1303 ("LEVEL", "SERIALIZABLE"), 1304 ), 1305 "READ": ("WRITE", "ONLY"), 1306 } 1307 1308 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1309 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1310 ) 1311 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1312 1313 CREATE_SEQUENCE: OPTIONS_TYPE = { 1314 "SCALE": ("EXTEND", "NOEXTEND"), 1315 "SHARD": ("EXTEND", "NOEXTEND"), 1316 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1317 **dict.fromkeys( 1318 ( 1319 "SESSION", 1320 "GLOBAL", 1321 "KEEP", 1322 "NOKEEP", 1323 "ORDER", 1324 "NOORDER", 1325 "NOCACHE", 1326 "CYCLE", 1327 "NOCYCLE", 1328 "NOMINVALUE", 1329 "NOMAXVALUE", 1330 "NOSCALE", 1331 "NOSHARD", 1332 ), 1333 tuple(), 1334 ), 1335 } 1336 1337 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1338 1339 USABLES: OPTIONS_TYPE = dict.fromkeys( 1340 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1341 ) 1342 1343 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1344 1345 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1346 "TYPE": ("EVOLUTION",), 1347 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1348 } 1349 1350 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1351 1352 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1353 1354 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1355 "NOT": ("ENFORCED",), 1356 "MATCH": ( 1357 "FULL", 1358 "PARTIAL", 1359 "SIMPLE", 1360 ), 1361 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1362 "USING": ( 1363 "BTREE", 1364 "HASH", 1365 ), 1366 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1367 } 1368 1369 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1370 "NO": ("OTHERS",), 1371 "CURRENT": ("ROW",), 1372 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1373 } 1374 1375 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1376 1377 CLONE_KEYWORDS = {"CLONE", "COPY"} 1378 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1379 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1380 1381 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1382 1383 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1384 1385 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1386 1387 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1388 1389 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1390 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1391 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1392 1393 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1394 1395 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1396 1397 ADD_CONSTRAINT_TOKENS = { 1398 TokenType.CONSTRAINT, 1399 TokenType.FOREIGN_KEY, 1400 TokenType.INDEX, 1401 TokenType.KEY, 1402 TokenType.PRIMARY_KEY, 1403 TokenType.UNIQUE, 1404 } 1405 1406 DISTINCT_TOKENS = {TokenType.DISTINCT} 1407 1408 NULL_TOKENS = {TokenType.NULL} 1409 1410 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1411 1412 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1413 1414 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1415 1416 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1417 1418 ODBC_DATETIME_LITERALS = { 1419 "d": exp.Date, 1420 "t": exp.Time, 1421 "ts": exp.Timestamp, 1422 } 1423 1424 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1425 1426 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1427 1428 # The style options for the DESCRIBE statement 1429 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1430 1431 # The style options for the ANALYZE statement 1432 ANALYZE_STYLES = { 1433 "BUFFER_USAGE_LIMIT", 1434 "FULL", 1435 "LOCAL", 1436 "NO_WRITE_TO_BINLOG", 1437 "SAMPLE", 1438 "SKIP_LOCKED", 1439 "VERBOSE", 1440 } 1441 1442 ANALYZE_EXPRESSION_PARSERS = { 1443 "ALL": lambda self: self._parse_analyze_columns(), 1444 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1445 "DELETE": lambda self: self._parse_analyze_delete(), 1446 "DROP": lambda self: self._parse_analyze_histogram(), 1447 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1448 "LIST": lambda self: self._parse_analyze_list(), 1449 "PREDICATE": lambda self: self._parse_analyze_columns(), 1450 "UPDATE": lambda self: self._parse_analyze_histogram(), 1451 "VALIDATE": lambda self: self._parse_analyze_validate(), 1452 } 1453 1454 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1455 1456 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1457 1458 OPERATION_MODIFIERS: t.Set[str] = set() 1459 1460 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1461 1462 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1463 1464 STRICT_CAST = True 1465 1466 PREFIXED_PIVOT_COLUMNS = False 1467 IDENTIFY_PIVOT_STRINGS = False 1468 1469 LOG_DEFAULTS_TO_LN = False 1470 1471 # Whether the table sample clause expects CSV syntax 1472 TABLESAMPLE_CSV = False 1473 1474 # The default method used for table sampling 1475 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1476 1477 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1478 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1479 1480 # Whether the TRIM function expects the characters to trim as its first argument 1481 TRIM_PATTERN_FIRST = False 1482 1483 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1484 STRING_ALIASES = False 1485 1486 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1487 MODIFIERS_ATTACHED_TO_SET_OP = True 1488 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1489 1490 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1491 NO_PAREN_IF_COMMANDS = True 1492 1493 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1494 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1495 1496 # Whether the `:` operator is used to extract a value from a VARIANT column 1497 COLON_IS_VARIANT_EXTRACT = False 1498 1499 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1500 # If this is True and '(' is not found, the keyword will be treated as an identifier 1501 VALUES_FOLLOWED_BY_PAREN = True 1502 1503 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1504 SUPPORTS_IMPLICIT_UNNEST = False 1505 1506 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1507 INTERVAL_SPANS = True 1508 1509 # Whether a PARTITION clause can follow a table reference 1510 SUPPORTS_PARTITION_SELECTION = False 1511 1512 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1513 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1514 1515 # Whether the 'AS' keyword is optional in the CTE definition syntax 1516 OPTIONAL_ALIAS_TOKEN_CTE = True 1517 1518 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1519 ALTER_RENAME_REQUIRES_COLUMN = True 1520 1521 __slots__ = ( 1522 "error_level", 1523 "error_message_context", 1524 "max_errors", 1525 "dialect", 1526 "sql", 1527 "errors", 1528 "_tokens", 1529 "_index", 1530 "_curr", 1531 "_next", 1532 "_prev", 1533 "_prev_comments", 1534 "_pipe_cte_counter", 1535 ) 1536 1537 # Autofilled 1538 SHOW_TRIE: t.Dict = {} 1539 SET_TRIE: t.Dict = {} 1540 1541 def __init__( 1542 self, 1543 error_level: t.Optional[ErrorLevel] = None, 1544 error_message_context: int = 100, 1545 max_errors: int = 3, 1546 dialect: DialectType = None, 1547 ): 1548 from sqlglot.dialects import Dialect 1549 1550 self.error_level = error_level or ErrorLevel.IMMEDIATE 1551 self.error_message_context = error_message_context 1552 self.max_errors = max_errors 1553 self.dialect = Dialect.get_or_raise(dialect) 1554 self.reset() 1555 1556 def reset(self): 1557 self.sql = "" 1558 self.errors = [] 1559 self._tokens = [] 1560 self._index = 0 1561 self._curr = None 1562 self._next = None 1563 self._prev = None 1564 self._prev_comments = None 1565 self._pipe_cte_counter = 0 1566 1567 def parse( 1568 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1569 ) -> t.List[t.Optional[exp.Expression]]: 1570 """ 1571 Parses a list of tokens and returns a list of syntax trees, one tree 1572 per parsed SQL statement. 1573 1574 Args: 1575 raw_tokens: The list of tokens. 1576 sql: The original SQL string, used to produce helpful debug messages. 1577 1578 Returns: 1579 The list of the produced syntax trees. 1580 """ 1581 return self._parse( 1582 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1583 ) 1584 1585 def parse_into( 1586 self, 1587 expression_types: exp.IntoType, 1588 raw_tokens: t.List[Token], 1589 sql: t.Optional[str] = None, 1590 ) -> t.List[t.Optional[exp.Expression]]: 1591 """ 1592 Parses a list of tokens into a given Expression type. If a collection of Expression 1593 types is given instead, this method will try to parse the token list into each one 1594 of them, stopping at the first for which the parsing succeeds. 1595 1596 Args: 1597 expression_types: The expression type(s) to try and parse the token list into. 1598 raw_tokens: The list of tokens. 1599 sql: The original SQL string, used to produce helpful debug messages. 1600 1601 Returns: 1602 The target Expression. 1603 """ 1604 errors = [] 1605 for expression_type in ensure_list(expression_types): 1606 parser = self.EXPRESSION_PARSERS.get(expression_type) 1607 if not parser: 1608 raise TypeError(f"No parser registered for {expression_type}") 1609 1610 try: 1611 return self._parse(parser, raw_tokens, sql) 1612 except ParseError as e: 1613 e.errors[0]["into_expression"] = expression_type 1614 errors.append(e) 1615 1616 raise ParseError( 1617 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1618 errors=merge_errors(errors), 1619 ) from errors[-1] 1620 1621 def _parse( 1622 self, 1623 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1624 raw_tokens: t.List[Token], 1625 sql: t.Optional[str] = None, 1626 ) -> t.List[t.Optional[exp.Expression]]: 1627 self.reset() 1628 self.sql = sql or "" 1629 1630 total = len(raw_tokens) 1631 chunks: t.List[t.List[Token]] = [[]] 1632 1633 for i, token in enumerate(raw_tokens): 1634 if token.token_type == TokenType.SEMICOLON: 1635 if token.comments: 1636 chunks.append([token]) 1637 1638 if i < total - 1: 1639 chunks.append([]) 1640 else: 1641 chunks[-1].append(token) 1642 1643 expressions = [] 1644 1645 for tokens in chunks: 1646 self._index = -1 1647 self._tokens = tokens 1648 self._advance() 1649 1650 expressions.append(parse_method(self)) 1651 1652 if self._index < len(self._tokens): 1653 self.raise_error("Invalid expression / Unexpected token") 1654 1655 self.check_errors() 1656 1657 return expressions 1658 1659 def check_errors(self) -> None: 1660 """Logs or raises any found errors, depending on the chosen error level setting.""" 1661 if self.error_level == ErrorLevel.WARN: 1662 for error in self.errors: 1663 logger.error(str(error)) 1664 elif self.error_level == ErrorLevel.RAISE and self.errors: 1665 raise ParseError( 1666 concat_messages(self.errors, self.max_errors), 1667 errors=merge_errors(self.errors), 1668 ) 1669 1670 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1671 """ 1672 Appends an error in the list of recorded errors or raises it, depending on the chosen 1673 error level setting. 1674 """ 1675 token = token or self._curr or self._prev or Token.string("") 1676 start = token.start 1677 end = token.end + 1 1678 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1679 highlight = self.sql[start:end] 1680 end_context = self.sql[end : end + self.error_message_context] 1681 1682 error = ParseError.new( 1683 f"{message}. Line {token.line}, Col: {token.col}.\n" 1684 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1685 description=message, 1686 line=token.line, 1687 col=token.col, 1688 start_context=start_context, 1689 highlight=highlight, 1690 end_context=end_context, 1691 ) 1692 1693 if self.error_level == ErrorLevel.IMMEDIATE: 1694 raise error 1695 1696 self.errors.append(error) 1697 1698 def expression( 1699 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1700 ) -> E: 1701 """ 1702 Creates a new, validated Expression. 1703 1704 Args: 1705 exp_class: The expression class to instantiate. 1706 comments: An optional list of comments to attach to the expression. 1707 kwargs: The arguments to set for the expression along with their respective values. 1708 1709 Returns: 1710 The target expression. 1711 """ 1712 instance = exp_class(**kwargs) 1713 instance.add_comments(comments) if comments else self._add_comments(instance) 1714 return self.validate_expression(instance) 1715 1716 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1717 if expression and self._prev_comments: 1718 expression.add_comments(self._prev_comments) 1719 self._prev_comments = None 1720 1721 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1722 """ 1723 Validates an Expression, making sure that all its mandatory arguments are set. 1724 1725 Args: 1726 expression: The expression to validate. 1727 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1728 1729 Returns: 1730 The validated expression. 1731 """ 1732 if self.error_level != ErrorLevel.IGNORE: 1733 for error_message in expression.error_messages(args): 1734 self.raise_error(error_message) 1735 1736 return expression 1737 1738 def _find_sql(self, start: Token, end: Token) -> str: 1739 return self.sql[start.start : end.end + 1] 1740 1741 def _is_connected(self) -> bool: 1742 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1743 1744 def _advance(self, times: int = 1) -> None: 1745 self._index += times 1746 self._curr = seq_get(self._tokens, self._index) 1747 self._next = seq_get(self._tokens, self._index + 1) 1748 1749 if self._index > 0: 1750 self._prev = self._tokens[self._index - 1] 1751 self._prev_comments = self._prev.comments 1752 else: 1753 self._prev = None 1754 self._prev_comments = None 1755 1756 def _retreat(self, index: int) -> None: 1757 if index != self._index: 1758 self._advance(index - self._index) 1759 1760 def _warn_unsupported(self) -> None: 1761 if len(self._tokens) <= 1: 1762 return 1763 1764 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1765 # interested in emitting a warning for the one being currently processed. 1766 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1767 1768 logger.warning( 1769 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1770 ) 1771 1772 def _parse_command(self) -> exp.Command: 1773 self._warn_unsupported() 1774 return self.expression( 1775 exp.Command, 1776 comments=self._prev_comments, 1777 this=self._prev.text.upper(), 1778 expression=self._parse_string(), 1779 ) 1780 1781 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1782 """ 1783 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1784 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1785 solve this by setting & resetting the parser state accordingly 1786 """ 1787 index = self._index 1788 error_level = self.error_level 1789 1790 self.error_level = ErrorLevel.IMMEDIATE 1791 try: 1792 this = parse_method() 1793 except ParseError: 1794 this = None 1795 finally: 1796 if not this or retreat: 1797 self._retreat(index) 1798 self.error_level = error_level 1799 1800 return this 1801 1802 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1803 start = self._prev 1804 exists = self._parse_exists() if allow_exists else None 1805 1806 self._match(TokenType.ON) 1807 1808 materialized = self._match_text_seq("MATERIALIZED") 1809 kind = self._match_set(self.CREATABLES) and self._prev 1810 if not kind: 1811 return self._parse_as_command(start) 1812 1813 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1814 this = self._parse_user_defined_function(kind=kind.token_type) 1815 elif kind.token_type == TokenType.TABLE: 1816 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1817 elif kind.token_type == TokenType.COLUMN: 1818 this = self._parse_column() 1819 else: 1820 this = self._parse_id_var() 1821 1822 self._match(TokenType.IS) 1823 1824 return self.expression( 1825 exp.Comment, 1826 this=this, 1827 kind=kind.text, 1828 expression=self._parse_string(), 1829 exists=exists, 1830 materialized=materialized, 1831 ) 1832 1833 def _parse_to_table( 1834 self, 1835 ) -> exp.ToTableProperty: 1836 table = self._parse_table_parts(schema=True) 1837 return self.expression(exp.ToTableProperty, this=table) 1838 1839 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1840 def _parse_ttl(self) -> exp.Expression: 1841 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1842 this = self._parse_bitwise() 1843 1844 if self._match_text_seq("DELETE"): 1845 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1846 if self._match_text_seq("RECOMPRESS"): 1847 return self.expression( 1848 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1849 ) 1850 if self._match_text_seq("TO", "DISK"): 1851 return self.expression( 1852 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1853 ) 1854 if self._match_text_seq("TO", "VOLUME"): 1855 return self.expression( 1856 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1857 ) 1858 1859 return this 1860 1861 expressions = self._parse_csv(_parse_ttl_action) 1862 where = self._parse_where() 1863 group = self._parse_group() 1864 1865 aggregates = None 1866 if group and self._match(TokenType.SET): 1867 aggregates = self._parse_csv(self._parse_set_item) 1868 1869 return self.expression( 1870 exp.MergeTreeTTL, 1871 expressions=expressions, 1872 where=where, 1873 group=group, 1874 aggregates=aggregates, 1875 ) 1876 1877 def _parse_statement(self) -> t.Optional[exp.Expression]: 1878 if self._curr is None: 1879 return None 1880 1881 if self._match_set(self.STATEMENT_PARSERS): 1882 comments = self._prev_comments 1883 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1884 stmt.add_comments(comments, prepend=True) 1885 return stmt 1886 1887 if self._match_set(self.dialect.tokenizer.COMMANDS): 1888 return self._parse_command() 1889 1890 expression = self._parse_expression() 1891 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1892 return self._parse_query_modifiers(expression) 1893 1894 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1895 start = self._prev 1896 temporary = self._match(TokenType.TEMPORARY) 1897 materialized = self._match_text_seq("MATERIALIZED") 1898 1899 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1900 if not kind: 1901 return self._parse_as_command(start) 1902 1903 concurrently = self._match_text_seq("CONCURRENTLY") 1904 if_exists = exists or self._parse_exists() 1905 1906 if kind == "COLUMN": 1907 this = self._parse_column() 1908 else: 1909 this = self._parse_table_parts( 1910 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1911 ) 1912 1913 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1914 1915 if self._match(TokenType.L_PAREN, advance=False): 1916 expressions = self._parse_wrapped_csv(self._parse_types) 1917 else: 1918 expressions = None 1919 1920 return self.expression( 1921 exp.Drop, 1922 exists=if_exists, 1923 this=this, 1924 expressions=expressions, 1925 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1926 temporary=temporary, 1927 materialized=materialized, 1928 cascade=self._match_text_seq("CASCADE"), 1929 constraints=self._match_text_seq("CONSTRAINTS"), 1930 purge=self._match_text_seq("PURGE"), 1931 cluster=cluster, 1932 concurrently=concurrently, 1933 ) 1934 1935 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1936 return ( 1937 self._match_text_seq("IF") 1938 and (not not_ or self._match(TokenType.NOT)) 1939 and self._match(TokenType.EXISTS) 1940 ) 1941 1942 def _parse_create(self) -> exp.Create | exp.Command: 1943 # Note: this can't be None because we've matched a statement parser 1944 start = self._prev 1945 1946 replace = ( 1947 start.token_type == TokenType.REPLACE 1948 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1949 or self._match_pair(TokenType.OR, TokenType.ALTER) 1950 ) 1951 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1952 1953 unique = self._match(TokenType.UNIQUE) 1954 1955 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1956 clustered = True 1957 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1958 "COLUMNSTORE" 1959 ): 1960 clustered = False 1961 else: 1962 clustered = None 1963 1964 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1965 self._advance() 1966 1967 properties = None 1968 create_token = self._match_set(self.CREATABLES) and self._prev 1969 1970 if not create_token: 1971 # exp.Properties.Location.POST_CREATE 1972 properties = self._parse_properties() 1973 create_token = self._match_set(self.CREATABLES) and self._prev 1974 1975 if not properties or not create_token: 1976 return self._parse_as_command(start) 1977 1978 concurrently = self._match_text_seq("CONCURRENTLY") 1979 exists = self._parse_exists(not_=True) 1980 this = None 1981 expression: t.Optional[exp.Expression] = None 1982 indexes = None 1983 no_schema_binding = None 1984 begin = None 1985 end = None 1986 clone = None 1987 1988 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1989 nonlocal properties 1990 if properties and temp_props: 1991 properties.expressions.extend(temp_props.expressions) 1992 elif temp_props: 1993 properties = temp_props 1994 1995 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1996 this = self._parse_user_defined_function(kind=create_token.token_type) 1997 1998 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1999 extend_props(self._parse_properties()) 2000 2001 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2002 extend_props(self._parse_properties()) 2003 2004 if not expression: 2005 if self._match(TokenType.COMMAND): 2006 expression = self._parse_as_command(self._prev) 2007 else: 2008 begin = self._match(TokenType.BEGIN) 2009 return_ = self._match_text_seq("RETURN") 2010 2011 if self._match(TokenType.STRING, advance=False): 2012 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2013 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2014 expression = self._parse_string() 2015 extend_props(self._parse_properties()) 2016 else: 2017 expression = self._parse_user_defined_function_expression() 2018 2019 end = self._match_text_seq("END") 2020 2021 if return_: 2022 expression = self.expression(exp.Return, this=expression) 2023 elif create_token.token_type == TokenType.INDEX: 2024 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2025 if not self._match(TokenType.ON): 2026 index = self._parse_id_var() 2027 anonymous = False 2028 else: 2029 index = None 2030 anonymous = True 2031 2032 this = self._parse_index(index=index, anonymous=anonymous) 2033 elif create_token.token_type in self.DB_CREATABLES: 2034 table_parts = self._parse_table_parts( 2035 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2036 ) 2037 2038 # exp.Properties.Location.POST_NAME 2039 self._match(TokenType.COMMA) 2040 extend_props(self._parse_properties(before=True)) 2041 2042 this = self._parse_schema(this=table_parts) 2043 2044 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2045 extend_props(self._parse_properties()) 2046 2047 has_alias = self._match(TokenType.ALIAS) 2048 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2049 # exp.Properties.Location.POST_ALIAS 2050 extend_props(self._parse_properties()) 2051 2052 if create_token.token_type == TokenType.SEQUENCE: 2053 expression = self._parse_types() 2054 extend_props(self._parse_properties()) 2055 else: 2056 expression = self._parse_ddl_select() 2057 2058 # Some dialects also support using a table as an alias instead of a SELECT. 2059 # Here we fallback to this as an alternative. 2060 if not expression and has_alias: 2061 expression = self._try_parse(self._parse_table_parts) 2062 2063 if create_token.token_type == TokenType.TABLE: 2064 # exp.Properties.Location.POST_EXPRESSION 2065 extend_props(self._parse_properties()) 2066 2067 indexes = [] 2068 while True: 2069 index = self._parse_index() 2070 2071 # exp.Properties.Location.POST_INDEX 2072 extend_props(self._parse_properties()) 2073 if not index: 2074 break 2075 else: 2076 self._match(TokenType.COMMA) 2077 indexes.append(index) 2078 elif create_token.token_type == TokenType.VIEW: 2079 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2080 no_schema_binding = True 2081 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2082 extend_props(self._parse_properties()) 2083 2084 shallow = self._match_text_seq("SHALLOW") 2085 2086 if self._match_texts(self.CLONE_KEYWORDS): 2087 copy = self._prev.text.lower() == "copy" 2088 clone = self.expression( 2089 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2090 ) 2091 2092 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2093 return self._parse_as_command(start) 2094 2095 create_kind_text = create_token.text.upper() 2096 return self.expression( 2097 exp.Create, 2098 this=this, 2099 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2100 replace=replace, 2101 refresh=refresh, 2102 unique=unique, 2103 expression=expression, 2104 exists=exists, 2105 properties=properties, 2106 indexes=indexes, 2107 no_schema_binding=no_schema_binding, 2108 begin=begin, 2109 end=end, 2110 clone=clone, 2111 concurrently=concurrently, 2112 clustered=clustered, 2113 ) 2114 2115 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2116 seq = exp.SequenceProperties() 2117 2118 options = [] 2119 index = self._index 2120 2121 while self._curr: 2122 self._match(TokenType.COMMA) 2123 if self._match_text_seq("INCREMENT"): 2124 self._match_text_seq("BY") 2125 self._match_text_seq("=") 2126 seq.set("increment", self._parse_term()) 2127 elif self._match_text_seq("MINVALUE"): 2128 seq.set("minvalue", self._parse_term()) 2129 elif self._match_text_seq("MAXVALUE"): 2130 seq.set("maxvalue", self._parse_term()) 2131 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2132 self._match_text_seq("=") 2133 seq.set("start", self._parse_term()) 2134 elif self._match_text_seq("CACHE"): 2135 # T-SQL allows empty CACHE which is initialized dynamically 2136 seq.set("cache", self._parse_number() or True) 2137 elif self._match_text_seq("OWNED", "BY"): 2138 # "OWNED BY NONE" is the default 2139 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2140 else: 2141 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2142 if opt: 2143 options.append(opt) 2144 else: 2145 break 2146 2147 seq.set("options", options if options else None) 2148 return None if self._index == index else seq 2149 2150 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2151 # only used for teradata currently 2152 self._match(TokenType.COMMA) 2153 2154 kwargs = { 2155 "no": self._match_text_seq("NO"), 2156 "dual": self._match_text_seq("DUAL"), 2157 "before": self._match_text_seq("BEFORE"), 2158 "default": self._match_text_seq("DEFAULT"), 2159 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2160 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2161 "after": self._match_text_seq("AFTER"), 2162 "minimum": self._match_texts(("MIN", "MINIMUM")), 2163 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2164 } 2165 2166 if self._match_texts(self.PROPERTY_PARSERS): 2167 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2168 try: 2169 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2170 except TypeError: 2171 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2172 2173 return None 2174 2175 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2176 return self._parse_wrapped_csv(self._parse_property) 2177 2178 def _parse_property(self) -> t.Optional[exp.Expression]: 2179 if self._match_texts(self.PROPERTY_PARSERS): 2180 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2181 2182 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2183 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2184 2185 if self._match_text_seq("COMPOUND", "SORTKEY"): 2186 return self._parse_sortkey(compound=True) 2187 2188 if self._match_text_seq("SQL", "SECURITY"): 2189 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2190 2191 index = self._index 2192 key = self._parse_column() 2193 2194 if not self._match(TokenType.EQ): 2195 self._retreat(index) 2196 return self._parse_sequence_properties() 2197 2198 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2199 if isinstance(key, exp.Column): 2200 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2201 2202 value = self._parse_bitwise() or self._parse_var(any_token=True) 2203 2204 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2205 if isinstance(value, exp.Column): 2206 value = exp.var(value.name) 2207 2208 return self.expression(exp.Property, this=key, value=value) 2209 2210 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2211 if self._match_text_seq("BY"): 2212 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2213 2214 self._match(TokenType.ALIAS) 2215 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2216 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2217 2218 return self.expression( 2219 exp.FileFormatProperty, 2220 this=( 2221 self.expression( 2222 exp.InputOutputFormat, 2223 input_format=input_format, 2224 output_format=output_format, 2225 ) 2226 if input_format or output_format 2227 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2228 ), 2229 ) 2230 2231 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2232 field = self._parse_field() 2233 if isinstance(field, exp.Identifier) and not field.quoted: 2234 field = exp.var(field) 2235 2236 return field 2237 2238 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2239 self._match(TokenType.EQ) 2240 self._match(TokenType.ALIAS) 2241 2242 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2243 2244 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2245 properties = [] 2246 while True: 2247 if before: 2248 prop = self._parse_property_before() 2249 else: 2250 prop = self._parse_property() 2251 if not prop: 2252 break 2253 for p in ensure_list(prop): 2254 properties.append(p) 2255 2256 if properties: 2257 return self.expression(exp.Properties, expressions=properties) 2258 2259 return None 2260 2261 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2262 return self.expression( 2263 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2264 ) 2265 2266 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2267 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2268 security_specifier = self._prev.text.upper() 2269 return self.expression(exp.SecurityProperty, this=security_specifier) 2270 return None 2271 2272 def _parse_settings_property(self) -> exp.SettingsProperty: 2273 return self.expression( 2274 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2275 ) 2276 2277 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2278 if self._index >= 2: 2279 pre_volatile_token = self._tokens[self._index - 2] 2280 else: 2281 pre_volatile_token = None 2282 2283 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2284 return exp.VolatileProperty() 2285 2286 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2287 2288 def _parse_retention_period(self) -> exp.Var: 2289 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2290 number = self._parse_number() 2291 number_str = f"{number} " if number else "" 2292 unit = self._parse_var(any_token=True) 2293 return exp.var(f"{number_str}{unit}") 2294 2295 def _parse_system_versioning_property( 2296 self, with_: bool = False 2297 ) -> exp.WithSystemVersioningProperty: 2298 self._match(TokenType.EQ) 2299 prop = self.expression( 2300 exp.WithSystemVersioningProperty, 2301 **{ # type: ignore 2302 "on": True, 2303 "with": with_, 2304 }, 2305 ) 2306 2307 if self._match_text_seq("OFF"): 2308 prop.set("on", False) 2309 return prop 2310 2311 self._match(TokenType.ON) 2312 if self._match(TokenType.L_PAREN): 2313 while self._curr and not self._match(TokenType.R_PAREN): 2314 if self._match_text_seq("HISTORY_TABLE", "="): 2315 prop.set("this", self._parse_table_parts()) 2316 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2317 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2318 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2319 prop.set("retention_period", self._parse_retention_period()) 2320 2321 self._match(TokenType.COMMA) 2322 2323 return prop 2324 2325 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2326 self._match(TokenType.EQ) 2327 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2328 prop = self.expression(exp.DataDeletionProperty, on=on) 2329 2330 if self._match(TokenType.L_PAREN): 2331 while self._curr and not self._match(TokenType.R_PAREN): 2332 if self._match_text_seq("FILTER_COLUMN", "="): 2333 prop.set("filter_column", self._parse_column()) 2334 elif self._match_text_seq("RETENTION_PERIOD", "="): 2335 prop.set("retention_period", self._parse_retention_period()) 2336 2337 self._match(TokenType.COMMA) 2338 2339 return prop 2340 2341 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2342 kind = "HASH" 2343 expressions: t.Optional[t.List[exp.Expression]] = None 2344 if self._match_text_seq("BY", "HASH"): 2345 expressions = self._parse_wrapped_csv(self._parse_id_var) 2346 elif self._match_text_seq("BY", "RANDOM"): 2347 kind = "RANDOM" 2348 2349 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2350 buckets: t.Optional[exp.Expression] = None 2351 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2352 buckets = self._parse_number() 2353 2354 return self.expression( 2355 exp.DistributedByProperty, 2356 expressions=expressions, 2357 kind=kind, 2358 buckets=buckets, 2359 order=self._parse_order(), 2360 ) 2361 2362 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2363 self._match_text_seq("KEY") 2364 expressions = self._parse_wrapped_id_vars() 2365 return self.expression(expr_type, expressions=expressions) 2366 2367 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2368 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2369 prop = self._parse_system_versioning_property(with_=True) 2370 self._match_r_paren() 2371 return prop 2372 2373 if self._match(TokenType.L_PAREN, advance=False): 2374 return self._parse_wrapped_properties() 2375 2376 if self._match_text_seq("JOURNAL"): 2377 return self._parse_withjournaltable() 2378 2379 if self._match_texts(self.VIEW_ATTRIBUTES): 2380 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2381 2382 if self._match_text_seq("DATA"): 2383 return self._parse_withdata(no=False) 2384 elif self._match_text_seq("NO", "DATA"): 2385 return self._parse_withdata(no=True) 2386 2387 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2388 return self._parse_serde_properties(with_=True) 2389 2390 if self._match(TokenType.SCHEMA): 2391 return self.expression( 2392 exp.WithSchemaBindingProperty, 2393 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2394 ) 2395 2396 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2397 return self.expression( 2398 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2399 ) 2400 2401 if not self._next: 2402 return None 2403 2404 return self._parse_withisolatedloading() 2405 2406 def _parse_procedure_option(self) -> exp.Expression | None: 2407 if self._match_text_seq("EXECUTE", "AS"): 2408 return self.expression( 2409 exp.ExecuteAsProperty, 2410 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2411 or self._parse_string(), 2412 ) 2413 2414 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2415 2416 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2417 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2418 self._match(TokenType.EQ) 2419 2420 user = self._parse_id_var() 2421 self._match(TokenType.PARAMETER) 2422 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2423 2424 if not user or not host: 2425 return None 2426 2427 return exp.DefinerProperty(this=f"{user}@{host}") 2428 2429 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2430 self._match(TokenType.TABLE) 2431 self._match(TokenType.EQ) 2432 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2433 2434 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2435 return self.expression(exp.LogProperty, no=no) 2436 2437 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2438 return self.expression(exp.JournalProperty, **kwargs) 2439 2440 def _parse_checksum(self) -> exp.ChecksumProperty: 2441 self._match(TokenType.EQ) 2442 2443 on = None 2444 if self._match(TokenType.ON): 2445 on = True 2446 elif self._match_text_seq("OFF"): 2447 on = False 2448 2449 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2450 2451 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2452 return self.expression( 2453 exp.Cluster, 2454 expressions=( 2455 self._parse_wrapped_csv(self._parse_ordered) 2456 if wrapped 2457 else self._parse_csv(self._parse_ordered) 2458 ), 2459 ) 2460 2461 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2462 self._match_text_seq("BY") 2463 2464 self._match_l_paren() 2465 expressions = self._parse_csv(self._parse_column) 2466 self._match_r_paren() 2467 2468 if self._match_text_seq("SORTED", "BY"): 2469 self._match_l_paren() 2470 sorted_by = self._parse_csv(self._parse_ordered) 2471 self._match_r_paren() 2472 else: 2473 sorted_by = None 2474 2475 self._match(TokenType.INTO) 2476 buckets = self._parse_number() 2477 self._match_text_seq("BUCKETS") 2478 2479 return self.expression( 2480 exp.ClusteredByProperty, 2481 expressions=expressions, 2482 sorted_by=sorted_by, 2483 buckets=buckets, 2484 ) 2485 2486 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2487 if not self._match_text_seq("GRANTS"): 2488 self._retreat(self._index - 1) 2489 return None 2490 2491 return self.expression(exp.CopyGrantsProperty) 2492 2493 def _parse_freespace(self) -> exp.FreespaceProperty: 2494 self._match(TokenType.EQ) 2495 return self.expression( 2496 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2497 ) 2498 2499 def _parse_mergeblockratio( 2500 self, no: bool = False, default: bool = False 2501 ) -> exp.MergeBlockRatioProperty: 2502 if self._match(TokenType.EQ): 2503 return self.expression( 2504 exp.MergeBlockRatioProperty, 2505 this=self._parse_number(), 2506 percent=self._match(TokenType.PERCENT), 2507 ) 2508 2509 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2510 2511 def _parse_datablocksize( 2512 self, 2513 default: t.Optional[bool] = None, 2514 minimum: t.Optional[bool] = None, 2515 maximum: t.Optional[bool] = None, 2516 ) -> exp.DataBlocksizeProperty: 2517 self._match(TokenType.EQ) 2518 size = self._parse_number() 2519 2520 units = None 2521 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2522 units = self._prev.text 2523 2524 return self.expression( 2525 exp.DataBlocksizeProperty, 2526 size=size, 2527 units=units, 2528 default=default, 2529 minimum=minimum, 2530 maximum=maximum, 2531 ) 2532 2533 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2534 self._match(TokenType.EQ) 2535 always = self._match_text_seq("ALWAYS") 2536 manual = self._match_text_seq("MANUAL") 2537 never = self._match_text_seq("NEVER") 2538 default = self._match_text_seq("DEFAULT") 2539 2540 autotemp = None 2541 if self._match_text_seq("AUTOTEMP"): 2542 autotemp = self._parse_schema() 2543 2544 return self.expression( 2545 exp.BlockCompressionProperty, 2546 always=always, 2547 manual=manual, 2548 never=never, 2549 default=default, 2550 autotemp=autotemp, 2551 ) 2552 2553 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2554 index = self._index 2555 no = self._match_text_seq("NO") 2556 concurrent = self._match_text_seq("CONCURRENT") 2557 2558 if not self._match_text_seq("ISOLATED", "LOADING"): 2559 self._retreat(index) 2560 return None 2561 2562 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2563 return self.expression( 2564 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2565 ) 2566 2567 def _parse_locking(self) -> exp.LockingProperty: 2568 if self._match(TokenType.TABLE): 2569 kind = "TABLE" 2570 elif self._match(TokenType.VIEW): 2571 kind = "VIEW" 2572 elif self._match(TokenType.ROW): 2573 kind = "ROW" 2574 elif self._match_text_seq("DATABASE"): 2575 kind = "DATABASE" 2576 else: 2577 kind = None 2578 2579 if kind in ("DATABASE", "TABLE", "VIEW"): 2580 this = self._parse_table_parts() 2581 else: 2582 this = None 2583 2584 if self._match(TokenType.FOR): 2585 for_or_in = "FOR" 2586 elif self._match(TokenType.IN): 2587 for_or_in = "IN" 2588 else: 2589 for_or_in = None 2590 2591 if self._match_text_seq("ACCESS"): 2592 lock_type = "ACCESS" 2593 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2594 lock_type = "EXCLUSIVE" 2595 elif self._match_text_seq("SHARE"): 2596 lock_type = "SHARE" 2597 elif self._match_text_seq("READ"): 2598 lock_type = "READ" 2599 elif self._match_text_seq("WRITE"): 2600 lock_type = "WRITE" 2601 elif self._match_text_seq("CHECKSUM"): 2602 lock_type = "CHECKSUM" 2603 else: 2604 lock_type = None 2605 2606 override = self._match_text_seq("OVERRIDE") 2607 2608 return self.expression( 2609 exp.LockingProperty, 2610 this=this, 2611 kind=kind, 2612 for_or_in=for_or_in, 2613 lock_type=lock_type, 2614 override=override, 2615 ) 2616 2617 def _parse_partition_by(self) -> t.List[exp.Expression]: 2618 if self._match(TokenType.PARTITION_BY): 2619 return self._parse_csv(self._parse_assignment) 2620 return [] 2621 2622 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2623 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2624 if self._match_text_seq("MINVALUE"): 2625 return exp.var("MINVALUE") 2626 if self._match_text_seq("MAXVALUE"): 2627 return exp.var("MAXVALUE") 2628 return self._parse_bitwise() 2629 2630 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2631 expression = None 2632 from_expressions = None 2633 to_expressions = None 2634 2635 if self._match(TokenType.IN): 2636 this = self._parse_wrapped_csv(self._parse_bitwise) 2637 elif self._match(TokenType.FROM): 2638 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2639 self._match_text_seq("TO") 2640 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2641 elif self._match_text_seq("WITH", "(", "MODULUS"): 2642 this = self._parse_number() 2643 self._match_text_seq(",", "REMAINDER") 2644 expression = self._parse_number() 2645 self._match_r_paren() 2646 else: 2647 self.raise_error("Failed to parse partition bound spec.") 2648 2649 return self.expression( 2650 exp.PartitionBoundSpec, 2651 this=this, 2652 expression=expression, 2653 from_expressions=from_expressions, 2654 to_expressions=to_expressions, 2655 ) 2656 2657 # https://www.postgresql.org/docs/current/sql-createtable.html 2658 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2659 if not self._match_text_seq("OF"): 2660 self._retreat(self._index - 1) 2661 return None 2662 2663 this = self._parse_table(schema=True) 2664 2665 if self._match(TokenType.DEFAULT): 2666 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2667 elif self._match_text_seq("FOR", "VALUES"): 2668 expression = self._parse_partition_bound_spec() 2669 else: 2670 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2671 2672 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2673 2674 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2675 self._match(TokenType.EQ) 2676 return self.expression( 2677 exp.PartitionedByProperty, 2678 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2679 ) 2680 2681 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2682 if self._match_text_seq("AND", "STATISTICS"): 2683 statistics = True 2684 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2685 statistics = False 2686 else: 2687 statistics = None 2688 2689 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2690 2691 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2692 if self._match_text_seq("SQL"): 2693 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2694 return None 2695 2696 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2697 if self._match_text_seq("SQL", "DATA"): 2698 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2699 return None 2700 2701 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2702 if self._match_text_seq("PRIMARY", "INDEX"): 2703 return exp.NoPrimaryIndexProperty() 2704 if self._match_text_seq("SQL"): 2705 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2706 return None 2707 2708 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2709 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2710 return exp.OnCommitProperty() 2711 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2712 return exp.OnCommitProperty(delete=True) 2713 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2714 2715 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2716 if self._match_text_seq("SQL", "DATA"): 2717 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2718 return None 2719 2720 def _parse_distkey(self) -> exp.DistKeyProperty: 2721 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2722 2723 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2724 table = self._parse_table(schema=True) 2725 2726 options = [] 2727 while self._match_texts(("INCLUDING", "EXCLUDING")): 2728 this = self._prev.text.upper() 2729 2730 id_var = self._parse_id_var() 2731 if not id_var: 2732 return None 2733 2734 options.append( 2735 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2736 ) 2737 2738 return self.expression(exp.LikeProperty, this=table, expressions=options) 2739 2740 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2741 return self.expression( 2742 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2743 ) 2744 2745 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2746 self._match(TokenType.EQ) 2747 return self.expression( 2748 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2749 ) 2750 2751 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2752 self._match_text_seq("WITH", "CONNECTION") 2753 return self.expression( 2754 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2755 ) 2756 2757 def _parse_returns(self) -> exp.ReturnsProperty: 2758 value: t.Optional[exp.Expression] 2759 null = None 2760 is_table = self._match(TokenType.TABLE) 2761 2762 if is_table: 2763 if self._match(TokenType.LT): 2764 value = self.expression( 2765 exp.Schema, 2766 this="TABLE", 2767 expressions=self._parse_csv(self._parse_struct_types), 2768 ) 2769 if not self._match(TokenType.GT): 2770 self.raise_error("Expecting >") 2771 else: 2772 value = self._parse_schema(exp.var("TABLE")) 2773 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2774 null = True 2775 value = None 2776 else: 2777 value = self._parse_types() 2778 2779 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2780 2781 def _parse_describe(self) -> exp.Describe: 2782 kind = self._match_set(self.CREATABLES) and self._prev.text 2783 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2784 if self._match(TokenType.DOT): 2785 style = None 2786 self._retreat(self._index - 2) 2787 2788 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2789 2790 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2791 this = self._parse_statement() 2792 else: 2793 this = self._parse_table(schema=True) 2794 2795 properties = self._parse_properties() 2796 expressions = properties.expressions if properties else None 2797 partition = self._parse_partition() 2798 return self.expression( 2799 exp.Describe, 2800 this=this, 2801 style=style, 2802 kind=kind, 2803 expressions=expressions, 2804 partition=partition, 2805 format=format, 2806 ) 2807 2808 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2809 kind = self._prev.text.upper() 2810 expressions = [] 2811 2812 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2813 if self._match(TokenType.WHEN): 2814 expression = self._parse_disjunction() 2815 self._match(TokenType.THEN) 2816 else: 2817 expression = None 2818 2819 else_ = self._match(TokenType.ELSE) 2820 2821 if not self._match(TokenType.INTO): 2822 return None 2823 2824 return self.expression( 2825 exp.ConditionalInsert, 2826 this=self.expression( 2827 exp.Insert, 2828 this=self._parse_table(schema=True), 2829 expression=self._parse_derived_table_values(), 2830 ), 2831 expression=expression, 2832 else_=else_, 2833 ) 2834 2835 expression = parse_conditional_insert() 2836 while expression is not None: 2837 expressions.append(expression) 2838 expression = parse_conditional_insert() 2839 2840 return self.expression( 2841 exp.MultitableInserts, 2842 kind=kind, 2843 comments=comments, 2844 expressions=expressions, 2845 source=self._parse_table(), 2846 ) 2847 2848 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2849 comments = [] 2850 hint = self._parse_hint() 2851 overwrite = self._match(TokenType.OVERWRITE) 2852 ignore = self._match(TokenType.IGNORE) 2853 local = self._match_text_seq("LOCAL") 2854 alternative = None 2855 is_function = None 2856 2857 if self._match_text_seq("DIRECTORY"): 2858 this: t.Optional[exp.Expression] = self.expression( 2859 exp.Directory, 2860 this=self._parse_var_or_string(), 2861 local=local, 2862 row_format=self._parse_row_format(match_row=True), 2863 ) 2864 else: 2865 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2866 comments += ensure_list(self._prev_comments) 2867 return self._parse_multitable_inserts(comments) 2868 2869 if self._match(TokenType.OR): 2870 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2871 2872 self._match(TokenType.INTO) 2873 comments += ensure_list(self._prev_comments) 2874 self._match(TokenType.TABLE) 2875 is_function = self._match(TokenType.FUNCTION) 2876 2877 this = ( 2878 self._parse_table(schema=True, parse_partition=True) 2879 if not is_function 2880 else self._parse_function() 2881 ) 2882 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2883 this.set("alias", self._parse_table_alias()) 2884 2885 returning = self._parse_returning() 2886 2887 return self.expression( 2888 exp.Insert, 2889 comments=comments, 2890 hint=hint, 2891 is_function=is_function, 2892 this=this, 2893 stored=self._match_text_seq("STORED") and self._parse_stored(), 2894 by_name=self._match_text_seq("BY", "NAME"), 2895 exists=self._parse_exists(), 2896 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2897 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2898 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2899 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2900 conflict=self._parse_on_conflict(), 2901 returning=returning or self._parse_returning(), 2902 overwrite=overwrite, 2903 alternative=alternative, 2904 ignore=ignore, 2905 source=self._match(TokenType.TABLE) and self._parse_table(), 2906 ) 2907 2908 def _parse_kill(self) -> exp.Kill: 2909 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2910 2911 return self.expression( 2912 exp.Kill, 2913 this=self._parse_primary(), 2914 kind=kind, 2915 ) 2916 2917 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2918 conflict = self._match_text_seq("ON", "CONFLICT") 2919 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2920 2921 if not conflict and not duplicate: 2922 return None 2923 2924 conflict_keys = None 2925 constraint = None 2926 2927 if conflict: 2928 if self._match_text_seq("ON", "CONSTRAINT"): 2929 constraint = self._parse_id_var() 2930 elif self._match(TokenType.L_PAREN): 2931 conflict_keys = self._parse_csv(self._parse_id_var) 2932 self._match_r_paren() 2933 2934 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2935 if self._prev.token_type == TokenType.UPDATE: 2936 self._match(TokenType.SET) 2937 expressions = self._parse_csv(self._parse_equality) 2938 else: 2939 expressions = None 2940 2941 return self.expression( 2942 exp.OnConflict, 2943 duplicate=duplicate, 2944 expressions=expressions, 2945 action=action, 2946 conflict_keys=conflict_keys, 2947 constraint=constraint, 2948 where=self._parse_where(), 2949 ) 2950 2951 def _parse_returning(self) -> t.Optional[exp.Returning]: 2952 if not self._match(TokenType.RETURNING): 2953 return None 2954 return self.expression( 2955 exp.Returning, 2956 expressions=self._parse_csv(self._parse_expression), 2957 into=self._match(TokenType.INTO) and self._parse_table_part(), 2958 ) 2959 2960 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2961 if not self._match(TokenType.FORMAT): 2962 return None 2963 return self._parse_row_format() 2964 2965 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2966 index = self._index 2967 with_ = with_ or self._match_text_seq("WITH") 2968 2969 if not self._match(TokenType.SERDE_PROPERTIES): 2970 self._retreat(index) 2971 return None 2972 return self.expression( 2973 exp.SerdeProperties, 2974 **{ # type: ignore 2975 "expressions": self._parse_wrapped_properties(), 2976 "with": with_, 2977 }, 2978 ) 2979 2980 def _parse_row_format( 2981 self, match_row: bool = False 2982 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2983 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2984 return None 2985 2986 if self._match_text_seq("SERDE"): 2987 this = self._parse_string() 2988 2989 serde_properties = self._parse_serde_properties() 2990 2991 return self.expression( 2992 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2993 ) 2994 2995 self._match_text_seq("DELIMITED") 2996 2997 kwargs = {} 2998 2999 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3000 kwargs["fields"] = self._parse_string() 3001 if self._match_text_seq("ESCAPED", "BY"): 3002 kwargs["escaped"] = self._parse_string() 3003 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3004 kwargs["collection_items"] = self._parse_string() 3005 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3006 kwargs["map_keys"] = self._parse_string() 3007 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3008 kwargs["lines"] = self._parse_string() 3009 if self._match_text_seq("NULL", "DEFINED", "AS"): 3010 kwargs["null"] = self._parse_string() 3011 3012 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3013 3014 def _parse_load(self) -> exp.LoadData | exp.Command: 3015 if self._match_text_seq("DATA"): 3016 local = self._match_text_seq("LOCAL") 3017 self._match_text_seq("INPATH") 3018 inpath = self._parse_string() 3019 overwrite = self._match(TokenType.OVERWRITE) 3020 self._match_pair(TokenType.INTO, TokenType.TABLE) 3021 3022 return self.expression( 3023 exp.LoadData, 3024 this=self._parse_table(schema=True), 3025 local=local, 3026 overwrite=overwrite, 3027 inpath=inpath, 3028 partition=self._parse_partition(), 3029 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3030 serde=self._match_text_seq("SERDE") and self._parse_string(), 3031 ) 3032 return self._parse_as_command(self._prev) 3033 3034 def _parse_delete(self) -> exp.Delete: 3035 # This handles MySQL's "Multiple-Table Syntax" 3036 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3037 tables = None 3038 if not self._match(TokenType.FROM, advance=False): 3039 tables = self._parse_csv(self._parse_table) or None 3040 3041 returning = self._parse_returning() 3042 3043 return self.expression( 3044 exp.Delete, 3045 tables=tables, 3046 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3047 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3048 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3049 where=self._parse_where(), 3050 returning=returning or self._parse_returning(), 3051 limit=self._parse_limit(), 3052 ) 3053 3054 def _parse_update(self) -> exp.Update: 3055 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3056 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3057 returning = self._parse_returning() 3058 return self.expression( 3059 exp.Update, 3060 **{ # type: ignore 3061 "this": this, 3062 "expressions": expressions, 3063 "from": self._parse_from(joins=True), 3064 "where": self._parse_where(), 3065 "returning": returning or self._parse_returning(), 3066 "order": self._parse_order(), 3067 "limit": self._parse_limit(), 3068 }, 3069 ) 3070 3071 def _parse_use(self) -> exp.Use: 3072 return self.expression( 3073 exp.Use, 3074 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3075 this=self._parse_table(schema=False), 3076 ) 3077 3078 def _parse_uncache(self) -> exp.Uncache: 3079 if not self._match(TokenType.TABLE): 3080 self.raise_error("Expecting TABLE after UNCACHE") 3081 3082 return self.expression( 3083 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3084 ) 3085 3086 def _parse_cache(self) -> exp.Cache: 3087 lazy = self._match_text_seq("LAZY") 3088 self._match(TokenType.TABLE) 3089 table = self._parse_table(schema=True) 3090 3091 options = [] 3092 if self._match_text_seq("OPTIONS"): 3093 self._match_l_paren() 3094 k = self._parse_string() 3095 self._match(TokenType.EQ) 3096 v = self._parse_string() 3097 options = [k, v] 3098 self._match_r_paren() 3099 3100 self._match(TokenType.ALIAS) 3101 return self.expression( 3102 exp.Cache, 3103 this=table, 3104 lazy=lazy, 3105 options=options, 3106 expression=self._parse_select(nested=True), 3107 ) 3108 3109 def _parse_partition(self) -> t.Optional[exp.Partition]: 3110 if not self._match_texts(self.PARTITION_KEYWORDS): 3111 return None 3112 3113 return self.expression( 3114 exp.Partition, 3115 subpartition=self._prev.text.upper() == "SUBPARTITION", 3116 expressions=self._parse_wrapped_csv(self._parse_assignment), 3117 ) 3118 3119 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3120 def _parse_value_expression() -> t.Optional[exp.Expression]: 3121 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3122 return exp.var(self._prev.text.upper()) 3123 return self._parse_expression() 3124 3125 if self._match(TokenType.L_PAREN): 3126 expressions = self._parse_csv(_parse_value_expression) 3127 self._match_r_paren() 3128 return self.expression(exp.Tuple, expressions=expressions) 3129 3130 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3131 expression = self._parse_expression() 3132 if expression: 3133 return self.expression(exp.Tuple, expressions=[expression]) 3134 return None 3135 3136 def _parse_projections(self) -> t.List[exp.Expression]: 3137 return self._parse_expressions() 3138 3139 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3140 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3141 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3142 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3143 ) 3144 elif self._match(TokenType.FROM): 3145 from_ = self._parse_from(skip_from_token=True) 3146 # Support parentheses for duckdb FROM-first syntax 3147 select = self._parse_select() 3148 if select: 3149 select.set("from", from_) 3150 this = select 3151 else: 3152 this = exp.select("*").from_(t.cast(exp.From, from_)) 3153 else: 3154 this = ( 3155 self._parse_table() 3156 if table 3157 else self._parse_select(nested=True, parse_set_operation=False) 3158 ) 3159 3160 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3161 # in case a modifier (e.g. join) is following 3162 if table and isinstance(this, exp.Values) and this.alias: 3163 alias = this.args["alias"].pop() 3164 this = exp.Table(this=this, alias=alias) 3165 3166 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3167 3168 return this 3169 3170 def _parse_select( 3171 self, 3172 nested: bool = False, 3173 table: bool = False, 3174 parse_subquery_alias: bool = True, 3175 parse_set_operation: bool = True, 3176 ) -> t.Optional[exp.Expression]: 3177 cte = self._parse_with() 3178 3179 if cte: 3180 this = self._parse_statement() 3181 3182 if not this: 3183 self.raise_error("Failed to parse any statement following CTE") 3184 return cte 3185 3186 if "with" in this.arg_types: 3187 this.set("with", cte) 3188 else: 3189 self.raise_error(f"{this.key} does not support CTE") 3190 this = cte 3191 3192 return this 3193 3194 # duckdb supports leading with FROM x 3195 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3196 3197 if self._match(TokenType.SELECT): 3198 comments = self._prev_comments 3199 3200 hint = self._parse_hint() 3201 3202 if self._next and not self._next.token_type == TokenType.DOT: 3203 all_ = self._match(TokenType.ALL) 3204 distinct = self._match_set(self.DISTINCT_TOKENS) 3205 else: 3206 all_, distinct = None, None 3207 3208 kind = ( 3209 self._match(TokenType.ALIAS) 3210 and self._match_texts(("STRUCT", "VALUE")) 3211 and self._prev.text.upper() 3212 ) 3213 3214 if distinct: 3215 distinct = self.expression( 3216 exp.Distinct, 3217 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3218 ) 3219 3220 if all_ and distinct: 3221 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3222 3223 operation_modifiers = [] 3224 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3225 operation_modifiers.append(exp.var(self._prev.text.upper())) 3226 3227 limit = self._parse_limit(top=True) 3228 projections = self._parse_projections() 3229 3230 this = self.expression( 3231 exp.Select, 3232 kind=kind, 3233 hint=hint, 3234 distinct=distinct, 3235 expressions=projections, 3236 limit=limit, 3237 operation_modifiers=operation_modifiers or None, 3238 ) 3239 this.comments = comments 3240 3241 into = self._parse_into() 3242 if into: 3243 this.set("into", into) 3244 3245 if not from_: 3246 from_ = self._parse_from() 3247 3248 if from_: 3249 this.set("from", from_) 3250 3251 this = self._parse_query_modifiers(this) 3252 elif (table or nested) and self._match(TokenType.L_PAREN): 3253 this = self._parse_wrapped_select(table=table) 3254 3255 # We return early here so that the UNION isn't attached to the subquery by the 3256 # following call to _parse_set_operations, but instead becomes the parent node 3257 self._match_r_paren() 3258 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3259 elif self._match(TokenType.VALUES, advance=False): 3260 this = self._parse_derived_table_values() 3261 elif from_: 3262 this = exp.select("*").from_(from_.this, copy=False) 3263 if self._match(TokenType.PIPE_GT, advance=False): 3264 return self._parse_pipe_syntax_query(this) 3265 elif self._match(TokenType.SUMMARIZE): 3266 table = self._match(TokenType.TABLE) 3267 this = self._parse_select() or self._parse_string() or self._parse_table() 3268 return self.expression(exp.Summarize, this=this, table=table) 3269 elif self._match(TokenType.DESCRIBE): 3270 this = self._parse_describe() 3271 elif self._match_text_seq("STREAM"): 3272 this = self._parse_function() 3273 if this: 3274 this = self.expression(exp.Stream, this=this) 3275 else: 3276 self._retreat(self._index - 1) 3277 else: 3278 this = None 3279 3280 return self._parse_set_operations(this) if parse_set_operation else this 3281 3282 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3283 self._match_text_seq("SEARCH") 3284 3285 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3286 3287 if not kind: 3288 return None 3289 3290 self._match_text_seq("FIRST", "BY") 3291 3292 return self.expression( 3293 exp.RecursiveWithSearch, 3294 kind=kind, 3295 this=self._parse_id_var(), 3296 expression=self._match_text_seq("SET") and self._parse_id_var(), 3297 using=self._match_text_seq("USING") and self._parse_id_var(), 3298 ) 3299 3300 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3301 if not skip_with_token and not self._match(TokenType.WITH): 3302 return None 3303 3304 comments = self._prev_comments 3305 recursive = self._match(TokenType.RECURSIVE) 3306 3307 last_comments = None 3308 expressions = [] 3309 while True: 3310 cte = self._parse_cte() 3311 if isinstance(cte, exp.CTE): 3312 expressions.append(cte) 3313 if last_comments: 3314 cte.add_comments(last_comments) 3315 3316 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3317 break 3318 else: 3319 self._match(TokenType.WITH) 3320 3321 last_comments = self._prev_comments 3322 3323 return self.expression( 3324 exp.With, 3325 comments=comments, 3326 expressions=expressions, 3327 recursive=recursive, 3328 search=self._parse_recursive_with_search(), 3329 ) 3330 3331 def _parse_cte(self) -> t.Optional[exp.CTE]: 3332 index = self._index 3333 3334 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3335 if not alias or not alias.this: 3336 self.raise_error("Expected CTE to have alias") 3337 3338 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3339 self._retreat(index) 3340 return None 3341 3342 comments = self._prev_comments 3343 3344 if self._match_text_seq("NOT", "MATERIALIZED"): 3345 materialized = False 3346 elif self._match_text_seq("MATERIALIZED"): 3347 materialized = True 3348 else: 3349 materialized = None 3350 3351 cte = self.expression( 3352 exp.CTE, 3353 this=self._parse_wrapped(self._parse_statement), 3354 alias=alias, 3355 materialized=materialized, 3356 comments=comments, 3357 ) 3358 3359 if isinstance(cte.this, exp.Values): 3360 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3361 3362 return cte 3363 3364 def _parse_table_alias( 3365 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3366 ) -> t.Optional[exp.TableAlias]: 3367 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3368 # so this section tries to parse the clause version and if it fails, it treats the token 3369 # as an identifier (alias) 3370 if self._can_parse_limit_or_offset(): 3371 return None 3372 3373 any_token = self._match(TokenType.ALIAS) 3374 alias = ( 3375 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3376 or self._parse_string_as_identifier() 3377 ) 3378 3379 index = self._index 3380 if self._match(TokenType.L_PAREN): 3381 columns = self._parse_csv(self._parse_function_parameter) 3382 self._match_r_paren() if columns else self._retreat(index) 3383 else: 3384 columns = None 3385 3386 if not alias and not columns: 3387 return None 3388 3389 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3390 3391 # We bubble up comments from the Identifier to the TableAlias 3392 if isinstance(alias, exp.Identifier): 3393 table_alias.add_comments(alias.pop_comments()) 3394 3395 return table_alias 3396 3397 def _parse_subquery( 3398 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3399 ) -> t.Optional[exp.Subquery]: 3400 if not this: 3401 return None 3402 3403 return self.expression( 3404 exp.Subquery, 3405 this=this, 3406 pivots=self._parse_pivots(), 3407 alias=self._parse_table_alias() if parse_alias else None, 3408 sample=self._parse_table_sample(), 3409 ) 3410 3411 def _implicit_unnests_to_explicit(self, this: E) -> E: 3412 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3413 3414 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3415 for i, join in enumerate(this.args.get("joins") or []): 3416 table = join.this 3417 normalized_table = table.copy() 3418 normalized_table.meta["maybe_column"] = True 3419 normalized_table = _norm(normalized_table, dialect=self.dialect) 3420 3421 if isinstance(table, exp.Table) and not join.args.get("on"): 3422 if normalized_table.parts[0].name in refs: 3423 table_as_column = table.to_column() 3424 unnest = exp.Unnest(expressions=[table_as_column]) 3425 3426 # Table.to_column creates a parent Alias node that we want to convert to 3427 # a TableAlias and attach to the Unnest, so it matches the parser's output 3428 if isinstance(table.args.get("alias"), exp.TableAlias): 3429 table_as_column.replace(table_as_column.this) 3430 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3431 3432 table.replace(unnest) 3433 3434 refs.add(normalized_table.alias_or_name) 3435 3436 return this 3437 3438 def _parse_query_modifiers( 3439 self, this: t.Optional[exp.Expression] 3440 ) -> t.Optional[exp.Expression]: 3441 if isinstance(this, self.MODIFIABLES): 3442 for join in self._parse_joins(): 3443 this.append("joins", join) 3444 for lateral in iter(self._parse_lateral, None): 3445 this.append("laterals", lateral) 3446 3447 while True: 3448 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3449 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3450 key, expression = parser(self) 3451 3452 if expression: 3453 this.set(key, expression) 3454 if key == "limit": 3455 offset = expression.args.pop("offset", None) 3456 3457 if offset: 3458 offset = exp.Offset(expression=offset) 3459 this.set("offset", offset) 3460 3461 limit_by_expressions = expression.expressions 3462 expression.set("expressions", None) 3463 offset.set("expressions", limit_by_expressions) 3464 continue 3465 break 3466 3467 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3468 this = self._implicit_unnests_to_explicit(this) 3469 3470 return this 3471 3472 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3473 start = self._curr 3474 while self._curr: 3475 self._advance() 3476 3477 end = self._tokens[self._index - 1] 3478 return exp.Hint(expressions=[self._find_sql(start, end)]) 3479 3480 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3481 return self._parse_function_call() 3482 3483 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3484 start_index = self._index 3485 should_fallback_to_string = False 3486 3487 hints = [] 3488 try: 3489 for hint in iter( 3490 lambda: self._parse_csv( 3491 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3492 ), 3493 [], 3494 ): 3495 hints.extend(hint) 3496 except ParseError: 3497 should_fallback_to_string = True 3498 3499 if should_fallback_to_string or self._curr: 3500 self._retreat(start_index) 3501 return self._parse_hint_fallback_to_string() 3502 3503 return self.expression(exp.Hint, expressions=hints) 3504 3505 def _parse_hint(self) -> t.Optional[exp.Hint]: 3506 if self._match(TokenType.HINT) and self._prev_comments: 3507 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3508 3509 return None 3510 3511 def _parse_into(self) -> t.Optional[exp.Into]: 3512 if not self._match(TokenType.INTO): 3513 return None 3514 3515 temp = self._match(TokenType.TEMPORARY) 3516 unlogged = self._match_text_seq("UNLOGGED") 3517 self._match(TokenType.TABLE) 3518 3519 return self.expression( 3520 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3521 ) 3522 3523 def _parse_from( 3524 self, joins: bool = False, skip_from_token: bool = False 3525 ) -> t.Optional[exp.From]: 3526 if not skip_from_token and not self._match(TokenType.FROM): 3527 return None 3528 3529 return self.expression( 3530 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3531 ) 3532 3533 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3534 return self.expression( 3535 exp.MatchRecognizeMeasure, 3536 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3537 this=self._parse_expression(), 3538 ) 3539 3540 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3541 if not self._match(TokenType.MATCH_RECOGNIZE): 3542 return None 3543 3544 self._match_l_paren() 3545 3546 partition = self._parse_partition_by() 3547 order = self._parse_order() 3548 3549 measures = ( 3550 self._parse_csv(self._parse_match_recognize_measure) 3551 if self._match_text_seq("MEASURES") 3552 else None 3553 ) 3554 3555 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3556 rows = exp.var("ONE ROW PER MATCH") 3557 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3558 text = "ALL ROWS PER MATCH" 3559 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3560 text += " SHOW EMPTY MATCHES" 3561 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3562 text += " OMIT EMPTY MATCHES" 3563 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3564 text += " WITH UNMATCHED ROWS" 3565 rows = exp.var(text) 3566 else: 3567 rows = None 3568 3569 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3570 text = "AFTER MATCH SKIP" 3571 if self._match_text_seq("PAST", "LAST", "ROW"): 3572 text += " PAST LAST ROW" 3573 elif self._match_text_seq("TO", "NEXT", "ROW"): 3574 text += " TO NEXT ROW" 3575 elif self._match_text_seq("TO", "FIRST"): 3576 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3577 elif self._match_text_seq("TO", "LAST"): 3578 text += f" TO LAST {self._advance_any().text}" # type: ignore 3579 after = exp.var(text) 3580 else: 3581 after = None 3582 3583 if self._match_text_seq("PATTERN"): 3584 self._match_l_paren() 3585 3586 if not self._curr: 3587 self.raise_error("Expecting )", self._curr) 3588 3589 paren = 1 3590 start = self._curr 3591 3592 while self._curr and paren > 0: 3593 if self._curr.token_type == TokenType.L_PAREN: 3594 paren += 1 3595 if self._curr.token_type == TokenType.R_PAREN: 3596 paren -= 1 3597 3598 end = self._prev 3599 self._advance() 3600 3601 if paren > 0: 3602 self.raise_error("Expecting )", self._curr) 3603 3604 pattern = exp.var(self._find_sql(start, end)) 3605 else: 3606 pattern = None 3607 3608 define = ( 3609 self._parse_csv(self._parse_name_as_expression) 3610 if self._match_text_seq("DEFINE") 3611 else None 3612 ) 3613 3614 self._match_r_paren() 3615 3616 return self.expression( 3617 exp.MatchRecognize, 3618 partition_by=partition, 3619 order=order, 3620 measures=measures, 3621 rows=rows, 3622 after=after, 3623 pattern=pattern, 3624 define=define, 3625 alias=self._parse_table_alias(), 3626 ) 3627 3628 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3629 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3630 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3631 cross_apply = False 3632 3633 if cross_apply is not None: 3634 this = self._parse_select(table=True) 3635 view = None 3636 outer = None 3637 elif self._match(TokenType.LATERAL): 3638 this = self._parse_select(table=True) 3639 view = self._match(TokenType.VIEW) 3640 outer = self._match(TokenType.OUTER) 3641 else: 3642 return None 3643 3644 if not this: 3645 this = ( 3646 self._parse_unnest() 3647 or self._parse_function() 3648 or self._parse_id_var(any_token=False) 3649 ) 3650 3651 while self._match(TokenType.DOT): 3652 this = exp.Dot( 3653 this=this, 3654 expression=self._parse_function() or self._parse_id_var(any_token=False), 3655 ) 3656 3657 ordinality: t.Optional[bool] = None 3658 3659 if view: 3660 table = self._parse_id_var(any_token=False) 3661 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3662 table_alias: t.Optional[exp.TableAlias] = self.expression( 3663 exp.TableAlias, this=table, columns=columns 3664 ) 3665 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3666 # We move the alias from the lateral's child node to the lateral itself 3667 table_alias = this.args["alias"].pop() 3668 else: 3669 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3670 table_alias = self._parse_table_alias() 3671 3672 return self.expression( 3673 exp.Lateral, 3674 this=this, 3675 view=view, 3676 outer=outer, 3677 alias=table_alias, 3678 cross_apply=cross_apply, 3679 ordinality=ordinality, 3680 ) 3681 3682 def _parse_join_parts( 3683 self, 3684 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3685 return ( 3686 self._match_set(self.JOIN_METHODS) and self._prev, 3687 self._match_set(self.JOIN_SIDES) and self._prev, 3688 self._match_set(self.JOIN_KINDS) and self._prev, 3689 ) 3690 3691 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3692 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3693 this = self._parse_column() 3694 if isinstance(this, exp.Column): 3695 return this.this 3696 return this 3697 3698 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3699 3700 def _parse_join( 3701 self, skip_join_token: bool = False, parse_bracket: bool = False 3702 ) -> t.Optional[exp.Join]: 3703 if self._match(TokenType.COMMA): 3704 table = self._try_parse(self._parse_table) 3705 if table: 3706 return self.expression(exp.Join, this=table) 3707 return None 3708 3709 index = self._index 3710 method, side, kind = self._parse_join_parts() 3711 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3712 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3713 3714 if not skip_join_token and not join: 3715 self._retreat(index) 3716 kind = None 3717 method = None 3718 side = None 3719 3720 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3721 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3722 3723 if not skip_join_token and not join and not outer_apply and not cross_apply: 3724 return None 3725 3726 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3727 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3728 kwargs["expressions"] = self._parse_csv( 3729 lambda: self._parse_table(parse_bracket=parse_bracket) 3730 ) 3731 3732 if method: 3733 kwargs["method"] = method.text 3734 if side: 3735 kwargs["side"] = side.text 3736 if kind: 3737 kwargs["kind"] = kind.text 3738 if hint: 3739 kwargs["hint"] = hint 3740 3741 if self._match(TokenType.MATCH_CONDITION): 3742 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3743 3744 if self._match(TokenType.ON): 3745 kwargs["on"] = self._parse_assignment() 3746 elif self._match(TokenType.USING): 3747 kwargs["using"] = self._parse_using_identifiers() 3748 elif ( 3749 not (outer_apply or cross_apply) 3750 and not isinstance(kwargs["this"], exp.Unnest) 3751 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3752 ): 3753 index = self._index 3754 joins: t.Optional[list] = list(self._parse_joins()) 3755 3756 if joins and self._match(TokenType.ON): 3757 kwargs["on"] = self._parse_assignment() 3758 elif joins and self._match(TokenType.USING): 3759 kwargs["using"] = self._parse_using_identifiers() 3760 else: 3761 joins = None 3762 self._retreat(index) 3763 3764 kwargs["this"].set("joins", joins if joins else None) 3765 3766 kwargs["pivots"] = self._parse_pivots() 3767 3768 comments = [c for token in (method, side, kind) if token for c in token.comments] 3769 return self.expression(exp.Join, comments=comments, **kwargs) 3770 3771 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3772 this = self._parse_assignment() 3773 3774 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3775 return this 3776 3777 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3778 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3779 3780 return this 3781 3782 def _parse_index_params(self) -> exp.IndexParameters: 3783 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3784 3785 if self._match(TokenType.L_PAREN, advance=False): 3786 columns = self._parse_wrapped_csv(self._parse_with_operator) 3787 else: 3788 columns = None 3789 3790 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3791 partition_by = self._parse_partition_by() 3792 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3793 tablespace = ( 3794 self._parse_var(any_token=True) 3795 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3796 else None 3797 ) 3798 where = self._parse_where() 3799 3800 on = self._parse_field() if self._match(TokenType.ON) else None 3801 3802 return self.expression( 3803 exp.IndexParameters, 3804 using=using, 3805 columns=columns, 3806 include=include, 3807 partition_by=partition_by, 3808 where=where, 3809 with_storage=with_storage, 3810 tablespace=tablespace, 3811 on=on, 3812 ) 3813 3814 def _parse_index( 3815 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3816 ) -> t.Optional[exp.Index]: 3817 if index or anonymous: 3818 unique = None 3819 primary = None 3820 amp = None 3821 3822 self._match(TokenType.ON) 3823 self._match(TokenType.TABLE) # hive 3824 table = self._parse_table_parts(schema=True) 3825 else: 3826 unique = self._match(TokenType.UNIQUE) 3827 primary = self._match_text_seq("PRIMARY") 3828 amp = self._match_text_seq("AMP") 3829 3830 if not self._match(TokenType.INDEX): 3831 return None 3832 3833 index = self._parse_id_var() 3834 table = None 3835 3836 params = self._parse_index_params() 3837 3838 return self.expression( 3839 exp.Index, 3840 this=index, 3841 table=table, 3842 unique=unique, 3843 primary=primary, 3844 amp=amp, 3845 params=params, 3846 ) 3847 3848 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3849 hints: t.List[exp.Expression] = [] 3850 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3851 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3852 hints.append( 3853 self.expression( 3854 exp.WithTableHint, 3855 expressions=self._parse_csv( 3856 lambda: self._parse_function() or self._parse_var(any_token=True) 3857 ), 3858 ) 3859 ) 3860 self._match_r_paren() 3861 else: 3862 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3863 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3864 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3865 3866 self._match_set((TokenType.INDEX, TokenType.KEY)) 3867 if self._match(TokenType.FOR): 3868 hint.set("target", self._advance_any() and self._prev.text.upper()) 3869 3870 hint.set("expressions", self._parse_wrapped_id_vars()) 3871 hints.append(hint) 3872 3873 return hints or None 3874 3875 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3876 return ( 3877 (not schema and self._parse_function(optional_parens=False)) 3878 or self._parse_id_var(any_token=False) 3879 or self._parse_string_as_identifier() 3880 or self._parse_placeholder() 3881 ) 3882 3883 def _parse_table_parts( 3884 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3885 ) -> exp.Table: 3886 catalog = None 3887 db = None 3888 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3889 3890 while self._match(TokenType.DOT): 3891 if catalog: 3892 # This allows nesting the table in arbitrarily many dot expressions if needed 3893 table = self.expression( 3894 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3895 ) 3896 else: 3897 catalog = db 3898 db = table 3899 # "" used for tsql FROM a..b case 3900 table = self._parse_table_part(schema=schema) or "" 3901 3902 if ( 3903 wildcard 3904 and self._is_connected() 3905 and (isinstance(table, exp.Identifier) or not table) 3906 and self._match(TokenType.STAR) 3907 ): 3908 if isinstance(table, exp.Identifier): 3909 table.args["this"] += "*" 3910 else: 3911 table = exp.Identifier(this="*") 3912 3913 # We bubble up comments from the Identifier to the Table 3914 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3915 3916 if is_db_reference: 3917 catalog = db 3918 db = table 3919 table = None 3920 3921 if not table and not is_db_reference: 3922 self.raise_error(f"Expected table name but got {self._curr}") 3923 if not db and is_db_reference: 3924 self.raise_error(f"Expected database name but got {self._curr}") 3925 3926 table = self.expression( 3927 exp.Table, 3928 comments=comments, 3929 this=table, 3930 db=db, 3931 catalog=catalog, 3932 ) 3933 3934 changes = self._parse_changes() 3935 if changes: 3936 table.set("changes", changes) 3937 3938 at_before = self._parse_historical_data() 3939 if at_before: 3940 table.set("when", at_before) 3941 3942 pivots = self._parse_pivots() 3943 if pivots: 3944 table.set("pivots", pivots) 3945 3946 return table 3947 3948 def _parse_table( 3949 self, 3950 schema: bool = False, 3951 joins: bool = False, 3952 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3953 parse_bracket: bool = False, 3954 is_db_reference: bool = False, 3955 parse_partition: bool = False, 3956 ) -> t.Optional[exp.Expression]: 3957 lateral = self._parse_lateral() 3958 if lateral: 3959 return lateral 3960 3961 unnest = self._parse_unnest() 3962 if unnest: 3963 return unnest 3964 3965 values = self._parse_derived_table_values() 3966 if values: 3967 return values 3968 3969 subquery = self._parse_select(table=True) 3970 if subquery: 3971 if not subquery.args.get("pivots"): 3972 subquery.set("pivots", self._parse_pivots()) 3973 return subquery 3974 3975 bracket = parse_bracket and self._parse_bracket(None) 3976 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3977 3978 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3979 self._parse_table 3980 ) 3981 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3982 3983 only = self._match(TokenType.ONLY) 3984 3985 this = t.cast( 3986 exp.Expression, 3987 bracket 3988 or rows_from 3989 or self._parse_bracket( 3990 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3991 ), 3992 ) 3993 3994 if only: 3995 this.set("only", only) 3996 3997 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3998 self._match_text_seq("*") 3999 4000 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4001 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4002 this.set("partition", self._parse_partition()) 4003 4004 if schema: 4005 return self._parse_schema(this=this) 4006 4007 version = self._parse_version() 4008 4009 if version: 4010 this.set("version", version) 4011 4012 if self.dialect.ALIAS_POST_TABLESAMPLE: 4013 this.set("sample", self._parse_table_sample()) 4014 4015 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4016 if alias: 4017 this.set("alias", alias) 4018 4019 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4020 return self.expression( 4021 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4022 ) 4023 4024 this.set("hints", self._parse_table_hints()) 4025 4026 if not this.args.get("pivots"): 4027 this.set("pivots", self._parse_pivots()) 4028 4029 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4030 this.set("sample", self._parse_table_sample()) 4031 4032 if joins: 4033 for join in self._parse_joins(): 4034 this.append("joins", join) 4035 4036 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4037 this.set("ordinality", True) 4038 this.set("alias", self._parse_table_alias()) 4039 4040 return this 4041 4042 def _parse_version(self) -> t.Optional[exp.Version]: 4043 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4044 this = "TIMESTAMP" 4045 elif self._match(TokenType.VERSION_SNAPSHOT): 4046 this = "VERSION" 4047 else: 4048 return None 4049 4050 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4051 kind = self._prev.text.upper() 4052 start = self._parse_bitwise() 4053 self._match_texts(("TO", "AND")) 4054 end = self._parse_bitwise() 4055 expression: t.Optional[exp.Expression] = self.expression( 4056 exp.Tuple, expressions=[start, end] 4057 ) 4058 elif self._match_text_seq("CONTAINED", "IN"): 4059 kind = "CONTAINED IN" 4060 expression = self.expression( 4061 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4062 ) 4063 elif self._match(TokenType.ALL): 4064 kind = "ALL" 4065 expression = None 4066 else: 4067 self._match_text_seq("AS", "OF") 4068 kind = "AS OF" 4069 expression = self._parse_type() 4070 4071 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4072 4073 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4074 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4075 index = self._index 4076 historical_data = None 4077 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4078 this = self._prev.text.upper() 4079 kind = ( 4080 self._match(TokenType.L_PAREN) 4081 and self._match_texts(self.HISTORICAL_DATA_KIND) 4082 and self._prev.text.upper() 4083 ) 4084 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4085 4086 if expression: 4087 self._match_r_paren() 4088 historical_data = self.expression( 4089 exp.HistoricalData, this=this, kind=kind, expression=expression 4090 ) 4091 else: 4092 self._retreat(index) 4093 4094 return historical_data 4095 4096 def _parse_changes(self) -> t.Optional[exp.Changes]: 4097 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4098 return None 4099 4100 information = self._parse_var(any_token=True) 4101 self._match_r_paren() 4102 4103 return self.expression( 4104 exp.Changes, 4105 information=information, 4106 at_before=self._parse_historical_data(), 4107 end=self._parse_historical_data(), 4108 ) 4109 4110 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4111 if not self._match(TokenType.UNNEST): 4112 return None 4113 4114 expressions = self._parse_wrapped_csv(self._parse_equality) 4115 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4116 4117 alias = self._parse_table_alias() if with_alias else None 4118 4119 if alias: 4120 if self.dialect.UNNEST_COLUMN_ONLY: 4121 if alias.args.get("columns"): 4122 self.raise_error("Unexpected extra column alias in unnest.") 4123 4124 alias.set("columns", [alias.this]) 4125 alias.set("this", None) 4126 4127 columns = alias.args.get("columns") or [] 4128 if offset and len(expressions) < len(columns): 4129 offset = columns.pop() 4130 4131 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4132 self._match(TokenType.ALIAS) 4133 offset = self._parse_id_var( 4134 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4135 ) or exp.to_identifier("offset") 4136 4137 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4138 4139 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4140 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4141 if not is_derived and not ( 4142 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4143 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4144 ): 4145 return None 4146 4147 expressions = self._parse_csv(self._parse_value) 4148 alias = self._parse_table_alias() 4149 4150 if is_derived: 4151 self._match_r_paren() 4152 4153 return self.expression( 4154 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4155 ) 4156 4157 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4158 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4159 as_modifier and self._match_text_seq("USING", "SAMPLE") 4160 ): 4161 return None 4162 4163 bucket_numerator = None 4164 bucket_denominator = None 4165 bucket_field = None 4166 percent = None 4167 size = None 4168 seed = None 4169 4170 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4171 matched_l_paren = self._match(TokenType.L_PAREN) 4172 4173 if self.TABLESAMPLE_CSV: 4174 num = None 4175 expressions = self._parse_csv(self._parse_primary) 4176 else: 4177 expressions = None 4178 num = ( 4179 self._parse_factor() 4180 if self._match(TokenType.NUMBER, advance=False) 4181 else self._parse_primary() or self._parse_placeholder() 4182 ) 4183 4184 if self._match_text_seq("BUCKET"): 4185 bucket_numerator = self._parse_number() 4186 self._match_text_seq("OUT", "OF") 4187 bucket_denominator = bucket_denominator = self._parse_number() 4188 self._match(TokenType.ON) 4189 bucket_field = self._parse_field() 4190 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4191 percent = num 4192 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4193 size = num 4194 else: 4195 percent = num 4196 4197 if matched_l_paren: 4198 self._match_r_paren() 4199 4200 if self._match(TokenType.L_PAREN): 4201 method = self._parse_var(upper=True) 4202 seed = self._match(TokenType.COMMA) and self._parse_number() 4203 self._match_r_paren() 4204 elif self._match_texts(("SEED", "REPEATABLE")): 4205 seed = self._parse_wrapped(self._parse_number) 4206 4207 if not method and self.DEFAULT_SAMPLING_METHOD: 4208 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4209 4210 return self.expression( 4211 exp.TableSample, 4212 expressions=expressions, 4213 method=method, 4214 bucket_numerator=bucket_numerator, 4215 bucket_denominator=bucket_denominator, 4216 bucket_field=bucket_field, 4217 percent=percent, 4218 size=size, 4219 seed=seed, 4220 ) 4221 4222 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4223 return list(iter(self._parse_pivot, None)) or None 4224 4225 def _parse_joins(self) -> t.Iterator[exp.Join]: 4226 return iter(self._parse_join, None) 4227 4228 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4229 if not self._match(TokenType.INTO): 4230 return None 4231 4232 return self.expression( 4233 exp.UnpivotColumns, 4234 this=self._match_text_seq("NAME") and self._parse_column(), 4235 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4236 ) 4237 4238 # https://duckdb.org/docs/sql/statements/pivot 4239 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4240 def _parse_on() -> t.Optional[exp.Expression]: 4241 this = self._parse_bitwise() 4242 4243 if self._match(TokenType.IN): 4244 # PIVOT ... ON col IN (row_val1, row_val2) 4245 return self._parse_in(this) 4246 if self._match(TokenType.ALIAS, advance=False): 4247 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4248 return self._parse_alias(this) 4249 4250 return this 4251 4252 this = self._parse_table() 4253 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4254 into = self._parse_unpivot_columns() 4255 using = self._match(TokenType.USING) and self._parse_csv( 4256 lambda: self._parse_alias(self._parse_function()) 4257 ) 4258 group = self._parse_group() 4259 4260 return self.expression( 4261 exp.Pivot, 4262 this=this, 4263 expressions=expressions, 4264 using=using, 4265 group=group, 4266 unpivot=is_unpivot, 4267 into=into, 4268 ) 4269 4270 def _parse_pivot_in(self) -> exp.In: 4271 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4272 this = self._parse_select_or_expression() 4273 4274 self._match(TokenType.ALIAS) 4275 alias = self._parse_bitwise() 4276 if alias: 4277 if isinstance(alias, exp.Column) and not alias.db: 4278 alias = alias.this 4279 return self.expression(exp.PivotAlias, this=this, alias=alias) 4280 4281 return this 4282 4283 value = self._parse_column() 4284 4285 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4286 self.raise_error("Expecting IN (") 4287 4288 if self._match(TokenType.ANY): 4289 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4290 else: 4291 exprs = self._parse_csv(_parse_aliased_expression) 4292 4293 self._match_r_paren() 4294 return self.expression(exp.In, this=value, expressions=exprs) 4295 4296 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4297 index = self._index 4298 include_nulls = None 4299 4300 if self._match(TokenType.PIVOT): 4301 unpivot = False 4302 elif self._match(TokenType.UNPIVOT): 4303 unpivot = True 4304 4305 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4306 if self._match_text_seq("INCLUDE", "NULLS"): 4307 include_nulls = True 4308 elif self._match_text_seq("EXCLUDE", "NULLS"): 4309 include_nulls = False 4310 else: 4311 return None 4312 4313 expressions = [] 4314 4315 if not self._match(TokenType.L_PAREN): 4316 self._retreat(index) 4317 return None 4318 4319 if unpivot: 4320 expressions = self._parse_csv(self._parse_column) 4321 else: 4322 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4323 4324 if not expressions: 4325 self.raise_error("Failed to parse PIVOT's aggregation list") 4326 4327 if not self._match(TokenType.FOR): 4328 self.raise_error("Expecting FOR") 4329 4330 fields = [] 4331 while True: 4332 field = self._try_parse(self._parse_pivot_in) 4333 if not field: 4334 break 4335 fields.append(field) 4336 4337 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4338 self._parse_bitwise 4339 ) 4340 4341 group = self._parse_group() 4342 4343 self._match_r_paren() 4344 4345 pivot = self.expression( 4346 exp.Pivot, 4347 expressions=expressions, 4348 fields=fields, 4349 unpivot=unpivot, 4350 include_nulls=include_nulls, 4351 default_on_null=default_on_null, 4352 group=group, 4353 ) 4354 4355 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4356 pivot.set("alias", self._parse_table_alias()) 4357 4358 if not unpivot: 4359 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4360 4361 columns: t.List[exp.Expression] = [] 4362 all_fields = [] 4363 for pivot_field in pivot.fields: 4364 pivot_field_expressions = pivot_field.expressions 4365 4366 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4367 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4368 continue 4369 4370 all_fields.append( 4371 [ 4372 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4373 for fld in pivot_field_expressions 4374 ] 4375 ) 4376 4377 if all_fields: 4378 if names: 4379 all_fields.append(names) 4380 4381 # Generate all possible combinations of the pivot columns 4382 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4383 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4384 for fld_parts_tuple in itertools.product(*all_fields): 4385 fld_parts = list(fld_parts_tuple) 4386 4387 if names and self.PREFIXED_PIVOT_COLUMNS: 4388 # Move the "name" to the front of the list 4389 fld_parts.insert(0, fld_parts.pop(-1)) 4390 4391 columns.append(exp.to_identifier("_".join(fld_parts))) 4392 4393 pivot.set("columns", columns) 4394 4395 return pivot 4396 4397 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4398 return [agg.alias for agg in aggregations if agg.alias] 4399 4400 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4401 if not skip_where_token and not self._match(TokenType.PREWHERE): 4402 return None 4403 4404 return self.expression( 4405 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4406 ) 4407 4408 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4409 if not skip_where_token and not self._match(TokenType.WHERE): 4410 return None 4411 4412 return self.expression( 4413 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4414 ) 4415 4416 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4417 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4418 return None 4419 4420 elements: t.Dict[str, t.Any] = defaultdict(list) 4421 4422 if self._match(TokenType.ALL): 4423 elements["all"] = True 4424 elif self._match(TokenType.DISTINCT): 4425 elements["all"] = False 4426 4427 while True: 4428 index = self._index 4429 4430 elements["expressions"].extend( 4431 self._parse_csv( 4432 lambda: None 4433 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4434 else self._parse_assignment() 4435 ) 4436 ) 4437 4438 before_with_index = self._index 4439 with_prefix = self._match(TokenType.WITH) 4440 4441 if self._match(TokenType.ROLLUP): 4442 elements["rollup"].append( 4443 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4444 ) 4445 elif self._match(TokenType.CUBE): 4446 elements["cube"].append( 4447 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4448 ) 4449 elif self._match(TokenType.GROUPING_SETS): 4450 elements["grouping_sets"].append( 4451 self.expression( 4452 exp.GroupingSets, 4453 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4454 ) 4455 ) 4456 elif self._match_text_seq("TOTALS"): 4457 elements["totals"] = True # type: ignore 4458 4459 if before_with_index <= self._index <= before_with_index + 1: 4460 self._retreat(before_with_index) 4461 break 4462 4463 if index == self._index: 4464 break 4465 4466 return self.expression(exp.Group, **elements) # type: ignore 4467 4468 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4469 return self.expression( 4470 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4471 ) 4472 4473 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4474 if self._match(TokenType.L_PAREN): 4475 grouping_set = self._parse_csv(self._parse_column) 4476 self._match_r_paren() 4477 return self.expression(exp.Tuple, expressions=grouping_set) 4478 4479 return self._parse_column() 4480 4481 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4482 if not skip_having_token and not self._match(TokenType.HAVING): 4483 return None 4484 return self.expression(exp.Having, this=self._parse_assignment()) 4485 4486 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4487 if not self._match(TokenType.QUALIFY): 4488 return None 4489 return self.expression(exp.Qualify, this=self._parse_assignment()) 4490 4491 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4492 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4493 exp.Prior, this=self._parse_bitwise() 4494 ) 4495 connect = self._parse_assignment() 4496 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4497 return connect 4498 4499 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4500 if skip_start_token: 4501 start = None 4502 elif self._match(TokenType.START_WITH): 4503 start = self._parse_assignment() 4504 else: 4505 return None 4506 4507 self._match(TokenType.CONNECT_BY) 4508 nocycle = self._match_text_seq("NOCYCLE") 4509 connect = self._parse_connect_with_prior() 4510 4511 if not start and self._match(TokenType.START_WITH): 4512 start = self._parse_assignment() 4513 4514 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4515 4516 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4517 this = self._parse_id_var(any_token=True) 4518 if self._match(TokenType.ALIAS): 4519 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4520 return this 4521 4522 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4523 if self._match_text_seq("INTERPOLATE"): 4524 return self._parse_wrapped_csv(self._parse_name_as_expression) 4525 return None 4526 4527 def _parse_order( 4528 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4529 ) -> t.Optional[exp.Expression]: 4530 siblings = None 4531 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4532 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4533 return this 4534 4535 siblings = True 4536 4537 return self.expression( 4538 exp.Order, 4539 this=this, 4540 expressions=self._parse_csv(self._parse_ordered), 4541 siblings=siblings, 4542 ) 4543 4544 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4545 if not self._match(token): 4546 return None 4547 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4548 4549 def _parse_ordered( 4550 self, parse_method: t.Optional[t.Callable] = None 4551 ) -> t.Optional[exp.Ordered]: 4552 this = parse_method() if parse_method else self._parse_assignment() 4553 if not this: 4554 return None 4555 4556 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4557 this = exp.var("ALL") 4558 4559 asc = self._match(TokenType.ASC) 4560 desc = self._match(TokenType.DESC) or (asc and False) 4561 4562 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4563 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4564 4565 nulls_first = is_nulls_first or False 4566 explicitly_null_ordered = is_nulls_first or is_nulls_last 4567 4568 if ( 4569 not explicitly_null_ordered 4570 and ( 4571 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4572 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4573 ) 4574 and self.dialect.NULL_ORDERING != "nulls_are_last" 4575 ): 4576 nulls_first = True 4577 4578 if self._match_text_seq("WITH", "FILL"): 4579 with_fill = self.expression( 4580 exp.WithFill, 4581 **{ # type: ignore 4582 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4583 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4584 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4585 "interpolate": self._parse_interpolate(), 4586 }, 4587 ) 4588 else: 4589 with_fill = None 4590 4591 return self.expression( 4592 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4593 ) 4594 4595 def _parse_limit_options(self) -> exp.LimitOptions: 4596 percent = self._match(TokenType.PERCENT) 4597 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4598 self._match_text_seq("ONLY") 4599 with_ties = self._match_text_seq("WITH", "TIES") 4600 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4601 4602 def _parse_limit( 4603 self, 4604 this: t.Optional[exp.Expression] = None, 4605 top: bool = False, 4606 skip_limit_token: bool = False, 4607 ) -> t.Optional[exp.Expression]: 4608 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4609 comments = self._prev_comments 4610 if top: 4611 limit_paren = self._match(TokenType.L_PAREN) 4612 expression = self._parse_term() if limit_paren else self._parse_number() 4613 4614 if limit_paren: 4615 self._match_r_paren() 4616 4617 limit_options = self._parse_limit_options() 4618 else: 4619 limit_options = None 4620 expression = self._parse_term() 4621 4622 if self._match(TokenType.COMMA): 4623 offset = expression 4624 expression = self._parse_term() 4625 else: 4626 offset = None 4627 4628 limit_exp = self.expression( 4629 exp.Limit, 4630 this=this, 4631 expression=expression, 4632 offset=offset, 4633 comments=comments, 4634 limit_options=limit_options, 4635 expressions=self._parse_limit_by(), 4636 ) 4637 4638 return limit_exp 4639 4640 if self._match(TokenType.FETCH): 4641 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4642 direction = self._prev.text.upper() if direction else "FIRST" 4643 4644 count = self._parse_field(tokens=self.FETCH_TOKENS) 4645 4646 return self.expression( 4647 exp.Fetch, 4648 direction=direction, 4649 count=count, 4650 limit_options=self._parse_limit_options(), 4651 ) 4652 4653 return this 4654 4655 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4656 if not self._match(TokenType.OFFSET): 4657 return this 4658 4659 count = self._parse_term() 4660 self._match_set((TokenType.ROW, TokenType.ROWS)) 4661 4662 return self.expression( 4663 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4664 ) 4665 4666 def _can_parse_limit_or_offset(self) -> bool: 4667 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4668 return False 4669 4670 index = self._index 4671 result = bool( 4672 self._try_parse(self._parse_limit, retreat=True) 4673 or self._try_parse(self._parse_offset, retreat=True) 4674 ) 4675 self._retreat(index) 4676 return result 4677 4678 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4679 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4680 4681 def _parse_locks(self) -> t.List[exp.Lock]: 4682 locks = [] 4683 while True: 4684 if self._match_text_seq("FOR", "UPDATE"): 4685 update = True 4686 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4687 "LOCK", "IN", "SHARE", "MODE" 4688 ): 4689 update = False 4690 else: 4691 break 4692 4693 expressions = None 4694 if self._match_text_seq("OF"): 4695 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4696 4697 wait: t.Optional[bool | exp.Expression] = None 4698 if self._match_text_seq("NOWAIT"): 4699 wait = True 4700 elif self._match_text_seq("WAIT"): 4701 wait = self._parse_primary() 4702 elif self._match_text_seq("SKIP", "LOCKED"): 4703 wait = False 4704 4705 locks.append( 4706 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4707 ) 4708 4709 return locks 4710 4711 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4712 start = self._index 4713 _, side_token, kind_token = self._parse_join_parts() 4714 4715 side = side_token.text if side_token else None 4716 kind = kind_token.text if kind_token else None 4717 4718 if not self._match_set(self.SET_OPERATIONS): 4719 self._retreat(start) 4720 return None 4721 4722 token_type = self._prev.token_type 4723 4724 if token_type == TokenType.UNION: 4725 operation: t.Type[exp.SetOperation] = exp.Union 4726 elif token_type == TokenType.EXCEPT: 4727 operation = exp.Except 4728 else: 4729 operation = exp.Intersect 4730 4731 comments = self._prev.comments 4732 4733 if self._match(TokenType.DISTINCT): 4734 distinct: t.Optional[bool] = True 4735 elif self._match(TokenType.ALL): 4736 distinct = False 4737 else: 4738 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4739 if distinct is None: 4740 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4741 4742 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4743 "STRICT", "CORRESPONDING" 4744 ) 4745 if self._match_text_seq("CORRESPONDING"): 4746 by_name = True 4747 if not side and not kind: 4748 kind = "INNER" 4749 4750 on_column_list = None 4751 if by_name and self._match_texts(("ON", "BY")): 4752 on_column_list = self._parse_wrapped_csv(self._parse_column) 4753 4754 expression = self._parse_select(nested=True, parse_set_operation=False) 4755 4756 return self.expression( 4757 operation, 4758 comments=comments, 4759 this=this, 4760 distinct=distinct, 4761 by_name=by_name, 4762 expression=expression, 4763 side=side, 4764 kind=kind, 4765 on=on_column_list, 4766 ) 4767 4768 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4769 while this: 4770 setop = self.parse_set_operation(this) 4771 if not setop: 4772 break 4773 this = setop 4774 4775 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4776 expression = this.expression 4777 4778 if expression: 4779 for arg in self.SET_OP_MODIFIERS: 4780 expr = expression.args.get(arg) 4781 if expr: 4782 this.set(arg, expr.pop()) 4783 4784 return this 4785 4786 def _parse_expression(self) -> t.Optional[exp.Expression]: 4787 return self._parse_alias(self._parse_assignment()) 4788 4789 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4790 this = self._parse_disjunction() 4791 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4792 # This allows us to parse <non-identifier token> := <expr> 4793 this = exp.column( 4794 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4795 ) 4796 4797 while self._match_set(self.ASSIGNMENT): 4798 if isinstance(this, exp.Column) and len(this.parts) == 1: 4799 this = this.this 4800 4801 this = self.expression( 4802 self.ASSIGNMENT[self._prev.token_type], 4803 this=this, 4804 comments=self._prev_comments, 4805 expression=self._parse_assignment(), 4806 ) 4807 4808 return this 4809 4810 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4811 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4812 4813 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4814 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4815 4816 def _parse_equality(self) -> t.Optional[exp.Expression]: 4817 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4818 4819 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4820 return self._parse_tokens(self._parse_range, self.COMPARISON) 4821 4822 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4823 this = this or self._parse_bitwise() 4824 negate = self._match(TokenType.NOT) 4825 4826 if self._match_set(self.RANGE_PARSERS): 4827 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4828 if not expression: 4829 return this 4830 4831 this = expression 4832 elif self._match(TokenType.ISNULL): 4833 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4834 4835 # Postgres supports ISNULL and NOTNULL for conditions. 4836 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4837 if self._match(TokenType.NOTNULL): 4838 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4839 this = self.expression(exp.Not, this=this) 4840 4841 if negate: 4842 this = self._negate_range(this) 4843 4844 if self._match(TokenType.IS): 4845 this = self._parse_is(this) 4846 4847 return this 4848 4849 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4850 if not this: 4851 return this 4852 4853 return self.expression(exp.Not, this=this) 4854 4855 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4856 index = self._index - 1 4857 negate = self._match(TokenType.NOT) 4858 4859 if self._match_text_seq("DISTINCT", "FROM"): 4860 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4861 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4862 4863 if self._match(TokenType.JSON): 4864 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4865 4866 if self._match_text_seq("WITH"): 4867 _with = True 4868 elif self._match_text_seq("WITHOUT"): 4869 _with = False 4870 else: 4871 _with = None 4872 4873 unique = self._match(TokenType.UNIQUE) 4874 self._match_text_seq("KEYS") 4875 expression: t.Optional[exp.Expression] = self.expression( 4876 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4877 ) 4878 else: 4879 expression = self._parse_primary() or self._parse_null() 4880 if not expression: 4881 self._retreat(index) 4882 return None 4883 4884 this = self.expression(exp.Is, this=this, expression=expression) 4885 return self.expression(exp.Not, this=this) if negate else this 4886 4887 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4888 unnest = self._parse_unnest(with_alias=False) 4889 if unnest: 4890 this = self.expression(exp.In, this=this, unnest=unnest) 4891 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4892 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4893 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4894 4895 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4896 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4897 else: 4898 this = self.expression(exp.In, this=this, expressions=expressions) 4899 4900 if matched_l_paren: 4901 self._match_r_paren(this) 4902 elif not self._match(TokenType.R_BRACKET, expression=this): 4903 self.raise_error("Expecting ]") 4904 else: 4905 this = self.expression(exp.In, this=this, field=self._parse_column()) 4906 4907 return this 4908 4909 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4910 low = self._parse_bitwise() 4911 self._match(TokenType.AND) 4912 high = self._parse_bitwise() 4913 return self.expression(exp.Between, this=this, low=low, high=high) 4914 4915 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4916 if not self._match(TokenType.ESCAPE): 4917 return this 4918 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4919 4920 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4921 index = self._index 4922 4923 if not self._match(TokenType.INTERVAL) and match_interval: 4924 return None 4925 4926 if self._match(TokenType.STRING, advance=False): 4927 this = self._parse_primary() 4928 else: 4929 this = self._parse_term() 4930 4931 if not this or ( 4932 isinstance(this, exp.Column) 4933 and not this.table 4934 and not this.this.quoted 4935 and this.name.upper() == "IS" 4936 ): 4937 self._retreat(index) 4938 return None 4939 4940 unit = self._parse_function() or ( 4941 not self._match(TokenType.ALIAS, advance=False) 4942 and self._parse_var(any_token=True, upper=True) 4943 ) 4944 4945 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4946 # each INTERVAL expression into this canonical form so it's easy to transpile 4947 if this and this.is_number: 4948 this = exp.Literal.string(this.to_py()) 4949 elif this and this.is_string: 4950 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4951 if parts and unit: 4952 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4953 unit = None 4954 self._retreat(self._index - 1) 4955 4956 if len(parts) == 1: 4957 this = exp.Literal.string(parts[0][0]) 4958 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4959 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4960 unit = self.expression( 4961 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4962 ) 4963 4964 interval = self.expression(exp.Interval, this=this, unit=unit) 4965 4966 index = self._index 4967 self._match(TokenType.PLUS) 4968 4969 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4970 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4971 return self.expression( 4972 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4973 ) 4974 4975 self._retreat(index) 4976 return interval 4977 4978 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4979 this = self._parse_term() 4980 4981 while True: 4982 if self._match_set(self.BITWISE): 4983 this = self.expression( 4984 self.BITWISE[self._prev.token_type], 4985 this=this, 4986 expression=self._parse_term(), 4987 ) 4988 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4989 this = self.expression( 4990 exp.DPipe, 4991 this=this, 4992 expression=self._parse_term(), 4993 safe=not self.dialect.STRICT_STRING_CONCAT, 4994 ) 4995 elif self._match(TokenType.DQMARK): 4996 this = self.expression( 4997 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4998 ) 4999 elif self._match_pair(TokenType.LT, TokenType.LT): 5000 this = self.expression( 5001 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5002 ) 5003 elif self._match_pair(TokenType.GT, TokenType.GT): 5004 this = self.expression( 5005 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5006 ) 5007 else: 5008 break 5009 5010 return this 5011 5012 def _parse_term(self) -> t.Optional[exp.Expression]: 5013 this = self._parse_factor() 5014 5015 while self._match_set(self.TERM): 5016 klass = self.TERM[self._prev.token_type] 5017 comments = self._prev_comments 5018 expression = self._parse_factor() 5019 5020 this = self.expression(klass, this=this, comments=comments, expression=expression) 5021 5022 if isinstance(this, exp.Collate): 5023 expr = this.expression 5024 5025 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5026 # fallback to Identifier / Var 5027 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5028 ident = expr.this 5029 if isinstance(ident, exp.Identifier): 5030 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5031 5032 return this 5033 5034 def _parse_factor(self) -> t.Optional[exp.Expression]: 5035 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5036 this = parse_method() 5037 5038 while self._match_set(self.FACTOR): 5039 klass = self.FACTOR[self._prev.token_type] 5040 comments = self._prev_comments 5041 expression = parse_method() 5042 5043 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5044 self._retreat(self._index - 1) 5045 return this 5046 5047 this = self.expression(klass, this=this, comments=comments, expression=expression) 5048 5049 if isinstance(this, exp.Div): 5050 this.args["typed"] = self.dialect.TYPED_DIVISION 5051 this.args["safe"] = self.dialect.SAFE_DIVISION 5052 5053 return this 5054 5055 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5056 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5057 5058 def _parse_unary(self) -> t.Optional[exp.Expression]: 5059 if self._match_set(self.UNARY_PARSERS): 5060 return self.UNARY_PARSERS[self._prev.token_type](self) 5061 return self._parse_at_time_zone(self._parse_type()) 5062 5063 def _parse_type( 5064 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5065 ) -> t.Optional[exp.Expression]: 5066 interval = parse_interval and self._parse_interval() 5067 if interval: 5068 return interval 5069 5070 index = self._index 5071 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5072 5073 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5074 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5075 if isinstance(data_type, exp.Cast): 5076 # This constructor can contain ops directly after it, for instance struct unnesting: 5077 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5078 return self._parse_column_ops(data_type) 5079 5080 if data_type: 5081 index2 = self._index 5082 this = self._parse_primary() 5083 5084 if isinstance(this, exp.Literal): 5085 this = self._parse_column_ops(this) 5086 5087 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5088 if parser: 5089 return parser(self, this, data_type) 5090 5091 return self.expression(exp.Cast, this=this, to=data_type) 5092 5093 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5094 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5095 # 5096 # If the index difference here is greater than 1, that means the parser itself must have 5097 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5098 # 5099 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5100 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5101 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5102 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5103 # 5104 # In these cases, we don't really want to return the converted type, but instead retreat 5105 # and try to parse a Column or Identifier in the section below. 5106 if data_type.expressions and index2 - index > 1: 5107 self._retreat(index2) 5108 return self._parse_column_ops(data_type) 5109 5110 self._retreat(index) 5111 5112 if fallback_to_identifier: 5113 return self._parse_id_var() 5114 5115 this = self._parse_column() 5116 return this and self._parse_column_ops(this) 5117 5118 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5119 this = self._parse_type() 5120 if not this: 5121 return None 5122 5123 if isinstance(this, exp.Column) and not this.table: 5124 this = exp.var(this.name.upper()) 5125 5126 return self.expression( 5127 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5128 ) 5129 5130 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5131 type_name = identifier.name 5132 5133 while self._match(TokenType.DOT): 5134 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5135 5136 return exp.DataType.build(type_name, udt=True) 5137 5138 def _parse_types( 5139 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5140 ) -> t.Optional[exp.Expression]: 5141 index = self._index 5142 5143 this: t.Optional[exp.Expression] = None 5144 prefix = self._match_text_seq("SYSUDTLIB", ".") 5145 5146 if not self._match_set(self.TYPE_TOKENS): 5147 identifier = allow_identifiers and self._parse_id_var( 5148 any_token=False, tokens=(TokenType.VAR,) 5149 ) 5150 if isinstance(identifier, exp.Identifier): 5151 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5152 5153 if len(tokens) != 1: 5154 self.raise_error("Unexpected identifier", self._prev) 5155 5156 if tokens[0].token_type in self.TYPE_TOKENS: 5157 self._prev = tokens[0] 5158 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5159 this = self._parse_user_defined_type(identifier) 5160 else: 5161 self._retreat(self._index - 1) 5162 return None 5163 else: 5164 return None 5165 5166 type_token = self._prev.token_type 5167 5168 if type_token == TokenType.PSEUDO_TYPE: 5169 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5170 5171 if type_token == TokenType.OBJECT_IDENTIFIER: 5172 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5173 5174 # https://materialize.com/docs/sql/types/map/ 5175 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5176 key_type = self._parse_types( 5177 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5178 ) 5179 if not self._match(TokenType.FARROW): 5180 self._retreat(index) 5181 return None 5182 5183 value_type = self._parse_types( 5184 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5185 ) 5186 if not self._match(TokenType.R_BRACKET): 5187 self._retreat(index) 5188 return None 5189 5190 return exp.DataType( 5191 this=exp.DataType.Type.MAP, 5192 expressions=[key_type, value_type], 5193 nested=True, 5194 prefix=prefix, 5195 ) 5196 5197 nested = type_token in self.NESTED_TYPE_TOKENS 5198 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5199 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5200 expressions = None 5201 maybe_func = False 5202 5203 if self._match(TokenType.L_PAREN): 5204 if is_struct: 5205 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5206 elif nested: 5207 expressions = self._parse_csv( 5208 lambda: self._parse_types( 5209 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5210 ) 5211 ) 5212 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5213 this = expressions[0] 5214 this.set("nullable", True) 5215 self._match_r_paren() 5216 return this 5217 elif type_token in self.ENUM_TYPE_TOKENS: 5218 expressions = self._parse_csv(self._parse_equality) 5219 elif is_aggregate: 5220 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5221 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5222 ) 5223 if not func_or_ident: 5224 return None 5225 expressions = [func_or_ident] 5226 if self._match(TokenType.COMMA): 5227 expressions.extend( 5228 self._parse_csv( 5229 lambda: self._parse_types( 5230 check_func=check_func, 5231 schema=schema, 5232 allow_identifiers=allow_identifiers, 5233 ) 5234 ) 5235 ) 5236 else: 5237 expressions = self._parse_csv(self._parse_type_size) 5238 5239 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5240 if type_token == TokenType.VECTOR and len(expressions) == 2: 5241 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5242 5243 if not expressions or not self._match(TokenType.R_PAREN): 5244 self._retreat(index) 5245 return None 5246 5247 maybe_func = True 5248 5249 values: t.Optional[t.List[exp.Expression]] = None 5250 5251 if nested and self._match(TokenType.LT): 5252 if is_struct: 5253 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5254 else: 5255 expressions = self._parse_csv( 5256 lambda: self._parse_types( 5257 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5258 ) 5259 ) 5260 5261 if not self._match(TokenType.GT): 5262 self.raise_error("Expecting >") 5263 5264 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5265 values = self._parse_csv(self._parse_assignment) 5266 if not values and is_struct: 5267 values = None 5268 self._retreat(self._index - 1) 5269 else: 5270 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5271 5272 if type_token in self.TIMESTAMPS: 5273 if self._match_text_seq("WITH", "TIME", "ZONE"): 5274 maybe_func = False 5275 tz_type = ( 5276 exp.DataType.Type.TIMETZ 5277 if type_token in self.TIMES 5278 else exp.DataType.Type.TIMESTAMPTZ 5279 ) 5280 this = exp.DataType(this=tz_type, expressions=expressions) 5281 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5282 maybe_func = False 5283 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5284 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5285 maybe_func = False 5286 elif type_token == TokenType.INTERVAL: 5287 unit = self._parse_var(upper=True) 5288 if unit: 5289 if self._match_text_seq("TO"): 5290 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5291 5292 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5293 else: 5294 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5295 elif type_token == TokenType.VOID: 5296 this = exp.DataType(this=exp.DataType.Type.NULL) 5297 5298 if maybe_func and check_func: 5299 index2 = self._index 5300 peek = self._parse_string() 5301 5302 if not peek: 5303 self._retreat(index) 5304 return None 5305 5306 self._retreat(index2) 5307 5308 if not this: 5309 if self._match_text_seq("UNSIGNED"): 5310 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5311 if not unsigned_type_token: 5312 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5313 5314 type_token = unsigned_type_token or type_token 5315 5316 this = exp.DataType( 5317 this=exp.DataType.Type[type_token.value], 5318 expressions=expressions, 5319 nested=nested, 5320 prefix=prefix, 5321 ) 5322 5323 # Empty arrays/structs are allowed 5324 if values is not None: 5325 cls = exp.Struct if is_struct else exp.Array 5326 this = exp.cast(cls(expressions=values), this, copy=False) 5327 5328 elif expressions: 5329 this.set("expressions", expressions) 5330 5331 # https://materialize.com/docs/sql/types/list/#type-name 5332 while self._match(TokenType.LIST): 5333 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5334 5335 index = self._index 5336 5337 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5338 matched_array = self._match(TokenType.ARRAY) 5339 5340 while self._curr: 5341 datatype_token = self._prev.token_type 5342 matched_l_bracket = self._match(TokenType.L_BRACKET) 5343 5344 if (not matched_l_bracket and not matched_array) or ( 5345 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5346 ): 5347 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5348 # not to be confused with the fixed size array parsing 5349 break 5350 5351 matched_array = False 5352 values = self._parse_csv(self._parse_assignment) or None 5353 if ( 5354 values 5355 and not schema 5356 and ( 5357 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5358 ) 5359 ): 5360 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5361 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5362 self._retreat(index) 5363 break 5364 5365 this = exp.DataType( 5366 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5367 ) 5368 self._match(TokenType.R_BRACKET) 5369 5370 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5371 converter = self.TYPE_CONVERTERS.get(this.this) 5372 if converter: 5373 this = converter(t.cast(exp.DataType, this)) 5374 5375 return this 5376 5377 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5378 index = self._index 5379 5380 if ( 5381 self._curr 5382 and self._next 5383 and self._curr.token_type in self.TYPE_TOKENS 5384 and self._next.token_type in self.TYPE_TOKENS 5385 ): 5386 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5387 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5388 this = self._parse_id_var() 5389 else: 5390 this = ( 5391 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5392 or self._parse_id_var() 5393 ) 5394 5395 self._match(TokenType.COLON) 5396 5397 if ( 5398 type_required 5399 and not isinstance(this, exp.DataType) 5400 and not self._match_set(self.TYPE_TOKENS, advance=False) 5401 ): 5402 self._retreat(index) 5403 return self._parse_types() 5404 5405 return self._parse_column_def(this) 5406 5407 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5408 if not self._match_text_seq("AT", "TIME", "ZONE"): 5409 return this 5410 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5411 5412 def _parse_column(self) -> t.Optional[exp.Expression]: 5413 this = self._parse_column_reference() 5414 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5415 5416 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5417 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5418 5419 return column 5420 5421 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5422 this = self._parse_field() 5423 if ( 5424 not this 5425 and self._match(TokenType.VALUES, advance=False) 5426 and self.VALUES_FOLLOWED_BY_PAREN 5427 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5428 ): 5429 this = self._parse_id_var() 5430 5431 if isinstance(this, exp.Identifier): 5432 # We bubble up comments from the Identifier to the Column 5433 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5434 5435 return this 5436 5437 def _parse_colon_as_variant_extract( 5438 self, this: t.Optional[exp.Expression] 5439 ) -> t.Optional[exp.Expression]: 5440 casts = [] 5441 json_path = [] 5442 escape = None 5443 5444 while self._match(TokenType.COLON): 5445 start_index = self._index 5446 5447 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5448 path = self._parse_column_ops( 5449 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5450 ) 5451 5452 # The cast :: operator has a lower precedence than the extraction operator :, so 5453 # we rearrange the AST appropriately to avoid casting the JSON path 5454 while isinstance(path, exp.Cast): 5455 casts.append(path.to) 5456 path = path.this 5457 5458 if casts: 5459 dcolon_offset = next( 5460 i 5461 for i, t in enumerate(self._tokens[start_index:]) 5462 if t.token_type == TokenType.DCOLON 5463 ) 5464 end_token = self._tokens[start_index + dcolon_offset - 1] 5465 else: 5466 end_token = self._prev 5467 5468 if path: 5469 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5470 # it'll roundtrip to a string literal in GET_PATH 5471 if isinstance(path, exp.Identifier) and path.quoted: 5472 escape = True 5473 5474 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5475 5476 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5477 # Databricks transforms it back to the colon/dot notation 5478 if json_path: 5479 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5480 5481 if json_path_expr: 5482 json_path_expr.set("escape", escape) 5483 5484 this = self.expression( 5485 exp.JSONExtract, 5486 this=this, 5487 expression=json_path_expr, 5488 variant_extract=True, 5489 ) 5490 5491 while casts: 5492 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5493 5494 return this 5495 5496 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5497 return self._parse_types() 5498 5499 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5500 this = self._parse_bracket(this) 5501 5502 while self._match_set(self.COLUMN_OPERATORS): 5503 op_token = self._prev.token_type 5504 op = self.COLUMN_OPERATORS.get(op_token) 5505 5506 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5507 field = self._parse_dcolon() 5508 if not field: 5509 self.raise_error("Expected type") 5510 elif op and self._curr: 5511 field = self._parse_column_reference() or self._parse_bracket() 5512 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5513 field = self._parse_column_ops(field) 5514 else: 5515 field = self._parse_field(any_token=True, anonymous_func=True) 5516 5517 # Function calls can be qualified, e.g., x.y.FOO() 5518 # This converts the final AST to a series of Dots leading to the function call 5519 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5520 if isinstance(field, (exp.Func, exp.Window)) and this: 5521 this = this.transform( 5522 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5523 ) 5524 5525 if op: 5526 this = op(self, this, field) 5527 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5528 this = self.expression( 5529 exp.Column, 5530 comments=this.comments, 5531 this=field, 5532 table=this.this, 5533 db=this.args.get("table"), 5534 catalog=this.args.get("db"), 5535 ) 5536 elif isinstance(field, exp.Window): 5537 # Move the exp.Dot's to the window's function 5538 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5539 field.set("this", window_func) 5540 this = field 5541 else: 5542 this = self.expression(exp.Dot, this=this, expression=field) 5543 5544 if field and field.comments: 5545 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5546 5547 this = self._parse_bracket(this) 5548 5549 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5550 5551 def _parse_paren(self) -> t.Optional[exp.Expression]: 5552 if not self._match(TokenType.L_PAREN): 5553 return None 5554 5555 comments = self._prev_comments 5556 query = self._parse_select() 5557 5558 if query: 5559 expressions = [query] 5560 else: 5561 expressions = self._parse_expressions() 5562 5563 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5564 5565 if not this and self._match(TokenType.R_PAREN, advance=False): 5566 this = self.expression(exp.Tuple) 5567 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5568 this = self._parse_subquery(this=this, parse_alias=False) 5569 elif isinstance(this, exp.Subquery): 5570 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5571 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5572 this = self.expression(exp.Tuple, expressions=expressions) 5573 else: 5574 this = self.expression(exp.Paren, this=this) 5575 5576 if this: 5577 this.add_comments(comments) 5578 5579 self._match_r_paren(expression=this) 5580 return this 5581 5582 def _parse_primary(self) -> t.Optional[exp.Expression]: 5583 if self._match_set(self.PRIMARY_PARSERS): 5584 token_type = self._prev.token_type 5585 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5586 5587 if token_type == TokenType.STRING: 5588 expressions = [primary] 5589 while self._match(TokenType.STRING): 5590 expressions.append(exp.Literal.string(self._prev.text)) 5591 5592 if len(expressions) > 1: 5593 return self.expression(exp.Concat, expressions=expressions) 5594 5595 return primary 5596 5597 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5598 return exp.Literal.number(f"0.{self._prev.text}") 5599 5600 return self._parse_paren() 5601 5602 def _parse_field( 5603 self, 5604 any_token: bool = False, 5605 tokens: t.Optional[t.Collection[TokenType]] = None, 5606 anonymous_func: bool = False, 5607 ) -> t.Optional[exp.Expression]: 5608 if anonymous_func: 5609 field = ( 5610 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5611 or self._parse_primary() 5612 ) 5613 else: 5614 field = self._parse_primary() or self._parse_function( 5615 anonymous=anonymous_func, any_token=any_token 5616 ) 5617 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5618 5619 def _parse_function( 5620 self, 5621 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5622 anonymous: bool = False, 5623 optional_parens: bool = True, 5624 any_token: bool = False, 5625 ) -> t.Optional[exp.Expression]: 5626 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5627 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5628 fn_syntax = False 5629 if ( 5630 self._match(TokenType.L_BRACE, advance=False) 5631 and self._next 5632 and self._next.text.upper() == "FN" 5633 ): 5634 self._advance(2) 5635 fn_syntax = True 5636 5637 func = self._parse_function_call( 5638 functions=functions, 5639 anonymous=anonymous, 5640 optional_parens=optional_parens, 5641 any_token=any_token, 5642 ) 5643 5644 if fn_syntax: 5645 self._match(TokenType.R_BRACE) 5646 5647 return func 5648 5649 def _parse_function_call( 5650 self, 5651 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5652 anonymous: bool = False, 5653 optional_parens: bool = True, 5654 any_token: bool = False, 5655 ) -> t.Optional[exp.Expression]: 5656 if not self._curr: 5657 return None 5658 5659 comments = self._curr.comments 5660 token = self._curr 5661 token_type = self._curr.token_type 5662 this = self._curr.text 5663 upper = this.upper() 5664 5665 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5666 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5667 self._advance() 5668 return self._parse_window(parser(self)) 5669 5670 if not self._next or self._next.token_type != TokenType.L_PAREN: 5671 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5672 self._advance() 5673 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5674 5675 return None 5676 5677 if any_token: 5678 if token_type in self.RESERVED_TOKENS: 5679 return None 5680 elif token_type not in self.FUNC_TOKENS: 5681 return None 5682 5683 self._advance(2) 5684 5685 parser = self.FUNCTION_PARSERS.get(upper) 5686 if parser and not anonymous: 5687 this = parser(self) 5688 else: 5689 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5690 5691 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5692 this = self.expression( 5693 subquery_predicate, comments=comments, this=self._parse_select() 5694 ) 5695 self._match_r_paren() 5696 return this 5697 5698 if functions is None: 5699 functions = self.FUNCTIONS 5700 5701 function = functions.get(upper) 5702 known_function = function and not anonymous 5703 5704 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5705 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5706 5707 post_func_comments = self._curr and self._curr.comments 5708 if known_function and post_func_comments: 5709 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5710 # call we'll construct it as exp.Anonymous, even if it's "known" 5711 if any( 5712 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5713 for comment in post_func_comments 5714 ): 5715 known_function = False 5716 5717 if alias and known_function: 5718 args = self._kv_to_prop_eq(args) 5719 5720 if known_function: 5721 func_builder = t.cast(t.Callable, function) 5722 5723 if "dialect" in func_builder.__code__.co_varnames: 5724 func = func_builder(args, dialect=self.dialect) 5725 else: 5726 func = func_builder(args) 5727 5728 func = self.validate_expression(func, args) 5729 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5730 func.meta["name"] = this 5731 5732 this = func 5733 else: 5734 if token_type == TokenType.IDENTIFIER: 5735 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5736 5737 this = self.expression(exp.Anonymous, this=this, expressions=args) 5738 this = this.update_positions(token) 5739 5740 if isinstance(this, exp.Expression): 5741 this.add_comments(comments) 5742 5743 self._match_r_paren(this) 5744 return self._parse_window(this) 5745 5746 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5747 return expression 5748 5749 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5750 transformed = [] 5751 5752 for index, e in enumerate(expressions): 5753 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5754 if isinstance(e, exp.Alias): 5755 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5756 5757 if not isinstance(e, exp.PropertyEQ): 5758 e = self.expression( 5759 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5760 ) 5761 5762 if isinstance(e.this, exp.Column): 5763 e.this.replace(e.this.this) 5764 else: 5765 e = self._to_prop_eq(e, index) 5766 5767 transformed.append(e) 5768 5769 return transformed 5770 5771 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5772 return self._parse_statement() 5773 5774 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5775 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5776 5777 def _parse_user_defined_function( 5778 self, kind: t.Optional[TokenType] = None 5779 ) -> t.Optional[exp.Expression]: 5780 this = self._parse_table_parts(schema=True) 5781 5782 if not self._match(TokenType.L_PAREN): 5783 return this 5784 5785 expressions = self._parse_csv(self._parse_function_parameter) 5786 self._match_r_paren() 5787 return self.expression( 5788 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5789 ) 5790 5791 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5792 literal = self._parse_primary() 5793 if literal: 5794 return self.expression(exp.Introducer, this=token.text, expression=literal) 5795 5796 return self._identifier_expression(token) 5797 5798 def _parse_session_parameter(self) -> exp.SessionParameter: 5799 kind = None 5800 this = self._parse_id_var() or self._parse_primary() 5801 5802 if this and self._match(TokenType.DOT): 5803 kind = this.name 5804 this = self._parse_var() or self._parse_primary() 5805 5806 return self.expression(exp.SessionParameter, this=this, kind=kind) 5807 5808 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5809 return self._parse_id_var() 5810 5811 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5812 index = self._index 5813 5814 if self._match(TokenType.L_PAREN): 5815 expressions = t.cast( 5816 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5817 ) 5818 5819 if not self._match(TokenType.R_PAREN): 5820 self._retreat(index) 5821 else: 5822 expressions = [self._parse_lambda_arg()] 5823 5824 if self._match_set(self.LAMBDAS): 5825 return self.LAMBDAS[self._prev.token_type](self, expressions) 5826 5827 self._retreat(index) 5828 5829 this: t.Optional[exp.Expression] 5830 5831 if self._match(TokenType.DISTINCT): 5832 this = self.expression( 5833 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5834 ) 5835 else: 5836 this = self._parse_select_or_expression(alias=alias) 5837 5838 return self._parse_limit( 5839 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5840 ) 5841 5842 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5843 index = self._index 5844 if not self._match(TokenType.L_PAREN): 5845 return this 5846 5847 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5848 # expr can be of both types 5849 if self._match_set(self.SELECT_START_TOKENS): 5850 self._retreat(index) 5851 return this 5852 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5853 self._match_r_paren() 5854 return self.expression(exp.Schema, this=this, expressions=args) 5855 5856 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5857 return self._parse_column_def(self._parse_field(any_token=True)) 5858 5859 def _parse_column_def( 5860 self, this: t.Optional[exp.Expression], computed_column: bool = True 5861 ) -> t.Optional[exp.Expression]: 5862 # column defs are not really columns, they're identifiers 5863 if isinstance(this, exp.Column): 5864 this = this.this 5865 5866 if not computed_column: 5867 self._match(TokenType.ALIAS) 5868 5869 kind = self._parse_types(schema=True) 5870 5871 if self._match_text_seq("FOR", "ORDINALITY"): 5872 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5873 5874 constraints: t.List[exp.Expression] = [] 5875 5876 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5877 ("ALIAS", "MATERIALIZED") 5878 ): 5879 persisted = self._prev.text.upper() == "MATERIALIZED" 5880 constraint_kind = exp.ComputedColumnConstraint( 5881 this=self._parse_assignment(), 5882 persisted=persisted or self._match_text_seq("PERSISTED"), 5883 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5884 ) 5885 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5886 elif ( 5887 kind 5888 and self._match(TokenType.ALIAS, advance=False) 5889 and ( 5890 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5891 or (self._next and self._next.token_type == TokenType.L_PAREN) 5892 ) 5893 ): 5894 self._advance() 5895 constraints.append( 5896 self.expression( 5897 exp.ColumnConstraint, 5898 kind=exp.ComputedColumnConstraint( 5899 this=self._parse_disjunction(), 5900 persisted=self._match_texts(("STORED", "VIRTUAL")) 5901 and self._prev.text.upper() == "STORED", 5902 ), 5903 ) 5904 ) 5905 5906 while True: 5907 constraint = self._parse_column_constraint() 5908 if not constraint: 5909 break 5910 constraints.append(constraint) 5911 5912 if not kind and not constraints: 5913 return this 5914 5915 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5916 5917 def _parse_auto_increment( 5918 self, 5919 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5920 start = None 5921 increment = None 5922 order = None 5923 5924 if self._match(TokenType.L_PAREN, advance=False): 5925 args = self._parse_wrapped_csv(self._parse_bitwise) 5926 start = seq_get(args, 0) 5927 increment = seq_get(args, 1) 5928 elif self._match_text_seq("START"): 5929 start = self._parse_bitwise() 5930 self._match_text_seq("INCREMENT") 5931 increment = self._parse_bitwise() 5932 if self._match_text_seq("ORDER"): 5933 order = True 5934 elif self._match_text_seq("NOORDER"): 5935 order = False 5936 5937 if start and increment: 5938 return exp.GeneratedAsIdentityColumnConstraint( 5939 start=start, increment=increment, this=False, order=order 5940 ) 5941 5942 return exp.AutoIncrementColumnConstraint() 5943 5944 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5945 if not self._match_text_seq("REFRESH"): 5946 self._retreat(self._index - 1) 5947 return None 5948 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5949 5950 def _parse_compress(self) -> exp.CompressColumnConstraint: 5951 if self._match(TokenType.L_PAREN, advance=False): 5952 return self.expression( 5953 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5954 ) 5955 5956 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5957 5958 def _parse_generated_as_identity( 5959 self, 5960 ) -> ( 5961 exp.GeneratedAsIdentityColumnConstraint 5962 | exp.ComputedColumnConstraint 5963 | exp.GeneratedAsRowColumnConstraint 5964 ): 5965 if self._match_text_seq("BY", "DEFAULT"): 5966 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5967 this = self.expression( 5968 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5969 ) 5970 else: 5971 self._match_text_seq("ALWAYS") 5972 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5973 5974 self._match(TokenType.ALIAS) 5975 5976 if self._match_text_seq("ROW"): 5977 start = self._match_text_seq("START") 5978 if not start: 5979 self._match(TokenType.END) 5980 hidden = self._match_text_seq("HIDDEN") 5981 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5982 5983 identity = self._match_text_seq("IDENTITY") 5984 5985 if self._match(TokenType.L_PAREN): 5986 if self._match(TokenType.START_WITH): 5987 this.set("start", self._parse_bitwise()) 5988 if self._match_text_seq("INCREMENT", "BY"): 5989 this.set("increment", self._parse_bitwise()) 5990 if self._match_text_seq("MINVALUE"): 5991 this.set("minvalue", self._parse_bitwise()) 5992 if self._match_text_seq("MAXVALUE"): 5993 this.set("maxvalue", self._parse_bitwise()) 5994 5995 if self._match_text_seq("CYCLE"): 5996 this.set("cycle", True) 5997 elif self._match_text_seq("NO", "CYCLE"): 5998 this.set("cycle", False) 5999 6000 if not identity: 6001 this.set("expression", self._parse_range()) 6002 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6003 args = self._parse_csv(self._parse_bitwise) 6004 this.set("start", seq_get(args, 0)) 6005 this.set("increment", seq_get(args, 1)) 6006 6007 self._match_r_paren() 6008 6009 return this 6010 6011 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6012 self._match_text_seq("LENGTH") 6013 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6014 6015 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6016 if self._match_text_seq("NULL"): 6017 return self.expression(exp.NotNullColumnConstraint) 6018 if self._match_text_seq("CASESPECIFIC"): 6019 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6020 if self._match_text_seq("FOR", "REPLICATION"): 6021 return self.expression(exp.NotForReplicationColumnConstraint) 6022 6023 # Unconsume the `NOT` token 6024 self._retreat(self._index - 1) 6025 return None 6026 6027 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6028 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6029 6030 procedure_option_follows = ( 6031 self._match(TokenType.WITH, advance=False) 6032 and self._next 6033 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6034 ) 6035 6036 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6037 return self.expression( 6038 exp.ColumnConstraint, 6039 this=this, 6040 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6041 ) 6042 6043 return this 6044 6045 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6046 if not self._match(TokenType.CONSTRAINT): 6047 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6048 6049 return self.expression( 6050 exp.Constraint, 6051 this=self._parse_id_var(), 6052 expressions=self._parse_unnamed_constraints(), 6053 ) 6054 6055 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6056 constraints = [] 6057 while True: 6058 constraint = self._parse_unnamed_constraint() or self._parse_function() 6059 if not constraint: 6060 break 6061 constraints.append(constraint) 6062 6063 return constraints 6064 6065 def _parse_unnamed_constraint( 6066 self, constraints: t.Optional[t.Collection[str]] = None 6067 ) -> t.Optional[exp.Expression]: 6068 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6069 constraints or self.CONSTRAINT_PARSERS 6070 ): 6071 return None 6072 6073 constraint = self._prev.text.upper() 6074 if constraint not in self.CONSTRAINT_PARSERS: 6075 self.raise_error(f"No parser found for schema constraint {constraint}.") 6076 6077 return self.CONSTRAINT_PARSERS[constraint](self) 6078 6079 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6080 return self._parse_id_var(any_token=False) 6081 6082 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6083 self._match_text_seq("KEY") 6084 return self.expression( 6085 exp.UniqueColumnConstraint, 6086 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6087 this=self._parse_schema(self._parse_unique_key()), 6088 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6089 on_conflict=self._parse_on_conflict(), 6090 options=self._parse_key_constraint_options(), 6091 ) 6092 6093 def _parse_key_constraint_options(self) -> t.List[str]: 6094 options = [] 6095 while True: 6096 if not self._curr: 6097 break 6098 6099 if self._match(TokenType.ON): 6100 action = None 6101 on = self._advance_any() and self._prev.text 6102 6103 if self._match_text_seq("NO", "ACTION"): 6104 action = "NO ACTION" 6105 elif self._match_text_seq("CASCADE"): 6106 action = "CASCADE" 6107 elif self._match_text_seq("RESTRICT"): 6108 action = "RESTRICT" 6109 elif self._match_pair(TokenType.SET, TokenType.NULL): 6110 action = "SET NULL" 6111 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6112 action = "SET DEFAULT" 6113 else: 6114 self.raise_error("Invalid key constraint") 6115 6116 options.append(f"ON {on} {action}") 6117 else: 6118 var = self._parse_var_from_options( 6119 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6120 ) 6121 if not var: 6122 break 6123 options.append(var.name) 6124 6125 return options 6126 6127 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6128 if match and not self._match(TokenType.REFERENCES): 6129 return None 6130 6131 expressions = None 6132 this = self._parse_table(schema=True) 6133 options = self._parse_key_constraint_options() 6134 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6135 6136 def _parse_foreign_key(self) -> exp.ForeignKey: 6137 expressions = ( 6138 self._parse_wrapped_id_vars() 6139 if not self._match(TokenType.REFERENCES, advance=False) 6140 else None 6141 ) 6142 reference = self._parse_references() 6143 on_options = {} 6144 6145 while self._match(TokenType.ON): 6146 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6147 self.raise_error("Expected DELETE or UPDATE") 6148 6149 kind = self._prev.text.lower() 6150 6151 if self._match_text_seq("NO", "ACTION"): 6152 action = "NO ACTION" 6153 elif self._match(TokenType.SET): 6154 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6155 action = "SET " + self._prev.text.upper() 6156 else: 6157 self._advance() 6158 action = self._prev.text.upper() 6159 6160 on_options[kind] = action 6161 6162 return self.expression( 6163 exp.ForeignKey, 6164 expressions=expressions, 6165 reference=reference, 6166 options=self._parse_key_constraint_options(), 6167 **on_options, # type: ignore 6168 ) 6169 6170 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6171 return self._parse_ordered() or self._parse_field() 6172 6173 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6174 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6175 self._retreat(self._index - 1) 6176 return None 6177 6178 id_vars = self._parse_wrapped_id_vars() 6179 return self.expression( 6180 exp.PeriodForSystemTimeConstraint, 6181 this=seq_get(id_vars, 0), 6182 expression=seq_get(id_vars, 1), 6183 ) 6184 6185 def _parse_primary_key( 6186 self, wrapped_optional: bool = False, in_props: bool = False 6187 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6188 desc = ( 6189 self._match_set((TokenType.ASC, TokenType.DESC)) 6190 and self._prev.token_type == TokenType.DESC 6191 ) 6192 6193 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6194 return self.expression( 6195 exp.PrimaryKeyColumnConstraint, 6196 desc=desc, 6197 options=self._parse_key_constraint_options(), 6198 ) 6199 6200 expressions = self._parse_wrapped_csv( 6201 self._parse_primary_key_part, optional=wrapped_optional 6202 ) 6203 options = self._parse_key_constraint_options() 6204 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6205 6206 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6207 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6208 6209 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6210 """ 6211 Parses a datetime column in ODBC format. We parse the column into the corresponding 6212 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6213 same as we did for `DATE('yyyy-mm-dd')`. 6214 6215 Reference: 6216 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6217 """ 6218 self._match(TokenType.VAR) 6219 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6220 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6221 if not self._match(TokenType.R_BRACE): 6222 self.raise_error("Expected }") 6223 return expression 6224 6225 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6226 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6227 return this 6228 6229 bracket_kind = self._prev.token_type 6230 if ( 6231 bracket_kind == TokenType.L_BRACE 6232 and self._curr 6233 and self._curr.token_type == TokenType.VAR 6234 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6235 ): 6236 return self._parse_odbc_datetime_literal() 6237 6238 expressions = self._parse_csv( 6239 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6240 ) 6241 6242 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6243 self.raise_error("Expected ]") 6244 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6245 self.raise_error("Expected }") 6246 6247 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6248 if bracket_kind == TokenType.L_BRACE: 6249 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6250 elif not this: 6251 this = build_array_constructor( 6252 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6253 ) 6254 else: 6255 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6256 if constructor_type: 6257 return build_array_constructor( 6258 constructor_type, 6259 args=expressions, 6260 bracket_kind=bracket_kind, 6261 dialect=self.dialect, 6262 ) 6263 6264 expressions = apply_index_offset( 6265 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6266 ) 6267 this = self.expression( 6268 exp.Bracket, 6269 this=this, 6270 expressions=expressions, 6271 comments=this.pop_comments(), 6272 ) 6273 6274 self._add_comments(this) 6275 return self._parse_bracket(this) 6276 6277 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6278 if self._match(TokenType.COLON): 6279 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6280 return this 6281 6282 def _parse_case(self) -> t.Optional[exp.Expression]: 6283 ifs = [] 6284 default = None 6285 6286 comments = self._prev_comments 6287 expression = self._parse_assignment() 6288 6289 while self._match(TokenType.WHEN): 6290 this = self._parse_assignment() 6291 self._match(TokenType.THEN) 6292 then = self._parse_assignment() 6293 ifs.append(self.expression(exp.If, this=this, true=then)) 6294 6295 if self._match(TokenType.ELSE): 6296 default = self._parse_assignment() 6297 6298 if not self._match(TokenType.END): 6299 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6300 default = exp.column("interval") 6301 else: 6302 self.raise_error("Expected END after CASE", self._prev) 6303 6304 return self.expression( 6305 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6306 ) 6307 6308 def _parse_if(self) -> t.Optional[exp.Expression]: 6309 if self._match(TokenType.L_PAREN): 6310 args = self._parse_csv( 6311 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6312 ) 6313 this = self.validate_expression(exp.If.from_arg_list(args), args) 6314 self._match_r_paren() 6315 else: 6316 index = self._index - 1 6317 6318 if self.NO_PAREN_IF_COMMANDS and index == 0: 6319 return self._parse_as_command(self._prev) 6320 6321 condition = self._parse_assignment() 6322 6323 if not condition: 6324 self._retreat(index) 6325 return None 6326 6327 self._match(TokenType.THEN) 6328 true = self._parse_assignment() 6329 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6330 self._match(TokenType.END) 6331 this = self.expression(exp.If, this=condition, true=true, false=false) 6332 6333 return this 6334 6335 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6336 if not self._match_text_seq("VALUE", "FOR"): 6337 self._retreat(self._index - 1) 6338 return None 6339 6340 return self.expression( 6341 exp.NextValueFor, 6342 this=self._parse_column(), 6343 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6344 ) 6345 6346 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6347 this = self._parse_function() or self._parse_var_or_string(upper=True) 6348 6349 if self._match(TokenType.FROM): 6350 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6351 6352 if not self._match(TokenType.COMMA): 6353 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6354 6355 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6356 6357 def _parse_gap_fill(self) -> exp.GapFill: 6358 self._match(TokenType.TABLE) 6359 this = self._parse_table() 6360 6361 self._match(TokenType.COMMA) 6362 args = [this, *self._parse_csv(self._parse_lambda)] 6363 6364 gap_fill = exp.GapFill.from_arg_list(args) 6365 return self.validate_expression(gap_fill, args) 6366 6367 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6368 this = self._parse_assignment() 6369 6370 if not self._match(TokenType.ALIAS): 6371 if self._match(TokenType.COMMA): 6372 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6373 6374 self.raise_error("Expected AS after CAST") 6375 6376 fmt = None 6377 to = self._parse_types() 6378 6379 default = self._match(TokenType.DEFAULT) 6380 if default: 6381 default = self._parse_bitwise() 6382 self._match_text_seq("ON", "CONVERSION", "ERROR") 6383 6384 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6385 fmt_string = self._parse_string() 6386 fmt = self._parse_at_time_zone(fmt_string) 6387 6388 if not to: 6389 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6390 if to.this in exp.DataType.TEMPORAL_TYPES: 6391 this = self.expression( 6392 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6393 this=this, 6394 format=exp.Literal.string( 6395 format_time( 6396 fmt_string.this if fmt_string else "", 6397 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6398 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6399 ) 6400 ), 6401 safe=safe, 6402 ) 6403 6404 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6405 this.set("zone", fmt.args["zone"]) 6406 return this 6407 elif not to: 6408 self.raise_error("Expected TYPE after CAST") 6409 elif isinstance(to, exp.Identifier): 6410 to = exp.DataType.build(to.name, udt=True) 6411 elif to.this == exp.DataType.Type.CHAR: 6412 if self._match(TokenType.CHARACTER_SET): 6413 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6414 6415 return self.expression( 6416 exp.Cast if strict else exp.TryCast, 6417 this=this, 6418 to=to, 6419 format=fmt, 6420 safe=safe, 6421 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6422 default=default, 6423 ) 6424 6425 def _parse_string_agg(self) -> exp.GroupConcat: 6426 if self._match(TokenType.DISTINCT): 6427 args: t.List[t.Optional[exp.Expression]] = [ 6428 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6429 ] 6430 if self._match(TokenType.COMMA): 6431 args.extend(self._parse_csv(self._parse_assignment)) 6432 else: 6433 args = self._parse_csv(self._parse_assignment) # type: ignore 6434 6435 if self._match_text_seq("ON", "OVERFLOW"): 6436 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6437 if self._match_text_seq("ERROR"): 6438 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6439 else: 6440 self._match_text_seq("TRUNCATE") 6441 on_overflow = self.expression( 6442 exp.OverflowTruncateBehavior, 6443 this=self._parse_string(), 6444 with_count=( 6445 self._match_text_seq("WITH", "COUNT") 6446 or not self._match_text_seq("WITHOUT", "COUNT") 6447 ), 6448 ) 6449 else: 6450 on_overflow = None 6451 6452 index = self._index 6453 if not self._match(TokenType.R_PAREN) and args: 6454 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6455 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6456 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6457 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6458 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6459 6460 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6461 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6462 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6463 if not self._match_text_seq("WITHIN", "GROUP"): 6464 self._retreat(index) 6465 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6466 6467 # The corresponding match_r_paren will be called in parse_function (caller) 6468 self._match_l_paren() 6469 6470 return self.expression( 6471 exp.GroupConcat, 6472 this=self._parse_order(this=seq_get(args, 0)), 6473 separator=seq_get(args, 1), 6474 on_overflow=on_overflow, 6475 ) 6476 6477 def _parse_convert( 6478 self, strict: bool, safe: t.Optional[bool] = None 6479 ) -> t.Optional[exp.Expression]: 6480 this = self._parse_bitwise() 6481 6482 if self._match(TokenType.USING): 6483 to: t.Optional[exp.Expression] = self.expression( 6484 exp.CharacterSet, this=self._parse_var() 6485 ) 6486 elif self._match(TokenType.COMMA): 6487 to = self._parse_types() 6488 else: 6489 to = None 6490 6491 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6492 6493 def _parse_xml_table(self) -> exp.XMLTable: 6494 namespaces = None 6495 passing = None 6496 columns = None 6497 6498 if self._match_text_seq("XMLNAMESPACES", "("): 6499 namespaces = self._parse_xml_namespace() 6500 self._match_text_seq(")", ",") 6501 6502 this = self._parse_string() 6503 6504 if self._match_text_seq("PASSING"): 6505 # The BY VALUE keywords are optional and are provided for semantic clarity 6506 self._match_text_seq("BY", "VALUE") 6507 passing = self._parse_csv(self._parse_column) 6508 6509 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6510 6511 if self._match_text_seq("COLUMNS"): 6512 columns = self._parse_csv(self._parse_field_def) 6513 6514 return self.expression( 6515 exp.XMLTable, 6516 this=this, 6517 namespaces=namespaces, 6518 passing=passing, 6519 columns=columns, 6520 by_ref=by_ref, 6521 ) 6522 6523 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6524 namespaces = [] 6525 6526 while True: 6527 if self._match(TokenType.DEFAULT): 6528 uri = self._parse_string() 6529 else: 6530 uri = self._parse_alias(self._parse_string()) 6531 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6532 if not self._match(TokenType.COMMA): 6533 break 6534 6535 return namespaces 6536 6537 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6538 """ 6539 There are generally two variants of the DECODE function: 6540 6541 - DECODE(bin, charset) 6542 - DECODE(expression, search, result [, search, result] ... [, default]) 6543 6544 The second variant will always be parsed into a CASE expression. Note that NULL 6545 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6546 instead of relying on pattern matching. 6547 """ 6548 args = self._parse_csv(self._parse_assignment) 6549 6550 if len(args) < 3: 6551 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6552 6553 expression, *expressions = args 6554 if not expression: 6555 return None 6556 6557 ifs = [] 6558 for search, result in zip(expressions[::2], expressions[1::2]): 6559 if not search or not result: 6560 return None 6561 6562 if isinstance(search, exp.Literal): 6563 ifs.append( 6564 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6565 ) 6566 elif isinstance(search, exp.Null): 6567 ifs.append( 6568 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6569 ) 6570 else: 6571 cond = exp.or_( 6572 exp.EQ(this=expression.copy(), expression=search), 6573 exp.and_( 6574 exp.Is(this=expression.copy(), expression=exp.Null()), 6575 exp.Is(this=search.copy(), expression=exp.Null()), 6576 copy=False, 6577 ), 6578 copy=False, 6579 ) 6580 ifs.append(exp.If(this=cond, true=result)) 6581 6582 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6583 6584 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6585 self._match_text_seq("KEY") 6586 key = self._parse_column() 6587 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6588 self._match_text_seq("VALUE") 6589 value = self._parse_bitwise() 6590 6591 if not key and not value: 6592 return None 6593 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6594 6595 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6596 if not this or not self._match_text_seq("FORMAT", "JSON"): 6597 return this 6598 6599 return self.expression(exp.FormatJson, this=this) 6600 6601 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6602 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6603 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6604 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6605 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6606 else: 6607 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6608 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6609 6610 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6611 6612 if not empty and not error and not null: 6613 return None 6614 6615 return self.expression( 6616 exp.OnCondition, 6617 empty=empty, 6618 error=error, 6619 null=null, 6620 ) 6621 6622 def _parse_on_handling( 6623 self, on: str, *values: str 6624 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6625 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6626 for value in values: 6627 if self._match_text_seq(value, "ON", on): 6628 return f"{value} ON {on}" 6629 6630 index = self._index 6631 if self._match(TokenType.DEFAULT): 6632 default_value = self._parse_bitwise() 6633 if self._match_text_seq("ON", on): 6634 return default_value 6635 6636 self._retreat(index) 6637 6638 return None 6639 6640 @t.overload 6641 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6642 6643 @t.overload 6644 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6645 6646 def _parse_json_object(self, agg=False): 6647 star = self._parse_star() 6648 expressions = ( 6649 [star] 6650 if star 6651 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6652 ) 6653 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6654 6655 unique_keys = None 6656 if self._match_text_seq("WITH", "UNIQUE"): 6657 unique_keys = True 6658 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6659 unique_keys = False 6660 6661 self._match_text_seq("KEYS") 6662 6663 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6664 self._parse_type() 6665 ) 6666 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6667 6668 return self.expression( 6669 exp.JSONObjectAgg if agg else exp.JSONObject, 6670 expressions=expressions, 6671 null_handling=null_handling, 6672 unique_keys=unique_keys, 6673 return_type=return_type, 6674 encoding=encoding, 6675 ) 6676 6677 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6678 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6679 if not self._match_text_seq("NESTED"): 6680 this = self._parse_id_var() 6681 kind = self._parse_types(allow_identifiers=False) 6682 nested = None 6683 else: 6684 this = None 6685 kind = None 6686 nested = True 6687 6688 path = self._match_text_seq("PATH") and self._parse_string() 6689 nested_schema = nested and self._parse_json_schema() 6690 6691 return self.expression( 6692 exp.JSONColumnDef, 6693 this=this, 6694 kind=kind, 6695 path=path, 6696 nested_schema=nested_schema, 6697 ) 6698 6699 def _parse_json_schema(self) -> exp.JSONSchema: 6700 self._match_text_seq("COLUMNS") 6701 return self.expression( 6702 exp.JSONSchema, 6703 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6704 ) 6705 6706 def _parse_json_table(self) -> exp.JSONTable: 6707 this = self._parse_format_json(self._parse_bitwise()) 6708 path = self._match(TokenType.COMMA) and self._parse_string() 6709 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6710 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6711 schema = self._parse_json_schema() 6712 6713 return exp.JSONTable( 6714 this=this, 6715 schema=schema, 6716 path=path, 6717 error_handling=error_handling, 6718 empty_handling=empty_handling, 6719 ) 6720 6721 def _parse_match_against(self) -> exp.MatchAgainst: 6722 expressions = self._parse_csv(self._parse_column) 6723 6724 self._match_text_seq(")", "AGAINST", "(") 6725 6726 this = self._parse_string() 6727 6728 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6729 modifier = "IN NATURAL LANGUAGE MODE" 6730 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6731 modifier = f"{modifier} WITH QUERY EXPANSION" 6732 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6733 modifier = "IN BOOLEAN MODE" 6734 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6735 modifier = "WITH QUERY EXPANSION" 6736 else: 6737 modifier = None 6738 6739 return self.expression( 6740 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6741 ) 6742 6743 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6744 def _parse_open_json(self) -> exp.OpenJSON: 6745 this = self._parse_bitwise() 6746 path = self._match(TokenType.COMMA) and self._parse_string() 6747 6748 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6749 this = self._parse_field(any_token=True) 6750 kind = self._parse_types() 6751 path = self._parse_string() 6752 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6753 6754 return self.expression( 6755 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6756 ) 6757 6758 expressions = None 6759 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6760 self._match_l_paren() 6761 expressions = self._parse_csv(_parse_open_json_column_def) 6762 6763 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6764 6765 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6766 args = self._parse_csv(self._parse_bitwise) 6767 6768 if self._match(TokenType.IN): 6769 return self.expression( 6770 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6771 ) 6772 6773 if haystack_first: 6774 haystack = seq_get(args, 0) 6775 needle = seq_get(args, 1) 6776 else: 6777 haystack = seq_get(args, 1) 6778 needle = seq_get(args, 0) 6779 6780 return self.expression( 6781 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6782 ) 6783 6784 def _parse_predict(self) -> exp.Predict: 6785 self._match_text_seq("MODEL") 6786 this = self._parse_table() 6787 6788 self._match(TokenType.COMMA) 6789 self._match_text_seq("TABLE") 6790 6791 return self.expression( 6792 exp.Predict, 6793 this=this, 6794 expression=self._parse_table(), 6795 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6796 ) 6797 6798 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6799 args = self._parse_csv(self._parse_table) 6800 return exp.JoinHint(this=func_name.upper(), expressions=args) 6801 6802 def _parse_substring(self) -> exp.Substring: 6803 # Postgres supports the form: substring(string [from int] [for int]) 6804 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6805 6806 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6807 6808 if self._match(TokenType.FROM): 6809 args.append(self._parse_bitwise()) 6810 if self._match(TokenType.FOR): 6811 if len(args) == 1: 6812 args.append(exp.Literal.number(1)) 6813 args.append(self._parse_bitwise()) 6814 6815 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6816 6817 def _parse_trim(self) -> exp.Trim: 6818 # https://www.w3resource.com/sql/character-functions/trim.php 6819 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6820 6821 position = None 6822 collation = None 6823 expression = None 6824 6825 if self._match_texts(self.TRIM_TYPES): 6826 position = self._prev.text.upper() 6827 6828 this = self._parse_bitwise() 6829 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6830 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6831 expression = self._parse_bitwise() 6832 6833 if invert_order: 6834 this, expression = expression, this 6835 6836 if self._match(TokenType.COLLATE): 6837 collation = self._parse_bitwise() 6838 6839 return self.expression( 6840 exp.Trim, this=this, position=position, expression=expression, collation=collation 6841 ) 6842 6843 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6844 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6845 6846 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6847 return self._parse_window(self._parse_id_var(), alias=True) 6848 6849 def _parse_respect_or_ignore_nulls( 6850 self, this: t.Optional[exp.Expression] 6851 ) -> t.Optional[exp.Expression]: 6852 if self._match_text_seq("IGNORE", "NULLS"): 6853 return self.expression(exp.IgnoreNulls, this=this) 6854 if self._match_text_seq("RESPECT", "NULLS"): 6855 return self.expression(exp.RespectNulls, this=this) 6856 return this 6857 6858 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6859 if self._match(TokenType.HAVING): 6860 self._match_texts(("MAX", "MIN")) 6861 max = self._prev.text.upper() != "MIN" 6862 return self.expression( 6863 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6864 ) 6865 6866 return this 6867 6868 def _parse_window( 6869 self, this: t.Optional[exp.Expression], alias: bool = False 6870 ) -> t.Optional[exp.Expression]: 6871 func = this 6872 comments = func.comments if isinstance(func, exp.Expression) else None 6873 6874 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6875 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6876 if self._match_text_seq("WITHIN", "GROUP"): 6877 order = self._parse_wrapped(self._parse_order) 6878 this = self.expression(exp.WithinGroup, this=this, expression=order) 6879 6880 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6881 self._match(TokenType.WHERE) 6882 this = self.expression( 6883 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6884 ) 6885 self._match_r_paren() 6886 6887 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6888 # Some dialects choose to implement and some do not. 6889 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6890 6891 # There is some code above in _parse_lambda that handles 6892 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6893 6894 # The below changes handle 6895 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6896 6897 # Oracle allows both formats 6898 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6899 # and Snowflake chose to do the same for familiarity 6900 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6901 if isinstance(this, exp.AggFunc): 6902 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6903 6904 if ignore_respect and ignore_respect is not this: 6905 ignore_respect.replace(ignore_respect.this) 6906 this = self.expression(ignore_respect.__class__, this=this) 6907 6908 this = self._parse_respect_or_ignore_nulls(this) 6909 6910 # bigquery select from window x AS (partition by ...) 6911 if alias: 6912 over = None 6913 self._match(TokenType.ALIAS) 6914 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6915 return this 6916 else: 6917 over = self._prev.text.upper() 6918 6919 if comments and isinstance(func, exp.Expression): 6920 func.pop_comments() 6921 6922 if not self._match(TokenType.L_PAREN): 6923 return self.expression( 6924 exp.Window, 6925 comments=comments, 6926 this=this, 6927 alias=self._parse_id_var(False), 6928 over=over, 6929 ) 6930 6931 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6932 6933 first = self._match(TokenType.FIRST) 6934 if self._match_text_seq("LAST"): 6935 first = False 6936 6937 partition, order = self._parse_partition_and_order() 6938 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6939 6940 if kind: 6941 self._match(TokenType.BETWEEN) 6942 start = self._parse_window_spec() 6943 self._match(TokenType.AND) 6944 end = self._parse_window_spec() 6945 exclude = ( 6946 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6947 if self._match_text_seq("EXCLUDE") 6948 else None 6949 ) 6950 6951 spec = self.expression( 6952 exp.WindowSpec, 6953 kind=kind, 6954 start=start["value"], 6955 start_side=start["side"], 6956 end=end["value"], 6957 end_side=end["side"], 6958 exclude=exclude, 6959 ) 6960 else: 6961 spec = None 6962 6963 self._match_r_paren() 6964 6965 window = self.expression( 6966 exp.Window, 6967 comments=comments, 6968 this=this, 6969 partition_by=partition, 6970 order=order, 6971 spec=spec, 6972 alias=window_alias, 6973 over=over, 6974 first=first, 6975 ) 6976 6977 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6978 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6979 return self._parse_window(window, alias=alias) 6980 6981 return window 6982 6983 def _parse_partition_and_order( 6984 self, 6985 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6986 return self._parse_partition_by(), self._parse_order() 6987 6988 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6989 self._match(TokenType.BETWEEN) 6990 6991 return { 6992 "value": ( 6993 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6994 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6995 or self._parse_bitwise() 6996 ), 6997 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6998 } 6999 7000 def _parse_alias( 7001 self, this: t.Optional[exp.Expression], explicit: bool = False 7002 ) -> t.Optional[exp.Expression]: 7003 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7004 # so this section tries to parse the clause version and if it fails, it treats the token 7005 # as an identifier (alias) 7006 if self._can_parse_limit_or_offset(): 7007 return this 7008 7009 any_token = self._match(TokenType.ALIAS) 7010 comments = self._prev_comments or [] 7011 7012 if explicit and not any_token: 7013 return this 7014 7015 if self._match(TokenType.L_PAREN): 7016 aliases = self.expression( 7017 exp.Aliases, 7018 comments=comments, 7019 this=this, 7020 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7021 ) 7022 self._match_r_paren(aliases) 7023 return aliases 7024 7025 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7026 self.STRING_ALIASES and self._parse_string_as_identifier() 7027 ) 7028 7029 if alias: 7030 comments.extend(alias.pop_comments()) 7031 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7032 column = this.this 7033 7034 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7035 if not this.comments and column and column.comments: 7036 this.comments = column.pop_comments() 7037 7038 return this 7039 7040 def _parse_id_var( 7041 self, 7042 any_token: bool = True, 7043 tokens: t.Optional[t.Collection[TokenType]] = None, 7044 ) -> t.Optional[exp.Expression]: 7045 expression = self._parse_identifier() 7046 if not expression and ( 7047 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7048 ): 7049 quoted = self._prev.token_type == TokenType.STRING 7050 expression = self._identifier_expression(quoted=quoted) 7051 7052 return expression 7053 7054 def _parse_string(self) -> t.Optional[exp.Expression]: 7055 if self._match_set(self.STRING_PARSERS): 7056 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7057 return self._parse_placeholder() 7058 7059 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7060 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7061 if output: 7062 output.update_positions(self._prev) 7063 return output 7064 7065 def _parse_number(self) -> t.Optional[exp.Expression]: 7066 if self._match_set(self.NUMERIC_PARSERS): 7067 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7068 return self._parse_placeholder() 7069 7070 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7071 if self._match(TokenType.IDENTIFIER): 7072 return self._identifier_expression(quoted=True) 7073 return self._parse_placeholder() 7074 7075 def _parse_var( 7076 self, 7077 any_token: bool = False, 7078 tokens: t.Optional[t.Collection[TokenType]] = None, 7079 upper: bool = False, 7080 ) -> t.Optional[exp.Expression]: 7081 if ( 7082 (any_token and self._advance_any()) 7083 or self._match(TokenType.VAR) 7084 or (self._match_set(tokens) if tokens else False) 7085 ): 7086 return self.expression( 7087 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7088 ) 7089 return self._parse_placeholder() 7090 7091 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7092 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7093 self._advance() 7094 return self._prev 7095 return None 7096 7097 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7098 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7099 7100 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7101 return self._parse_primary() or self._parse_var(any_token=True) 7102 7103 def _parse_null(self) -> t.Optional[exp.Expression]: 7104 if self._match_set(self.NULL_TOKENS): 7105 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7106 return self._parse_placeholder() 7107 7108 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7109 if self._match(TokenType.TRUE): 7110 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7111 if self._match(TokenType.FALSE): 7112 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7113 return self._parse_placeholder() 7114 7115 def _parse_star(self) -> t.Optional[exp.Expression]: 7116 if self._match(TokenType.STAR): 7117 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7118 return self._parse_placeholder() 7119 7120 def _parse_parameter(self) -> exp.Parameter: 7121 this = self._parse_identifier() or self._parse_primary_or_var() 7122 return self.expression(exp.Parameter, this=this) 7123 7124 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7125 if self._match_set(self.PLACEHOLDER_PARSERS): 7126 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7127 if placeholder: 7128 return placeholder 7129 self._advance(-1) 7130 return None 7131 7132 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7133 if not self._match_texts(keywords): 7134 return None 7135 if self._match(TokenType.L_PAREN, advance=False): 7136 return self._parse_wrapped_csv(self._parse_expression) 7137 7138 expression = self._parse_expression() 7139 return [expression] if expression else None 7140 7141 def _parse_csv( 7142 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7143 ) -> t.List[exp.Expression]: 7144 parse_result = parse_method() 7145 items = [parse_result] if parse_result is not None else [] 7146 7147 while self._match(sep): 7148 self._add_comments(parse_result) 7149 parse_result = parse_method() 7150 if parse_result is not None: 7151 items.append(parse_result) 7152 7153 return items 7154 7155 def _parse_tokens( 7156 self, parse_method: t.Callable, expressions: t.Dict 7157 ) -> t.Optional[exp.Expression]: 7158 this = parse_method() 7159 7160 while self._match_set(expressions): 7161 this = self.expression( 7162 expressions[self._prev.token_type], 7163 this=this, 7164 comments=self._prev_comments, 7165 expression=parse_method(), 7166 ) 7167 7168 return this 7169 7170 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7171 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7172 7173 def _parse_wrapped_csv( 7174 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7175 ) -> t.List[exp.Expression]: 7176 return self._parse_wrapped( 7177 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7178 ) 7179 7180 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7181 wrapped = self._match(TokenType.L_PAREN) 7182 if not wrapped and not optional: 7183 self.raise_error("Expecting (") 7184 parse_result = parse_method() 7185 if wrapped: 7186 self._match_r_paren() 7187 return parse_result 7188 7189 def _parse_expressions(self) -> t.List[exp.Expression]: 7190 return self._parse_csv(self._parse_expression) 7191 7192 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7193 return self._parse_select() or self._parse_set_operations( 7194 self._parse_alias(self._parse_assignment(), explicit=True) 7195 if alias 7196 else self._parse_assignment() 7197 ) 7198 7199 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7200 return self._parse_query_modifiers( 7201 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7202 ) 7203 7204 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7205 this = None 7206 if self._match_texts(self.TRANSACTION_KIND): 7207 this = self._prev.text 7208 7209 self._match_texts(("TRANSACTION", "WORK")) 7210 7211 modes = [] 7212 while True: 7213 mode = [] 7214 while self._match(TokenType.VAR): 7215 mode.append(self._prev.text) 7216 7217 if mode: 7218 modes.append(" ".join(mode)) 7219 if not self._match(TokenType.COMMA): 7220 break 7221 7222 return self.expression(exp.Transaction, this=this, modes=modes) 7223 7224 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7225 chain = None 7226 savepoint = None 7227 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7228 7229 self._match_texts(("TRANSACTION", "WORK")) 7230 7231 if self._match_text_seq("TO"): 7232 self._match_text_seq("SAVEPOINT") 7233 savepoint = self._parse_id_var() 7234 7235 if self._match(TokenType.AND): 7236 chain = not self._match_text_seq("NO") 7237 self._match_text_seq("CHAIN") 7238 7239 if is_rollback: 7240 return self.expression(exp.Rollback, savepoint=savepoint) 7241 7242 return self.expression(exp.Commit, chain=chain) 7243 7244 def _parse_refresh(self) -> exp.Refresh: 7245 self._match(TokenType.TABLE) 7246 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7247 7248 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7249 if not self._prev.text.upper() == "ADD": 7250 return None 7251 7252 start = self._index 7253 self._match(TokenType.COLUMN) 7254 7255 exists_column = self._parse_exists(not_=True) 7256 expression = self._parse_field_def() 7257 7258 if not isinstance(expression, exp.ColumnDef): 7259 self._retreat(start) 7260 return None 7261 7262 expression.set("exists", exists_column) 7263 7264 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7265 if self._match_texts(("FIRST", "AFTER")): 7266 position = self._prev.text 7267 column_position = self.expression( 7268 exp.ColumnPosition, this=self._parse_column(), position=position 7269 ) 7270 expression.set("position", column_position) 7271 7272 return expression 7273 7274 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7275 drop = self._match(TokenType.DROP) and self._parse_drop() 7276 if drop and not isinstance(drop, exp.Command): 7277 drop.set("kind", drop.args.get("kind", "COLUMN")) 7278 return drop 7279 7280 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7281 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7282 return self.expression( 7283 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7284 ) 7285 7286 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7287 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7288 self._match_text_seq("ADD") 7289 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7290 return self.expression( 7291 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7292 ) 7293 7294 column_def = self._parse_add_column() 7295 if isinstance(column_def, exp.ColumnDef): 7296 return column_def 7297 7298 exists = self._parse_exists(not_=True) 7299 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7300 return self.expression( 7301 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7302 ) 7303 7304 return None 7305 7306 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7307 "COLUMNS" 7308 ): 7309 schema = self._parse_schema() 7310 7311 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7312 7313 return self._parse_csv(_parse_add_alteration) 7314 7315 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7316 if self._match_texts(self.ALTER_ALTER_PARSERS): 7317 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7318 7319 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7320 # keyword after ALTER we default to parsing this statement 7321 self._match(TokenType.COLUMN) 7322 column = self._parse_field(any_token=True) 7323 7324 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7325 return self.expression(exp.AlterColumn, this=column, drop=True) 7326 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7327 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7328 if self._match(TokenType.COMMENT): 7329 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7330 if self._match_text_seq("DROP", "NOT", "NULL"): 7331 return self.expression( 7332 exp.AlterColumn, 7333 this=column, 7334 drop=True, 7335 allow_null=True, 7336 ) 7337 if self._match_text_seq("SET", "NOT", "NULL"): 7338 return self.expression( 7339 exp.AlterColumn, 7340 this=column, 7341 allow_null=False, 7342 ) 7343 7344 if self._match_text_seq("SET", "VISIBLE"): 7345 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7346 if self._match_text_seq("SET", "INVISIBLE"): 7347 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7348 7349 self._match_text_seq("SET", "DATA") 7350 self._match_text_seq("TYPE") 7351 return self.expression( 7352 exp.AlterColumn, 7353 this=column, 7354 dtype=self._parse_types(), 7355 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7356 using=self._match(TokenType.USING) and self._parse_assignment(), 7357 ) 7358 7359 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7360 if self._match_texts(("ALL", "EVEN", "AUTO")): 7361 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7362 7363 self._match_text_seq("KEY", "DISTKEY") 7364 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7365 7366 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7367 if compound: 7368 self._match_text_seq("SORTKEY") 7369 7370 if self._match(TokenType.L_PAREN, advance=False): 7371 return self.expression( 7372 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7373 ) 7374 7375 self._match_texts(("AUTO", "NONE")) 7376 return self.expression( 7377 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7378 ) 7379 7380 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7381 index = self._index - 1 7382 7383 partition_exists = self._parse_exists() 7384 if self._match(TokenType.PARTITION, advance=False): 7385 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7386 7387 self._retreat(index) 7388 return self._parse_csv(self._parse_drop_column) 7389 7390 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7391 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7392 exists = self._parse_exists() 7393 old_column = self._parse_column() 7394 to = self._match_text_seq("TO") 7395 new_column = self._parse_column() 7396 7397 if old_column is None or to is None or new_column is None: 7398 return None 7399 7400 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7401 7402 self._match_text_seq("TO") 7403 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7404 7405 def _parse_alter_table_set(self) -> exp.AlterSet: 7406 alter_set = self.expression(exp.AlterSet) 7407 7408 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7409 "TABLE", "PROPERTIES" 7410 ): 7411 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7412 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7413 alter_set.set("expressions", [self._parse_assignment()]) 7414 elif self._match_texts(("LOGGED", "UNLOGGED")): 7415 alter_set.set("option", exp.var(self._prev.text.upper())) 7416 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7417 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7418 elif self._match_text_seq("LOCATION"): 7419 alter_set.set("location", self._parse_field()) 7420 elif self._match_text_seq("ACCESS", "METHOD"): 7421 alter_set.set("access_method", self._parse_field()) 7422 elif self._match_text_seq("TABLESPACE"): 7423 alter_set.set("tablespace", self._parse_field()) 7424 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7425 alter_set.set("file_format", [self._parse_field()]) 7426 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7427 alter_set.set("file_format", self._parse_wrapped_options()) 7428 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7429 alter_set.set("copy_options", self._parse_wrapped_options()) 7430 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7431 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7432 else: 7433 if self._match_text_seq("SERDE"): 7434 alter_set.set("serde", self._parse_field()) 7435 7436 properties = self._parse_wrapped(self._parse_properties, optional=True) 7437 alter_set.set("expressions", [properties]) 7438 7439 return alter_set 7440 7441 def _parse_alter(self) -> exp.Alter | exp.Command: 7442 start = self._prev 7443 7444 alter_token = self._match_set(self.ALTERABLES) and self._prev 7445 if not alter_token: 7446 return self._parse_as_command(start) 7447 7448 exists = self._parse_exists() 7449 only = self._match_text_seq("ONLY") 7450 this = self._parse_table(schema=True) 7451 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7452 7453 if self._next: 7454 self._advance() 7455 7456 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7457 if parser: 7458 actions = ensure_list(parser(self)) 7459 not_valid = self._match_text_seq("NOT", "VALID") 7460 options = self._parse_csv(self._parse_property) 7461 7462 if not self._curr and actions: 7463 return self.expression( 7464 exp.Alter, 7465 this=this, 7466 kind=alter_token.text.upper(), 7467 exists=exists, 7468 actions=actions, 7469 only=only, 7470 options=options, 7471 cluster=cluster, 7472 not_valid=not_valid, 7473 ) 7474 7475 return self._parse_as_command(start) 7476 7477 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7478 start = self._prev 7479 # https://duckdb.org/docs/sql/statements/analyze 7480 if not self._curr: 7481 return self.expression(exp.Analyze) 7482 7483 options = [] 7484 while self._match_texts(self.ANALYZE_STYLES): 7485 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7486 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7487 else: 7488 options.append(self._prev.text.upper()) 7489 7490 this: t.Optional[exp.Expression] = None 7491 inner_expression: t.Optional[exp.Expression] = None 7492 7493 kind = self._curr and self._curr.text.upper() 7494 7495 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7496 this = self._parse_table_parts() 7497 elif self._match_text_seq("TABLES"): 7498 if self._match_set((TokenType.FROM, TokenType.IN)): 7499 kind = f"{kind} {self._prev.text.upper()}" 7500 this = self._parse_table(schema=True, is_db_reference=True) 7501 elif self._match_text_seq("DATABASE"): 7502 this = self._parse_table(schema=True, is_db_reference=True) 7503 elif self._match_text_seq("CLUSTER"): 7504 this = self._parse_table() 7505 # Try matching inner expr keywords before fallback to parse table. 7506 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7507 kind = None 7508 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7509 else: 7510 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7511 kind = None 7512 this = self._parse_table_parts() 7513 7514 partition = self._try_parse(self._parse_partition) 7515 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7516 return self._parse_as_command(start) 7517 7518 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7519 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7520 "WITH", "ASYNC", "MODE" 7521 ): 7522 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7523 else: 7524 mode = None 7525 7526 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7527 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7528 7529 properties = self._parse_properties() 7530 return self.expression( 7531 exp.Analyze, 7532 kind=kind, 7533 this=this, 7534 mode=mode, 7535 partition=partition, 7536 properties=properties, 7537 expression=inner_expression, 7538 options=options, 7539 ) 7540 7541 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7542 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7543 this = None 7544 kind = self._prev.text.upper() 7545 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7546 expressions = [] 7547 7548 if not self._match_text_seq("STATISTICS"): 7549 self.raise_error("Expecting token STATISTICS") 7550 7551 if self._match_text_seq("NOSCAN"): 7552 this = "NOSCAN" 7553 elif self._match(TokenType.FOR): 7554 if self._match_text_seq("ALL", "COLUMNS"): 7555 this = "FOR ALL COLUMNS" 7556 if self._match_texts("COLUMNS"): 7557 this = "FOR COLUMNS" 7558 expressions = self._parse_csv(self._parse_column_reference) 7559 elif self._match_text_seq("SAMPLE"): 7560 sample = self._parse_number() 7561 expressions = [ 7562 self.expression( 7563 exp.AnalyzeSample, 7564 sample=sample, 7565 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7566 ) 7567 ] 7568 7569 return self.expression( 7570 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7571 ) 7572 7573 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7574 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7575 kind = None 7576 this = None 7577 expression: t.Optional[exp.Expression] = None 7578 if self._match_text_seq("REF", "UPDATE"): 7579 kind = "REF" 7580 this = "UPDATE" 7581 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7582 this = "UPDATE SET DANGLING TO NULL" 7583 elif self._match_text_seq("STRUCTURE"): 7584 kind = "STRUCTURE" 7585 if self._match_text_seq("CASCADE", "FAST"): 7586 this = "CASCADE FAST" 7587 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7588 ("ONLINE", "OFFLINE") 7589 ): 7590 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7591 expression = self._parse_into() 7592 7593 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7594 7595 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7596 this = self._prev.text.upper() 7597 if self._match_text_seq("COLUMNS"): 7598 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7599 return None 7600 7601 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7602 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7603 if self._match_text_seq("STATISTICS"): 7604 return self.expression(exp.AnalyzeDelete, kind=kind) 7605 return None 7606 7607 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7608 if self._match_text_seq("CHAINED", "ROWS"): 7609 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7610 return None 7611 7612 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7613 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7614 this = self._prev.text.upper() 7615 expression: t.Optional[exp.Expression] = None 7616 expressions = [] 7617 update_options = None 7618 7619 if self._match_text_seq("HISTOGRAM", "ON"): 7620 expressions = self._parse_csv(self._parse_column_reference) 7621 with_expressions = [] 7622 while self._match(TokenType.WITH): 7623 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7624 if self._match_texts(("SYNC", "ASYNC")): 7625 if self._match_text_seq("MODE", advance=False): 7626 with_expressions.append(f"{self._prev.text.upper()} MODE") 7627 self._advance() 7628 else: 7629 buckets = self._parse_number() 7630 if self._match_text_seq("BUCKETS"): 7631 with_expressions.append(f"{buckets} BUCKETS") 7632 if with_expressions: 7633 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7634 7635 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7636 TokenType.UPDATE, advance=False 7637 ): 7638 update_options = self._prev.text.upper() 7639 self._advance() 7640 elif self._match_text_seq("USING", "DATA"): 7641 expression = self.expression(exp.UsingData, this=self._parse_string()) 7642 7643 return self.expression( 7644 exp.AnalyzeHistogram, 7645 this=this, 7646 expressions=expressions, 7647 expression=expression, 7648 update_options=update_options, 7649 ) 7650 7651 def _parse_merge(self) -> exp.Merge: 7652 self._match(TokenType.INTO) 7653 target = self._parse_table() 7654 7655 if target and self._match(TokenType.ALIAS, advance=False): 7656 target.set("alias", self._parse_table_alias()) 7657 7658 self._match(TokenType.USING) 7659 using = self._parse_table() 7660 7661 self._match(TokenType.ON) 7662 on = self._parse_assignment() 7663 7664 return self.expression( 7665 exp.Merge, 7666 this=target, 7667 using=using, 7668 on=on, 7669 whens=self._parse_when_matched(), 7670 returning=self._parse_returning(), 7671 ) 7672 7673 def _parse_when_matched(self) -> exp.Whens: 7674 whens = [] 7675 7676 while self._match(TokenType.WHEN): 7677 matched = not self._match(TokenType.NOT) 7678 self._match_text_seq("MATCHED") 7679 source = ( 7680 False 7681 if self._match_text_seq("BY", "TARGET") 7682 else self._match_text_seq("BY", "SOURCE") 7683 ) 7684 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7685 7686 self._match(TokenType.THEN) 7687 7688 if self._match(TokenType.INSERT): 7689 this = self._parse_star() 7690 if this: 7691 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7692 else: 7693 then = self.expression( 7694 exp.Insert, 7695 this=exp.var("ROW") 7696 if self._match_text_seq("ROW") 7697 else self._parse_value(values=False), 7698 expression=self._match_text_seq("VALUES") and self._parse_value(), 7699 ) 7700 elif self._match(TokenType.UPDATE): 7701 expressions = self._parse_star() 7702 if expressions: 7703 then = self.expression(exp.Update, expressions=expressions) 7704 else: 7705 then = self.expression( 7706 exp.Update, 7707 expressions=self._match(TokenType.SET) 7708 and self._parse_csv(self._parse_equality), 7709 ) 7710 elif self._match(TokenType.DELETE): 7711 then = self.expression(exp.Var, this=self._prev.text) 7712 else: 7713 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7714 7715 whens.append( 7716 self.expression( 7717 exp.When, 7718 matched=matched, 7719 source=source, 7720 condition=condition, 7721 then=then, 7722 ) 7723 ) 7724 return self.expression(exp.Whens, expressions=whens) 7725 7726 def _parse_show(self) -> t.Optional[exp.Expression]: 7727 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7728 if parser: 7729 return parser(self) 7730 return self._parse_as_command(self._prev) 7731 7732 def _parse_set_item_assignment( 7733 self, kind: t.Optional[str] = None 7734 ) -> t.Optional[exp.Expression]: 7735 index = self._index 7736 7737 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7738 return self._parse_set_transaction(global_=kind == "GLOBAL") 7739 7740 left = self._parse_primary() or self._parse_column() 7741 assignment_delimiter = self._match_texts(("=", "TO")) 7742 7743 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7744 self._retreat(index) 7745 return None 7746 7747 right = self._parse_statement() or self._parse_id_var() 7748 if isinstance(right, (exp.Column, exp.Identifier)): 7749 right = exp.var(right.name) 7750 7751 this = self.expression(exp.EQ, this=left, expression=right) 7752 return self.expression(exp.SetItem, this=this, kind=kind) 7753 7754 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7755 self._match_text_seq("TRANSACTION") 7756 characteristics = self._parse_csv( 7757 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7758 ) 7759 return self.expression( 7760 exp.SetItem, 7761 expressions=characteristics, 7762 kind="TRANSACTION", 7763 **{"global": global_}, # type: ignore 7764 ) 7765 7766 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7767 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7768 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7769 7770 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7771 index = self._index 7772 set_ = self.expression( 7773 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7774 ) 7775 7776 if self._curr: 7777 self._retreat(index) 7778 return self._parse_as_command(self._prev) 7779 7780 return set_ 7781 7782 def _parse_var_from_options( 7783 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7784 ) -> t.Optional[exp.Var]: 7785 start = self._curr 7786 if not start: 7787 return None 7788 7789 option = start.text.upper() 7790 continuations = options.get(option) 7791 7792 index = self._index 7793 self._advance() 7794 for keywords in continuations or []: 7795 if isinstance(keywords, str): 7796 keywords = (keywords,) 7797 7798 if self._match_text_seq(*keywords): 7799 option = f"{option} {' '.join(keywords)}" 7800 break 7801 else: 7802 if continuations or continuations is None: 7803 if raise_unmatched: 7804 self.raise_error(f"Unknown option {option}") 7805 7806 self._retreat(index) 7807 return None 7808 7809 return exp.var(option) 7810 7811 def _parse_as_command(self, start: Token) -> exp.Command: 7812 while self._curr: 7813 self._advance() 7814 text = self._find_sql(start, self._prev) 7815 size = len(start.text) 7816 self._warn_unsupported() 7817 return exp.Command(this=text[:size], expression=text[size:]) 7818 7819 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7820 settings = [] 7821 7822 self._match_l_paren() 7823 kind = self._parse_id_var() 7824 7825 if self._match(TokenType.L_PAREN): 7826 while True: 7827 key = self._parse_id_var() 7828 value = self._parse_primary() 7829 if not key and value is None: 7830 break 7831 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7832 self._match(TokenType.R_PAREN) 7833 7834 self._match_r_paren() 7835 7836 return self.expression( 7837 exp.DictProperty, 7838 this=this, 7839 kind=kind.this if kind else None, 7840 settings=settings, 7841 ) 7842 7843 def _parse_dict_range(self, this: str) -> exp.DictRange: 7844 self._match_l_paren() 7845 has_min = self._match_text_seq("MIN") 7846 if has_min: 7847 min = self._parse_var() or self._parse_primary() 7848 self._match_text_seq("MAX") 7849 max = self._parse_var() or self._parse_primary() 7850 else: 7851 max = self._parse_var() or self._parse_primary() 7852 min = exp.Literal.number(0) 7853 self._match_r_paren() 7854 return self.expression(exp.DictRange, this=this, min=min, max=max) 7855 7856 def _parse_comprehension( 7857 self, this: t.Optional[exp.Expression] 7858 ) -> t.Optional[exp.Comprehension]: 7859 index = self._index 7860 expression = self._parse_column() 7861 if not self._match(TokenType.IN): 7862 self._retreat(index - 1) 7863 return None 7864 iterator = self._parse_column() 7865 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7866 return self.expression( 7867 exp.Comprehension, 7868 this=this, 7869 expression=expression, 7870 iterator=iterator, 7871 condition=condition, 7872 ) 7873 7874 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7875 if self._match(TokenType.HEREDOC_STRING): 7876 return self.expression(exp.Heredoc, this=self._prev.text) 7877 7878 if not self._match_text_seq("$"): 7879 return None 7880 7881 tags = ["$"] 7882 tag_text = None 7883 7884 if self._is_connected(): 7885 self._advance() 7886 tags.append(self._prev.text.upper()) 7887 else: 7888 self.raise_error("No closing $ found") 7889 7890 if tags[-1] != "$": 7891 if self._is_connected() and self._match_text_seq("$"): 7892 tag_text = tags[-1] 7893 tags.append("$") 7894 else: 7895 self.raise_error("No closing $ found") 7896 7897 heredoc_start = self._curr 7898 7899 while self._curr: 7900 if self._match_text_seq(*tags, advance=False): 7901 this = self._find_sql(heredoc_start, self._prev) 7902 self._advance(len(tags)) 7903 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7904 7905 self._advance() 7906 7907 self.raise_error(f"No closing {''.join(tags)} found") 7908 return None 7909 7910 def _find_parser( 7911 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7912 ) -> t.Optional[t.Callable]: 7913 if not self._curr: 7914 return None 7915 7916 index = self._index 7917 this = [] 7918 while True: 7919 # The current token might be multiple words 7920 curr = self._curr.text.upper() 7921 key = curr.split(" ") 7922 this.append(curr) 7923 7924 self._advance() 7925 result, trie = in_trie(trie, key) 7926 if result == TrieResult.FAILED: 7927 break 7928 7929 if result == TrieResult.EXISTS: 7930 subparser = parsers[" ".join(this)] 7931 return subparser 7932 7933 self._retreat(index) 7934 return None 7935 7936 def _match(self, token_type, advance=True, expression=None): 7937 if not self._curr: 7938 return None 7939 7940 if self._curr.token_type == token_type: 7941 if advance: 7942 self._advance() 7943 self._add_comments(expression) 7944 return True 7945 7946 return None 7947 7948 def _match_set(self, types, advance=True): 7949 if not self._curr: 7950 return None 7951 7952 if self._curr.token_type in types: 7953 if advance: 7954 self._advance() 7955 return True 7956 7957 return None 7958 7959 def _match_pair(self, token_type_a, token_type_b, advance=True): 7960 if not self._curr or not self._next: 7961 return None 7962 7963 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7964 if advance: 7965 self._advance(2) 7966 return True 7967 7968 return None 7969 7970 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7971 if not self._match(TokenType.L_PAREN, expression=expression): 7972 self.raise_error("Expecting (") 7973 7974 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7975 if not self._match(TokenType.R_PAREN, expression=expression): 7976 self.raise_error("Expecting )") 7977 7978 def _match_texts(self, texts, advance=True): 7979 if ( 7980 self._curr 7981 and self._curr.token_type != TokenType.STRING 7982 and self._curr.text.upper() in texts 7983 ): 7984 if advance: 7985 self._advance() 7986 return True 7987 return None 7988 7989 def _match_text_seq(self, *texts, advance=True): 7990 index = self._index 7991 for text in texts: 7992 if ( 7993 self._curr 7994 and self._curr.token_type != TokenType.STRING 7995 and self._curr.text.upper() == text 7996 ): 7997 self._advance() 7998 else: 7999 self._retreat(index) 8000 return None 8001 8002 if not advance: 8003 self._retreat(index) 8004 8005 return True 8006 8007 def _replace_lambda( 8008 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8009 ) -> t.Optional[exp.Expression]: 8010 if not node: 8011 return node 8012 8013 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8014 8015 for column in node.find_all(exp.Column): 8016 typ = lambda_types.get(column.parts[0].name) 8017 if typ is not None: 8018 dot_or_id = column.to_dot() if column.table else column.this 8019 8020 if typ: 8021 dot_or_id = self.expression( 8022 exp.Cast, 8023 this=dot_or_id, 8024 to=typ, 8025 ) 8026 8027 parent = column.parent 8028 8029 while isinstance(parent, exp.Dot): 8030 if not isinstance(parent.parent, exp.Dot): 8031 parent.replace(dot_or_id) 8032 break 8033 parent = parent.parent 8034 else: 8035 if column is node: 8036 node = dot_or_id 8037 else: 8038 column.replace(dot_or_id) 8039 return node 8040 8041 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8042 start = self._prev 8043 8044 # Not to be confused with TRUNCATE(number, decimals) function call 8045 if self._match(TokenType.L_PAREN): 8046 self._retreat(self._index - 2) 8047 return self._parse_function() 8048 8049 # Clickhouse supports TRUNCATE DATABASE as well 8050 is_database = self._match(TokenType.DATABASE) 8051 8052 self._match(TokenType.TABLE) 8053 8054 exists = self._parse_exists(not_=False) 8055 8056 expressions = self._parse_csv( 8057 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8058 ) 8059 8060 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8061 8062 if self._match_text_seq("RESTART", "IDENTITY"): 8063 identity = "RESTART" 8064 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8065 identity = "CONTINUE" 8066 else: 8067 identity = None 8068 8069 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8070 option = self._prev.text 8071 else: 8072 option = None 8073 8074 partition = self._parse_partition() 8075 8076 # Fallback case 8077 if self._curr: 8078 return self._parse_as_command(start) 8079 8080 return self.expression( 8081 exp.TruncateTable, 8082 expressions=expressions, 8083 is_database=is_database, 8084 exists=exists, 8085 cluster=cluster, 8086 identity=identity, 8087 option=option, 8088 partition=partition, 8089 ) 8090 8091 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8092 this = self._parse_ordered(self._parse_opclass) 8093 8094 if not self._match(TokenType.WITH): 8095 return this 8096 8097 op = self._parse_var(any_token=True) 8098 8099 return self.expression(exp.WithOperator, this=this, op=op) 8100 8101 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8102 self._match(TokenType.EQ) 8103 self._match(TokenType.L_PAREN) 8104 8105 opts: t.List[t.Optional[exp.Expression]] = [] 8106 option: exp.Expression | None 8107 while self._curr and not self._match(TokenType.R_PAREN): 8108 if self._match_text_seq("FORMAT_NAME", "="): 8109 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8110 option = self._parse_format_name() 8111 else: 8112 option = self._parse_property() 8113 8114 if option is None: 8115 self.raise_error("Unable to parse option") 8116 break 8117 8118 opts.append(option) 8119 8120 return opts 8121 8122 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8123 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8124 8125 options = [] 8126 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8127 option = self._parse_var(any_token=True) 8128 prev = self._prev.text.upper() 8129 8130 # Different dialects might separate options and values by white space, "=" and "AS" 8131 self._match(TokenType.EQ) 8132 self._match(TokenType.ALIAS) 8133 8134 param = self.expression(exp.CopyParameter, this=option) 8135 8136 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8137 TokenType.L_PAREN, advance=False 8138 ): 8139 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8140 param.set("expressions", self._parse_wrapped_options()) 8141 elif prev == "FILE_FORMAT": 8142 # T-SQL's external file format case 8143 param.set("expression", self._parse_field()) 8144 else: 8145 param.set("expression", self._parse_unquoted_field()) 8146 8147 options.append(param) 8148 self._match(sep) 8149 8150 return options 8151 8152 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8153 expr = self.expression(exp.Credentials) 8154 8155 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8156 expr.set("storage", self._parse_field()) 8157 if self._match_text_seq("CREDENTIALS"): 8158 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8159 creds = ( 8160 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8161 ) 8162 expr.set("credentials", creds) 8163 if self._match_text_seq("ENCRYPTION"): 8164 expr.set("encryption", self._parse_wrapped_options()) 8165 if self._match_text_seq("IAM_ROLE"): 8166 expr.set("iam_role", self._parse_field()) 8167 if self._match_text_seq("REGION"): 8168 expr.set("region", self._parse_field()) 8169 8170 return expr 8171 8172 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8173 return self._parse_field() 8174 8175 def _parse_copy(self) -> exp.Copy | exp.Command: 8176 start = self._prev 8177 8178 self._match(TokenType.INTO) 8179 8180 this = ( 8181 self._parse_select(nested=True, parse_subquery_alias=False) 8182 if self._match(TokenType.L_PAREN, advance=False) 8183 else self._parse_table(schema=True) 8184 ) 8185 8186 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8187 8188 files = self._parse_csv(self._parse_file_location) 8189 credentials = self._parse_credentials() 8190 8191 self._match_text_seq("WITH") 8192 8193 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8194 8195 # Fallback case 8196 if self._curr: 8197 return self._parse_as_command(start) 8198 8199 return self.expression( 8200 exp.Copy, 8201 this=this, 8202 kind=kind, 8203 credentials=credentials, 8204 files=files, 8205 params=params, 8206 ) 8207 8208 def _parse_normalize(self) -> exp.Normalize: 8209 return self.expression( 8210 exp.Normalize, 8211 this=self._parse_bitwise(), 8212 form=self._match(TokenType.COMMA) and self._parse_var(), 8213 ) 8214 8215 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8216 args = self._parse_csv(lambda: self._parse_lambda()) 8217 8218 this = seq_get(args, 0) 8219 decimals = seq_get(args, 1) 8220 8221 return expr_type( 8222 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8223 ) 8224 8225 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8226 star_token = self._prev 8227 8228 if self._match_text_seq("COLUMNS", "(", advance=False): 8229 this = self._parse_function() 8230 if isinstance(this, exp.Columns): 8231 this.set("unpack", True) 8232 return this 8233 8234 return self.expression( 8235 exp.Star, 8236 **{ # type: ignore 8237 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8238 "replace": self._parse_star_op("REPLACE"), 8239 "rename": self._parse_star_op("RENAME"), 8240 }, 8241 ).update_positions(star_token) 8242 8243 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8244 privilege_parts = [] 8245 8246 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8247 # (end of privilege list) or L_PAREN (start of column list) are met 8248 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8249 privilege_parts.append(self._curr.text.upper()) 8250 self._advance() 8251 8252 this = exp.var(" ".join(privilege_parts)) 8253 expressions = ( 8254 self._parse_wrapped_csv(self._parse_column) 8255 if self._match(TokenType.L_PAREN, advance=False) 8256 else None 8257 ) 8258 8259 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8260 8261 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8262 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8263 principal = self._parse_id_var() 8264 8265 if not principal: 8266 return None 8267 8268 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8269 8270 def _parse_grant(self) -> exp.Grant | exp.Command: 8271 start = self._prev 8272 8273 privileges = self._parse_csv(self._parse_grant_privilege) 8274 8275 self._match(TokenType.ON) 8276 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8277 8278 # Attempt to parse the securable e.g. MySQL allows names 8279 # such as "foo.*", "*.*" which are not easily parseable yet 8280 securable = self._try_parse(self._parse_table_parts) 8281 8282 if not securable or not self._match_text_seq("TO"): 8283 return self._parse_as_command(start) 8284 8285 principals = self._parse_csv(self._parse_grant_principal) 8286 8287 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8288 8289 if self._curr: 8290 return self._parse_as_command(start) 8291 8292 return self.expression( 8293 exp.Grant, 8294 privileges=privileges, 8295 kind=kind, 8296 securable=securable, 8297 principals=principals, 8298 grant_option=grant_option, 8299 ) 8300 8301 def _parse_overlay(self) -> exp.Overlay: 8302 return self.expression( 8303 exp.Overlay, 8304 **{ # type: ignore 8305 "this": self._parse_bitwise(), 8306 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8307 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8308 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8309 }, 8310 ) 8311 8312 def _parse_format_name(self) -> exp.Property: 8313 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8314 # for FILE_FORMAT = <format_name> 8315 return self.expression( 8316 exp.Property, 8317 this=exp.var("FORMAT_NAME"), 8318 value=self._parse_string() or self._parse_table_parts(), 8319 ) 8320 8321 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8322 args: t.List[exp.Expression] = [] 8323 8324 if self._match(TokenType.DISTINCT): 8325 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8326 self._match(TokenType.COMMA) 8327 8328 args.extend(self._parse_csv(self._parse_assignment)) 8329 8330 return self.expression( 8331 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8332 ) 8333 8334 def _identifier_expression( 8335 self, token: t.Optional[Token] = None, **kwargs: t.Any 8336 ) -> exp.Identifier: 8337 token = token or self._prev 8338 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8339 expression.update_positions(token) 8340 return expression 8341 8342 def _build_pipe_cte( 8343 self, 8344 query: exp.Query, 8345 expressions: t.List[exp.Expression], 8346 alias_cte: t.Optional[exp.TableAlias] = None, 8347 ) -> exp.Select: 8348 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8349 if alias_cte: 8350 new_cte = alias_cte 8351 else: 8352 self._pipe_cte_counter += 1 8353 new_cte = f"__tmp{self._pipe_cte_counter}" 8354 8355 with_ = query.args.get("with") 8356 ctes = with_.pop() if with_ else None 8357 8358 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8359 if ctes: 8360 new_select.set("with", ctes) 8361 8362 return new_select.with_(new_cte, as_=query, copy=False) 8363 8364 def _build_pipe_ctes( 8365 self, 8366 query: exp.Select, 8367 expressions: t.List[exp.Expression], 8368 alias_cte: t.Optional[exp.TableAlias] = None, 8369 ) -> exp.Select: 8370 select = query.selects[0].assert_is(exp.Star) 8371 if select.args.get("except") or select.args.get("replace"): 8372 query = self._build_pipe_cte( 8373 query=query.select( 8374 *[expr for expr in expressions if not expr.is_star and expr.args.get("alias")], 8375 copy=False, 8376 ), 8377 expressions=[ 8378 projection.args.get("alias", projection) for projection in expressions 8379 ], 8380 ) 8381 else: 8382 query.select(*expressions, append=False, copy=False) 8383 8384 return self._build_pipe_cte(query=query, expressions=[exp.Star()], alias_cte=alias_cte) 8385 8386 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8387 select = self._parse_select() 8388 if not select: 8389 return query 8390 8391 return self._build_pipe_ctes(query=query, expressions=select.expressions) 8392 8393 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8394 limit = self._parse_limit() 8395 offset = self._parse_offset() 8396 if limit: 8397 curr_limit = query.args.get("limit", limit) 8398 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8399 query.limit(limit, copy=False) 8400 if offset: 8401 curr_offset = query.args.get("offset") 8402 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8403 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8404 8405 return query 8406 8407 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8408 this = self._parse_assignment() 8409 if self._match_text_seq("GROUP", "AND", advance=False): 8410 return this 8411 8412 this = self._parse_alias(this) 8413 8414 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8415 return self._parse_ordered(lambda: this) 8416 8417 return this 8418 8419 def _parse_pipe_syntax_aggregate_group_order_by( 8420 self, query: exp.Select, group_by_exists: bool = True 8421 ) -> exp.Select: 8422 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8423 aggregates_or_groups, orders = [], [] 8424 for element in expr: 8425 if isinstance(element, exp.Ordered): 8426 this = element.this 8427 if isinstance(this, exp.Alias): 8428 element.set("this", this.args["alias"]) 8429 orders.append(element) 8430 else: 8431 this = element 8432 aggregates_or_groups.append(this) 8433 8434 if group_by_exists: 8435 query.select(*aggregates_or_groups, copy=False).group_by( 8436 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8437 copy=False, 8438 ) 8439 else: 8440 query.select(*aggregates_or_groups, copy=False) 8441 8442 if orders: 8443 return query.order_by(*orders, append=False, copy=False) 8444 8445 return query 8446 8447 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8448 self._match_text_seq("AGGREGATE") 8449 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8450 8451 if self._match(TokenType.GROUP_BY) or ( 8452 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8453 ): 8454 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8455 8456 return self._build_pipe_ctes( 8457 query=query, expressions=[expr for expr in query.selects if not expr.is_star] 8458 ) 8459 8460 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Select]: 8461 first_setop = self.parse_set_operation(this=query) 8462 if not first_setop: 8463 return None 8464 8465 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8466 expr = self._parse_paren() 8467 return expr.assert_is(exp.Subquery).unnest() if expr else None 8468 8469 first_setop.this.pop() 8470 8471 setops = [ 8472 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8473 *self._parse_csv(_parse_and_unwrap_query), 8474 ] 8475 8476 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8477 with_ = query.args.get("with") 8478 ctes = with_.pop() if with_ else None 8479 8480 if isinstance(first_setop, exp.Union): 8481 query = query.union(*setops, copy=False, **first_setop.args) 8482 elif isinstance(first_setop, exp.Except): 8483 query = query.except_(*setops, copy=False, **first_setop.args) 8484 else: 8485 query = query.intersect(*setops, copy=False, **first_setop.args) 8486 8487 query.set("with", ctes) 8488 8489 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8490 8491 def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]: 8492 join = self._parse_join() 8493 if not join: 8494 return None 8495 8496 return query.join(join, copy=False) 8497 8498 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8499 pivots = self._parse_pivots() 8500 if not pivots: 8501 return query 8502 8503 from_ = query.args.get("from") 8504 if from_: 8505 from_.this.set("pivots", pivots) 8506 8507 return self._build_pipe_ctes(query=query, expressions=[exp.Star()]) 8508 8509 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8510 self._match_text_seq("EXTEND") 8511 return self._build_pipe_ctes( 8512 query=query, 8513 expressions=[query.selects[0].assert_is(exp.Star), *self._parse_expressions()], 8514 ) 8515 8516 def _parse_pipe_syntax_drop(self, query: exp.Select) -> exp.Select: 8517 self._match_text_seq("DROP") 8518 dropped_columns = self._parse_csv(self._parse_assignment) 8519 8520 select = query.selects[0].assert_is(exp.Star) 8521 except_ = select.args.get("except") or [] 8522 select.set("except", [*except_, *dropped_columns]) 8523 8524 return query 8525 8526 def _parse_pipe_syntax_set(self, query: exp.Select) -> exp.Select: 8527 self._match_text_seq("SET") 8528 replaced_columns = [ 8529 self.expression(exp.Alias, this=expr.expression, alias=expr.this) 8530 for expr in self._parse_csv(self._parse_assignment) 8531 ] 8532 8533 select = query.selects[0].assert_is(exp.Star) 8534 replace_ = select.args.get("replace") or [] 8535 select.set("replace", [*replace_, *replaced_columns]) 8536 8537 return query 8538 8539 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8540 sample = self._parse_table_sample() 8541 8542 with_ = query.args.get("with") 8543 if with_: 8544 with_.expressions[-1].this.set("sample", sample) 8545 else: 8546 query.set("sample", sample) 8547 8548 return query 8549 8550 def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]: 8551 while self._match(TokenType.PIPE_GT): 8552 start = self._curr 8553 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8554 if not parser: 8555 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8556 # keywords, making it tricky to disambiguate them without lookahead. The approach 8557 # here is to try and parse a set operation and if that fails, then try to parse a 8558 # join operator. If that fails as well, then the operator is not supported. 8559 parsed_query = self._parse_pipe_syntax_set_operator(query) 8560 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8561 if not parsed_query: 8562 self._retreat(start) 8563 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8564 break 8565 query = parsed_query 8566 else: 8567 query = parser(self, query) 8568 8569 return query
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PIPE_SYNTAX_TRANSFORM_PARSERS = { 935 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 936 "AS": lambda self, query: self._build_pipe_cte( 937 query, [exp.Star()], self._parse_table_alias() 938 ), 939 "DROP": lambda self, query: self._parse_pipe_syntax_drop(query), 940 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 941 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 942 "ORDER BY": lambda self, query: query.order_by( 943 self._parse_order(), append=False, copy=False 944 ), 945 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 946 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 947 "SET": lambda self, query: self._parse_pipe_syntax_set(query), 948 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 949 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 950 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 951 } 952 953 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 954 "ALLOWED_VALUES": lambda self: self.expression( 955 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 956 ), 957 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 958 "AUTO": lambda self: self._parse_auto_property(), 959 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 960 "BACKUP": lambda self: self.expression( 961 exp.BackupProperty, this=self._parse_var(any_token=True) 962 ), 963 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 964 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 965 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHECKSUM": lambda self: self._parse_checksum(), 967 "CLUSTER BY": lambda self: self._parse_cluster(), 968 "CLUSTERED": lambda self: self._parse_clustered_by(), 969 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 970 exp.CollateProperty, **kwargs 971 ), 972 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 973 "CONTAINS": lambda self: self._parse_contains_property(), 974 "COPY": lambda self: self._parse_copy_property(), 975 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 976 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 977 "DEFINER": lambda self: self._parse_definer(), 978 "DETERMINISTIC": lambda self: self.expression( 979 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 980 ), 981 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 982 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 983 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 984 "DISTKEY": lambda self: self._parse_distkey(), 985 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 986 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 987 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 988 "ENVIRONMENT": lambda self: self.expression( 989 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 990 ), 991 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 992 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 993 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 994 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 995 "FREESPACE": lambda self: self._parse_freespace(), 996 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 997 "HEAP": lambda self: self.expression(exp.HeapProperty), 998 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 999 "IMMUTABLE": lambda self: self.expression( 1000 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1001 ), 1002 "INHERITS": lambda self: self.expression( 1003 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1004 ), 1005 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1006 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1007 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1008 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1009 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1010 "LIKE": lambda self: self._parse_create_like(), 1011 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1012 "LOCK": lambda self: self._parse_locking(), 1013 "LOCKING": lambda self: self._parse_locking(), 1014 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1015 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1016 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1017 "MODIFIES": lambda self: self._parse_modifies_property(), 1018 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1019 "NO": lambda self: self._parse_no_property(), 1020 "ON": lambda self: self._parse_on_property(), 1021 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1022 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1023 "PARTITION": lambda self: self._parse_partitioned_of(), 1024 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1025 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1027 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1028 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1029 "READS": lambda self: self._parse_reads_property(), 1030 "REMOTE": lambda self: self._parse_remote_with_connection(), 1031 "RETURNS": lambda self: self._parse_returns(), 1032 "STRICT": lambda self: self.expression(exp.StrictProperty), 1033 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1034 "ROW": lambda self: self._parse_row(), 1035 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1036 "SAMPLE": lambda self: self.expression( 1037 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1038 ), 1039 "SECURE": lambda self: self.expression(exp.SecureProperty), 1040 "SECURITY": lambda self: self._parse_security(), 1041 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1042 "SETTINGS": lambda self: self._parse_settings_property(), 1043 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1044 "SORTKEY": lambda self: self._parse_sortkey(), 1045 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1046 "STABLE": lambda self: self.expression( 1047 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1048 ), 1049 "STORED": lambda self: self._parse_stored(), 1050 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1051 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1052 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1053 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1054 "TO": lambda self: self._parse_to_table(), 1055 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1056 "TRANSFORM": lambda self: self.expression( 1057 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1058 ), 1059 "TTL": lambda self: self._parse_ttl(), 1060 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1061 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1062 "VOLATILE": lambda self: self._parse_volatile_property(), 1063 "WITH": lambda self: self._parse_with_property(), 1064 } 1065 1066 CONSTRAINT_PARSERS = { 1067 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1068 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1069 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1070 "CHARACTER SET": lambda self: self.expression( 1071 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1072 ), 1073 "CHECK": lambda self: self.expression( 1074 exp.CheckColumnConstraint, 1075 this=self._parse_wrapped(self._parse_assignment), 1076 enforced=self._match_text_seq("ENFORCED"), 1077 ), 1078 "COLLATE": lambda self: self.expression( 1079 exp.CollateColumnConstraint, 1080 this=self._parse_identifier() or self._parse_column(), 1081 ), 1082 "COMMENT": lambda self: self.expression( 1083 exp.CommentColumnConstraint, this=self._parse_string() 1084 ), 1085 "COMPRESS": lambda self: self._parse_compress(), 1086 "CLUSTERED": lambda self: self.expression( 1087 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1088 ), 1089 "NONCLUSTERED": lambda self: self.expression( 1090 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1091 ), 1092 "DEFAULT": lambda self: self.expression( 1093 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1094 ), 1095 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1096 "EPHEMERAL": lambda self: self.expression( 1097 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1098 ), 1099 "EXCLUDE": lambda self: self.expression( 1100 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1101 ), 1102 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1103 "FORMAT": lambda self: self.expression( 1104 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1105 ), 1106 "GENERATED": lambda self: self._parse_generated_as_identity(), 1107 "IDENTITY": lambda self: self._parse_auto_increment(), 1108 "INLINE": lambda self: self._parse_inline(), 1109 "LIKE": lambda self: self._parse_create_like(), 1110 "NOT": lambda self: self._parse_not_constraint(), 1111 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1112 "ON": lambda self: ( 1113 self._match(TokenType.UPDATE) 1114 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1115 ) 1116 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1117 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1118 "PERIOD": lambda self: self._parse_period_for_system_time(), 1119 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1120 "REFERENCES": lambda self: self._parse_references(match=False), 1121 "TITLE": lambda self: self.expression( 1122 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1123 ), 1124 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1125 "UNIQUE": lambda self: self._parse_unique(), 1126 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1127 "WATERMARK": lambda self: self.expression( 1128 exp.WatermarkColumnConstraint, 1129 this=self._match(TokenType.FOR) and self._parse_column(), 1130 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1131 ), 1132 "WITH": lambda self: self.expression( 1133 exp.Properties, expressions=self._parse_wrapped_properties() 1134 ), 1135 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1136 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 } 1138 1139 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1140 klass = ( 1141 exp.PartitionedByBucket 1142 if self._prev.text.upper() == "BUCKET" 1143 else exp.PartitionByTruncate 1144 ) 1145 1146 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1147 this, expression = seq_get(args, 0), seq_get(args, 1) 1148 1149 if isinstance(this, exp.Literal): 1150 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1151 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1152 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1153 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1154 # 1155 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1156 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1157 this, expression = expression, this 1158 1159 return self.expression(klass, this=this, expression=expression) 1160 1161 ALTER_PARSERS = { 1162 "ADD": lambda self: self._parse_alter_table_add(), 1163 "AS": lambda self: self._parse_select(), 1164 "ALTER": lambda self: self._parse_alter_table_alter(), 1165 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1166 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1167 "DROP": lambda self: self._parse_alter_table_drop(), 1168 "RENAME": lambda self: self._parse_alter_table_rename(), 1169 "SET": lambda self: self._parse_alter_table_set(), 1170 "SWAP": lambda self: self.expression( 1171 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1172 ), 1173 } 1174 1175 ALTER_ALTER_PARSERS = { 1176 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1177 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1178 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1179 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1180 } 1181 1182 SCHEMA_UNNAMED_CONSTRAINTS = { 1183 "CHECK", 1184 "EXCLUDE", 1185 "FOREIGN KEY", 1186 "LIKE", 1187 "PERIOD", 1188 "PRIMARY KEY", 1189 "UNIQUE", 1190 "WATERMARK", 1191 "BUCKET", 1192 "TRUNCATE", 1193 } 1194 1195 NO_PAREN_FUNCTION_PARSERS = { 1196 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1197 "CASE": lambda self: self._parse_case(), 1198 "CONNECT_BY_ROOT": lambda self: self.expression( 1199 exp.ConnectByRoot, this=self._parse_column() 1200 ), 1201 "IF": lambda self: self._parse_if(), 1202 } 1203 1204 INVALID_FUNC_NAME_TOKENS = { 1205 TokenType.IDENTIFIER, 1206 TokenType.STRING, 1207 } 1208 1209 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1210 1211 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1212 1213 FUNCTION_PARSERS = { 1214 **{ 1215 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1216 }, 1217 **{ 1218 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1219 }, 1220 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1221 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1222 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1223 "DECODE": lambda self: self._parse_decode(), 1224 "EXTRACT": lambda self: self._parse_extract(), 1225 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1226 "GAP_FILL": lambda self: self._parse_gap_fill(), 1227 "JSON_OBJECT": lambda self: self._parse_json_object(), 1228 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1229 "JSON_TABLE": lambda self: self._parse_json_table(), 1230 "MATCH": lambda self: self._parse_match_against(), 1231 "NORMALIZE": lambda self: self._parse_normalize(), 1232 "OPENJSON": lambda self: self._parse_open_json(), 1233 "OVERLAY": lambda self: self._parse_overlay(), 1234 "POSITION": lambda self: self._parse_position(), 1235 "PREDICT": lambda self: self._parse_predict(), 1236 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1237 "STRING_AGG": lambda self: self._parse_string_agg(), 1238 "SUBSTRING": lambda self: self._parse_substring(), 1239 "TRIM": lambda self: self._parse_trim(), 1240 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1241 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1242 "XMLELEMENT": lambda self: self.expression( 1243 exp.XMLElement, 1244 this=self._match_text_seq("NAME") and self._parse_id_var(), 1245 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1246 ), 1247 "XMLTABLE": lambda self: self._parse_xml_table(), 1248 } 1249 1250 QUERY_MODIFIER_PARSERS = { 1251 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1252 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1253 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1254 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1255 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1256 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1257 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1258 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1259 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1260 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1261 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1262 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1263 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1264 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1265 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1266 TokenType.CLUSTER_BY: lambda self: ( 1267 "cluster", 1268 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1269 ), 1270 TokenType.DISTRIBUTE_BY: lambda self: ( 1271 "distribute", 1272 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1273 ), 1274 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1275 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1276 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1277 } 1278 1279 SET_PARSERS = { 1280 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1281 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1282 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1283 "TRANSACTION": lambda self: self._parse_set_transaction(), 1284 } 1285 1286 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1287 1288 TYPE_LITERAL_PARSERS = { 1289 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1290 } 1291 1292 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1293 1294 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1295 1296 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1297 1298 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1299 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1300 "ISOLATION": ( 1301 ("LEVEL", "REPEATABLE", "READ"), 1302 ("LEVEL", "READ", "COMMITTED"), 1303 ("LEVEL", "READ", "UNCOMITTED"), 1304 ("LEVEL", "SERIALIZABLE"), 1305 ), 1306 "READ": ("WRITE", "ONLY"), 1307 } 1308 1309 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1310 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1311 ) 1312 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1313 1314 CREATE_SEQUENCE: OPTIONS_TYPE = { 1315 "SCALE": ("EXTEND", "NOEXTEND"), 1316 "SHARD": ("EXTEND", "NOEXTEND"), 1317 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1318 **dict.fromkeys( 1319 ( 1320 "SESSION", 1321 "GLOBAL", 1322 "KEEP", 1323 "NOKEEP", 1324 "ORDER", 1325 "NOORDER", 1326 "NOCACHE", 1327 "CYCLE", 1328 "NOCYCLE", 1329 "NOMINVALUE", 1330 "NOMAXVALUE", 1331 "NOSCALE", 1332 "NOSHARD", 1333 ), 1334 tuple(), 1335 ), 1336 } 1337 1338 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1339 1340 USABLES: OPTIONS_TYPE = dict.fromkeys( 1341 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1342 ) 1343 1344 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1345 1346 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1347 "TYPE": ("EVOLUTION",), 1348 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1349 } 1350 1351 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1352 1353 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1354 1355 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1356 "NOT": ("ENFORCED",), 1357 "MATCH": ( 1358 "FULL", 1359 "PARTIAL", 1360 "SIMPLE", 1361 ), 1362 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1363 "USING": ( 1364 "BTREE", 1365 "HASH", 1366 ), 1367 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1368 } 1369 1370 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1371 "NO": ("OTHERS",), 1372 "CURRENT": ("ROW",), 1373 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1374 } 1375 1376 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1377 1378 CLONE_KEYWORDS = {"CLONE", "COPY"} 1379 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1380 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1381 1382 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1383 1384 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1385 1386 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1387 1388 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1389 1390 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1391 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1392 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1393 1394 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1395 1396 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1397 1398 ADD_CONSTRAINT_TOKENS = { 1399 TokenType.CONSTRAINT, 1400 TokenType.FOREIGN_KEY, 1401 TokenType.INDEX, 1402 TokenType.KEY, 1403 TokenType.PRIMARY_KEY, 1404 TokenType.UNIQUE, 1405 } 1406 1407 DISTINCT_TOKENS = {TokenType.DISTINCT} 1408 1409 NULL_TOKENS = {TokenType.NULL} 1410 1411 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1412 1413 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1414 1415 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1416 1417 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1418 1419 ODBC_DATETIME_LITERALS = { 1420 "d": exp.Date, 1421 "t": exp.Time, 1422 "ts": exp.Timestamp, 1423 } 1424 1425 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1426 1427 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1428 1429 # The style options for the DESCRIBE statement 1430 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1431 1432 # The style options for the ANALYZE statement 1433 ANALYZE_STYLES = { 1434 "BUFFER_USAGE_LIMIT", 1435 "FULL", 1436 "LOCAL", 1437 "NO_WRITE_TO_BINLOG", 1438 "SAMPLE", 1439 "SKIP_LOCKED", 1440 "VERBOSE", 1441 } 1442 1443 ANALYZE_EXPRESSION_PARSERS = { 1444 "ALL": lambda self: self._parse_analyze_columns(), 1445 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1446 "DELETE": lambda self: self._parse_analyze_delete(), 1447 "DROP": lambda self: self._parse_analyze_histogram(), 1448 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1449 "LIST": lambda self: self._parse_analyze_list(), 1450 "PREDICATE": lambda self: self._parse_analyze_columns(), 1451 "UPDATE": lambda self: self._parse_analyze_histogram(), 1452 "VALIDATE": lambda self: self._parse_analyze_validate(), 1453 } 1454 1455 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1456 1457 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1458 1459 OPERATION_MODIFIERS: t.Set[str] = set() 1460 1461 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1462 1463 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1464 1465 STRICT_CAST = True 1466 1467 PREFIXED_PIVOT_COLUMNS = False 1468 IDENTIFY_PIVOT_STRINGS = False 1469 1470 LOG_DEFAULTS_TO_LN = False 1471 1472 # Whether the table sample clause expects CSV syntax 1473 TABLESAMPLE_CSV = False 1474 1475 # The default method used for table sampling 1476 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1477 1478 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1479 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1480 1481 # Whether the TRIM function expects the characters to trim as its first argument 1482 TRIM_PATTERN_FIRST = False 1483 1484 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1485 STRING_ALIASES = False 1486 1487 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1488 MODIFIERS_ATTACHED_TO_SET_OP = True 1489 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1490 1491 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1492 NO_PAREN_IF_COMMANDS = True 1493 1494 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1495 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1496 1497 # Whether the `:` operator is used to extract a value from a VARIANT column 1498 COLON_IS_VARIANT_EXTRACT = False 1499 1500 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1501 # If this is True and '(' is not found, the keyword will be treated as an identifier 1502 VALUES_FOLLOWED_BY_PAREN = True 1503 1504 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1505 SUPPORTS_IMPLICIT_UNNEST = False 1506 1507 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1508 INTERVAL_SPANS = True 1509 1510 # Whether a PARTITION clause can follow a table reference 1511 SUPPORTS_PARTITION_SELECTION = False 1512 1513 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1514 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1515 1516 # Whether the 'AS' keyword is optional in the CTE definition syntax 1517 OPTIONAL_ALIAS_TOKEN_CTE = True 1518 1519 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1520 ALTER_RENAME_REQUIRES_COLUMN = True 1521 1522 __slots__ = ( 1523 "error_level", 1524 "error_message_context", 1525 "max_errors", 1526 "dialect", 1527 "sql", 1528 "errors", 1529 "_tokens", 1530 "_index", 1531 "_curr", 1532 "_next", 1533 "_prev", 1534 "_prev_comments", 1535 "_pipe_cte_counter", 1536 ) 1537 1538 # Autofilled 1539 SHOW_TRIE: t.Dict = {} 1540 SET_TRIE: t.Dict = {} 1541 1542 def __init__( 1543 self, 1544 error_level: t.Optional[ErrorLevel] = None, 1545 error_message_context: int = 100, 1546 max_errors: int = 3, 1547 dialect: DialectType = None, 1548 ): 1549 from sqlglot.dialects import Dialect 1550 1551 self.error_level = error_level or ErrorLevel.IMMEDIATE 1552 self.error_message_context = error_message_context 1553 self.max_errors = max_errors 1554 self.dialect = Dialect.get_or_raise(dialect) 1555 self.reset() 1556 1557 def reset(self): 1558 self.sql = "" 1559 self.errors = [] 1560 self._tokens = [] 1561 self._index = 0 1562 self._curr = None 1563 self._next = None 1564 self._prev = None 1565 self._prev_comments = None 1566 self._pipe_cte_counter = 0 1567 1568 def parse( 1569 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens and returns a list of syntax trees, one tree 1573 per parsed SQL statement. 1574 1575 Args: 1576 raw_tokens: The list of tokens. 1577 sql: The original SQL string, used to produce helpful debug messages. 1578 1579 Returns: 1580 The list of the produced syntax trees. 1581 """ 1582 return self._parse( 1583 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1584 ) 1585 1586 def parse_into( 1587 self, 1588 expression_types: exp.IntoType, 1589 raw_tokens: t.List[Token], 1590 sql: t.Optional[str] = None, 1591 ) -> t.List[t.Optional[exp.Expression]]: 1592 """ 1593 Parses a list of tokens into a given Expression type. If a collection of Expression 1594 types is given instead, this method will try to parse the token list into each one 1595 of them, stopping at the first for which the parsing succeeds. 1596 1597 Args: 1598 expression_types: The expression type(s) to try and parse the token list into. 1599 raw_tokens: The list of tokens. 1600 sql: The original SQL string, used to produce helpful debug messages. 1601 1602 Returns: 1603 The target Expression. 1604 """ 1605 errors = [] 1606 for expression_type in ensure_list(expression_types): 1607 parser = self.EXPRESSION_PARSERS.get(expression_type) 1608 if not parser: 1609 raise TypeError(f"No parser registered for {expression_type}") 1610 1611 try: 1612 return self._parse(parser, raw_tokens, sql) 1613 except ParseError as e: 1614 e.errors[0]["into_expression"] = expression_type 1615 errors.append(e) 1616 1617 raise ParseError( 1618 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1619 errors=merge_errors(errors), 1620 ) from errors[-1] 1621 1622 def _parse( 1623 self, 1624 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 self.reset() 1629 self.sql = sql or "" 1630 1631 total = len(raw_tokens) 1632 chunks: t.List[t.List[Token]] = [[]] 1633 1634 for i, token in enumerate(raw_tokens): 1635 if token.token_type == TokenType.SEMICOLON: 1636 if token.comments: 1637 chunks.append([token]) 1638 1639 if i < total - 1: 1640 chunks.append([]) 1641 else: 1642 chunks[-1].append(token) 1643 1644 expressions = [] 1645 1646 for tokens in chunks: 1647 self._index = -1 1648 self._tokens = tokens 1649 self._advance() 1650 1651 expressions.append(parse_method(self)) 1652 1653 if self._index < len(self._tokens): 1654 self.raise_error("Invalid expression / Unexpected token") 1655 1656 self.check_errors() 1657 1658 return expressions 1659 1660 def check_errors(self) -> None: 1661 """Logs or raises any found errors, depending on the chosen error level setting.""" 1662 if self.error_level == ErrorLevel.WARN: 1663 for error in self.errors: 1664 logger.error(str(error)) 1665 elif self.error_level == ErrorLevel.RAISE and self.errors: 1666 raise ParseError( 1667 concat_messages(self.errors, self.max_errors), 1668 errors=merge_errors(self.errors), 1669 ) 1670 1671 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1672 """ 1673 Appends an error in the list of recorded errors or raises it, depending on the chosen 1674 error level setting. 1675 """ 1676 token = token or self._curr or self._prev or Token.string("") 1677 start = token.start 1678 end = token.end + 1 1679 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1680 highlight = self.sql[start:end] 1681 end_context = self.sql[end : end + self.error_message_context] 1682 1683 error = ParseError.new( 1684 f"{message}. Line {token.line}, Col: {token.col}.\n" 1685 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1686 description=message, 1687 line=token.line, 1688 col=token.col, 1689 start_context=start_context, 1690 highlight=highlight, 1691 end_context=end_context, 1692 ) 1693 1694 if self.error_level == ErrorLevel.IMMEDIATE: 1695 raise error 1696 1697 self.errors.append(error) 1698 1699 def expression( 1700 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1701 ) -> E: 1702 """ 1703 Creates a new, validated Expression. 1704 1705 Args: 1706 exp_class: The expression class to instantiate. 1707 comments: An optional list of comments to attach to the expression. 1708 kwargs: The arguments to set for the expression along with their respective values. 1709 1710 Returns: 1711 The target expression. 1712 """ 1713 instance = exp_class(**kwargs) 1714 instance.add_comments(comments) if comments else self._add_comments(instance) 1715 return self.validate_expression(instance) 1716 1717 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1718 if expression and self._prev_comments: 1719 expression.add_comments(self._prev_comments) 1720 self._prev_comments = None 1721 1722 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1723 """ 1724 Validates an Expression, making sure that all its mandatory arguments are set. 1725 1726 Args: 1727 expression: The expression to validate. 1728 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1729 1730 Returns: 1731 The validated expression. 1732 """ 1733 if self.error_level != ErrorLevel.IGNORE: 1734 for error_message in expression.error_messages(args): 1735 self.raise_error(error_message) 1736 1737 return expression 1738 1739 def _find_sql(self, start: Token, end: Token) -> str: 1740 return self.sql[start.start : end.end + 1] 1741 1742 def _is_connected(self) -> bool: 1743 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1744 1745 def _advance(self, times: int = 1) -> None: 1746 self._index += times 1747 self._curr = seq_get(self._tokens, self._index) 1748 self._next = seq_get(self._tokens, self._index + 1) 1749 1750 if self._index > 0: 1751 self._prev = self._tokens[self._index - 1] 1752 self._prev_comments = self._prev.comments 1753 else: 1754 self._prev = None 1755 self._prev_comments = None 1756 1757 def _retreat(self, index: int) -> None: 1758 if index != self._index: 1759 self._advance(index - self._index) 1760 1761 def _warn_unsupported(self) -> None: 1762 if len(self._tokens) <= 1: 1763 return 1764 1765 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1766 # interested in emitting a warning for the one being currently processed. 1767 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1768 1769 logger.warning( 1770 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1771 ) 1772 1773 def _parse_command(self) -> exp.Command: 1774 self._warn_unsupported() 1775 return self.expression( 1776 exp.Command, 1777 comments=self._prev_comments, 1778 this=self._prev.text.upper(), 1779 expression=self._parse_string(), 1780 ) 1781 1782 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1783 """ 1784 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1785 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1786 solve this by setting & resetting the parser state accordingly 1787 """ 1788 index = self._index 1789 error_level = self.error_level 1790 1791 self.error_level = ErrorLevel.IMMEDIATE 1792 try: 1793 this = parse_method() 1794 except ParseError: 1795 this = None 1796 finally: 1797 if not this or retreat: 1798 self._retreat(index) 1799 self.error_level = error_level 1800 1801 return this 1802 1803 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1804 start = self._prev 1805 exists = self._parse_exists() if allow_exists else None 1806 1807 self._match(TokenType.ON) 1808 1809 materialized = self._match_text_seq("MATERIALIZED") 1810 kind = self._match_set(self.CREATABLES) and self._prev 1811 if not kind: 1812 return self._parse_as_command(start) 1813 1814 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1815 this = self._parse_user_defined_function(kind=kind.token_type) 1816 elif kind.token_type == TokenType.TABLE: 1817 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1818 elif kind.token_type == TokenType.COLUMN: 1819 this = self._parse_column() 1820 else: 1821 this = self._parse_id_var() 1822 1823 self._match(TokenType.IS) 1824 1825 return self.expression( 1826 exp.Comment, 1827 this=this, 1828 kind=kind.text, 1829 expression=self._parse_string(), 1830 exists=exists, 1831 materialized=materialized, 1832 ) 1833 1834 def _parse_to_table( 1835 self, 1836 ) -> exp.ToTableProperty: 1837 table = self._parse_table_parts(schema=True) 1838 return self.expression(exp.ToTableProperty, this=table) 1839 1840 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1841 def _parse_ttl(self) -> exp.Expression: 1842 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1843 this = self._parse_bitwise() 1844 1845 if self._match_text_seq("DELETE"): 1846 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1847 if self._match_text_seq("RECOMPRESS"): 1848 return self.expression( 1849 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1850 ) 1851 if self._match_text_seq("TO", "DISK"): 1852 return self.expression( 1853 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1854 ) 1855 if self._match_text_seq("TO", "VOLUME"): 1856 return self.expression( 1857 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1858 ) 1859 1860 return this 1861 1862 expressions = self._parse_csv(_parse_ttl_action) 1863 where = self._parse_where() 1864 group = self._parse_group() 1865 1866 aggregates = None 1867 if group and self._match(TokenType.SET): 1868 aggregates = self._parse_csv(self._parse_set_item) 1869 1870 return self.expression( 1871 exp.MergeTreeTTL, 1872 expressions=expressions, 1873 where=where, 1874 group=group, 1875 aggregates=aggregates, 1876 ) 1877 1878 def _parse_statement(self) -> t.Optional[exp.Expression]: 1879 if self._curr is None: 1880 return None 1881 1882 if self._match_set(self.STATEMENT_PARSERS): 1883 comments = self._prev_comments 1884 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1885 stmt.add_comments(comments, prepend=True) 1886 return stmt 1887 1888 if self._match_set(self.dialect.tokenizer.COMMANDS): 1889 return self._parse_command() 1890 1891 expression = self._parse_expression() 1892 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1893 return self._parse_query_modifiers(expression) 1894 1895 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1896 start = self._prev 1897 temporary = self._match(TokenType.TEMPORARY) 1898 materialized = self._match_text_seq("MATERIALIZED") 1899 1900 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1901 if not kind: 1902 return self._parse_as_command(start) 1903 1904 concurrently = self._match_text_seq("CONCURRENTLY") 1905 if_exists = exists or self._parse_exists() 1906 1907 if kind == "COLUMN": 1908 this = self._parse_column() 1909 else: 1910 this = self._parse_table_parts( 1911 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1912 ) 1913 1914 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1915 1916 if self._match(TokenType.L_PAREN, advance=False): 1917 expressions = self._parse_wrapped_csv(self._parse_types) 1918 else: 1919 expressions = None 1920 1921 return self.expression( 1922 exp.Drop, 1923 exists=if_exists, 1924 this=this, 1925 expressions=expressions, 1926 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1927 temporary=temporary, 1928 materialized=materialized, 1929 cascade=self._match_text_seq("CASCADE"), 1930 constraints=self._match_text_seq("CONSTRAINTS"), 1931 purge=self._match_text_seq("PURGE"), 1932 cluster=cluster, 1933 concurrently=concurrently, 1934 ) 1935 1936 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1937 return ( 1938 self._match_text_seq("IF") 1939 and (not not_ or self._match(TokenType.NOT)) 1940 and self._match(TokenType.EXISTS) 1941 ) 1942 1943 def _parse_create(self) -> exp.Create | exp.Command: 1944 # Note: this can't be None because we've matched a statement parser 1945 start = self._prev 1946 1947 replace = ( 1948 start.token_type == TokenType.REPLACE 1949 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1950 or self._match_pair(TokenType.OR, TokenType.ALTER) 1951 ) 1952 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1953 1954 unique = self._match(TokenType.UNIQUE) 1955 1956 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1957 clustered = True 1958 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1959 "COLUMNSTORE" 1960 ): 1961 clustered = False 1962 else: 1963 clustered = None 1964 1965 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1966 self._advance() 1967 1968 properties = None 1969 create_token = self._match_set(self.CREATABLES) and self._prev 1970 1971 if not create_token: 1972 # exp.Properties.Location.POST_CREATE 1973 properties = self._parse_properties() 1974 create_token = self._match_set(self.CREATABLES) and self._prev 1975 1976 if not properties or not create_token: 1977 return self._parse_as_command(start) 1978 1979 concurrently = self._match_text_seq("CONCURRENTLY") 1980 exists = self._parse_exists(not_=True) 1981 this = None 1982 expression: t.Optional[exp.Expression] = None 1983 indexes = None 1984 no_schema_binding = None 1985 begin = None 1986 end = None 1987 clone = None 1988 1989 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1990 nonlocal properties 1991 if properties and temp_props: 1992 properties.expressions.extend(temp_props.expressions) 1993 elif temp_props: 1994 properties = temp_props 1995 1996 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1997 this = self._parse_user_defined_function(kind=create_token.token_type) 1998 1999 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2000 extend_props(self._parse_properties()) 2001 2002 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2003 extend_props(self._parse_properties()) 2004 2005 if not expression: 2006 if self._match(TokenType.COMMAND): 2007 expression = self._parse_as_command(self._prev) 2008 else: 2009 begin = self._match(TokenType.BEGIN) 2010 return_ = self._match_text_seq("RETURN") 2011 2012 if self._match(TokenType.STRING, advance=False): 2013 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2014 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2015 expression = self._parse_string() 2016 extend_props(self._parse_properties()) 2017 else: 2018 expression = self._parse_user_defined_function_expression() 2019 2020 end = self._match_text_seq("END") 2021 2022 if return_: 2023 expression = self.expression(exp.Return, this=expression) 2024 elif create_token.token_type == TokenType.INDEX: 2025 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2026 if not self._match(TokenType.ON): 2027 index = self._parse_id_var() 2028 anonymous = False 2029 else: 2030 index = None 2031 anonymous = True 2032 2033 this = self._parse_index(index=index, anonymous=anonymous) 2034 elif create_token.token_type in self.DB_CREATABLES: 2035 table_parts = self._parse_table_parts( 2036 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2037 ) 2038 2039 # exp.Properties.Location.POST_NAME 2040 self._match(TokenType.COMMA) 2041 extend_props(self._parse_properties(before=True)) 2042 2043 this = self._parse_schema(this=table_parts) 2044 2045 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2046 extend_props(self._parse_properties()) 2047 2048 has_alias = self._match(TokenType.ALIAS) 2049 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2050 # exp.Properties.Location.POST_ALIAS 2051 extend_props(self._parse_properties()) 2052 2053 if create_token.token_type == TokenType.SEQUENCE: 2054 expression = self._parse_types() 2055 extend_props(self._parse_properties()) 2056 else: 2057 expression = self._parse_ddl_select() 2058 2059 # Some dialects also support using a table as an alias instead of a SELECT. 2060 # Here we fallback to this as an alternative. 2061 if not expression and has_alias: 2062 expression = self._try_parse(self._parse_table_parts) 2063 2064 if create_token.token_type == TokenType.TABLE: 2065 # exp.Properties.Location.POST_EXPRESSION 2066 extend_props(self._parse_properties()) 2067 2068 indexes = [] 2069 while True: 2070 index = self._parse_index() 2071 2072 # exp.Properties.Location.POST_INDEX 2073 extend_props(self._parse_properties()) 2074 if not index: 2075 break 2076 else: 2077 self._match(TokenType.COMMA) 2078 indexes.append(index) 2079 elif create_token.token_type == TokenType.VIEW: 2080 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2081 no_schema_binding = True 2082 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2083 extend_props(self._parse_properties()) 2084 2085 shallow = self._match_text_seq("SHALLOW") 2086 2087 if self._match_texts(self.CLONE_KEYWORDS): 2088 copy = self._prev.text.lower() == "copy" 2089 clone = self.expression( 2090 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2091 ) 2092 2093 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2094 return self._parse_as_command(start) 2095 2096 create_kind_text = create_token.text.upper() 2097 return self.expression( 2098 exp.Create, 2099 this=this, 2100 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2101 replace=replace, 2102 refresh=refresh, 2103 unique=unique, 2104 expression=expression, 2105 exists=exists, 2106 properties=properties, 2107 indexes=indexes, 2108 no_schema_binding=no_schema_binding, 2109 begin=begin, 2110 end=end, 2111 clone=clone, 2112 concurrently=concurrently, 2113 clustered=clustered, 2114 ) 2115 2116 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2117 seq = exp.SequenceProperties() 2118 2119 options = [] 2120 index = self._index 2121 2122 while self._curr: 2123 self._match(TokenType.COMMA) 2124 if self._match_text_seq("INCREMENT"): 2125 self._match_text_seq("BY") 2126 self._match_text_seq("=") 2127 seq.set("increment", self._parse_term()) 2128 elif self._match_text_seq("MINVALUE"): 2129 seq.set("minvalue", self._parse_term()) 2130 elif self._match_text_seq("MAXVALUE"): 2131 seq.set("maxvalue", self._parse_term()) 2132 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2133 self._match_text_seq("=") 2134 seq.set("start", self._parse_term()) 2135 elif self._match_text_seq("CACHE"): 2136 # T-SQL allows empty CACHE which is initialized dynamically 2137 seq.set("cache", self._parse_number() or True) 2138 elif self._match_text_seq("OWNED", "BY"): 2139 # "OWNED BY NONE" is the default 2140 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2141 else: 2142 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2143 if opt: 2144 options.append(opt) 2145 else: 2146 break 2147 2148 seq.set("options", options if options else None) 2149 return None if self._index == index else seq 2150 2151 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2152 # only used for teradata currently 2153 self._match(TokenType.COMMA) 2154 2155 kwargs = { 2156 "no": self._match_text_seq("NO"), 2157 "dual": self._match_text_seq("DUAL"), 2158 "before": self._match_text_seq("BEFORE"), 2159 "default": self._match_text_seq("DEFAULT"), 2160 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2161 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2162 "after": self._match_text_seq("AFTER"), 2163 "minimum": self._match_texts(("MIN", "MINIMUM")), 2164 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2165 } 2166 2167 if self._match_texts(self.PROPERTY_PARSERS): 2168 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2169 try: 2170 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2171 except TypeError: 2172 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2173 2174 return None 2175 2176 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2177 return self._parse_wrapped_csv(self._parse_property) 2178 2179 def _parse_property(self) -> t.Optional[exp.Expression]: 2180 if self._match_texts(self.PROPERTY_PARSERS): 2181 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2182 2183 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2184 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2185 2186 if self._match_text_seq("COMPOUND", "SORTKEY"): 2187 return self._parse_sortkey(compound=True) 2188 2189 if self._match_text_seq("SQL", "SECURITY"): 2190 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2191 2192 index = self._index 2193 key = self._parse_column() 2194 2195 if not self._match(TokenType.EQ): 2196 self._retreat(index) 2197 return self._parse_sequence_properties() 2198 2199 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2200 if isinstance(key, exp.Column): 2201 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2202 2203 value = self._parse_bitwise() or self._parse_var(any_token=True) 2204 2205 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2206 if isinstance(value, exp.Column): 2207 value = exp.var(value.name) 2208 2209 return self.expression(exp.Property, this=key, value=value) 2210 2211 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2212 if self._match_text_seq("BY"): 2213 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2214 2215 self._match(TokenType.ALIAS) 2216 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2217 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2218 2219 return self.expression( 2220 exp.FileFormatProperty, 2221 this=( 2222 self.expression( 2223 exp.InputOutputFormat, 2224 input_format=input_format, 2225 output_format=output_format, 2226 ) 2227 if input_format or output_format 2228 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2229 ), 2230 ) 2231 2232 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2233 field = self._parse_field() 2234 if isinstance(field, exp.Identifier) and not field.quoted: 2235 field = exp.var(field) 2236 2237 return field 2238 2239 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2240 self._match(TokenType.EQ) 2241 self._match(TokenType.ALIAS) 2242 2243 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2244 2245 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2246 properties = [] 2247 while True: 2248 if before: 2249 prop = self._parse_property_before() 2250 else: 2251 prop = self._parse_property() 2252 if not prop: 2253 break 2254 for p in ensure_list(prop): 2255 properties.append(p) 2256 2257 if properties: 2258 return self.expression(exp.Properties, expressions=properties) 2259 2260 return None 2261 2262 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2263 return self.expression( 2264 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2265 ) 2266 2267 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2268 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2269 security_specifier = self._prev.text.upper() 2270 return self.expression(exp.SecurityProperty, this=security_specifier) 2271 return None 2272 2273 def _parse_settings_property(self) -> exp.SettingsProperty: 2274 return self.expression( 2275 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2276 ) 2277 2278 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2279 if self._index >= 2: 2280 pre_volatile_token = self._tokens[self._index - 2] 2281 else: 2282 pre_volatile_token = None 2283 2284 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2285 return exp.VolatileProperty() 2286 2287 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2288 2289 def _parse_retention_period(self) -> exp.Var: 2290 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2291 number = self._parse_number() 2292 number_str = f"{number} " if number else "" 2293 unit = self._parse_var(any_token=True) 2294 return exp.var(f"{number_str}{unit}") 2295 2296 def _parse_system_versioning_property( 2297 self, with_: bool = False 2298 ) -> exp.WithSystemVersioningProperty: 2299 self._match(TokenType.EQ) 2300 prop = self.expression( 2301 exp.WithSystemVersioningProperty, 2302 **{ # type: ignore 2303 "on": True, 2304 "with": with_, 2305 }, 2306 ) 2307 2308 if self._match_text_seq("OFF"): 2309 prop.set("on", False) 2310 return prop 2311 2312 self._match(TokenType.ON) 2313 if self._match(TokenType.L_PAREN): 2314 while self._curr and not self._match(TokenType.R_PAREN): 2315 if self._match_text_seq("HISTORY_TABLE", "="): 2316 prop.set("this", self._parse_table_parts()) 2317 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2318 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2319 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2320 prop.set("retention_period", self._parse_retention_period()) 2321 2322 self._match(TokenType.COMMA) 2323 2324 return prop 2325 2326 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2327 self._match(TokenType.EQ) 2328 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2329 prop = self.expression(exp.DataDeletionProperty, on=on) 2330 2331 if self._match(TokenType.L_PAREN): 2332 while self._curr and not self._match(TokenType.R_PAREN): 2333 if self._match_text_seq("FILTER_COLUMN", "="): 2334 prop.set("filter_column", self._parse_column()) 2335 elif self._match_text_seq("RETENTION_PERIOD", "="): 2336 prop.set("retention_period", self._parse_retention_period()) 2337 2338 self._match(TokenType.COMMA) 2339 2340 return prop 2341 2342 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2343 kind = "HASH" 2344 expressions: t.Optional[t.List[exp.Expression]] = None 2345 if self._match_text_seq("BY", "HASH"): 2346 expressions = self._parse_wrapped_csv(self._parse_id_var) 2347 elif self._match_text_seq("BY", "RANDOM"): 2348 kind = "RANDOM" 2349 2350 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2351 buckets: t.Optional[exp.Expression] = None 2352 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2353 buckets = self._parse_number() 2354 2355 return self.expression( 2356 exp.DistributedByProperty, 2357 expressions=expressions, 2358 kind=kind, 2359 buckets=buckets, 2360 order=self._parse_order(), 2361 ) 2362 2363 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2364 self._match_text_seq("KEY") 2365 expressions = self._parse_wrapped_id_vars() 2366 return self.expression(expr_type, expressions=expressions) 2367 2368 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2369 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2370 prop = self._parse_system_versioning_property(with_=True) 2371 self._match_r_paren() 2372 return prop 2373 2374 if self._match(TokenType.L_PAREN, advance=False): 2375 return self._parse_wrapped_properties() 2376 2377 if self._match_text_seq("JOURNAL"): 2378 return self._parse_withjournaltable() 2379 2380 if self._match_texts(self.VIEW_ATTRIBUTES): 2381 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2382 2383 if self._match_text_seq("DATA"): 2384 return self._parse_withdata(no=False) 2385 elif self._match_text_seq("NO", "DATA"): 2386 return self._parse_withdata(no=True) 2387 2388 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2389 return self._parse_serde_properties(with_=True) 2390 2391 if self._match(TokenType.SCHEMA): 2392 return self.expression( 2393 exp.WithSchemaBindingProperty, 2394 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2395 ) 2396 2397 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2398 return self.expression( 2399 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2400 ) 2401 2402 if not self._next: 2403 return None 2404 2405 return self._parse_withisolatedloading() 2406 2407 def _parse_procedure_option(self) -> exp.Expression | None: 2408 if self._match_text_seq("EXECUTE", "AS"): 2409 return self.expression( 2410 exp.ExecuteAsProperty, 2411 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2412 or self._parse_string(), 2413 ) 2414 2415 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2416 2417 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2418 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2419 self._match(TokenType.EQ) 2420 2421 user = self._parse_id_var() 2422 self._match(TokenType.PARAMETER) 2423 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2424 2425 if not user or not host: 2426 return None 2427 2428 return exp.DefinerProperty(this=f"{user}@{host}") 2429 2430 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2431 self._match(TokenType.TABLE) 2432 self._match(TokenType.EQ) 2433 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2434 2435 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2436 return self.expression(exp.LogProperty, no=no) 2437 2438 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2439 return self.expression(exp.JournalProperty, **kwargs) 2440 2441 def _parse_checksum(self) -> exp.ChecksumProperty: 2442 self._match(TokenType.EQ) 2443 2444 on = None 2445 if self._match(TokenType.ON): 2446 on = True 2447 elif self._match_text_seq("OFF"): 2448 on = False 2449 2450 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2451 2452 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2453 return self.expression( 2454 exp.Cluster, 2455 expressions=( 2456 self._parse_wrapped_csv(self._parse_ordered) 2457 if wrapped 2458 else self._parse_csv(self._parse_ordered) 2459 ), 2460 ) 2461 2462 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2463 self._match_text_seq("BY") 2464 2465 self._match_l_paren() 2466 expressions = self._parse_csv(self._parse_column) 2467 self._match_r_paren() 2468 2469 if self._match_text_seq("SORTED", "BY"): 2470 self._match_l_paren() 2471 sorted_by = self._parse_csv(self._parse_ordered) 2472 self._match_r_paren() 2473 else: 2474 sorted_by = None 2475 2476 self._match(TokenType.INTO) 2477 buckets = self._parse_number() 2478 self._match_text_seq("BUCKETS") 2479 2480 return self.expression( 2481 exp.ClusteredByProperty, 2482 expressions=expressions, 2483 sorted_by=sorted_by, 2484 buckets=buckets, 2485 ) 2486 2487 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2488 if not self._match_text_seq("GRANTS"): 2489 self._retreat(self._index - 1) 2490 return None 2491 2492 return self.expression(exp.CopyGrantsProperty) 2493 2494 def _parse_freespace(self) -> exp.FreespaceProperty: 2495 self._match(TokenType.EQ) 2496 return self.expression( 2497 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2498 ) 2499 2500 def _parse_mergeblockratio( 2501 self, no: bool = False, default: bool = False 2502 ) -> exp.MergeBlockRatioProperty: 2503 if self._match(TokenType.EQ): 2504 return self.expression( 2505 exp.MergeBlockRatioProperty, 2506 this=self._parse_number(), 2507 percent=self._match(TokenType.PERCENT), 2508 ) 2509 2510 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2511 2512 def _parse_datablocksize( 2513 self, 2514 default: t.Optional[bool] = None, 2515 minimum: t.Optional[bool] = None, 2516 maximum: t.Optional[bool] = None, 2517 ) -> exp.DataBlocksizeProperty: 2518 self._match(TokenType.EQ) 2519 size = self._parse_number() 2520 2521 units = None 2522 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2523 units = self._prev.text 2524 2525 return self.expression( 2526 exp.DataBlocksizeProperty, 2527 size=size, 2528 units=units, 2529 default=default, 2530 minimum=minimum, 2531 maximum=maximum, 2532 ) 2533 2534 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2535 self._match(TokenType.EQ) 2536 always = self._match_text_seq("ALWAYS") 2537 manual = self._match_text_seq("MANUAL") 2538 never = self._match_text_seq("NEVER") 2539 default = self._match_text_seq("DEFAULT") 2540 2541 autotemp = None 2542 if self._match_text_seq("AUTOTEMP"): 2543 autotemp = self._parse_schema() 2544 2545 return self.expression( 2546 exp.BlockCompressionProperty, 2547 always=always, 2548 manual=manual, 2549 never=never, 2550 default=default, 2551 autotemp=autotemp, 2552 ) 2553 2554 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2555 index = self._index 2556 no = self._match_text_seq("NO") 2557 concurrent = self._match_text_seq("CONCURRENT") 2558 2559 if not self._match_text_seq("ISOLATED", "LOADING"): 2560 self._retreat(index) 2561 return None 2562 2563 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2564 return self.expression( 2565 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2566 ) 2567 2568 def _parse_locking(self) -> exp.LockingProperty: 2569 if self._match(TokenType.TABLE): 2570 kind = "TABLE" 2571 elif self._match(TokenType.VIEW): 2572 kind = "VIEW" 2573 elif self._match(TokenType.ROW): 2574 kind = "ROW" 2575 elif self._match_text_seq("DATABASE"): 2576 kind = "DATABASE" 2577 else: 2578 kind = None 2579 2580 if kind in ("DATABASE", "TABLE", "VIEW"): 2581 this = self._parse_table_parts() 2582 else: 2583 this = None 2584 2585 if self._match(TokenType.FOR): 2586 for_or_in = "FOR" 2587 elif self._match(TokenType.IN): 2588 for_or_in = "IN" 2589 else: 2590 for_or_in = None 2591 2592 if self._match_text_seq("ACCESS"): 2593 lock_type = "ACCESS" 2594 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2595 lock_type = "EXCLUSIVE" 2596 elif self._match_text_seq("SHARE"): 2597 lock_type = "SHARE" 2598 elif self._match_text_seq("READ"): 2599 lock_type = "READ" 2600 elif self._match_text_seq("WRITE"): 2601 lock_type = "WRITE" 2602 elif self._match_text_seq("CHECKSUM"): 2603 lock_type = "CHECKSUM" 2604 else: 2605 lock_type = None 2606 2607 override = self._match_text_seq("OVERRIDE") 2608 2609 return self.expression( 2610 exp.LockingProperty, 2611 this=this, 2612 kind=kind, 2613 for_or_in=for_or_in, 2614 lock_type=lock_type, 2615 override=override, 2616 ) 2617 2618 def _parse_partition_by(self) -> t.List[exp.Expression]: 2619 if self._match(TokenType.PARTITION_BY): 2620 return self._parse_csv(self._parse_assignment) 2621 return [] 2622 2623 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2624 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2625 if self._match_text_seq("MINVALUE"): 2626 return exp.var("MINVALUE") 2627 if self._match_text_seq("MAXVALUE"): 2628 return exp.var("MAXVALUE") 2629 return self._parse_bitwise() 2630 2631 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2632 expression = None 2633 from_expressions = None 2634 to_expressions = None 2635 2636 if self._match(TokenType.IN): 2637 this = self._parse_wrapped_csv(self._parse_bitwise) 2638 elif self._match(TokenType.FROM): 2639 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2640 self._match_text_seq("TO") 2641 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2642 elif self._match_text_seq("WITH", "(", "MODULUS"): 2643 this = self._parse_number() 2644 self._match_text_seq(",", "REMAINDER") 2645 expression = self._parse_number() 2646 self._match_r_paren() 2647 else: 2648 self.raise_error("Failed to parse partition bound spec.") 2649 2650 return self.expression( 2651 exp.PartitionBoundSpec, 2652 this=this, 2653 expression=expression, 2654 from_expressions=from_expressions, 2655 to_expressions=to_expressions, 2656 ) 2657 2658 # https://www.postgresql.org/docs/current/sql-createtable.html 2659 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2660 if not self._match_text_seq("OF"): 2661 self._retreat(self._index - 1) 2662 return None 2663 2664 this = self._parse_table(schema=True) 2665 2666 if self._match(TokenType.DEFAULT): 2667 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2668 elif self._match_text_seq("FOR", "VALUES"): 2669 expression = self._parse_partition_bound_spec() 2670 else: 2671 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2672 2673 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2674 2675 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2676 self._match(TokenType.EQ) 2677 return self.expression( 2678 exp.PartitionedByProperty, 2679 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2680 ) 2681 2682 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2683 if self._match_text_seq("AND", "STATISTICS"): 2684 statistics = True 2685 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2686 statistics = False 2687 else: 2688 statistics = None 2689 2690 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2691 2692 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2693 if self._match_text_seq("SQL"): 2694 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2695 return None 2696 2697 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2698 if self._match_text_seq("SQL", "DATA"): 2699 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2700 return None 2701 2702 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2703 if self._match_text_seq("PRIMARY", "INDEX"): 2704 return exp.NoPrimaryIndexProperty() 2705 if self._match_text_seq("SQL"): 2706 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2707 return None 2708 2709 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2710 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2711 return exp.OnCommitProperty() 2712 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2713 return exp.OnCommitProperty(delete=True) 2714 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2715 2716 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2717 if self._match_text_seq("SQL", "DATA"): 2718 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2719 return None 2720 2721 def _parse_distkey(self) -> exp.DistKeyProperty: 2722 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2723 2724 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2725 table = self._parse_table(schema=True) 2726 2727 options = [] 2728 while self._match_texts(("INCLUDING", "EXCLUDING")): 2729 this = self._prev.text.upper() 2730 2731 id_var = self._parse_id_var() 2732 if not id_var: 2733 return None 2734 2735 options.append( 2736 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2737 ) 2738 2739 return self.expression(exp.LikeProperty, this=table, expressions=options) 2740 2741 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2742 return self.expression( 2743 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2744 ) 2745 2746 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2747 self._match(TokenType.EQ) 2748 return self.expression( 2749 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2750 ) 2751 2752 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2753 self._match_text_seq("WITH", "CONNECTION") 2754 return self.expression( 2755 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2756 ) 2757 2758 def _parse_returns(self) -> exp.ReturnsProperty: 2759 value: t.Optional[exp.Expression] 2760 null = None 2761 is_table = self._match(TokenType.TABLE) 2762 2763 if is_table: 2764 if self._match(TokenType.LT): 2765 value = self.expression( 2766 exp.Schema, 2767 this="TABLE", 2768 expressions=self._parse_csv(self._parse_struct_types), 2769 ) 2770 if not self._match(TokenType.GT): 2771 self.raise_error("Expecting >") 2772 else: 2773 value = self._parse_schema(exp.var("TABLE")) 2774 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2775 null = True 2776 value = None 2777 else: 2778 value = self._parse_types() 2779 2780 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2781 2782 def _parse_describe(self) -> exp.Describe: 2783 kind = self._match_set(self.CREATABLES) and self._prev.text 2784 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2785 if self._match(TokenType.DOT): 2786 style = None 2787 self._retreat(self._index - 2) 2788 2789 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2790 2791 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2792 this = self._parse_statement() 2793 else: 2794 this = self._parse_table(schema=True) 2795 2796 properties = self._parse_properties() 2797 expressions = properties.expressions if properties else None 2798 partition = self._parse_partition() 2799 return self.expression( 2800 exp.Describe, 2801 this=this, 2802 style=style, 2803 kind=kind, 2804 expressions=expressions, 2805 partition=partition, 2806 format=format, 2807 ) 2808 2809 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2810 kind = self._prev.text.upper() 2811 expressions = [] 2812 2813 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2814 if self._match(TokenType.WHEN): 2815 expression = self._parse_disjunction() 2816 self._match(TokenType.THEN) 2817 else: 2818 expression = None 2819 2820 else_ = self._match(TokenType.ELSE) 2821 2822 if not self._match(TokenType.INTO): 2823 return None 2824 2825 return self.expression( 2826 exp.ConditionalInsert, 2827 this=self.expression( 2828 exp.Insert, 2829 this=self._parse_table(schema=True), 2830 expression=self._parse_derived_table_values(), 2831 ), 2832 expression=expression, 2833 else_=else_, 2834 ) 2835 2836 expression = parse_conditional_insert() 2837 while expression is not None: 2838 expressions.append(expression) 2839 expression = parse_conditional_insert() 2840 2841 return self.expression( 2842 exp.MultitableInserts, 2843 kind=kind, 2844 comments=comments, 2845 expressions=expressions, 2846 source=self._parse_table(), 2847 ) 2848 2849 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2850 comments = [] 2851 hint = self._parse_hint() 2852 overwrite = self._match(TokenType.OVERWRITE) 2853 ignore = self._match(TokenType.IGNORE) 2854 local = self._match_text_seq("LOCAL") 2855 alternative = None 2856 is_function = None 2857 2858 if self._match_text_seq("DIRECTORY"): 2859 this: t.Optional[exp.Expression] = self.expression( 2860 exp.Directory, 2861 this=self._parse_var_or_string(), 2862 local=local, 2863 row_format=self._parse_row_format(match_row=True), 2864 ) 2865 else: 2866 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2867 comments += ensure_list(self._prev_comments) 2868 return self._parse_multitable_inserts(comments) 2869 2870 if self._match(TokenType.OR): 2871 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2872 2873 self._match(TokenType.INTO) 2874 comments += ensure_list(self._prev_comments) 2875 self._match(TokenType.TABLE) 2876 is_function = self._match(TokenType.FUNCTION) 2877 2878 this = ( 2879 self._parse_table(schema=True, parse_partition=True) 2880 if not is_function 2881 else self._parse_function() 2882 ) 2883 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2884 this.set("alias", self._parse_table_alias()) 2885 2886 returning = self._parse_returning() 2887 2888 return self.expression( 2889 exp.Insert, 2890 comments=comments, 2891 hint=hint, 2892 is_function=is_function, 2893 this=this, 2894 stored=self._match_text_seq("STORED") and self._parse_stored(), 2895 by_name=self._match_text_seq("BY", "NAME"), 2896 exists=self._parse_exists(), 2897 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2898 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2899 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2900 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2901 conflict=self._parse_on_conflict(), 2902 returning=returning or self._parse_returning(), 2903 overwrite=overwrite, 2904 alternative=alternative, 2905 ignore=ignore, 2906 source=self._match(TokenType.TABLE) and self._parse_table(), 2907 ) 2908 2909 def _parse_kill(self) -> exp.Kill: 2910 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2911 2912 return self.expression( 2913 exp.Kill, 2914 this=self._parse_primary(), 2915 kind=kind, 2916 ) 2917 2918 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2919 conflict = self._match_text_seq("ON", "CONFLICT") 2920 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2921 2922 if not conflict and not duplicate: 2923 return None 2924 2925 conflict_keys = None 2926 constraint = None 2927 2928 if conflict: 2929 if self._match_text_seq("ON", "CONSTRAINT"): 2930 constraint = self._parse_id_var() 2931 elif self._match(TokenType.L_PAREN): 2932 conflict_keys = self._parse_csv(self._parse_id_var) 2933 self._match_r_paren() 2934 2935 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2936 if self._prev.token_type == TokenType.UPDATE: 2937 self._match(TokenType.SET) 2938 expressions = self._parse_csv(self._parse_equality) 2939 else: 2940 expressions = None 2941 2942 return self.expression( 2943 exp.OnConflict, 2944 duplicate=duplicate, 2945 expressions=expressions, 2946 action=action, 2947 conflict_keys=conflict_keys, 2948 constraint=constraint, 2949 where=self._parse_where(), 2950 ) 2951 2952 def _parse_returning(self) -> t.Optional[exp.Returning]: 2953 if not self._match(TokenType.RETURNING): 2954 return None 2955 return self.expression( 2956 exp.Returning, 2957 expressions=self._parse_csv(self._parse_expression), 2958 into=self._match(TokenType.INTO) and self._parse_table_part(), 2959 ) 2960 2961 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2962 if not self._match(TokenType.FORMAT): 2963 return None 2964 return self._parse_row_format() 2965 2966 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2967 index = self._index 2968 with_ = with_ or self._match_text_seq("WITH") 2969 2970 if not self._match(TokenType.SERDE_PROPERTIES): 2971 self._retreat(index) 2972 return None 2973 return self.expression( 2974 exp.SerdeProperties, 2975 **{ # type: ignore 2976 "expressions": self._parse_wrapped_properties(), 2977 "with": with_, 2978 }, 2979 ) 2980 2981 def _parse_row_format( 2982 self, match_row: bool = False 2983 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2984 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2985 return None 2986 2987 if self._match_text_seq("SERDE"): 2988 this = self._parse_string() 2989 2990 serde_properties = self._parse_serde_properties() 2991 2992 return self.expression( 2993 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2994 ) 2995 2996 self._match_text_seq("DELIMITED") 2997 2998 kwargs = {} 2999 3000 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3001 kwargs["fields"] = self._parse_string() 3002 if self._match_text_seq("ESCAPED", "BY"): 3003 kwargs["escaped"] = self._parse_string() 3004 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3005 kwargs["collection_items"] = self._parse_string() 3006 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3007 kwargs["map_keys"] = self._parse_string() 3008 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3009 kwargs["lines"] = self._parse_string() 3010 if self._match_text_seq("NULL", "DEFINED", "AS"): 3011 kwargs["null"] = self._parse_string() 3012 3013 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3014 3015 def _parse_load(self) -> exp.LoadData | exp.Command: 3016 if self._match_text_seq("DATA"): 3017 local = self._match_text_seq("LOCAL") 3018 self._match_text_seq("INPATH") 3019 inpath = self._parse_string() 3020 overwrite = self._match(TokenType.OVERWRITE) 3021 self._match_pair(TokenType.INTO, TokenType.TABLE) 3022 3023 return self.expression( 3024 exp.LoadData, 3025 this=self._parse_table(schema=True), 3026 local=local, 3027 overwrite=overwrite, 3028 inpath=inpath, 3029 partition=self._parse_partition(), 3030 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3031 serde=self._match_text_seq("SERDE") and self._parse_string(), 3032 ) 3033 return self._parse_as_command(self._prev) 3034 3035 def _parse_delete(self) -> exp.Delete: 3036 # This handles MySQL's "Multiple-Table Syntax" 3037 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3038 tables = None 3039 if not self._match(TokenType.FROM, advance=False): 3040 tables = self._parse_csv(self._parse_table) or None 3041 3042 returning = self._parse_returning() 3043 3044 return self.expression( 3045 exp.Delete, 3046 tables=tables, 3047 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3048 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3049 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3050 where=self._parse_where(), 3051 returning=returning or self._parse_returning(), 3052 limit=self._parse_limit(), 3053 ) 3054 3055 def _parse_update(self) -> exp.Update: 3056 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3057 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3058 returning = self._parse_returning() 3059 return self.expression( 3060 exp.Update, 3061 **{ # type: ignore 3062 "this": this, 3063 "expressions": expressions, 3064 "from": self._parse_from(joins=True), 3065 "where": self._parse_where(), 3066 "returning": returning or self._parse_returning(), 3067 "order": self._parse_order(), 3068 "limit": self._parse_limit(), 3069 }, 3070 ) 3071 3072 def _parse_use(self) -> exp.Use: 3073 return self.expression( 3074 exp.Use, 3075 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3076 this=self._parse_table(schema=False), 3077 ) 3078 3079 def _parse_uncache(self) -> exp.Uncache: 3080 if not self._match(TokenType.TABLE): 3081 self.raise_error("Expecting TABLE after UNCACHE") 3082 3083 return self.expression( 3084 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3085 ) 3086 3087 def _parse_cache(self) -> exp.Cache: 3088 lazy = self._match_text_seq("LAZY") 3089 self._match(TokenType.TABLE) 3090 table = self._parse_table(schema=True) 3091 3092 options = [] 3093 if self._match_text_seq("OPTIONS"): 3094 self._match_l_paren() 3095 k = self._parse_string() 3096 self._match(TokenType.EQ) 3097 v = self._parse_string() 3098 options = [k, v] 3099 self._match_r_paren() 3100 3101 self._match(TokenType.ALIAS) 3102 return self.expression( 3103 exp.Cache, 3104 this=table, 3105 lazy=lazy, 3106 options=options, 3107 expression=self._parse_select(nested=True), 3108 ) 3109 3110 def _parse_partition(self) -> t.Optional[exp.Partition]: 3111 if not self._match_texts(self.PARTITION_KEYWORDS): 3112 return None 3113 3114 return self.expression( 3115 exp.Partition, 3116 subpartition=self._prev.text.upper() == "SUBPARTITION", 3117 expressions=self._parse_wrapped_csv(self._parse_assignment), 3118 ) 3119 3120 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3121 def _parse_value_expression() -> t.Optional[exp.Expression]: 3122 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3123 return exp.var(self._prev.text.upper()) 3124 return self._parse_expression() 3125 3126 if self._match(TokenType.L_PAREN): 3127 expressions = self._parse_csv(_parse_value_expression) 3128 self._match_r_paren() 3129 return self.expression(exp.Tuple, expressions=expressions) 3130 3131 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3132 expression = self._parse_expression() 3133 if expression: 3134 return self.expression(exp.Tuple, expressions=[expression]) 3135 return None 3136 3137 def _parse_projections(self) -> t.List[exp.Expression]: 3138 return self._parse_expressions() 3139 3140 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3141 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3142 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3143 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3144 ) 3145 elif self._match(TokenType.FROM): 3146 from_ = self._parse_from(skip_from_token=True) 3147 # Support parentheses for duckdb FROM-first syntax 3148 select = self._parse_select() 3149 if select: 3150 select.set("from", from_) 3151 this = select 3152 else: 3153 this = exp.select("*").from_(t.cast(exp.From, from_)) 3154 else: 3155 this = ( 3156 self._parse_table() 3157 if table 3158 else self._parse_select(nested=True, parse_set_operation=False) 3159 ) 3160 3161 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3162 # in case a modifier (e.g. join) is following 3163 if table and isinstance(this, exp.Values) and this.alias: 3164 alias = this.args["alias"].pop() 3165 this = exp.Table(this=this, alias=alias) 3166 3167 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3168 3169 return this 3170 3171 def _parse_select( 3172 self, 3173 nested: bool = False, 3174 table: bool = False, 3175 parse_subquery_alias: bool = True, 3176 parse_set_operation: bool = True, 3177 ) -> t.Optional[exp.Expression]: 3178 cte = self._parse_with() 3179 3180 if cte: 3181 this = self._parse_statement() 3182 3183 if not this: 3184 self.raise_error("Failed to parse any statement following CTE") 3185 return cte 3186 3187 if "with" in this.arg_types: 3188 this.set("with", cte) 3189 else: 3190 self.raise_error(f"{this.key} does not support CTE") 3191 this = cte 3192 3193 return this 3194 3195 # duckdb supports leading with FROM x 3196 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3197 3198 if self._match(TokenType.SELECT): 3199 comments = self._prev_comments 3200 3201 hint = self._parse_hint() 3202 3203 if self._next and not self._next.token_type == TokenType.DOT: 3204 all_ = self._match(TokenType.ALL) 3205 distinct = self._match_set(self.DISTINCT_TOKENS) 3206 else: 3207 all_, distinct = None, None 3208 3209 kind = ( 3210 self._match(TokenType.ALIAS) 3211 and self._match_texts(("STRUCT", "VALUE")) 3212 and self._prev.text.upper() 3213 ) 3214 3215 if distinct: 3216 distinct = self.expression( 3217 exp.Distinct, 3218 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3219 ) 3220 3221 if all_ and distinct: 3222 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3223 3224 operation_modifiers = [] 3225 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3226 operation_modifiers.append(exp.var(self._prev.text.upper())) 3227 3228 limit = self._parse_limit(top=True) 3229 projections = self._parse_projections() 3230 3231 this = self.expression( 3232 exp.Select, 3233 kind=kind, 3234 hint=hint, 3235 distinct=distinct, 3236 expressions=projections, 3237 limit=limit, 3238 operation_modifiers=operation_modifiers or None, 3239 ) 3240 this.comments = comments 3241 3242 into = self._parse_into() 3243 if into: 3244 this.set("into", into) 3245 3246 if not from_: 3247 from_ = self._parse_from() 3248 3249 if from_: 3250 this.set("from", from_) 3251 3252 this = self._parse_query_modifiers(this) 3253 elif (table or nested) and self._match(TokenType.L_PAREN): 3254 this = self._parse_wrapped_select(table=table) 3255 3256 # We return early here so that the UNION isn't attached to the subquery by the 3257 # following call to _parse_set_operations, but instead becomes the parent node 3258 self._match_r_paren() 3259 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3260 elif self._match(TokenType.VALUES, advance=False): 3261 this = self._parse_derived_table_values() 3262 elif from_: 3263 this = exp.select("*").from_(from_.this, copy=False) 3264 if self._match(TokenType.PIPE_GT, advance=False): 3265 return self._parse_pipe_syntax_query(this) 3266 elif self._match(TokenType.SUMMARIZE): 3267 table = self._match(TokenType.TABLE) 3268 this = self._parse_select() or self._parse_string() or self._parse_table() 3269 return self.expression(exp.Summarize, this=this, table=table) 3270 elif self._match(TokenType.DESCRIBE): 3271 this = self._parse_describe() 3272 elif self._match_text_seq("STREAM"): 3273 this = self._parse_function() 3274 if this: 3275 this = self.expression(exp.Stream, this=this) 3276 else: 3277 self._retreat(self._index - 1) 3278 else: 3279 this = None 3280 3281 return self._parse_set_operations(this) if parse_set_operation else this 3282 3283 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3284 self._match_text_seq("SEARCH") 3285 3286 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3287 3288 if not kind: 3289 return None 3290 3291 self._match_text_seq("FIRST", "BY") 3292 3293 return self.expression( 3294 exp.RecursiveWithSearch, 3295 kind=kind, 3296 this=self._parse_id_var(), 3297 expression=self._match_text_seq("SET") and self._parse_id_var(), 3298 using=self._match_text_seq("USING") and self._parse_id_var(), 3299 ) 3300 3301 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3302 if not skip_with_token and not self._match(TokenType.WITH): 3303 return None 3304 3305 comments = self._prev_comments 3306 recursive = self._match(TokenType.RECURSIVE) 3307 3308 last_comments = None 3309 expressions = [] 3310 while True: 3311 cte = self._parse_cte() 3312 if isinstance(cte, exp.CTE): 3313 expressions.append(cte) 3314 if last_comments: 3315 cte.add_comments(last_comments) 3316 3317 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3318 break 3319 else: 3320 self._match(TokenType.WITH) 3321 3322 last_comments = self._prev_comments 3323 3324 return self.expression( 3325 exp.With, 3326 comments=comments, 3327 expressions=expressions, 3328 recursive=recursive, 3329 search=self._parse_recursive_with_search(), 3330 ) 3331 3332 def _parse_cte(self) -> t.Optional[exp.CTE]: 3333 index = self._index 3334 3335 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3336 if not alias or not alias.this: 3337 self.raise_error("Expected CTE to have alias") 3338 3339 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3340 self._retreat(index) 3341 return None 3342 3343 comments = self._prev_comments 3344 3345 if self._match_text_seq("NOT", "MATERIALIZED"): 3346 materialized = False 3347 elif self._match_text_seq("MATERIALIZED"): 3348 materialized = True 3349 else: 3350 materialized = None 3351 3352 cte = self.expression( 3353 exp.CTE, 3354 this=self._parse_wrapped(self._parse_statement), 3355 alias=alias, 3356 materialized=materialized, 3357 comments=comments, 3358 ) 3359 3360 if isinstance(cte.this, exp.Values): 3361 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3362 3363 return cte 3364 3365 def _parse_table_alias( 3366 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3367 ) -> t.Optional[exp.TableAlias]: 3368 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3369 # so this section tries to parse the clause version and if it fails, it treats the token 3370 # as an identifier (alias) 3371 if self._can_parse_limit_or_offset(): 3372 return None 3373 3374 any_token = self._match(TokenType.ALIAS) 3375 alias = ( 3376 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3377 or self._parse_string_as_identifier() 3378 ) 3379 3380 index = self._index 3381 if self._match(TokenType.L_PAREN): 3382 columns = self._parse_csv(self._parse_function_parameter) 3383 self._match_r_paren() if columns else self._retreat(index) 3384 else: 3385 columns = None 3386 3387 if not alias and not columns: 3388 return None 3389 3390 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3391 3392 # We bubble up comments from the Identifier to the TableAlias 3393 if isinstance(alias, exp.Identifier): 3394 table_alias.add_comments(alias.pop_comments()) 3395 3396 return table_alias 3397 3398 def _parse_subquery( 3399 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3400 ) -> t.Optional[exp.Subquery]: 3401 if not this: 3402 return None 3403 3404 return self.expression( 3405 exp.Subquery, 3406 this=this, 3407 pivots=self._parse_pivots(), 3408 alias=self._parse_table_alias() if parse_alias else None, 3409 sample=self._parse_table_sample(), 3410 ) 3411 3412 def _implicit_unnests_to_explicit(self, this: E) -> E: 3413 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3414 3415 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3416 for i, join in enumerate(this.args.get("joins") or []): 3417 table = join.this 3418 normalized_table = table.copy() 3419 normalized_table.meta["maybe_column"] = True 3420 normalized_table = _norm(normalized_table, dialect=self.dialect) 3421 3422 if isinstance(table, exp.Table) and not join.args.get("on"): 3423 if normalized_table.parts[0].name in refs: 3424 table_as_column = table.to_column() 3425 unnest = exp.Unnest(expressions=[table_as_column]) 3426 3427 # Table.to_column creates a parent Alias node that we want to convert to 3428 # a TableAlias and attach to the Unnest, so it matches the parser's output 3429 if isinstance(table.args.get("alias"), exp.TableAlias): 3430 table_as_column.replace(table_as_column.this) 3431 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3432 3433 table.replace(unnest) 3434 3435 refs.add(normalized_table.alias_or_name) 3436 3437 return this 3438 3439 def _parse_query_modifiers( 3440 self, this: t.Optional[exp.Expression] 3441 ) -> t.Optional[exp.Expression]: 3442 if isinstance(this, self.MODIFIABLES): 3443 for join in self._parse_joins(): 3444 this.append("joins", join) 3445 for lateral in iter(self._parse_lateral, None): 3446 this.append("laterals", lateral) 3447 3448 while True: 3449 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3450 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3451 key, expression = parser(self) 3452 3453 if expression: 3454 this.set(key, expression) 3455 if key == "limit": 3456 offset = expression.args.pop("offset", None) 3457 3458 if offset: 3459 offset = exp.Offset(expression=offset) 3460 this.set("offset", offset) 3461 3462 limit_by_expressions = expression.expressions 3463 expression.set("expressions", None) 3464 offset.set("expressions", limit_by_expressions) 3465 continue 3466 break 3467 3468 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3469 this = self._implicit_unnests_to_explicit(this) 3470 3471 return this 3472 3473 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3474 start = self._curr 3475 while self._curr: 3476 self._advance() 3477 3478 end = self._tokens[self._index - 1] 3479 return exp.Hint(expressions=[self._find_sql(start, end)]) 3480 3481 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3482 return self._parse_function_call() 3483 3484 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3485 start_index = self._index 3486 should_fallback_to_string = False 3487 3488 hints = [] 3489 try: 3490 for hint in iter( 3491 lambda: self._parse_csv( 3492 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3493 ), 3494 [], 3495 ): 3496 hints.extend(hint) 3497 except ParseError: 3498 should_fallback_to_string = True 3499 3500 if should_fallback_to_string or self._curr: 3501 self._retreat(start_index) 3502 return self._parse_hint_fallback_to_string() 3503 3504 return self.expression(exp.Hint, expressions=hints) 3505 3506 def _parse_hint(self) -> t.Optional[exp.Hint]: 3507 if self._match(TokenType.HINT) and self._prev_comments: 3508 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3509 3510 return None 3511 3512 def _parse_into(self) -> t.Optional[exp.Into]: 3513 if not self._match(TokenType.INTO): 3514 return None 3515 3516 temp = self._match(TokenType.TEMPORARY) 3517 unlogged = self._match_text_seq("UNLOGGED") 3518 self._match(TokenType.TABLE) 3519 3520 return self.expression( 3521 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3522 ) 3523 3524 def _parse_from( 3525 self, joins: bool = False, skip_from_token: bool = False 3526 ) -> t.Optional[exp.From]: 3527 if not skip_from_token and not self._match(TokenType.FROM): 3528 return None 3529 3530 return self.expression( 3531 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3532 ) 3533 3534 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3535 return self.expression( 3536 exp.MatchRecognizeMeasure, 3537 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3538 this=self._parse_expression(), 3539 ) 3540 3541 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3542 if not self._match(TokenType.MATCH_RECOGNIZE): 3543 return None 3544 3545 self._match_l_paren() 3546 3547 partition = self._parse_partition_by() 3548 order = self._parse_order() 3549 3550 measures = ( 3551 self._parse_csv(self._parse_match_recognize_measure) 3552 if self._match_text_seq("MEASURES") 3553 else None 3554 ) 3555 3556 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3557 rows = exp.var("ONE ROW PER MATCH") 3558 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3559 text = "ALL ROWS PER MATCH" 3560 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3561 text += " SHOW EMPTY MATCHES" 3562 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3563 text += " OMIT EMPTY MATCHES" 3564 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3565 text += " WITH UNMATCHED ROWS" 3566 rows = exp.var(text) 3567 else: 3568 rows = None 3569 3570 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3571 text = "AFTER MATCH SKIP" 3572 if self._match_text_seq("PAST", "LAST", "ROW"): 3573 text += " PAST LAST ROW" 3574 elif self._match_text_seq("TO", "NEXT", "ROW"): 3575 text += " TO NEXT ROW" 3576 elif self._match_text_seq("TO", "FIRST"): 3577 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3578 elif self._match_text_seq("TO", "LAST"): 3579 text += f" TO LAST {self._advance_any().text}" # type: ignore 3580 after = exp.var(text) 3581 else: 3582 after = None 3583 3584 if self._match_text_seq("PATTERN"): 3585 self._match_l_paren() 3586 3587 if not self._curr: 3588 self.raise_error("Expecting )", self._curr) 3589 3590 paren = 1 3591 start = self._curr 3592 3593 while self._curr and paren > 0: 3594 if self._curr.token_type == TokenType.L_PAREN: 3595 paren += 1 3596 if self._curr.token_type == TokenType.R_PAREN: 3597 paren -= 1 3598 3599 end = self._prev 3600 self._advance() 3601 3602 if paren > 0: 3603 self.raise_error("Expecting )", self._curr) 3604 3605 pattern = exp.var(self._find_sql(start, end)) 3606 else: 3607 pattern = None 3608 3609 define = ( 3610 self._parse_csv(self._parse_name_as_expression) 3611 if self._match_text_seq("DEFINE") 3612 else None 3613 ) 3614 3615 self._match_r_paren() 3616 3617 return self.expression( 3618 exp.MatchRecognize, 3619 partition_by=partition, 3620 order=order, 3621 measures=measures, 3622 rows=rows, 3623 after=after, 3624 pattern=pattern, 3625 define=define, 3626 alias=self._parse_table_alias(), 3627 ) 3628 3629 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3630 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3631 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3632 cross_apply = False 3633 3634 if cross_apply is not None: 3635 this = self._parse_select(table=True) 3636 view = None 3637 outer = None 3638 elif self._match(TokenType.LATERAL): 3639 this = self._parse_select(table=True) 3640 view = self._match(TokenType.VIEW) 3641 outer = self._match(TokenType.OUTER) 3642 else: 3643 return None 3644 3645 if not this: 3646 this = ( 3647 self._parse_unnest() 3648 or self._parse_function() 3649 or self._parse_id_var(any_token=False) 3650 ) 3651 3652 while self._match(TokenType.DOT): 3653 this = exp.Dot( 3654 this=this, 3655 expression=self._parse_function() or self._parse_id_var(any_token=False), 3656 ) 3657 3658 ordinality: t.Optional[bool] = None 3659 3660 if view: 3661 table = self._parse_id_var(any_token=False) 3662 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3663 table_alias: t.Optional[exp.TableAlias] = self.expression( 3664 exp.TableAlias, this=table, columns=columns 3665 ) 3666 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3667 # We move the alias from the lateral's child node to the lateral itself 3668 table_alias = this.args["alias"].pop() 3669 else: 3670 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3671 table_alias = self._parse_table_alias() 3672 3673 return self.expression( 3674 exp.Lateral, 3675 this=this, 3676 view=view, 3677 outer=outer, 3678 alias=table_alias, 3679 cross_apply=cross_apply, 3680 ordinality=ordinality, 3681 ) 3682 3683 def _parse_join_parts( 3684 self, 3685 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3686 return ( 3687 self._match_set(self.JOIN_METHODS) and self._prev, 3688 self._match_set(self.JOIN_SIDES) and self._prev, 3689 self._match_set(self.JOIN_KINDS) and self._prev, 3690 ) 3691 3692 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3693 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3694 this = self._parse_column() 3695 if isinstance(this, exp.Column): 3696 return this.this 3697 return this 3698 3699 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3700 3701 def _parse_join( 3702 self, skip_join_token: bool = False, parse_bracket: bool = False 3703 ) -> t.Optional[exp.Join]: 3704 if self._match(TokenType.COMMA): 3705 table = self._try_parse(self._parse_table) 3706 if table: 3707 return self.expression(exp.Join, this=table) 3708 return None 3709 3710 index = self._index 3711 method, side, kind = self._parse_join_parts() 3712 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3713 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3714 3715 if not skip_join_token and not join: 3716 self._retreat(index) 3717 kind = None 3718 method = None 3719 side = None 3720 3721 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3722 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3723 3724 if not skip_join_token and not join and not outer_apply and not cross_apply: 3725 return None 3726 3727 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3728 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3729 kwargs["expressions"] = self._parse_csv( 3730 lambda: self._parse_table(parse_bracket=parse_bracket) 3731 ) 3732 3733 if method: 3734 kwargs["method"] = method.text 3735 if side: 3736 kwargs["side"] = side.text 3737 if kind: 3738 kwargs["kind"] = kind.text 3739 if hint: 3740 kwargs["hint"] = hint 3741 3742 if self._match(TokenType.MATCH_CONDITION): 3743 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3744 3745 if self._match(TokenType.ON): 3746 kwargs["on"] = self._parse_assignment() 3747 elif self._match(TokenType.USING): 3748 kwargs["using"] = self._parse_using_identifiers() 3749 elif ( 3750 not (outer_apply or cross_apply) 3751 and not isinstance(kwargs["this"], exp.Unnest) 3752 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3753 ): 3754 index = self._index 3755 joins: t.Optional[list] = list(self._parse_joins()) 3756 3757 if joins and self._match(TokenType.ON): 3758 kwargs["on"] = self._parse_assignment() 3759 elif joins and self._match(TokenType.USING): 3760 kwargs["using"] = self._parse_using_identifiers() 3761 else: 3762 joins = None 3763 self._retreat(index) 3764 3765 kwargs["this"].set("joins", joins if joins else None) 3766 3767 kwargs["pivots"] = self._parse_pivots() 3768 3769 comments = [c for token in (method, side, kind) if token for c in token.comments] 3770 return self.expression(exp.Join, comments=comments, **kwargs) 3771 3772 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3773 this = self._parse_assignment() 3774 3775 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3776 return this 3777 3778 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3779 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3780 3781 return this 3782 3783 def _parse_index_params(self) -> exp.IndexParameters: 3784 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3785 3786 if self._match(TokenType.L_PAREN, advance=False): 3787 columns = self._parse_wrapped_csv(self._parse_with_operator) 3788 else: 3789 columns = None 3790 3791 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3792 partition_by = self._parse_partition_by() 3793 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3794 tablespace = ( 3795 self._parse_var(any_token=True) 3796 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3797 else None 3798 ) 3799 where = self._parse_where() 3800 3801 on = self._parse_field() if self._match(TokenType.ON) else None 3802 3803 return self.expression( 3804 exp.IndexParameters, 3805 using=using, 3806 columns=columns, 3807 include=include, 3808 partition_by=partition_by, 3809 where=where, 3810 with_storage=with_storage, 3811 tablespace=tablespace, 3812 on=on, 3813 ) 3814 3815 def _parse_index( 3816 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3817 ) -> t.Optional[exp.Index]: 3818 if index or anonymous: 3819 unique = None 3820 primary = None 3821 amp = None 3822 3823 self._match(TokenType.ON) 3824 self._match(TokenType.TABLE) # hive 3825 table = self._parse_table_parts(schema=True) 3826 else: 3827 unique = self._match(TokenType.UNIQUE) 3828 primary = self._match_text_seq("PRIMARY") 3829 amp = self._match_text_seq("AMP") 3830 3831 if not self._match(TokenType.INDEX): 3832 return None 3833 3834 index = self._parse_id_var() 3835 table = None 3836 3837 params = self._parse_index_params() 3838 3839 return self.expression( 3840 exp.Index, 3841 this=index, 3842 table=table, 3843 unique=unique, 3844 primary=primary, 3845 amp=amp, 3846 params=params, 3847 ) 3848 3849 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3850 hints: t.List[exp.Expression] = [] 3851 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3852 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3853 hints.append( 3854 self.expression( 3855 exp.WithTableHint, 3856 expressions=self._parse_csv( 3857 lambda: self._parse_function() or self._parse_var(any_token=True) 3858 ), 3859 ) 3860 ) 3861 self._match_r_paren() 3862 else: 3863 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3864 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3865 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3866 3867 self._match_set((TokenType.INDEX, TokenType.KEY)) 3868 if self._match(TokenType.FOR): 3869 hint.set("target", self._advance_any() and self._prev.text.upper()) 3870 3871 hint.set("expressions", self._parse_wrapped_id_vars()) 3872 hints.append(hint) 3873 3874 return hints or None 3875 3876 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3877 return ( 3878 (not schema and self._parse_function(optional_parens=False)) 3879 or self._parse_id_var(any_token=False) 3880 or self._parse_string_as_identifier() 3881 or self._parse_placeholder() 3882 ) 3883 3884 def _parse_table_parts( 3885 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3886 ) -> exp.Table: 3887 catalog = None 3888 db = None 3889 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3890 3891 while self._match(TokenType.DOT): 3892 if catalog: 3893 # This allows nesting the table in arbitrarily many dot expressions if needed 3894 table = self.expression( 3895 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3896 ) 3897 else: 3898 catalog = db 3899 db = table 3900 # "" used for tsql FROM a..b case 3901 table = self._parse_table_part(schema=schema) or "" 3902 3903 if ( 3904 wildcard 3905 and self._is_connected() 3906 and (isinstance(table, exp.Identifier) or not table) 3907 and self._match(TokenType.STAR) 3908 ): 3909 if isinstance(table, exp.Identifier): 3910 table.args["this"] += "*" 3911 else: 3912 table = exp.Identifier(this="*") 3913 3914 # We bubble up comments from the Identifier to the Table 3915 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3916 3917 if is_db_reference: 3918 catalog = db 3919 db = table 3920 table = None 3921 3922 if not table and not is_db_reference: 3923 self.raise_error(f"Expected table name but got {self._curr}") 3924 if not db and is_db_reference: 3925 self.raise_error(f"Expected database name but got {self._curr}") 3926 3927 table = self.expression( 3928 exp.Table, 3929 comments=comments, 3930 this=table, 3931 db=db, 3932 catalog=catalog, 3933 ) 3934 3935 changes = self._parse_changes() 3936 if changes: 3937 table.set("changes", changes) 3938 3939 at_before = self._parse_historical_data() 3940 if at_before: 3941 table.set("when", at_before) 3942 3943 pivots = self._parse_pivots() 3944 if pivots: 3945 table.set("pivots", pivots) 3946 3947 return table 3948 3949 def _parse_table( 3950 self, 3951 schema: bool = False, 3952 joins: bool = False, 3953 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3954 parse_bracket: bool = False, 3955 is_db_reference: bool = False, 3956 parse_partition: bool = False, 3957 ) -> t.Optional[exp.Expression]: 3958 lateral = self._parse_lateral() 3959 if lateral: 3960 return lateral 3961 3962 unnest = self._parse_unnest() 3963 if unnest: 3964 return unnest 3965 3966 values = self._parse_derived_table_values() 3967 if values: 3968 return values 3969 3970 subquery = self._parse_select(table=True) 3971 if subquery: 3972 if not subquery.args.get("pivots"): 3973 subquery.set("pivots", self._parse_pivots()) 3974 return subquery 3975 3976 bracket = parse_bracket and self._parse_bracket(None) 3977 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3978 3979 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3980 self._parse_table 3981 ) 3982 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3983 3984 only = self._match(TokenType.ONLY) 3985 3986 this = t.cast( 3987 exp.Expression, 3988 bracket 3989 or rows_from 3990 or self._parse_bracket( 3991 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3992 ), 3993 ) 3994 3995 if only: 3996 this.set("only", only) 3997 3998 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3999 self._match_text_seq("*") 4000 4001 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4002 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4003 this.set("partition", self._parse_partition()) 4004 4005 if schema: 4006 return self._parse_schema(this=this) 4007 4008 version = self._parse_version() 4009 4010 if version: 4011 this.set("version", version) 4012 4013 if self.dialect.ALIAS_POST_TABLESAMPLE: 4014 this.set("sample", self._parse_table_sample()) 4015 4016 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4017 if alias: 4018 this.set("alias", alias) 4019 4020 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4021 return self.expression( 4022 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4023 ) 4024 4025 this.set("hints", self._parse_table_hints()) 4026 4027 if not this.args.get("pivots"): 4028 this.set("pivots", self._parse_pivots()) 4029 4030 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4031 this.set("sample", self._parse_table_sample()) 4032 4033 if joins: 4034 for join in self._parse_joins(): 4035 this.append("joins", join) 4036 4037 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4038 this.set("ordinality", True) 4039 this.set("alias", self._parse_table_alias()) 4040 4041 return this 4042 4043 def _parse_version(self) -> t.Optional[exp.Version]: 4044 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4045 this = "TIMESTAMP" 4046 elif self._match(TokenType.VERSION_SNAPSHOT): 4047 this = "VERSION" 4048 else: 4049 return None 4050 4051 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4052 kind = self._prev.text.upper() 4053 start = self._parse_bitwise() 4054 self._match_texts(("TO", "AND")) 4055 end = self._parse_bitwise() 4056 expression: t.Optional[exp.Expression] = self.expression( 4057 exp.Tuple, expressions=[start, end] 4058 ) 4059 elif self._match_text_seq("CONTAINED", "IN"): 4060 kind = "CONTAINED IN" 4061 expression = self.expression( 4062 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4063 ) 4064 elif self._match(TokenType.ALL): 4065 kind = "ALL" 4066 expression = None 4067 else: 4068 self._match_text_seq("AS", "OF") 4069 kind = "AS OF" 4070 expression = self._parse_type() 4071 4072 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4073 4074 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4075 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4076 index = self._index 4077 historical_data = None 4078 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4079 this = self._prev.text.upper() 4080 kind = ( 4081 self._match(TokenType.L_PAREN) 4082 and self._match_texts(self.HISTORICAL_DATA_KIND) 4083 and self._prev.text.upper() 4084 ) 4085 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4086 4087 if expression: 4088 self._match_r_paren() 4089 historical_data = self.expression( 4090 exp.HistoricalData, this=this, kind=kind, expression=expression 4091 ) 4092 else: 4093 self._retreat(index) 4094 4095 return historical_data 4096 4097 def _parse_changes(self) -> t.Optional[exp.Changes]: 4098 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4099 return None 4100 4101 information = self._parse_var(any_token=True) 4102 self._match_r_paren() 4103 4104 return self.expression( 4105 exp.Changes, 4106 information=information, 4107 at_before=self._parse_historical_data(), 4108 end=self._parse_historical_data(), 4109 ) 4110 4111 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4112 if not self._match(TokenType.UNNEST): 4113 return None 4114 4115 expressions = self._parse_wrapped_csv(self._parse_equality) 4116 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4117 4118 alias = self._parse_table_alias() if with_alias else None 4119 4120 if alias: 4121 if self.dialect.UNNEST_COLUMN_ONLY: 4122 if alias.args.get("columns"): 4123 self.raise_error("Unexpected extra column alias in unnest.") 4124 4125 alias.set("columns", [alias.this]) 4126 alias.set("this", None) 4127 4128 columns = alias.args.get("columns") or [] 4129 if offset and len(expressions) < len(columns): 4130 offset = columns.pop() 4131 4132 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4133 self._match(TokenType.ALIAS) 4134 offset = self._parse_id_var( 4135 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4136 ) or exp.to_identifier("offset") 4137 4138 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4139 4140 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4141 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4142 if not is_derived and not ( 4143 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4144 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4145 ): 4146 return None 4147 4148 expressions = self._parse_csv(self._parse_value) 4149 alias = self._parse_table_alias() 4150 4151 if is_derived: 4152 self._match_r_paren() 4153 4154 return self.expression( 4155 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4156 ) 4157 4158 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4159 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4160 as_modifier and self._match_text_seq("USING", "SAMPLE") 4161 ): 4162 return None 4163 4164 bucket_numerator = None 4165 bucket_denominator = None 4166 bucket_field = None 4167 percent = None 4168 size = None 4169 seed = None 4170 4171 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4172 matched_l_paren = self._match(TokenType.L_PAREN) 4173 4174 if self.TABLESAMPLE_CSV: 4175 num = None 4176 expressions = self._parse_csv(self._parse_primary) 4177 else: 4178 expressions = None 4179 num = ( 4180 self._parse_factor() 4181 if self._match(TokenType.NUMBER, advance=False) 4182 else self._parse_primary() or self._parse_placeholder() 4183 ) 4184 4185 if self._match_text_seq("BUCKET"): 4186 bucket_numerator = self._parse_number() 4187 self._match_text_seq("OUT", "OF") 4188 bucket_denominator = bucket_denominator = self._parse_number() 4189 self._match(TokenType.ON) 4190 bucket_field = self._parse_field() 4191 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4192 percent = num 4193 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4194 size = num 4195 else: 4196 percent = num 4197 4198 if matched_l_paren: 4199 self._match_r_paren() 4200 4201 if self._match(TokenType.L_PAREN): 4202 method = self._parse_var(upper=True) 4203 seed = self._match(TokenType.COMMA) and self._parse_number() 4204 self._match_r_paren() 4205 elif self._match_texts(("SEED", "REPEATABLE")): 4206 seed = self._parse_wrapped(self._parse_number) 4207 4208 if not method and self.DEFAULT_SAMPLING_METHOD: 4209 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4210 4211 return self.expression( 4212 exp.TableSample, 4213 expressions=expressions, 4214 method=method, 4215 bucket_numerator=bucket_numerator, 4216 bucket_denominator=bucket_denominator, 4217 bucket_field=bucket_field, 4218 percent=percent, 4219 size=size, 4220 seed=seed, 4221 ) 4222 4223 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4224 return list(iter(self._parse_pivot, None)) or None 4225 4226 def _parse_joins(self) -> t.Iterator[exp.Join]: 4227 return iter(self._parse_join, None) 4228 4229 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4230 if not self._match(TokenType.INTO): 4231 return None 4232 4233 return self.expression( 4234 exp.UnpivotColumns, 4235 this=self._match_text_seq("NAME") and self._parse_column(), 4236 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4237 ) 4238 4239 # https://duckdb.org/docs/sql/statements/pivot 4240 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4241 def _parse_on() -> t.Optional[exp.Expression]: 4242 this = self._parse_bitwise() 4243 4244 if self._match(TokenType.IN): 4245 # PIVOT ... ON col IN (row_val1, row_val2) 4246 return self._parse_in(this) 4247 if self._match(TokenType.ALIAS, advance=False): 4248 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4249 return self._parse_alias(this) 4250 4251 return this 4252 4253 this = self._parse_table() 4254 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4255 into = self._parse_unpivot_columns() 4256 using = self._match(TokenType.USING) and self._parse_csv( 4257 lambda: self._parse_alias(self._parse_function()) 4258 ) 4259 group = self._parse_group() 4260 4261 return self.expression( 4262 exp.Pivot, 4263 this=this, 4264 expressions=expressions, 4265 using=using, 4266 group=group, 4267 unpivot=is_unpivot, 4268 into=into, 4269 ) 4270 4271 def _parse_pivot_in(self) -> exp.In: 4272 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4273 this = self._parse_select_or_expression() 4274 4275 self._match(TokenType.ALIAS) 4276 alias = self._parse_bitwise() 4277 if alias: 4278 if isinstance(alias, exp.Column) and not alias.db: 4279 alias = alias.this 4280 return self.expression(exp.PivotAlias, this=this, alias=alias) 4281 4282 return this 4283 4284 value = self._parse_column() 4285 4286 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4287 self.raise_error("Expecting IN (") 4288 4289 if self._match(TokenType.ANY): 4290 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4291 else: 4292 exprs = self._parse_csv(_parse_aliased_expression) 4293 4294 self._match_r_paren() 4295 return self.expression(exp.In, this=value, expressions=exprs) 4296 4297 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4298 index = self._index 4299 include_nulls = None 4300 4301 if self._match(TokenType.PIVOT): 4302 unpivot = False 4303 elif self._match(TokenType.UNPIVOT): 4304 unpivot = True 4305 4306 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4307 if self._match_text_seq("INCLUDE", "NULLS"): 4308 include_nulls = True 4309 elif self._match_text_seq("EXCLUDE", "NULLS"): 4310 include_nulls = False 4311 else: 4312 return None 4313 4314 expressions = [] 4315 4316 if not self._match(TokenType.L_PAREN): 4317 self._retreat(index) 4318 return None 4319 4320 if unpivot: 4321 expressions = self._parse_csv(self._parse_column) 4322 else: 4323 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4324 4325 if not expressions: 4326 self.raise_error("Failed to parse PIVOT's aggregation list") 4327 4328 if not self._match(TokenType.FOR): 4329 self.raise_error("Expecting FOR") 4330 4331 fields = [] 4332 while True: 4333 field = self._try_parse(self._parse_pivot_in) 4334 if not field: 4335 break 4336 fields.append(field) 4337 4338 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4339 self._parse_bitwise 4340 ) 4341 4342 group = self._parse_group() 4343 4344 self._match_r_paren() 4345 4346 pivot = self.expression( 4347 exp.Pivot, 4348 expressions=expressions, 4349 fields=fields, 4350 unpivot=unpivot, 4351 include_nulls=include_nulls, 4352 default_on_null=default_on_null, 4353 group=group, 4354 ) 4355 4356 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4357 pivot.set("alias", self._parse_table_alias()) 4358 4359 if not unpivot: 4360 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4361 4362 columns: t.List[exp.Expression] = [] 4363 all_fields = [] 4364 for pivot_field in pivot.fields: 4365 pivot_field_expressions = pivot_field.expressions 4366 4367 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4368 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4369 continue 4370 4371 all_fields.append( 4372 [ 4373 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4374 for fld in pivot_field_expressions 4375 ] 4376 ) 4377 4378 if all_fields: 4379 if names: 4380 all_fields.append(names) 4381 4382 # Generate all possible combinations of the pivot columns 4383 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4384 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4385 for fld_parts_tuple in itertools.product(*all_fields): 4386 fld_parts = list(fld_parts_tuple) 4387 4388 if names and self.PREFIXED_PIVOT_COLUMNS: 4389 # Move the "name" to the front of the list 4390 fld_parts.insert(0, fld_parts.pop(-1)) 4391 4392 columns.append(exp.to_identifier("_".join(fld_parts))) 4393 4394 pivot.set("columns", columns) 4395 4396 return pivot 4397 4398 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4399 return [agg.alias for agg in aggregations if agg.alias] 4400 4401 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4402 if not skip_where_token and not self._match(TokenType.PREWHERE): 4403 return None 4404 4405 return self.expression( 4406 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4407 ) 4408 4409 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4410 if not skip_where_token and not self._match(TokenType.WHERE): 4411 return None 4412 4413 return self.expression( 4414 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4415 ) 4416 4417 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4418 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4419 return None 4420 4421 elements: t.Dict[str, t.Any] = defaultdict(list) 4422 4423 if self._match(TokenType.ALL): 4424 elements["all"] = True 4425 elif self._match(TokenType.DISTINCT): 4426 elements["all"] = False 4427 4428 while True: 4429 index = self._index 4430 4431 elements["expressions"].extend( 4432 self._parse_csv( 4433 lambda: None 4434 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4435 else self._parse_assignment() 4436 ) 4437 ) 4438 4439 before_with_index = self._index 4440 with_prefix = self._match(TokenType.WITH) 4441 4442 if self._match(TokenType.ROLLUP): 4443 elements["rollup"].append( 4444 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4445 ) 4446 elif self._match(TokenType.CUBE): 4447 elements["cube"].append( 4448 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4449 ) 4450 elif self._match(TokenType.GROUPING_SETS): 4451 elements["grouping_sets"].append( 4452 self.expression( 4453 exp.GroupingSets, 4454 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4455 ) 4456 ) 4457 elif self._match_text_seq("TOTALS"): 4458 elements["totals"] = True # type: ignore 4459 4460 if before_with_index <= self._index <= before_with_index + 1: 4461 self._retreat(before_with_index) 4462 break 4463 4464 if index == self._index: 4465 break 4466 4467 return self.expression(exp.Group, **elements) # type: ignore 4468 4469 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4470 return self.expression( 4471 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4472 ) 4473 4474 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4475 if self._match(TokenType.L_PAREN): 4476 grouping_set = self._parse_csv(self._parse_column) 4477 self._match_r_paren() 4478 return self.expression(exp.Tuple, expressions=grouping_set) 4479 4480 return self._parse_column() 4481 4482 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4483 if not skip_having_token and not self._match(TokenType.HAVING): 4484 return None 4485 return self.expression(exp.Having, this=self._parse_assignment()) 4486 4487 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4488 if not self._match(TokenType.QUALIFY): 4489 return None 4490 return self.expression(exp.Qualify, this=self._parse_assignment()) 4491 4492 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4493 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4494 exp.Prior, this=self._parse_bitwise() 4495 ) 4496 connect = self._parse_assignment() 4497 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4498 return connect 4499 4500 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4501 if skip_start_token: 4502 start = None 4503 elif self._match(TokenType.START_WITH): 4504 start = self._parse_assignment() 4505 else: 4506 return None 4507 4508 self._match(TokenType.CONNECT_BY) 4509 nocycle = self._match_text_seq("NOCYCLE") 4510 connect = self._parse_connect_with_prior() 4511 4512 if not start and self._match(TokenType.START_WITH): 4513 start = self._parse_assignment() 4514 4515 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4516 4517 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4518 this = self._parse_id_var(any_token=True) 4519 if self._match(TokenType.ALIAS): 4520 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4521 return this 4522 4523 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4524 if self._match_text_seq("INTERPOLATE"): 4525 return self._parse_wrapped_csv(self._parse_name_as_expression) 4526 return None 4527 4528 def _parse_order( 4529 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4530 ) -> t.Optional[exp.Expression]: 4531 siblings = None 4532 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4533 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4534 return this 4535 4536 siblings = True 4537 4538 return self.expression( 4539 exp.Order, 4540 this=this, 4541 expressions=self._parse_csv(self._parse_ordered), 4542 siblings=siblings, 4543 ) 4544 4545 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4546 if not self._match(token): 4547 return None 4548 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4549 4550 def _parse_ordered( 4551 self, parse_method: t.Optional[t.Callable] = None 4552 ) -> t.Optional[exp.Ordered]: 4553 this = parse_method() if parse_method else self._parse_assignment() 4554 if not this: 4555 return None 4556 4557 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4558 this = exp.var("ALL") 4559 4560 asc = self._match(TokenType.ASC) 4561 desc = self._match(TokenType.DESC) or (asc and False) 4562 4563 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4564 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4565 4566 nulls_first = is_nulls_first or False 4567 explicitly_null_ordered = is_nulls_first or is_nulls_last 4568 4569 if ( 4570 not explicitly_null_ordered 4571 and ( 4572 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4573 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4574 ) 4575 and self.dialect.NULL_ORDERING != "nulls_are_last" 4576 ): 4577 nulls_first = True 4578 4579 if self._match_text_seq("WITH", "FILL"): 4580 with_fill = self.expression( 4581 exp.WithFill, 4582 **{ # type: ignore 4583 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4584 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4585 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4586 "interpolate": self._parse_interpolate(), 4587 }, 4588 ) 4589 else: 4590 with_fill = None 4591 4592 return self.expression( 4593 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4594 ) 4595 4596 def _parse_limit_options(self) -> exp.LimitOptions: 4597 percent = self._match(TokenType.PERCENT) 4598 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4599 self._match_text_seq("ONLY") 4600 with_ties = self._match_text_seq("WITH", "TIES") 4601 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4602 4603 def _parse_limit( 4604 self, 4605 this: t.Optional[exp.Expression] = None, 4606 top: bool = False, 4607 skip_limit_token: bool = False, 4608 ) -> t.Optional[exp.Expression]: 4609 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4610 comments = self._prev_comments 4611 if top: 4612 limit_paren = self._match(TokenType.L_PAREN) 4613 expression = self._parse_term() if limit_paren else self._parse_number() 4614 4615 if limit_paren: 4616 self._match_r_paren() 4617 4618 limit_options = self._parse_limit_options() 4619 else: 4620 limit_options = None 4621 expression = self._parse_term() 4622 4623 if self._match(TokenType.COMMA): 4624 offset = expression 4625 expression = self._parse_term() 4626 else: 4627 offset = None 4628 4629 limit_exp = self.expression( 4630 exp.Limit, 4631 this=this, 4632 expression=expression, 4633 offset=offset, 4634 comments=comments, 4635 limit_options=limit_options, 4636 expressions=self._parse_limit_by(), 4637 ) 4638 4639 return limit_exp 4640 4641 if self._match(TokenType.FETCH): 4642 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4643 direction = self._prev.text.upper() if direction else "FIRST" 4644 4645 count = self._parse_field(tokens=self.FETCH_TOKENS) 4646 4647 return self.expression( 4648 exp.Fetch, 4649 direction=direction, 4650 count=count, 4651 limit_options=self._parse_limit_options(), 4652 ) 4653 4654 return this 4655 4656 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4657 if not self._match(TokenType.OFFSET): 4658 return this 4659 4660 count = self._parse_term() 4661 self._match_set((TokenType.ROW, TokenType.ROWS)) 4662 4663 return self.expression( 4664 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4665 ) 4666 4667 def _can_parse_limit_or_offset(self) -> bool: 4668 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4669 return False 4670 4671 index = self._index 4672 result = bool( 4673 self._try_parse(self._parse_limit, retreat=True) 4674 or self._try_parse(self._parse_offset, retreat=True) 4675 ) 4676 self._retreat(index) 4677 return result 4678 4679 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4680 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4681 4682 def _parse_locks(self) -> t.List[exp.Lock]: 4683 locks = [] 4684 while True: 4685 if self._match_text_seq("FOR", "UPDATE"): 4686 update = True 4687 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4688 "LOCK", "IN", "SHARE", "MODE" 4689 ): 4690 update = False 4691 else: 4692 break 4693 4694 expressions = None 4695 if self._match_text_seq("OF"): 4696 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4697 4698 wait: t.Optional[bool | exp.Expression] = None 4699 if self._match_text_seq("NOWAIT"): 4700 wait = True 4701 elif self._match_text_seq("WAIT"): 4702 wait = self._parse_primary() 4703 elif self._match_text_seq("SKIP", "LOCKED"): 4704 wait = False 4705 4706 locks.append( 4707 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4708 ) 4709 4710 return locks 4711 4712 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4713 start = self._index 4714 _, side_token, kind_token = self._parse_join_parts() 4715 4716 side = side_token.text if side_token else None 4717 kind = kind_token.text if kind_token else None 4718 4719 if not self._match_set(self.SET_OPERATIONS): 4720 self._retreat(start) 4721 return None 4722 4723 token_type = self._prev.token_type 4724 4725 if token_type == TokenType.UNION: 4726 operation: t.Type[exp.SetOperation] = exp.Union 4727 elif token_type == TokenType.EXCEPT: 4728 operation = exp.Except 4729 else: 4730 operation = exp.Intersect 4731 4732 comments = self._prev.comments 4733 4734 if self._match(TokenType.DISTINCT): 4735 distinct: t.Optional[bool] = True 4736 elif self._match(TokenType.ALL): 4737 distinct = False 4738 else: 4739 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4740 if distinct is None: 4741 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4742 4743 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4744 "STRICT", "CORRESPONDING" 4745 ) 4746 if self._match_text_seq("CORRESPONDING"): 4747 by_name = True 4748 if not side and not kind: 4749 kind = "INNER" 4750 4751 on_column_list = None 4752 if by_name and self._match_texts(("ON", "BY")): 4753 on_column_list = self._parse_wrapped_csv(self._parse_column) 4754 4755 expression = self._parse_select(nested=True, parse_set_operation=False) 4756 4757 return self.expression( 4758 operation, 4759 comments=comments, 4760 this=this, 4761 distinct=distinct, 4762 by_name=by_name, 4763 expression=expression, 4764 side=side, 4765 kind=kind, 4766 on=on_column_list, 4767 ) 4768 4769 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4770 while this: 4771 setop = self.parse_set_operation(this) 4772 if not setop: 4773 break 4774 this = setop 4775 4776 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4777 expression = this.expression 4778 4779 if expression: 4780 for arg in self.SET_OP_MODIFIERS: 4781 expr = expression.args.get(arg) 4782 if expr: 4783 this.set(arg, expr.pop()) 4784 4785 return this 4786 4787 def _parse_expression(self) -> t.Optional[exp.Expression]: 4788 return self._parse_alias(self._parse_assignment()) 4789 4790 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4791 this = self._parse_disjunction() 4792 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4793 # This allows us to parse <non-identifier token> := <expr> 4794 this = exp.column( 4795 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4796 ) 4797 4798 while self._match_set(self.ASSIGNMENT): 4799 if isinstance(this, exp.Column) and len(this.parts) == 1: 4800 this = this.this 4801 4802 this = self.expression( 4803 self.ASSIGNMENT[self._prev.token_type], 4804 this=this, 4805 comments=self._prev_comments, 4806 expression=self._parse_assignment(), 4807 ) 4808 4809 return this 4810 4811 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4812 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4813 4814 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4815 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4816 4817 def _parse_equality(self) -> t.Optional[exp.Expression]: 4818 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4819 4820 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4821 return self._parse_tokens(self._parse_range, self.COMPARISON) 4822 4823 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4824 this = this or self._parse_bitwise() 4825 negate = self._match(TokenType.NOT) 4826 4827 if self._match_set(self.RANGE_PARSERS): 4828 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4829 if not expression: 4830 return this 4831 4832 this = expression 4833 elif self._match(TokenType.ISNULL): 4834 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4835 4836 # Postgres supports ISNULL and NOTNULL for conditions. 4837 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4838 if self._match(TokenType.NOTNULL): 4839 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4840 this = self.expression(exp.Not, this=this) 4841 4842 if negate: 4843 this = self._negate_range(this) 4844 4845 if self._match(TokenType.IS): 4846 this = self._parse_is(this) 4847 4848 return this 4849 4850 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4851 if not this: 4852 return this 4853 4854 return self.expression(exp.Not, this=this) 4855 4856 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4857 index = self._index - 1 4858 negate = self._match(TokenType.NOT) 4859 4860 if self._match_text_seq("DISTINCT", "FROM"): 4861 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4862 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4863 4864 if self._match(TokenType.JSON): 4865 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4866 4867 if self._match_text_seq("WITH"): 4868 _with = True 4869 elif self._match_text_seq("WITHOUT"): 4870 _with = False 4871 else: 4872 _with = None 4873 4874 unique = self._match(TokenType.UNIQUE) 4875 self._match_text_seq("KEYS") 4876 expression: t.Optional[exp.Expression] = self.expression( 4877 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4878 ) 4879 else: 4880 expression = self._parse_primary() or self._parse_null() 4881 if not expression: 4882 self._retreat(index) 4883 return None 4884 4885 this = self.expression(exp.Is, this=this, expression=expression) 4886 return self.expression(exp.Not, this=this) if negate else this 4887 4888 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4889 unnest = self._parse_unnest(with_alias=False) 4890 if unnest: 4891 this = self.expression(exp.In, this=this, unnest=unnest) 4892 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4893 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4894 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4895 4896 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4897 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4898 else: 4899 this = self.expression(exp.In, this=this, expressions=expressions) 4900 4901 if matched_l_paren: 4902 self._match_r_paren(this) 4903 elif not self._match(TokenType.R_BRACKET, expression=this): 4904 self.raise_error("Expecting ]") 4905 else: 4906 this = self.expression(exp.In, this=this, field=self._parse_column()) 4907 4908 return this 4909 4910 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4911 low = self._parse_bitwise() 4912 self._match(TokenType.AND) 4913 high = self._parse_bitwise() 4914 return self.expression(exp.Between, this=this, low=low, high=high) 4915 4916 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4917 if not self._match(TokenType.ESCAPE): 4918 return this 4919 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4920 4921 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4922 index = self._index 4923 4924 if not self._match(TokenType.INTERVAL) and match_interval: 4925 return None 4926 4927 if self._match(TokenType.STRING, advance=False): 4928 this = self._parse_primary() 4929 else: 4930 this = self._parse_term() 4931 4932 if not this or ( 4933 isinstance(this, exp.Column) 4934 and not this.table 4935 and not this.this.quoted 4936 and this.name.upper() == "IS" 4937 ): 4938 self._retreat(index) 4939 return None 4940 4941 unit = self._parse_function() or ( 4942 not self._match(TokenType.ALIAS, advance=False) 4943 and self._parse_var(any_token=True, upper=True) 4944 ) 4945 4946 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4947 # each INTERVAL expression into this canonical form so it's easy to transpile 4948 if this and this.is_number: 4949 this = exp.Literal.string(this.to_py()) 4950 elif this and this.is_string: 4951 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4952 if parts and unit: 4953 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4954 unit = None 4955 self._retreat(self._index - 1) 4956 4957 if len(parts) == 1: 4958 this = exp.Literal.string(parts[0][0]) 4959 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4960 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4961 unit = self.expression( 4962 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4963 ) 4964 4965 interval = self.expression(exp.Interval, this=this, unit=unit) 4966 4967 index = self._index 4968 self._match(TokenType.PLUS) 4969 4970 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4971 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4972 return self.expression( 4973 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4974 ) 4975 4976 self._retreat(index) 4977 return interval 4978 4979 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4980 this = self._parse_term() 4981 4982 while True: 4983 if self._match_set(self.BITWISE): 4984 this = self.expression( 4985 self.BITWISE[self._prev.token_type], 4986 this=this, 4987 expression=self._parse_term(), 4988 ) 4989 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4990 this = self.expression( 4991 exp.DPipe, 4992 this=this, 4993 expression=self._parse_term(), 4994 safe=not self.dialect.STRICT_STRING_CONCAT, 4995 ) 4996 elif self._match(TokenType.DQMARK): 4997 this = self.expression( 4998 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4999 ) 5000 elif self._match_pair(TokenType.LT, TokenType.LT): 5001 this = self.expression( 5002 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5003 ) 5004 elif self._match_pair(TokenType.GT, TokenType.GT): 5005 this = self.expression( 5006 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5007 ) 5008 else: 5009 break 5010 5011 return this 5012 5013 def _parse_term(self) -> t.Optional[exp.Expression]: 5014 this = self._parse_factor() 5015 5016 while self._match_set(self.TERM): 5017 klass = self.TERM[self._prev.token_type] 5018 comments = self._prev_comments 5019 expression = self._parse_factor() 5020 5021 this = self.expression(klass, this=this, comments=comments, expression=expression) 5022 5023 if isinstance(this, exp.Collate): 5024 expr = this.expression 5025 5026 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5027 # fallback to Identifier / Var 5028 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5029 ident = expr.this 5030 if isinstance(ident, exp.Identifier): 5031 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5032 5033 return this 5034 5035 def _parse_factor(self) -> t.Optional[exp.Expression]: 5036 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5037 this = parse_method() 5038 5039 while self._match_set(self.FACTOR): 5040 klass = self.FACTOR[self._prev.token_type] 5041 comments = self._prev_comments 5042 expression = parse_method() 5043 5044 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5045 self._retreat(self._index - 1) 5046 return this 5047 5048 this = self.expression(klass, this=this, comments=comments, expression=expression) 5049 5050 if isinstance(this, exp.Div): 5051 this.args["typed"] = self.dialect.TYPED_DIVISION 5052 this.args["safe"] = self.dialect.SAFE_DIVISION 5053 5054 return this 5055 5056 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5057 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5058 5059 def _parse_unary(self) -> t.Optional[exp.Expression]: 5060 if self._match_set(self.UNARY_PARSERS): 5061 return self.UNARY_PARSERS[self._prev.token_type](self) 5062 return self._parse_at_time_zone(self._parse_type()) 5063 5064 def _parse_type( 5065 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5066 ) -> t.Optional[exp.Expression]: 5067 interval = parse_interval and self._parse_interval() 5068 if interval: 5069 return interval 5070 5071 index = self._index 5072 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5073 5074 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5075 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5076 if isinstance(data_type, exp.Cast): 5077 # This constructor can contain ops directly after it, for instance struct unnesting: 5078 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5079 return self._parse_column_ops(data_type) 5080 5081 if data_type: 5082 index2 = self._index 5083 this = self._parse_primary() 5084 5085 if isinstance(this, exp.Literal): 5086 this = self._parse_column_ops(this) 5087 5088 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5089 if parser: 5090 return parser(self, this, data_type) 5091 5092 return self.expression(exp.Cast, this=this, to=data_type) 5093 5094 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5095 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5096 # 5097 # If the index difference here is greater than 1, that means the parser itself must have 5098 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5099 # 5100 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5101 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5102 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5103 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5104 # 5105 # In these cases, we don't really want to return the converted type, but instead retreat 5106 # and try to parse a Column or Identifier in the section below. 5107 if data_type.expressions and index2 - index > 1: 5108 self._retreat(index2) 5109 return self._parse_column_ops(data_type) 5110 5111 self._retreat(index) 5112 5113 if fallback_to_identifier: 5114 return self._parse_id_var() 5115 5116 this = self._parse_column() 5117 return this and self._parse_column_ops(this) 5118 5119 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5120 this = self._parse_type() 5121 if not this: 5122 return None 5123 5124 if isinstance(this, exp.Column) and not this.table: 5125 this = exp.var(this.name.upper()) 5126 5127 return self.expression( 5128 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5129 ) 5130 5131 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5132 type_name = identifier.name 5133 5134 while self._match(TokenType.DOT): 5135 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5136 5137 return exp.DataType.build(type_name, udt=True) 5138 5139 def _parse_types( 5140 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5141 ) -> t.Optional[exp.Expression]: 5142 index = self._index 5143 5144 this: t.Optional[exp.Expression] = None 5145 prefix = self._match_text_seq("SYSUDTLIB", ".") 5146 5147 if not self._match_set(self.TYPE_TOKENS): 5148 identifier = allow_identifiers and self._parse_id_var( 5149 any_token=False, tokens=(TokenType.VAR,) 5150 ) 5151 if isinstance(identifier, exp.Identifier): 5152 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5153 5154 if len(tokens) != 1: 5155 self.raise_error("Unexpected identifier", self._prev) 5156 5157 if tokens[0].token_type in self.TYPE_TOKENS: 5158 self._prev = tokens[0] 5159 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5160 this = self._parse_user_defined_type(identifier) 5161 else: 5162 self._retreat(self._index - 1) 5163 return None 5164 else: 5165 return None 5166 5167 type_token = self._prev.token_type 5168 5169 if type_token == TokenType.PSEUDO_TYPE: 5170 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5171 5172 if type_token == TokenType.OBJECT_IDENTIFIER: 5173 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5174 5175 # https://materialize.com/docs/sql/types/map/ 5176 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5177 key_type = self._parse_types( 5178 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5179 ) 5180 if not self._match(TokenType.FARROW): 5181 self._retreat(index) 5182 return None 5183 5184 value_type = self._parse_types( 5185 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5186 ) 5187 if not self._match(TokenType.R_BRACKET): 5188 self._retreat(index) 5189 return None 5190 5191 return exp.DataType( 5192 this=exp.DataType.Type.MAP, 5193 expressions=[key_type, value_type], 5194 nested=True, 5195 prefix=prefix, 5196 ) 5197 5198 nested = type_token in self.NESTED_TYPE_TOKENS 5199 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5200 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5201 expressions = None 5202 maybe_func = False 5203 5204 if self._match(TokenType.L_PAREN): 5205 if is_struct: 5206 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5207 elif nested: 5208 expressions = self._parse_csv( 5209 lambda: self._parse_types( 5210 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5211 ) 5212 ) 5213 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5214 this = expressions[0] 5215 this.set("nullable", True) 5216 self._match_r_paren() 5217 return this 5218 elif type_token in self.ENUM_TYPE_TOKENS: 5219 expressions = self._parse_csv(self._parse_equality) 5220 elif is_aggregate: 5221 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5222 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5223 ) 5224 if not func_or_ident: 5225 return None 5226 expressions = [func_or_ident] 5227 if self._match(TokenType.COMMA): 5228 expressions.extend( 5229 self._parse_csv( 5230 lambda: self._parse_types( 5231 check_func=check_func, 5232 schema=schema, 5233 allow_identifiers=allow_identifiers, 5234 ) 5235 ) 5236 ) 5237 else: 5238 expressions = self._parse_csv(self._parse_type_size) 5239 5240 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5241 if type_token == TokenType.VECTOR and len(expressions) == 2: 5242 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5243 5244 if not expressions or not self._match(TokenType.R_PAREN): 5245 self._retreat(index) 5246 return None 5247 5248 maybe_func = True 5249 5250 values: t.Optional[t.List[exp.Expression]] = None 5251 5252 if nested and self._match(TokenType.LT): 5253 if is_struct: 5254 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5255 else: 5256 expressions = self._parse_csv( 5257 lambda: self._parse_types( 5258 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5259 ) 5260 ) 5261 5262 if not self._match(TokenType.GT): 5263 self.raise_error("Expecting >") 5264 5265 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5266 values = self._parse_csv(self._parse_assignment) 5267 if not values and is_struct: 5268 values = None 5269 self._retreat(self._index - 1) 5270 else: 5271 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5272 5273 if type_token in self.TIMESTAMPS: 5274 if self._match_text_seq("WITH", "TIME", "ZONE"): 5275 maybe_func = False 5276 tz_type = ( 5277 exp.DataType.Type.TIMETZ 5278 if type_token in self.TIMES 5279 else exp.DataType.Type.TIMESTAMPTZ 5280 ) 5281 this = exp.DataType(this=tz_type, expressions=expressions) 5282 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5283 maybe_func = False 5284 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5285 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5286 maybe_func = False 5287 elif type_token == TokenType.INTERVAL: 5288 unit = self._parse_var(upper=True) 5289 if unit: 5290 if self._match_text_seq("TO"): 5291 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5292 5293 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5294 else: 5295 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5296 elif type_token == TokenType.VOID: 5297 this = exp.DataType(this=exp.DataType.Type.NULL) 5298 5299 if maybe_func and check_func: 5300 index2 = self._index 5301 peek = self._parse_string() 5302 5303 if not peek: 5304 self._retreat(index) 5305 return None 5306 5307 self._retreat(index2) 5308 5309 if not this: 5310 if self._match_text_seq("UNSIGNED"): 5311 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5312 if not unsigned_type_token: 5313 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5314 5315 type_token = unsigned_type_token or type_token 5316 5317 this = exp.DataType( 5318 this=exp.DataType.Type[type_token.value], 5319 expressions=expressions, 5320 nested=nested, 5321 prefix=prefix, 5322 ) 5323 5324 # Empty arrays/structs are allowed 5325 if values is not None: 5326 cls = exp.Struct if is_struct else exp.Array 5327 this = exp.cast(cls(expressions=values), this, copy=False) 5328 5329 elif expressions: 5330 this.set("expressions", expressions) 5331 5332 # https://materialize.com/docs/sql/types/list/#type-name 5333 while self._match(TokenType.LIST): 5334 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5335 5336 index = self._index 5337 5338 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5339 matched_array = self._match(TokenType.ARRAY) 5340 5341 while self._curr: 5342 datatype_token = self._prev.token_type 5343 matched_l_bracket = self._match(TokenType.L_BRACKET) 5344 5345 if (not matched_l_bracket and not matched_array) or ( 5346 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5347 ): 5348 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5349 # not to be confused with the fixed size array parsing 5350 break 5351 5352 matched_array = False 5353 values = self._parse_csv(self._parse_assignment) or None 5354 if ( 5355 values 5356 and not schema 5357 and ( 5358 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5359 ) 5360 ): 5361 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5362 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5363 self._retreat(index) 5364 break 5365 5366 this = exp.DataType( 5367 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5368 ) 5369 self._match(TokenType.R_BRACKET) 5370 5371 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5372 converter = self.TYPE_CONVERTERS.get(this.this) 5373 if converter: 5374 this = converter(t.cast(exp.DataType, this)) 5375 5376 return this 5377 5378 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5379 index = self._index 5380 5381 if ( 5382 self._curr 5383 and self._next 5384 and self._curr.token_type in self.TYPE_TOKENS 5385 and self._next.token_type in self.TYPE_TOKENS 5386 ): 5387 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5388 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5389 this = self._parse_id_var() 5390 else: 5391 this = ( 5392 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5393 or self._parse_id_var() 5394 ) 5395 5396 self._match(TokenType.COLON) 5397 5398 if ( 5399 type_required 5400 and not isinstance(this, exp.DataType) 5401 and not self._match_set(self.TYPE_TOKENS, advance=False) 5402 ): 5403 self._retreat(index) 5404 return self._parse_types() 5405 5406 return self._parse_column_def(this) 5407 5408 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5409 if not self._match_text_seq("AT", "TIME", "ZONE"): 5410 return this 5411 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5412 5413 def _parse_column(self) -> t.Optional[exp.Expression]: 5414 this = self._parse_column_reference() 5415 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5416 5417 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5418 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5419 5420 return column 5421 5422 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5423 this = self._parse_field() 5424 if ( 5425 not this 5426 and self._match(TokenType.VALUES, advance=False) 5427 and self.VALUES_FOLLOWED_BY_PAREN 5428 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5429 ): 5430 this = self._parse_id_var() 5431 5432 if isinstance(this, exp.Identifier): 5433 # We bubble up comments from the Identifier to the Column 5434 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5435 5436 return this 5437 5438 def _parse_colon_as_variant_extract( 5439 self, this: t.Optional[exp.Expression] 5440 ) -> t.Optional[exp.Expression]: 5441 casts = [] 5442 json_path = [] 5443 escape = None 5444 5445 while self._match(TokenType.COLON): 5446 start_index = self._index 5447 5448 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5449 path = self._parse_column_ops( 5450 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5451 ) 5452 5453 # The cast :: operator has a lower precedence than the extraction operator :, so 5454 # we rearrange the AST appropriately to avoid casting the JSON path 5455 while isinstance(path, exp.Cast): 5456 casts.append(path.to) 5457 path = path.this 5458 5459 if casts: 5460 dcolon_offset = next( 5461 i 5462 for i, t in enumerate(self._tokens[start_index:]) 5463 if t.token_type == TokenType.DCOLON 5464 ) 5465 end_token = self._tokens[start_index + dcolon_offset - 1] 5466 else: 5467 end_token = self._prev 5468 5469 if path: 5470 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5471 # it'll roundtrip to a string literal in GET_PATH 5472 if isinstance(path, exp.Identifier) and path.quoted: 5473 escape = True 5474 5475 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5476 5477 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5478 # Databricks transforms it back to the colon/dot notation 5479 if json_path: 5480 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5481 5482 if json_path_expr: 5483 json_path_expr.set("escape", escape) 5484 5485 this = self.expression( 5486 exp.JSONExtract, 5487 this=this, 5488 expression=json_path_expr, 5489 variant_extract=True, 5490 ) 5491 5492 while casts: 5493 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5494 5495 return this 5496 5497 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5498 return self._parse_types() 5499 5500 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5501 this = self._parse_bracket(this) 5502 5503 while self._match_set(self.COLUMN_OPERATORS): 5504 op_token = self._prev.token_type 5505 op = self.COLUMN_OPERATORS.get(op_token) 5506 5507 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5508 field = self._parse_dcolon() 5509 if not field: 5510 self.raise_error("Expected type") 5511 elif op and self._curr: 5512 field = self._parse_column_reference() or self._parse_bracket() 5513 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5514 field = self._parse_column_ops(field) 5515 else: 5516 field = self._parse_field(any_token=True, anonymous_func=True) 5517 5518 # Function calls can be qualified, e.g., x.y.FOO() 5519 # This converts the final AST to a series of Dots leading to the function call 5520 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5521 if isinstance(field, (exp.Func, exp.Window)) and this: 5522 this = this.transform( 5523 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5524 ) 5525 5526 if op: 5527 this = op(self, this, field) 5528 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5529 this = self.expression( 5530 exp.Column, 5531 comments=this.comments, 5532 this=field, 5533 table=this.this, 5534 db=this.args.get("table"), 5535 catalog=this.args.get("db"), 5536 ) 5537 elif isinstance(field, exp.Window): 5538 # Move the exp.Dot's to the window's function 5539 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5540 field.set("this", window_func) 5541 this = field 5542 else: 5543 this = self.expression(exp.Dot, this=this, expression=field) 5544 5545 if field and field.comments: 5546 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5547 5548 this = self._parse_bracket(this) 5549 5550 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5551 5552 def _parse_paren(self) -> t.Optional[exp.Expression]: 5553 if not self._match(TokenType.L_PAREN): 5554 return None 5555 5556 comments = self._prev_comments 5557 query = self._parse_select() 5558 5559 if query: 5560 expressions = [query] 5561 else: 5562 expressions = self._parse_expressions() 5563 5564 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5565 5566 if not this and self._match(TokenType.R_PAREN, advance=False): 5567 this = self.expression(exp.Tuple) 5568 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5569 this = self._parse_subquery(this=this, parse_alias=False) 5570 elif isinstance(this, exp.Subquery): 5571 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5572 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5573 this = self.expression(exp.Tuple, expressions=expressions) 5574 else: 5575 this = self.expression(exp.Paren, this=this) 5576 5577 if this: 5578 this.add_comments(comments) 5579 5580 self._match_r_paren(expression=this) 5581 return this 5582 5583 def _parse_primary(self) -> t.Optional[exp.Expression]: 5584 if self._match_set(self.PRIMARY_PARSERS): 5585 token_type = self._prev.token_type 5586 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5587 5588 if token_type == TokenType.STRING: 5589 expressions = [primary] 5590 while self._match(TokenType.STRING): 5591 expressions.append(exp.Literal.string(self._prev.text)) 5592 5593 if len(expressions) > 1: 5594 return self.expression(exp.Concat, expressions=expressions) 5595 5596 return primary 5597 5598 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5599 return exp.Literal.number(f"0.{self._prev.text}") 5600 5601 return self._parse_paren() 5602 5603 def _parse_field( 5604 self, 5605 any_token: bool = False, 5606 tokens: t.Optional[t.Collection[TokenType]] = None, 5607 anonymous_func: bool = False, 5608 ) -> t.Optional[exp.Expression]: 5609 if anonymous_func: 5610 field = ( 5611 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5612 or self._parse_primary() 5613 ) 5614 else: 5615 field = self._parse_primary() or self._parse_function( 5616 anonymous=anonymous_func, any_token=any_token 5617 ) 5618 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5619 5620 def _parse_function( 5621 self, 5622 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5623 anonymous: bool = False, 5624 optional_parens: bool = True, 5625 any_token: bool = False, 5626 ) -> t.Optional[exp.Expression]: 5627 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5628 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5629 fn_syntax = False 5630 if ( 5631 self._match(TokenType.L_BRACE, advance=False) 5632 and self._next 5633 and self._next.text.upper() == "FN" 5634 ): 5635 self._advance(2) 5636 fn_syntax = True 5637 5638 func = self._parse_function_call( 5639 functions=functions, 5640 anonymous=anonymous, 5641 optional_parens=optional_parens, 5642 any_token=any_token, 5643 ) 5644 5645 if fn_syntax: 5646 self._match(TokenType.R_BRACE) 5647 5648 return func 5649 5650 def _parse_function_call( 5651 self, 5652 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5653 anonymous: bool = False, 5654 optional_parens: bool = True, 5655 any_token: bool = False, 5656 ) -> t.Optional[exp.Expression]: 5657 if not self._curr: 5658 return None 5659 5660 comments = self._curr.comments 5661 token = self._curr 5662 token_type = self._curr.token_type 5663 this = self._curr.text 5664 upper = this.upper() 5665 5666 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5667 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5668 self._advance() 5669 return self._parse_window(parser(self)) 5670 5671 if not self._next or self._next.token_type != TokenType.L_PAREN: 5672 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5673 self._advance() 5674 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5675 5676 return None 5677 5678 if any_token: 5679 if token_type in self.RESERVED_TOKENS: 5680 return None 5681 elif token_type not in self.FUNC_TOKENS: 5682 return None 5683 5684 self._advance(2) 5685 5686 parser = self.FUNCTION_PARSERS.get(upper) 5687 if parser and not anonymous: 5688 this = parser(self) 5689 else: 5690 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5691 5692 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5693 this = self.expression( 5694 subquery_predicate, comments=comments, this=self._parse_select() 5695 ) 5696 self._match_r_paren() 5697 return this 5698 5699 if functions is None: 5700 functions = self.FUNCTIONS 5701 5702 function = functions.get(upper) 5703 known_function = function and not anonymous 5704 5705 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5706 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5707 5708 post_func_comments = self._curr and self._curr.comments 5709 if known_function and post_func_comments: 5710 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5711 # call we'll construct it as exp.Anonymous, even if it's "known" 5712 if any( 5713 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5714 for comment in post_func_comments 5715 ): 5716 known_function = False 5717 5718 if alias and known_function: 5719 args = self._kv_to_prop_eq(args) 5720 5721 if known_function: 5722 func_builder = t.cast(t.Callable, function) 5723 5724 if "dialect" in func_builder.__code__.co_varnames: 5725 func = func_builder(args, dialect=self.dialect) 5726 else: 5727 func = func_builder(args) 5728 5729 func = self.validate_expression(func, args) 5730 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5731 func.meta["name"] = this 5732 5733 this = func 5734 else: 5735 if token_type == TokenType.IDENTIFIER: 5736 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5737 5738 this = self.expression(exp.Anonymous, this=this, expressions=args) 5739 this = this.update_positions(token) 5740 5741 if isinstance(this, exp.Expression): 5742 this.add_comments(comments) 5743 5744 self._match_r_paren(this) 5745 return self._parse_window(this) 5746 5747 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5748 return expression 5749 5750 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5751 transformed = [] 5752 5753 for index, e in enumerate(expressions): 5754 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5755 if isinstance(e, exp.Alias): 5756 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5757 5758 if not isinstance(e, exp.PropertyEQ): 5759 e = self.expression( 5760 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5761 ) 5762 5763 if isinstance(e.this, exp.Column): 5764 e.this.replace(e.this.this) 5765 else: 5766 e = self._to_prop_eq(e, index) 5767 5768 transformed.append(e) 5769 5770 return transformed 5771 5772 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5773 return self._parse_statement() 5774 5775 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5776 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5777 5778 def _parse_user_defined_function( 5779 self, kind: t.Optional[TokenType] = None 5780 ) -> t.Optional[exp.Expression]: 5781 this = self._parse_table_parts(schema=True) 5782 5783 if not self._match(TokenType.L_PAREN): 5784 return this 5785 5786 expressions = self._parse_csv(self._parse_function_parameter) 5787 self._match_r_paren() 5788 return self.expression( 5789 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5790 ) 5791 5792 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5793 literal = self._parse_primary() 5794 if literal: 5795 return self.expression(exp.Introducer, this=token.text, expression=literal) 5796 5797 return self._identifier_expression(token) 5798 5799 def _parse_session_parameter(self) -> exp.SessionParameter: 5800 kind = None 5801 this = self._parse_id_var() or self._parse_primary() 5802 5803 if this and self._match(TokenType.DOT): 5804 kind = this.name 5805 this = self._parse_var() or self._parse_primary() 5806 5807 return self.expression(exp.SessionParameter, this=this, kind=kind) 5808 5809 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5810 return self._parse_id_var() 5811 5812 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5813 index = self._index 5814 5815 if self._match(TokenType.L_PAREN): 5816 expressions = t.cast( 5817 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5818 ) 5819 5820 if not self._match(TokenType.R_PAREN): 5821 self._retreat(index) 5822 else: 5823 expressions = [self._parse_lambda_arg()] 5824 5825 if self._match_set(self.LAMBDAS): 5826 return self.LAMBDAS[self._prev.token_type](self, expressions) 5827 5828 self._retreat(index) 5829 5830 this: t.Optional[exp.Expression] 5831 5832 if self._match(TokenType.DISTINCT): 5833 this = self.expression( 5834 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5835 ) 5836 else: 5837 this = self._parse_select_or_expression(alias=alias) 5838 5839 return self._parse_limit( 5840 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5841 ) 5842 5843 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5844 index = self._index 5845 if not self._match(TokenType.L_PAREN): 5846 return this 5847 5848 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5849 # expr can be of both types 5850 if self._match_set(self.SELECT_START_TOKENS): 5851 self._retreat(index) 5852 return this 5853 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5854 self._match_r_paren() 5855 return self.expression(exp.Schema, this=this, expressions=args) 5856 5857 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5858 return self._parse_column_def(self._parse_field(any_token=True)) 5859 5860 def _parse_column_def( 5861 self, this: t.Optional[exp.Expression], computed_column: bool = True 5862 ) -> t.Optional[exp.Expression]: 5863 # column defs are not really columns, they're identifiers 5864 if isinstance(this, exp.Column): 5865 this = this.this 5866 5867 if not computed_column: 5868 self._match(TokenType.ALIAS) 5869 5870 kind = self._parse_types(schema=True) 5871 5872 if self._match_text_seq("FOR", "ORDINALITY"): 5873 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5874 5875 constraints: t.List[exp.Expression] = [] 5876 5877 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5878 ("ALIAS", "MATERIALIZED") 5879 ): 5880 persisted = self._prev.text.upper() == "MATERIALIZED" 5881 constraint_kind = exp.ComputedColumnConstraint( 5882 this=self._parse_assignment(), 5883 persisted=persisted or self._match_text_seq("PERSISTED"), 5884 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5885 ) 5886 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5887 elif ( 5888 kind 5889 and self._match(TokenType.ALIAS, advance=False) 5890 and ( 5891 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5892 or (self._next and self._next.token_type == TokenType.L_PAREN) 5893 ) 5894 ): 5895 self._advance() 5896 constraints.append( 5897 self.expression( 5898 exp.ColumnConstraint, 5899 kind=exp.ComputedColumnConstraint( 5900 this=self._parse_disjunction(), 5901 persisted=self._match_texts(("STORED", "VIRTUAL")) 5902 and self._prev.text.upper() == "STORED", 5903 ), 5904 ) 5905 ) 5906 5907 while True: 5908 constraint = self._parse_column_constraint() 5909 if not constraint: 5910 break 5911 constraints.append(constraint) 5912 5913 if not kind and not constraints: 5914 return this 5915 5916 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5917 5918 def _parse_auto_increment( 5919 self, 5920 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5921 start = None 5922 increment = None 5923 order = None 5924 5925 if self._match(TokenType.L_PAREN, advance=False): 5926 args = self._parse_wrapped_csv(self._parse_bitwise) 5927 start = seq_get(args, 0) 5928 increment = seq_get(args, 1) 5929 elif self._match_text_seq("START"): 5930 start = self._parse_bitwise() 5931 self._match_text_seq("INCREMENT") 5932 increment = self._parse_bitwise() 5933 if self._match_text_seq("ORDER"): 5934 order = True 5935 elif self._match_text_seq("NOORDER"): 5936 order = False 5937 5938 if start and increment: 5939 return exp.GeneratedAsIdentityColumnConstraint( 5940 start=start, increment=increment, this=False, order=order 5941 ) 5942 5943 return exp.AutoIncrementColumnConstraint() 5944 5945 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5946 if not self._match_text_seq("REFRESH"): 5947 self._retreat(self._index - 1) 5948 return None 5949 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5950 5951 def _parse_compress(self) -> exp.CompressColumnConstraint: 5952 if self._match(TokenType.L_PAREN, advance=False): 5953 return self.expression( 5954 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5955 ) 5956 5957 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5958 5959 def _parse_generated_as_identity( 5960 self, 5961 ) -> ( 5962 exp.GeneratedAsIdentityColumnConstraint 5963 | exp.ComputedColumnConstraint 5964 | exp.GeneratedAsRowColumnConstraint 5965 ): 5966 if self._match_text_seq("BY", "DEFAULT"): 5967 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5968 this = self.expression( 5969 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5970 ) 5971 else: 5972 self._match_text_seq("ALWAYS") 5973 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5974 5975 self._match(TokenType.ALIAS) 5976 5977 if self._match_text_seq("ROW"): 5978 start = self._match_text_seq("START") 5979 if not start: 5980 self._match(TokenType.END) 5981 hidden = self._match_text_seq("HIDDEN") 5982 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5983 5984 identity = self._match_text_seq("IDENTITY") 5985 5986 if self._match(TokenType.L_PAREN): 5987 if self._match(TokenType.START_WITH): 5988 this.set("start", self._parse_bitwise()) 5989 if self._match_text_seq("INCREMENT", "BY"): 5990 this.set("increment", self._parse_bitwise()) 5991 if self._match_text_seq("MINVALUE"): 5992 this.set("minvalue", self._parse_bitwise()) 5993 if self._match_text_seq("MAXVALUE"): 5994 this.set("maxvalue", self._parse_bitwise()) 5995 5996 if self._match_text_seq("CYCLE"): 5997 this.set("cycle", True) 5998 elif self._match_text_seq("NO", "CYCLE"): 5999 this.set("cycle", False) 6000 6001 if not identity: 6002 this.set("expression", self._parse_range()) 6003 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6004 args = self._parse_csv(self._parse_bitwise) 6005 this.set("start", seq_get(args, 0)) 6006 this.set("increment", seq_get(args, 1)) 6007 6008 self._match_r_paren() 6009 6010 return this 6011 6012 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6013 self._match_text_seq("LENGTH") 6014 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6015 6016 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6017 if self._match_text_seq("NULL"): 6018 return self.expression(exp.NotNullColumnConstraint) 6019 if self._match_text_seq("CASESPECIFIC"): 6020 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6021 if self._match_text_seq("FOR", "REPLICATION"): 6022 return self.expression(exp.NotForReplicationColumnConstraint) 6023 6024 # Unconsume the `NOT` token 6025 self._retreat(self._index - 1) 6026 return None 6027 6028 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6029 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6030 6031 procedure_option_follows = ( 6032 self._match(TokenType.WITH, advance=False) 6033 and self._next 6034 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6035 ) 6036 6037 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6038 return self.expression( 6039 exp.ColumnConstraint, 6040 this=this, 6041 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6042 ) 6043 6044 return this 6045 6046 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6047 if not self._match(TokenType.CONSTRAINT): 6048 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6049 6050 return self.expression( 6051 exp.Constraint, 6052 this=self._parse_id_var(), 6053 expressions=self._parse_unnamed_constraints(), 6054 ) 6055 6056 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6057 constraints = [] 6058 while True: 6059 constraint = self._parse_unnamed_constraint() or self._parse_function() 6060 if not constraint: 6061 break 6062 constraints.append(constraint) 6063 6064 return constraints 6065 6066 def _parse_unnamed_constraint( 6067 self, constraints: t.Optional[t.Collection[str]] = None 6068 ) -> t.Optional[exp.Expression]: 6069 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6070 constraints or self.CONSTRAINT_PARSERS 6071 ): 6072 return None 6073 6074 constraint = self._prev.text.upper() 6075 if constraint not in self.CONSTRAINT_PARSERS: 6076 self.raise_error(f"No parser found for schema constraint {constraint}.") 6077 6078 return self.CONSTRAINT_PARSERS[constraint](self) 6079 6080 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6081 return self._parse_id_var(any_token=False) 6082 6083 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6084 self._match_text_seq("KEY") 6085 return self.expression( 6086 exp.UniqueColumnConstraint, 6087 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6088 this=self._parse_schema(self._parse_unique_key()), 6089 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6090 on_conflict=self._parse_on_conflict(), 6091 options=self._parse_key_constraint_options(), 6092 ) 6093 6094 def _parse_key_constraint_options(self) -> t.List[str]: 6095 options = [] 6096 while True: 6097 if not self._curr: 6098 break 6099 6100 if self._match(TokenType.ON): 6101 action = None 6102 on = self._advance_any() and self._prev.text 6103 6104 if self._match_text_seq("NO", "ACTION"): 6105 action = "NO ACTION" 6106 elif self._match_text_seq("CASCADE"): 6107 action = "CASCADE" 6108 elif self._match_text_seq("RESTRICT"): 6109 action = "RESTRICT" 6110 elif self._match_pair(TokenType.SET, TokenType.NULL): 6111 action = "SET NULL" 6112 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6113 action = "SET DEFAULT" 6114 else: 6115 self.raise_error("Invalid key constraint") 6116 6117 options.append(f"ON {on} {action}") 6118 else: 6119 var = self._parse_var_from_options( 6120 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6121 ) 6122 if not var: 6123 break 6124 options.append(var.name) 6125 6126 return options 6127 6128 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6129 if match and not self._match(TokenType.REFERENCES): 6130 return None 6131 6132 expressions = None 6133 this = self._parse_table(schema=True) 6134 options = self._parse_key_constraint_options() 6135 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6136 6137 def _parse_foreign_key(self) -> exp.ForeignKey: 6138 expressions = ( 6139 self._parse_wrapped_id_vars() 6140 if not self._match(TokenType.REFERENCES, advance=False) 6141 else None 6142 ) 6143 reference = self._parse_references() 6144 on_options = {} 6145 6146 while self._match(TokenType.ON): 6147 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6148 self.raise_error("Expected DELETE or UPDATE") 6149 6150 kind = self._prev.text.lower() 6151 6152 if self._match_text_seq("NO", "ACTION"): 6153 action = "NO ACTION" 6154 elif self._match(TokenType.SET): 6155 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6156 action = "SET " + self._prev.text.upper() 6157 else: 6158 self._advance() 6159 action = self._prev.text.upper() 6160 6161 on_options[kind] = action 6162 6163 return self.expression( 6164 exp.ForeignKey, 6165 expressions=expressions, 6166 reference=reference, 6167 options=self._parse_key_constraint_options(), 6168 **on_options, # type: ignore 6169 ) 6170 6171 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6172 return self._parse_ordered() or self._parse_field() 6173 6174 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6175 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6176 self._retreat(self._index - 1) 6177 return None 6178 6179 id_vars = self._parse_wrapped_id_vars() 6180 return self.expression( 6181 exp.PeriodForSystemTimeConstraint, 6182 this=seq_get(id_vars, 0), 6183 expression=seq_get(id_vars, 1), 6184 ) 6185 6186 def _parse_primary_key( 6187 self, wrapped_optional: bool = False, in_props: bool = False 6188 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6189 desc = ( 6190 self._match_set((TokenType.ASC, TokenType.DESC)) 6191 and self._prev.token_type == TokenType.DESC 6192 ) 6193 6194 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6195 return self.expression( 6196 exp.PrimaryKeyColumnConstraint, 6197 desc=desc, 6198 options=self._parse_key_constraint_options(), 6199 ) 6200 6201 expressions = self._parse_wrapped_csv( 6202 self._parse_primary_key_part, optional=wrapped_optional 6203 ) 6204 options = self._parse_key_constraint_options() 6205 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6206 6207 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6208 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6209 6210 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6211 """ 6212 Parses a datetime column in ODBC format. We parse the column into the corresponding 6213 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6214 same as we did for `DATE('yyyy-mm-dd')`. 6215 6216 Reference: 6217 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6218 """ 6219 self._match(TokenType.VAR) 6220 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6221 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6222 if not self._match(TokenType.R_BRACE): 6223 self.raise_error("Expected }") 6224 return expression 6225 6226 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6227 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6228 return this 6229 6230 bracket_kind = self._prev.token_type 6231 if ( 6232 bracket_kind == TokenType.L_BRACE 6233 and self._curr 6234 and self._curr.token_type == TokenType.VAR 6235 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6236 ): 6237 return self._parse_odbc_datetime_literal() 6238 6239 expressions = self._parse_csv( 6240 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6241 ) 6242 6243 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6244 self.raise_error("Expected ]") 6245 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6246 self.raise_error("Expected }") 6247 6248 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6249 if bracket_kind == TokenType.L_BRACE: 6250 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6251 elif not this: 6252 this = build_array_constructor( 6253 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6254 ) 6255 else: 6256 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6257 if constructor_type: 6258 return build_array_constructor( 6259 constructor_type, 6260 args=expressions, 6261 bracket_kind=bracket_kind, 6262 dialect=self.dialect, 6263 ) 6264 6265 expressions = apply_index_offset( 6266 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6267 ) 6268 this = self.expression( 6269 exp.Bracket, 6270 this=this, 6271 expressions=expressions, 6272 comments=this.pop_comments(), 6273 ) 6274 6275 self._add_comments(this) 6276 return self._parse_bracket(this) 6277 6278 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6279 if self._match(TokenType.COLON): 6280 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6281 return this 6282 6283 def _parse_case(self) -> t.Optional[exp.Expression]: 6284 ifs = [] 6285 default = None 6286 6287 comments = self._prev_comments 6288 expression = self._parse_assignment() 6289 6290 while self._match(TokenType.WHEN): 6291 this = self._parse_assignment() 6292 self._match(TokenType.THEN) 6293 then = self._parse_assignment() 6294 ifs.append(self.expression(exp.If, this=this, true=then)) 6295 6296 if self._match(TokenType.ELSE): 6297 default = self._parse_assignment() 6298 6299 if not self._match(TokenType.END): 6300 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6301 default = exp.column("interval") 6302 else: 6303 self.raise_error("Expected END after CASE", self._prev) 6304 6305 return self.expression( 6306 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6307 ) 6308 6309 def _parse_if(self) -> t.Optional[exp.Expression]: 6310 if self._match(TokenType.L_PAREN): 6311 args = self._parse_csv( 6312 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6313 ) 6314 this = self.validate_expression(exp.If.from_arg_list(args), args) 6315 self._match_r_paren() 6316 else: 6317 index = self._index - 1 6318 6319 if self.NO_PAREN_IF_COMMANDS and index == 0: 6320 return self._parse_as_command(self._prev) 6321 6322 condition = self._parse_assignment() 6323 6324 if not condition: 6325 self._retreat(index) 6326 return None 6327 6328 self._match(TokenType.THEN) 6329 true = self._parse_assignment() 6330 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6331 self._match(TokenType.END) 6332 this = self.expression(exp.If, this=condition, true=true, false=false) 6333 6334 return this 6335 6336 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6337 if not self._match_text_seq("VALUE", "FOR"): 6338 self._retreat(self._index - 1) 6339 return None 6340 6341 return self.expression( 6342 exp.NextValueFor, 6343 this=self._parse_column(), 6344 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6345 ) 6346 6347 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6348 this = self._parse_function() or self._parse_var_or_string(upper=True) 6349 6350 if self._match(TokenType.FROM): 6351 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6352 6353 if not self._match(TokenType.COMMA): 6354 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6355 6356 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6357 6358 def _parse_gap_fill(self) -> exp.GapFill: 6359 self._match(TokenType.TABLE) 6360 this = self._parse_table() 6361 6362 self._match(TokenType.COMMA) 6363 args = [this, *self._parse_csv(self._parse_lambda)] 6364 6365 gap_fill = exp.GapFill.from_arg_list(args) 6366 return self.validate_expression(gap_fill, args) 6367 6368 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6369 this = self._parse_assignment() 6370 6371 if not self._match(TokenType.ALIAS): 6372 if self._match(TokenType.COMMA): 6373 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6374 6375 self.raise_error("Expected AS after CAST") 6376 6377 fmt = None 6378 to = self._parse_types() 6379 6380 default = self._match(TokenType.DEFAULT) 6381 if default: 6382 default = self._parse_bitwise() 6383 self._match_text_seq("ON", "CONVERSION", "ERROR") 6384 6385 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6386 fmt_string = self._parse_string() 6387 fmt = self._parse_at_time_zone(fmt_string) 6388 6389 if not to: 6390 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6391 if to.this in exp.DataType.TEMPORAL_TYPES: 6392 this = self.expression( 6393 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6394 this=this, 6395 format=exp.Literal.string( 6396 format_time( 6397 fmt_string.this if fmt_string else "", 6398 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6399 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6400 ) 6401 ), 6402 safe=safe, 6403 ) 6404 6405 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6406 this.set("zone", fmt.args["zone"]) 6407 return this 6408 elif not to: 6409 self.raise_error("Expected TYPE after CAST") 6410 elif isinstance(to, exp.Identifier): 6411 to = exp.DataType.build(to.name, udt=True) 6412 elif to.this == exp.DataType.Type.CHAR: 6413 if self._match(TokenType.CHARACTER_SET): 6414 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6415 6416 return self.expression( 6417 exp.Cast if strict else exp.TryCast, 6418 this=this, 6419 to=to, 6420 format=fmt, 6421 safe=safe, 6422 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6423 default=default, 6424 ) 6425 6426 def _parse_string_agg(self) -> exp.GroupConcat: 6427 if self._match(TokenType.DISTINCT): 6428 args: t.List[t.Optional[exp.Expression]] = [ 6429 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6430 ] 6431 if self._match(TokenType.COMMA): 6432 args.extend(self._parse_csv(self._parse_assignment)) 6433 else: 6434 args = self._parse_csv(self._parse_assignment) # type: ignore 6435 6436 if self._match_text_seq("ON", "OVERFLOW"): 6437 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6438 if self._match_text_seq("ERROR"): 6439 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6440 else: 6441 self._match_text_seq("TRUNCATE") 6442 on_overflow = self.expression( 6443 exp.OverflowTruncateBehavior, 6444 this=self._parse_string(), 6445 with_count=( 6446 self._match_text_seq("WITH", "COUNT") 6447 or not self._match_text_seq("WITHOUT", "COUNT") 6448 ), 6449 ) 6450 else: 6451 on_overflow = None 6452 6453 index = self._index 6454 if not self._match(TokenType.R_PAREN) and args: 6455 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6456 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6457 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6458 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6459 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6460 6461 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6462 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6463 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6464 if not self._match_text_seq("WITHIN", "GROUP"): 6465 self._retreat(index) 6466 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6467 6468 # The corresponding match_r_paren will be called in parse_function (caller) 6469 self._match_l_paren() 6470 6471 return self.expression( 6472 exp.GroupConcat, 6473 this=self._parse_order(this=seq_get(args, 0)), 6474 separator=seq_get(args, 1), 6475 on_overflow=on_overflow, 6476 ) 6477 6478 def _parse_convert( 6479 self, strict: bool, safe: t.Optional[bool] = None 6480 ) -> t.Optional[exp.Expression]: 6481 this = self._parse_bitwise() 6482 6483 if self._match(TokenType.USING): 6484 to: t.Optional[exp.Expression] = self.expression( 6485 exp.CharacterSet, this=self._parse_var() 6486 ) 6487 elif self._match(TokenType.COMMA): 6488 to = self._parse_types() 6489 else: 6490 to = None 6491 6492 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6493 6494 def _parse_xml_table(self) -> exp.XMLTable: 6495 namespaces = None 6496 passing = None 6497 columns = None 6498 6499 if self._match_text_seq("XMLNAMESPACES", "("): 6500 namespaces = self._parse_xml_namespace() 6501 self._match_text_seq(")", ",") 6502 6503 this = self._parse_string() 6504 6505 if self._match_text_seq("PASSING"): 6506 # The BY VALUE keywords are optional and are provided for semantic clarity 6507 self._match_text_seq("BY", "VALUE") 6508 passing = self._parse_csv(self._parse_column) 6509 6510 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6511 6512 if self._match_text_seq("COLUMNS"): 6513 columns = self._parse_csv(self._parse_field_def) 6514 6515 return self.expression( 6516 exp.XMLTable, 6517 this=this, 6518 namespaces=namespaces, 6519 passing=passing, 6520 columns=columns, 6521 by_ref=by_ref, 6522 ) 6523 6524 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6525 namespaces = [] 6526 6527 while True: 6528 if self._match(TokenType.DEFAULT): 6529 uri = self._parse_string() 6530 else: 6531 uri = self._parse_alias(self._parse_string()) 6532 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6533 if not self._match(TokenType.COMMA): 6534 break 6535 6536 return namespaces 6537 6538 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6539 """ 6540 There are generally two variants of the DECODE function: 6541 6542 - DECODE(bin, charset) 6543 - DECODE(expression, search, result [, search, result] ... [, default]) 6544 6545 The second variant will always be parsed into a CASE expression. Note that NULL 6546 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6547 instead of relying on pattern matching. 6548 """ 6549 args = self._parse_csv(self._parse_assignment) 6550 6551 if len(args) < 3: 6552 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6553 6554 expression, *expressions = args 6555 if not expression: 6556 return None 6557 6558 ifs = [] 6559 for search, result in zip(expressions[::2], expressions[1::2]): 6560 if not search or not result: 6561 return None 6562 6563 if isinstance(search, exp.Literal): 6564 ifs.append( 6565 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6566 ) 6567 elif isinstance(search, exp.Null): 6568 ifs.append( 6569 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6570 ) 6571 else: 6572 cond = exp.or_( 6573 exp.EQ(this=expression.copy(), expression=search), 6574 exp.and_( 6575 exp.Is(this=expression.copy(), expression=exp.Null()), 6576 exp.Is(this=search.copy(), expression=exp.Null()), 6577 copy=False, 6578 ), 6579 copy=False, 6580 ) 6581 ifs.append(exp.If(this=cond, true=result)) 6582 6583 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6584 6585 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6586 self._match_text_seq("KEY") 6587 key = self._parse_column() 6588 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6589 self._match_text_seq("VALUE") 6590 value = self._parse_bitwise() 6591 6592 if not key and not value: 6593 return None 6594 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6595 6596 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6597 if not this or not self._match_text_seq("FORMAT", "JSON"): 6598 return this 6599 6600 return self.expression(exp.FormatJson, this=this) 6601 6602 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6603 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6604 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6605 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6606 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6607 else: 6608 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6609 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6610 6611 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6612 6613 if not empty and not error and not null: 6614 return None 6615 6616 return self.expression( 6617 exp.OnCondition, 6618 empty=empty, 6619 error=error, 6620 null=null, 6621 ) 6622 6623 def _parse_on_handling( 6624 self, on: str, *values: str 6625 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6626 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6627 for value in values: 6628 if self._match_text_seq(value, "ON", on): 6629 return f"{value} ON {on}" 6630 6631 index = self._index 6632 if self._match(TokenType.DEFAULT): 6633 default_value = self._parse_bitwise() 6634 if self._match_text_seq("ON", on): 6635 return default_value 6636 6637 self._retreat(index) 6638 6639 return None 6640 6641 @t.overload 6642 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6643 6644 @t.overload 6645 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6646 6647 def _parse_json_object(self, agg=False): 6648 star = self._parse_star() 6649 expressions = ( 6650 [star] 6651 if star 6652 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6653 ) 6654 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6655 6656 unique_keys = None 6657 if self._match_text_seq("WITH", "UNIQUE"): 6658 unique_keys = True 6659 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6660 unique_keys = False 6661 6662 self._match_text_seq("KEYS") 6663 6664 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6665 self._parse_type() 6666 ) 6667 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6668 6669 return self.expression( 6670 exp.JSONObjectAgg if agg else exp.JSONObject, 6671 expressions=expressions, 6672 null_handling=null_handling, 6673 unique_keys=unique_keys, 6674 return_type=return_type, 6675 encoding=encoding, 6676 ) 6677 6678 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6679 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6680 if not self._match_text_seq("NESTED"): 6681 this = self._parse_id_var() 6682 kind = self._parse_types(allow_identifiers=False) 6683 nested = None 6684 else: 6685 this = None 6686 kind = None 6687 nested = True 6688 6689 path = self._match_text_seq("PATH") and self._parse_string() 6690 nested_schema = nested and self._parse_json_schema() 6691 6692 return self.expression( 6693 exp.JSONColumnDef, 6694 this=this, 6695 kind=kind, 6696 path=path, 6697 nested_schema=nested_schema, 6698 ) 6699 6700 def _parse_json_schema(self) -> exp.JSONSchema: 6701 self._match_text_seq("COLUMNS") 6702 return self.expression( 6703 exp.JSONSchema, 6704 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6705 ) 6706 6707 def _parse_json_table(self) -> exp.JSONTable: 6708 this = self._parse_format_json(self._parse_bitwise()) 6709 path = self._match(TokenType.COMMA) and self._parse_string() 6710 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6711 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6712 schema = self._parse_json_schema() 6713 6714 return exp.JSONTable( 6715 this=this, 6716 schema=schema, 6717 path=path, 6718 error_handling=error_handling, 6719 empty_handling=empty_handling, 6720 ) 6721 6722 def _parse_match_against(self) -> exp.MatchAgainst: 6723 expressions = self._parse_csv(self._parse_column) 6724 6725 self._match_text_seq(")", "AGAINST", "(") 6726 6727 this = self._parse_string() 6728 6729 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6730 modifier = "IN NATURAL LANGUAGE MODE" 6731 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6732 modifier = f"{modifier} WITH QUERY EXPANSION" 6733 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6734 modifier = "IN BOOLEAN MODE" 6735 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6736 modifier = "WITH QUERY EXPANSION" 6737 else: 6738 modifier = None 6739 6740 return self.expression( 6741 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6742 ) 6743 6744 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6745 def _parse_open_json(self) -> exp.OpenJSON: 6746 this = self._parse_bitwise() 6747 path = self._match(TokenType.COMMA) and self._parse_string() 6748 6749 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6750 this = self._parse_field(any_token=True) 6751 kind = self._parse_types() 6752 path = self._parse_string() 6753 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6754 6755 return self.expression( 6756 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6757 ) 6758 6759 expressions = None 6760 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6761 self._match_l_paren() 6762 expressions = self._parse_csv(_parse_open_json_column_def) 6763 6764 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6765 6766 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6767 args = self._parse_csv(self._parse_bitwise) 6768 6769 if self._match(TokenType.IN): 6770 return self.expression( 6771 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6772 ) 6773 6774 if haystack_first: 6775 haystack = seq_get(args, 0) 6776 needle = seq_get(args, 1) 6777 else: 6778 haystack = seq_get(args, 1) 6779 needle = seq_get(args, 0) 6780 6781 return self.expression( 6782 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6783 ) 6784 6785 def _parse_predict(self) -> exp.Predict: 6786 self._match_text_seq("MODEL") 6787 this = self._parse_table() 6788 6789 self._match(TokenType.COMMA) 6790 self._match_text_seq("TABLE") 6791 6792 return self.expression( 6793 exp.Predict, 6794 this=this, 6795 expression=self._parse_table(), 6796 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6797 ) 6798 6799 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6800 args = self._parse_csv(self._parse_table) 6801 return exp.JoinHint(this=func_name.upper(), expressions=args) 6802 6803 def _parse_substring(self) -> exp.Substring: 6804 # Postgres supports the form: substring(string [from int] [for int]) 6805 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6806 6807 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6808 6809 if self._match(TokenType.FROM): 6810 args.append(self._parse_bitwise()) 6811 if self._match(TokenType.FOR): 6812 if len(args) == 1: 6813 args.append(exp.Literal.number(1)) 6814 args.append(self._parse_bitwise()) 6815 6816 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6817 6818 def _parse_trim(self) -> exp.Trim: 6819 # https://www.w3resource.com/sql/character-functions/trim.php 6820 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6821 6822 position = None 6823 collation = None 6824 expression = None 6825 6826 if self._match_texts(self.TRIM_TYPES): 6827 position = self._prev.text.upper() 6828 6829 this = self._parse_bitwise() 6830 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6831 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6832 expression = self._parse_bitwise() 6833 6834 if invert_order: 6835 this, expression = expression, this 6836 6837 if self._match(TokenType.COLLATE): 6838 collation = self._parse_bitwise() 6839 6840 return self.expression( 6841 exp.Trim, this=this, position=position, expression=expression, collation=collation 6842 ) 6843 6844 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6845 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6846 6847 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6848 return self._parse_window(self._parse_id_var(), alias=True) 6849 6850 def _parse_respect_or_ignore_nulls( 6851 self, this: t.Optional[exp.Expression] 6852 ) -> t.Optional[exp.Expression]: 6853 if self._match_text_seq("IGNORE", "NULLS"): 6854 return self.expression(exp.IgnoreNulls, this=this) 6855 if self._match_text_seq("RESPECT", "NULLS"): 6856 return self.expression(exp.RespectNulls, this=this) 6857 return this 6858 6859 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6860 if self._match(TokenType.HAVING): 6861 self._match_texts(("MAX", "MIN")) 6862 max = self._prev.text.upper() != "MIN" 6863 return self.expression( 6864 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6865 ) 6866 6867 return this 6868 6869 def _parse_window( 6870 self, this: t.Optional[exp.Expression], alias: bool = False 6871 ) -> t.Optional[exp.Expression]: 6872 func = this 6873 comments = func.comments if isinstance(func, exp.Expression) else None 6874 6875 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6876 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6877 if self._match_text_seq("WITHIN", "GROUP"): 6878 order = self._parse_wrapped(self._parse_order) 6879 this = self.expression(exp.WithinGroup, this=this, expression=order) 6880 6881 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6882 self._match(TokenType.WHERE) 6883 this = self.expression( 6884 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6885 ) 6886 self._match_r_paren() 6887 6888 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6889 # Some dialects choose to implement and some do not. 6890 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6891 6892 # There is some code above in _parse_lambda that handles 6893 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6894 6895 # The below changes handle 6896 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6897 6898 # Oracle allows both formats 6899 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6900 # and Snowflake chose to do the same for familiarity 6901 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6902 if isinstance(this, exp.AggFunc): 6903 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6904 6905 if ignore_respect and ignore_respect is not this: 6906 ignore_respect.replace(ignore_respect.this) 6907 this = self.expression(ignore_respect.__class__, this=this) 6908 6909 this = self._parse_respect_or_ignore_nulls(this) 6910 6911 # bigquery select from window x AS (partition by ...) 6912 if alias: 6913 over = None 6914 self._match(TokenType.ALIAS) 6915 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6916 return this 6917 else: 6918 over = self._prev.text.upper() 6919 6920 if comments and isinstance(func, exp.Expression): 6921 func.pop_comments() 6922 6923 if not self._match(TokenType.L_PAREN): 6924 return self.expression( 6925 exp.Window, 6926 comments=comments, 6927 this=this, 6928 alias=self._parse_id_var(False), 6929 over=over, 6930 ) 6931 6932 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6933 6934 first = self._match(TokenType.FIRST) 6935 if self._match_text_seq("LAST"): 6936 first = False 6937 6938 partition, order = self._parse_partition_and_order() 6939 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6940 6941 if kind: 6942 self._match(TokenType.BETWEEN) 6943 start = self._parse_window_spec() 6944 self._match(TokenType.AND) 6945 end = self._parse_window_spec() 6946 exclude = ( 6947 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6948 if self._match_text_seq("EXCLUDE") 6949 else None 6950 ) 6951 6952 spec = self.expression( 6953 exp.WindowSpec, 6954 kind=kind, 6955 start=start["value"], 6956 start_side=start["side"], 6957 end=end["value"], 6958 end_side=end["side"], 6959 exclude=exclude, 6960 ) 6961 else: 6962 spec = None 6963 6964 self._match_r_paren() 6965 6966 window = self.expression( 6967 exp.Window, 6968 comments=comments, 6969 this=this, 6970 partition_by=partition, 6971 order=order, 6972 spec=spec, 6973 alias=window_alias, 6974 over=over, 6975 first=first, 6976 ) 6977 6978 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6979 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6980 return self._parse_window(window, alias=alias) 6981 6982 return window 6983 6984 def _parse_partition_and_order( 6985 self, 6986 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6987 return self._parse_partition_by(), self._parse_order() 6988 6989 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6990 self._match(TokenType.BETWEEN) 6991 6992 return { 6993 "value": ( 6994 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6995 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6996 or self._parse_bitwise() 6997 ), 6998 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6999 } 7000 7001 def _parse_alias( 7002 self, this: t.Optional[exp.Expression], explicit: bool = False 7003 ) -> t.Optional[exp.Expression]: 7004 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7005 # so this section tries to parse the clause version and if it fails, it treats the token 7006 # as an identifier (alias) 7007 if self._can_parse_limit_or_offset(): 7008 return this 7009 7010 any_token = self._match(TokenType.ALIAS) 7011 comments = self._prev_comments or [] 7012 7013 if explicit and not any_token: 7014 return this 7015 7016 if self._match(TokenType.L_PAREN): 7017 aliases = self.expression( 7018 exp.Aliases, 7019 comments=comments, 7020 this=this, 7021 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7022 ) 7023 self._match_r_paren(aliases) 7024 return aliases 7025 7026 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7027 self.STRING_ALIASES and self._parse_string_as_identifier() 7028 ) 7029 7030 if alias: 7031 comments.extend(alias.pop_comments()) 7032 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7033 column = this.this 7034 7035 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7036 if not this.comments and column and column.comments: 7037 this.comments = column.pop_comments() 7038 7039 return this 7040 7041 def _parse_id_var( 7042 self, 7043 any_token: bool = True, 7044 tokens: t.Optional[t.Collection[TokenType]] = None, 7045 ) -> t.Optional[exp.Expression]: 7046 expression = self._parse_identifier() 7047 if not expression and ( 7048 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7049 ): 7050 quoted = self._prev.token_type == TokenType.STRING 7051 expression = self._identifier_expression(quoted=quoted) 7052 7053 return expression 7054 7055 def _parse_string(self) -> t.Optional[exp.Expression]: 7056 if self._match_set(self.STRING_PARSERS): 7057 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7058 return self._parse_placeholder() 7059 7060 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7061 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7062 if output: 7063 output.update_positions(self._prev) 7064 return output 7065 7066 def _parse_number(self) -> t.Optional[exp.Expression]: 7067 if self._match_set(self.NUMERIC_PARSERS): 7068 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7069 return self._parse_placeholder() 7070 7071 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7072 if self._match(TokenType.IDENTIFIER): 7073 return self._identifier_expression(quoted=True) 7074 return self._parse_placeholder() 7075 7076 def _parse_var( 7077 self, 7078 any_token: bool = False, 7079 tokens: t.Optional[t.Collection[TokenType]] = None, 7080 upper: bool = False, 7081 ) -> t.Optional[exp.Expression]: 7082 if ( 7083 (any_token and self._advance_any()) 7084 or self._match(TokenType.VAR) 7085 or (self._match_set(tokens) if tokens else False) 7086 ): 7087 return self.expression( 7088 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7089 ) 7090 return self._parse_placeholder() 7091 7092 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7093 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7094 self._advance() 7095 return self._prev 7096 return None 7097 7098 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7099 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7100 7101 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7102 return self._parse_primary() or self._parse_var(any_token=True) 7103 7104 def _parse_null(self) -> t.Optional[exp.Expression]: 7105 if self._match_set(self.NULL_TOKENS): 7106 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7107 return self._parse_placeholder() 7108 7109 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7110 if self._match(TokenType.TRUE): 7111 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7112 if self._match(TokenType.FALSE): 7113 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7114 return self._parse_placeholder() 7115 7116 def _parse_star(self) -> t.Optional[exp.Expression]: 7117 if self._match(TokenType.STAR): 7118 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7119 return self._parse_placeholder() 7120 7121 def _parse_parameter(self) -> exp.Parameter: 7122 this = self._parse_identifier() or self._parse_primary_or_var() 7123 return self.expression(exp.Parameter, this=this) 7124 7125 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7126 if self._match_set(self.PLACEHOLDER_PARSERS): 7127 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7128 if placeholder: 7129 return placeholder 7130 self._advance(-1) 7131 return None 7132 7133 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7134 if not self._match_texts(keywords): 7135 return None 7136 if self._match(TokenType.L_PAREN, advance=False): 7137 return self._parse_wrapped_csv(self._parse_expression) 7138 7139 expression = self._parse_expression() 7140 return [expression] if expression else None 7141 7142 def _parse_csv( 7143 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7144 ) -> t.List[exp.Expression]: 7145 parse_result = parse_method() 7146 items = [parse_result] if parse_result is not None else [] 7147 7148 while self._match(sep): 7149 self._add_comments(parse_result) 7150 parse_result = parse_method() 7151 if parse_result is not None: 7152 items.append(parse_result) 7153 7154 return items 7155 7156 def _parse_tokens( 7157 self, parse_method: t.Callable, expressions: t.Dict 7158 ) -> t.Optional[exp.Expression]: 7159 this = parse_method() 7160 7161 while self._match_set(expressions): 7162 this = self.expression( 7163 expressions[self._prev.token_type], 7164 this=this, 7165 comments=self._prev_comments, 7166 expression=parse_method(), 7167 ) 7168 7169 return this 7170 7171 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7172 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7173 7174 def _parse_wrapped_csv( 7175 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7176 ) -> t.List[exp.Expression]: 7177 return self._parse_wrapped( 7178 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7179 ) 7180 7181 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7182 wrapped = self._match(TokenType.L_PAREN) 7183 if not wrapped and not optional: 7184 self.raise_error("Expecting (") 7185 parse_result = parse_method() 7186 if wrapped: 7187 self._match_r_paren() 7188 return parse_result 7189 7190 def _parse_expressions(self) -> t.List[exp.Expression]: 7191 return self._parse_csv(self._parse_expression) 7192 7193 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7194 return self._parse_select() or self._parse_set_operations( 7195 self._parse_alias(self._parse_assignment(), explicit=True) 7196 if alias 7197 else self._parse_assignment() 7198 ) 7199 7200 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7201 return self._parse_query_modifiers( 7202 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7203 ) 7204 7205 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7206 this = None 7207 if self._match_texts(self.TRANSACTION_KIND): 7208 this = self._prev.text 7209 7210 self._match_texts(("TRANSACTION", "WORK")) 7211 7212 modes = [] 7213 while True: 7214 mode = [] 7215 while self._match(TokenType.VAR): 7216 mode.append(self._prev.text) 7217 7218 if mode: 7219 modes.append(" ".join(mode)) 7220 if not self._match(TokenType.COMMA): 7221 break 7222 7223 return self.expression(exp.Transaction, this=this, modes=modes) 7224 7225 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7226 chain = None 7227 savepoint = None 7228 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7229 7230 self._match_texts(("TRANSACTION", "WORK")) 7231 7232 if self._match_text_seq("TO"): 7233 self._match_text_seq("SAVEPOINT") 7234 savepoint = self._parse_id_var() 7235 7236 if self._match(TokenType.AND): 7237 chain = not self._match_text_seq("NO") 7238 self._match_text_seq("CHAIN") 7239 7240 if is_rollback: 7241 return self.expression(exp.Rollback, savepoint=savepoint) 7242 7243 return self.expression(exp.Commit, chain=chain) 7244 7245 def _parse_refresh(self) -> exp.Refresh: 7246 self._match(TokenType.TABLE) 7247 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7248 7249 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7250 if not self._prev.text.upper() == "ADD": 7251 return None 7252 7253 start = self._index 7254 self._match(TokenType.COLUMN) 7255 7256 exists_column = self._parse_exists(not_=True) 7257 expression = self._parse_field_def() 7258 7259 if not isinstance(expression, exp.ColumnDef): 7260 self._retreat(start) 7261 return None 7262 7263 expression.set("exists", exists_column) 7264 7265 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7266 if self._match_texts(("FIRST", "AFTER")): 7267 position = self._prev.text 7268 column_position = self.expression( 7269 exp.ColumnPosition, this=self._parse_column(), position=position 7270 ) 7271 expression.set("position", column_position) 7272 7273 return expression 7274 7275 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7276 drop = self._match(TokenType.DROP) and self._parse_drop() 7277 if drop and not isinstance(drop, exp.Command): 7278 drop.set("kind", drop.args.get("kind", "COLUMN")) 7279 return drop 7280 7281 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7282 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7283 return self.expression( 7284 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7285 ) 7286 7287 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7288 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7289 self._match_text_seq("ADD") 7290 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7291 return self.expression( 7292 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7293 ) 7294 7295 column_def = self._parse_add_column() 7296 if isinstance(column_def, exp.ColumnDef): 7297 return column_def 7298 7299 exists = self._parse_exists(not_=True) 7300 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7301 return self.expression( 7302 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7303 ) 7304 7305 return None 7306 7307 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7308 "COLUMNS" 7309 ): 7310 schema = self._parse_schema() 7311 7312 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7313 7314 return self._parse_csv(_parse_add_alteration) 7315 7316 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7317 if self._match_texts(self.ALTER_ALTER_PARSERS): 7318 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7319 7320 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7321 # keyword after ALTER we default to parsing this statement 7322 self._match(TokenType.COLUMN) 7323 column = self._parse_field(any_token=True) 7324 7325 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7326 return self.expression(exp.AlterColumn, this=column, drop=True) 7327 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7328 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7329 if self._match(TokenType.COMMENT): 7330 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7331 if self._match_text_seq("DROP", "NOT", "NULL"): 7332 return self.expression( 7333 exp.AlterColumn, 7334 this=column, 7335 drop=True, 7336 allow_null=True, 7337 ) 7338 if self._match_text_seq("SET", "NOT", "NULL"): 7339 return self.expression( 7340 exp.AlterColumn, 7341 this=column, 7342 allow_null=False, 7343 ) 7344 7345 if self._match_text_seq("SET", "VISIBLE"): 7346 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7347 if self._match_text_seq("SET", "INVISIBLE"): 7348 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7349 7350 self._match_text_seq("SET", "DATA") 7351 self._match_text_seq("TYPE") 7352 return self.expression( 7353 exp.AlterColumn, 7354 this=column, 7355 dtype=self._parse_types(), 7356 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7357 using=self._match(TokenType.USING) and self._parse_assignment(), 7358 ) 7359 7360 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7361 if self._match_texts(("ALL", "EVEN", "AUTO")): 7362 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7363 7364 self._match_text_seq("KEY", "DISTKEY") 7365 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7366 7367 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7368 if compound: 7369 self._match_text_seq("SORTKEY") 7370 7371 if self._match(TokenType.L_PAREN, advance=False): 7372 return self.expression( 7373 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7374 ) 7375 7376 self._match_texts(("AUTO", "NONE")) 7377 return self.expression( 7378 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7379 ) 7380 7381 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7382 index = self._index - 1 7383 7384 partition_exists = self._parse_exists() 7385 if self._match(TokenType.PARTITION, advance=False): 7386 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7387 7388 self._retreat(index) 7389 return self._parse_csv(self._parse_drop_column) 7390 7391 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7392 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7393 exists = self._parse_exists() 7394 old_column = self._parse_column() 7395 to = self._match_text_seq("TO") 7396 new_column = self._parse_column() 7397 7398 if old_column is None or to is None or new_column is None: 7399 return None 7400 7401 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7402 7403 self._match_text_seq("TO") 7404 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7405 7406 def _parse_alter_table_set(self) -> exp.AlterSet: 7407 alter_set = self.expression(exp.AlterSet) 7408 7409 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7410 "TABLE", "PROPERTIES" 7411 ): 7412 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7413 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7414 alter_set.set("expressions", [self._parse_assignment()]) 7415 elif self._match_texts(("LOGGED", "UNLOGGED")): 7416 alter_set.set("option", exp.var(self._prev.text.upper())) 7417 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7418 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7419 elif self._match_text_seq("LOCATION"): 7420 alter_set.set("location", self._parse_field()) 7421 elif self._match_text_seq("ACCESS", "METHOD"): 7422 alter_set.set("access_method", self._parse_field()) 7423 elif self._match_text_seq("TABLESPACE"): 7424 alter_set.set("tablespace", self._parse_field()) 7425 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7426 alter_set.set("file_format", [self._parse_field()]) 7427 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7428 alter_set.set("file_format", self._parse_wrapped_options()) 7429 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7430 alter_set.set("copy_options", self._parse_wrapped_options()) 7431 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7432 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7433 else: 7434 if self._match_text_seq("SERDE"): 7435 alter_set.set("serde", self._parse_field()) 7436 7437 properties = self._parse_wrapped(self._parse_properties, optional=True) 7438 alter_set.set("expressions", [properties]) 7439 7440 return alter_set 7441 7442 def _parse_alter(self) -> exp.Alter | exp.Command: 7443 start = self._prev 7444 7445 alter_token = self._match_set(self.ALTERABLES) and self._prev 7446 if not alter_token: 7447 return self._parse_as_command(start) 7448 7449 exists = self._parse_exists() 7450 only = self._match_text_seq("ONLY") 7451 this = self._parse_table(schema=True) 7452 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7453 7454 if self._next: 7455 self._advance() 7456 7457 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7458 if parser: 7459 actions = ensure_list(parser(self)) 7460 not_valid = self._match_text_seq("NOT", "VALID") 7461 options = self._parse_csv(self._parse_property) 7462 7463 if not self._curr and actions: 7464 return self.expression( 7465 exp.Alter, 7466 this=this, 7467 kind=alter_token.text.upper(), 7468 exists=exists, 7469 actions=actions, 7470 only=only, 7471 options=options, 7472 cluster=cluster, 7473 not_valid=not_valid, 7474 ) 7475 7476 return self._parse_as_command(start) 7477 7478 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7479 start = self._prev 7480 # https://duckdb.org/docs/sql/statements/analyze 7481 if not self._curr: 7482 return self.expression(exp.Analyze) 7483 7484 options = [] 7485 while self._match_texts(self.ANALYZE_STYLES): 7486 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7487 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7488 else: 7489 options.append(self._prev.text.upper()) 7490 7491 this: t.Optional[exp.Expression] = None 7492 inner_expression: t.Optional[exp.Expression] = None 7493 7494 kind = self._curr and self._curr.text.upper() 7495 7496 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7497 this = self._parse_table_parts() 7498 elif self._match_text_seq("TABLES"): 7499 if self._match_set((TokenType.FROM, TokenType.IN)): 7500 kind = f"{kind} {self._prev.text.upper()}" 7501 this = self._parse_table(schema=True, is_db_reference=True) 7502 elif self._match_text_seq("DATABASE"): 7503 this = self._parse_table(schema=True, is_db_reference=True) 7504 elif self._match_text_seq("CLUSTER"): 7505 this = self._parse_table() 7506 # Try matching inner expr keywords before fallback to parse table. 7507 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7508 kind = None 7509 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7510 else: 7511 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7512 kind = None 7513 this = self._parse_table_parts() 7514 7515 partition = self._try_parse(self._parse_partition) 7516 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7517 return self._parse_as_command(start) 7518 7519 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7520 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7521 "WITH", "ASYNC", "MODE" 7522 ): 7523 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7524 else: 7525 mode = None 7526 7527 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7528 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7529 7530 properties = self._parse_properties() 7531 return self.expression( 7532 exp.Analyze, 7533 kind=kind, 7534 this=this, 7535 mode=mode, 7536 partition=partition, 7537 properties=properties, 7538 expression=inner_expression, 7539 options=options, 7540 ) 7541 7542 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7543 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7544 this = None 7545 kind = self._prev.text.upper() 7546 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7547 expressions = [] 7548 7549 if not self._match_text_seq("STATISTICS"): 7550 self.raise_error("Expecting token STATISTICS") 7551 7552 if self._match_text_seq("NOSCAN"): 7553 this = "NOSCAN" 7554 elif self._match(TokenType.FOR): 7555 if self._match_text_seq("ALL", "COLUMNS"): 7556 this = "FOR ALL COLUMNS" 7557 if self._match_texts("COLUMNS"): 7558 this = "FOR COLUMNS" 7559 expressions = self._parse_csv(self._parse_column_reference) 7560 elif self._match_text_seq("SAMPLE"): 7561 sample = self._parse_number() 7562 expressions = [ 7563 self.expression( 7564 exp.AnalyzeSample, 7565 sample=sample, 7566 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7567 ) 7568 ] 7569 7570 return self.expression( 7571 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7572 ) 7573 7574 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7575 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7576 kind = None 7577 this = None 7578 expression: t.Optional[exp.Expression] = None 7579 if self._match_text_seq("REF", "UPDATE"): 7580 kind = "REF" 7581 this = "UPDATE" 7582 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7583 this = "UPDATE SET DANGLING TO NULL" 7584 elif self._match_text_seq("STRUCTURE"): 7585 kind = "STRUCTURE" 7586 if self._match_text_seq("CASCADE", "FAST"): 7587 this = "CASCADE FAST" 7588 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7589 ("ONLINE", "OFFLINE") 7590 ): 7591 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7592 expression = self._parse_into() 7593 7594 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7595 7596 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7597 this = self._prev.text.upper() 7598 if self._match_text_seq("COLUMNS"): 7599 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7600 return None 7601 7602 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7603 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7604 if self._match_text_seq("STATISTICS"): 7605 return self.expression(exp.AnalyzeDelete, kind=kind) 7606 return None 7607 7608 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7609 if self._match_text_seq("CHAINED", "ROWS"): 7610 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7611 return None 7612 7613 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7614 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7615 this = self._prev.text.upper() 7616 expression: t.Optional[exp.Expression] = None 7617 expressions = [] 7618 update_options = None 7619 7620 if self._match_text_seq("HISTOGRAM", "ON"): 7621 expressions = self._parse_csv(self._parse_column_reference) 7622 with_expressions = [] 7623 while self._match(TokenType.WITH): 7624 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7625 if self._match_texts(("SYNC", "ASYNC")): 7626 if self._match_text_seq("MODE", advance=False): 7627 with_expressions.append(f"{self._prev.text.upper()} MODE") 7628 self._advance() 7629 else: 7630 buckets = self._parse_number() 7631 if self._match_text_seq("BUCKETS"): 7632 with_expressions.append(f"{buckets} BUCKETS") 7633 if with_expressions: 7634 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7635 7636 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7637 TokenType.UPDATE, advance=False 7638 ): 7639 update_options = self._prev.text.upper() 7640 self._advance() 7641 elif self._match_text_seq("USING", "DATA"): 7642 expression = self.expression(exp.UsingData, this=self._parse_string()) 7643 7644 return self.expression( 7645 exp.AnalyzeHistogram, 7646 this=this, 7647 expressions=expressions, 7648 expression=expression, 7649 update_options=update_options, 7650 ) 7651 7652 def _parse_merge(self) -> exp.Merge: 7653 self._match(TokenType.INTO) 7654 target = self._parse_table() 7655 7656 if target and self._match(TokenType.ALIAS, advance=False): 7657 target.set("alias", self._parse_table_alias()) 7658 7659 self._match(TokenType.USING) 7660 using = self._parse_table() 7661 7662 self._match(TokenType.ON) 7663 on = self._parse_assignment() 7664 7665 return self.expression( 7666 exp.Merge, 7667 this=target, 7668 using=using, 7669 on=on, 7670 whens=self._parse_when_matched(), 7671 returning=self._parse_returning(), 7672 ) 7673 7674 def _parse_when_matched(self) -> exp.Whens: 7675 whens = [] 7676 7677 while self._match(TokenType.WHEN): 7678 matched = not self._match(TokenType.NOT) 7679 self._match_text_seq("MATCHED") 7680 source = ( 7681 False 7682 if self._match_text_seq("BY", "TARGET") 7683 else self._match_text_seq("BY", "SOURCE") 7684 ) 7685 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7686 7687 self._match(TokenType.THEN) 7688 7689 if self._match(TokenType.INSERT): 7690 this = self._parse_star() 7691 if this: 7692 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7693 else: 7694 then = self.expression( 7695 exp.Insert, 7696 this=exp.var("ROW") 7697 if self._match_text_seq("ROW") 7698 else self._parse_value(values=False), 7699 expression=self._match_text_seq("VALUES") and self._parse_value(), 7700 ) 7701 elif self._match(TokenType.UPDATE): 7702 expressions = self._parse_star() 7703 if expressions: 7704 then = self.expression(exp.Update, expressions=expressions) 7705 else: 7706 then = self.expression( 7707 exp.Update, 7708 expressions=self._match(TokenType.SET) 7709 and self._parse_csv(self._parse_equality), 7710 ) 7711 elif self._match(TokenType.DELETE): 7712 then = self.expression(exp.Var, this=self._prev.text) 7713 else: 7714 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7715 7716 whens.append( 7717 self.expression( 7718 exp.When, 7719 matched=matched, 7720 source=source, 7721 condition=condition, 7722 then=then, 7723 ) 7724 ) 7725 return self.expression(exp.Whens, expressions=whens) 7726 7727 def _parse_show(self) -> t.Optional[exp.Expression]: 7728 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7729 if parser: 7730 return parser(self) 7731 return self._parse_as_command(self._prev) 7732 7733 def _parse_set_item_assignment( 7734 self, kind: t.Optional[str] = None 7735 ) -> t.Optional[exp.Expression]: 7736 index = self._index 7737 7738 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7739 return self._parse_set_transaction(global_=kind == "GLOBAL") 7740 7741 left = self._parse_primary() or self._parse_column() 7742 assignment_delimiter = self._match_texts(("=", "TO")) 7743 7744 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7745 self._retreat(index) 7746 return None 7747 7748 right = self._parse_statement() or self._parse_id_var() 7749 if isinstance(right, (exp.Column, exp.Identifier)): 7750 right = exp.var(right.name) 7751 7752 this = self.expression(exp.EQ, this=left, expression=right) 7753 return self.expression(exp.SetItem, this=this, kind=kind) 7754 7755 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7756 self._match_text_seq("TRANSACTION") 7757 characteristics = self._parse_csv( 7758 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7759 ) 7760 return self.expression( 7761 exp.SetItem, 7762 expressions=characteristics, 7763 kind="TRANSACTION", 7764 **{"global": global_}, # type: ignore 7765 ) 7766 7767 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7768 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7769 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7770 7771 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7772 index = self._index 7773 set_ = self.expression( 7774 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7775 ) 7776 7777 if self._curr: 7778 self._retreat(index) 7779 return self._parse_as_command(self._prev) 7780 7781 return set_ 7782 7783 def _parse_var_from_options( 7784 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7785 ) -> t.Optional[exp.Var]: 7786 start = self._curr 7787 if not start: 7788 return None 7789 7790 option = start.text.upper() 7791 continuations = options.get(option) 7792 7793 index = self._index 7794 self._advance() 7795 for keywords in continuations or []: 7796 if isinstance(keywords, str): 7797 keywords = (keywords,) 7798 7799 if self._match_text_seq(*keywords): 7800 option = f"{option} {' '.join(keywords)}" 7801 break 7802 else: 7803 if continuations or continuations is None: 7804 if raise_unmatched: 7805 self.raise_error(f"Unknown option {option}") 7806 7807 self._retreat(index) 7808 return None 7809 7810 return exp.var(option) 7811 7812 def _parse_as_command(self, start: Token) -> exp.Command: 7813 while self._curr: 7814 self._advance() 7815 text = self._find_sql(start, self._prev) 7816 size = len(start.text) 7817 self._warn_unsupported() 7818 return exp.Command(this=text[:size], expression=text[size:]) 7819 7820 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7821 settings = [] 7822 7823 self._match_l_paren() 7824 kind = self._parse_id_var() 7825 7826 if self._match(TokenType.L_PAREN): 7827 while True: 7828 key = self._parse_id_var() 7829 value = self._parse_primary() 7830 if not key and value is None: 7831 break 7832 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7833 self._match(TokenType.R_PAREN) 7834 7835 self._match_r_paren() 7836 7837 return self.expression( 7838 exp.DictProperty, 7839 this=this, 7840 kind=kind.this if kind else None, 7841 settings=settings, 7842 ) 7843 7844 def _parse_dict_range(self, this: str) -> exp.DictRange: 7845 self._match_l_paren() 7846 has_min = self._match_text_seq("MIN") 7847 if has_min: 7848 min = self._parse_var() or self._parse_primary() 7849 self._match_text_seq("MAX") 7850 max = self._parse_var() or self._parse_primary() 7851 else: 7852 max = self._parse_var() or self._parse_primary() 7853 min = exp.Literal.number(0) 7854 self._match_r_paren() 7855 return self.expression(exp.DictRange, this=this, min=min, max=max) 7856 7857 def _parse_comprehension( 7858 self, this: t.Optional[exp.Expression] 7859 ) -> t.Optional[exp.Comprehension]: 7860 index = self._index 7861 expression = self._parse_column() 7862 if not self._match(TokenType.IN): 7863 self._retreat(index - 1) 7864 return None 7865 iterator = self._parse_column() 7866 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7867 return self.expression( 7868 exp.Comprehension, 7869 this=this, 7870 expression=expression, 7871 iterator=iterator, 7872 condition=condition, 7873 ) 7874 7875 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7876 if self._match(TokenType.HEREDOC_STRING): 7877 return self.expression(exp.Heredoc, this=self._prev.text) 7878 7879 if not self._match_text_seq("$"): 7880 return None 7881 7882 tags = ["$"] 7883 tag_text = None 7884 7885 if self._is_connected(): 7886 self._advance() 7887 tags.append(self._prev.text.upper()) 7888 else: 7889 self.raise_error("No closing $ found") 7890 7891 if tags[-1] != "$": 7892 if self._is_connected() and self._match_text_seq("$"): 7893 tag_text = tags[-1] 7894 tags.append("$") 7895 else: 7896 self.raise_error("No closing $ found") 7897 7898 heredoc_start = self._curr 7899 7900 while self._curr: 7901 if self._match_text_seq(*tags, advance=False): 7902 this = self._find_sql(heredoc_start, self._prev) 7903 self._advance(len(tags)) 7904 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7905 7906 self._advance() 7907 7908 self.raise_error(f"No closing {''.join(tags)} found") 7909 return None 7910 7911 def _find_parser( 7912 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7913 ) -> t.Optional[t.Callable]: 7914 if not self._curr: 7915 return None 7916 7917 index = self._index 7918 this = [] 7919 while True: 7920 # The current token might be multiple words 7921 curr = self._curr.text.upper() 7922 key = curr.split(" ") 7923 this.append(curr) 7924 7925 self._advance() 7926 result, trie = in_trie(trie, key) 7927 if result == TrieResult.FAILED: 7928 break 7929 7930 if result == TrieResult.EXISTS: 7931 subparser = parsers[" ".join(this)] 7932 return subparser 7933 7934 self._retreat(index) 7935 return None 7936 7937 def _match(self, token_type, advance=True, expression=None): 7938 if not self._curr: 7939 return None 7940 7941 if self._curr.token_type == token_type: 7942 if advance: 7943 self._advance() 7944 self._add_comments(expression) 7945 return True 7946 7947 return None 7948 7949 def _match_set(self, types, advance=True): 7950 if not self._curr: 7951 return None 7952 7953 if self._curr.token_type in types: 7954 if advance: 7955 self._advance() 7956 return True 7957 7958 return None 7959 7960 def _match_pair(self, token_type_a, token_type_b, advance=True): 7961 if not self._curr or not self._next: 7962 return None 7963 7964 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7965 if advance: 7966 self._advance(2) 7967 return True 7968 7969 return None 7970 7971 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7972 if not self._match(TokenType.L_PAREN, expression=expression): 7973 self.raise_error("Expecting (") 7974 7975 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7976 if not self._match(TokenType.R_PAREN, expression=expression): 7977 self.raise_error("Expecting )") 7978 7979 def _match_texts(self, texts, advance=True): 7980 if ( 7981 self._curr 7982 and self._curr.token_type != TokenType.STRING 7983 and self._curr.text.upper() in texts 7984 ): 7985 if advance: 7986 self._advance() 7987 return True 7988 return None 7989 7990 def _match_text_seq(self, *texts, advance=True): 7991 index = self._index 7992 for text in texts: 7993 if ( 7994 self._curr 7995 and self._curr.token_type != TokenType.STRING 7996 and self._curr.text.upper() == text 7997 ): 7998 self._advance() 7999 else: 8000 self._retreat(index) 8001 return None 8002 8003 if not advance: 8004 self._retreat(index) 8005 8006 return True 8007 8008 def _replace_lambda( 8009 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8010 ) -> t.Optional[exp.Expression]: 8011 if not node: 8012 return node 8013 8014 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8015 8016 for column in node.find_all(exp.Column): 8017 typ = lambda_types.get(column.parts[0].name) 8018 if typ is not None: 8019 dot_or_id = column.to_dot() if column.table else column.this 8020 8021 if typ: 8022 dot_or_id = self.expression( 8023 exp.Cast, 8024 this=dot_or_id, 8025 to=typ, 8026 ) 8027 8028 parent = column.parent 8029 8030 while isinstance(parent, exp.Dot): 8031 if not isinstance(parent.parent, exp.Dot): 8032 parent.replace(dot_or_id) 8033 break 8034 parent = parent.parent 8035 else: 8036 if column is node: 8037 node = dot_or_id 8038 else: 8039 column.replace(dot_or_id) 8040 return node 8041 8042 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8043 start = self._prev 8044 8045 # Not to be confused with TRUNCATE(number, decimals) function call 8046 if self._match(TokenType.L_PAREN): 8047 self._retreat(self._index - 2) 8048 return self._parse_function() 8049 8050 # Clickhouse supports TRUNCATE DATABASE as well 8051 is_database = self._match(TokenType.DATABASE) 8052 8053 self._match(TokenType.TABLE) 8054 8055 exists = self._parse_exists(not_=False) 8056 8057 expressions = self._parse_csv( 8058 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8059 ) 8060 8061 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8062 8063 if self._match_text_seq("RESTART", "IDENTITY"): 8064 identity = "RESTART" 8065 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8066 identity = "CONTINUE" 8067 else: 8068 identity = None 8069 8070 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8071 option = self._prev.text 8072 else: 8073 option = None 8074 8075 partition = self._parse_partition() 8076 8077 # Fallback case 8078 if self._curr: 8079 return self._parse_as_command(start) 8080 8081 return self.expression( 8082 exp.TruncateTable, 8083 expressions=expressions, 8084 is_database=is_database, 8085 exists=exists, 8086 cluster=cluster, 8087 identity=identity, 8088 option=option, 8089 partition=partition, 8090 ) 8091 8092 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8093 this = self._parse_ordered(self._parse_opclass) 8094 8095 if not self._match(TokenType.WITH): 8096 return this 8097 8098 op = self._parse_var(any_token=True) 8099 8100 return self.expression(exp.WithOperator, this=this, op=op) 8101 8102 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8103 self._match(TokenType.EQ) 8104 self._match(TokenType.L_PAREN) 8105 8106 opts: t.List[t.Optional[exp.Expression]] = [] 8107 option: exp.Expression | None 8108 while self._curr and not self._match(TokenType.R_PAREN): 8109 if self._match_text_seq("FORMAT_NAME", "="): 8110 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8111 option = self._parse_format_name() 8112 else: 8113 option = self._parse_property() 8114 8115 if option is None: 8116 self.raise_error("Unable to parse option") 8117 break 8118 8119 opts.append(option) 8120 8121 return opts 8122 8123 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8124 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8125 8126 options = [] 8127 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8128 option = self._parse_var(any_token=True) 8129 prev = self._prev.text.upper() 8130 8131 # Different dialects might separate options and values by white space, "=" and "AS" 8132 self._match(TokenType.EQ) 8133 self._match(TokenType.ALIAS) 8134 8135 param = self.expression(exp.CopyParameter, this=option) 8136 8137 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8138 TokenType.L_PAREN, advance=False 8139 ): 8140 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8141 param.set("expressions", self._parse_wrapped_options()) 8142 elif prev == "FILE_FORMAT": 8143 # T-SQL's external file format case 8144 param.set("expression", self._parse_field()) 8145 else: 8146 param.set("expression", self._parse_unquoted_field()) 8147 8148 options.append(param) 8149 self._match(sep) 8150 8151 return options 8152 8153 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8154 expr = self.expression(exp.Credentials) 8155 8156 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8157 expr.set("storage", self._parse_field()) 8158 if self._match_text_seq("CREDENTIALS"): 8159 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8160 creds = ( 8161 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8162 ) 8163 expr.set("credentials", creds) 8164 if self._match_text_seq("ENCRYPTION"): 8165 expr.set("encryption", self._parse_wrapped_options()) 8166 if self._match_text_seq("IAM_ROLE"): 8167 expr.set("iam_role", self._parse_field()) 8168 if self._match_text_seq("REGION"): 8169 expr.set("region", self._parse_field()) 8170 8171 return expr 8172 8173 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8174 return self._parse_field() 8175 8176 def _parse_copy(self) -> exp.Copy | exp.Command: 8177 start = self._prev 8178 8179 self._match(TokenType.INTO) 8180 8181 this = ( 8182 self._parse_select(nested=True, parse_subquery_alias=False) 8183 if self._match(TokenType.L_PAREN, advance=False) 8184 else self._parse_table(schema=True) 8185 ) 8186 8187 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8188 8189 files = self._parse_csv(self._parse_file_location) 8190 credentials = self._parse_credentials() 8191 8192 self._match_text_seq("WITH") 8193 8194 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8195 8196 # Fallback case 8197 if self._curr: 8198 return self._parse_as_command(start) 8199 8200 return self.expression( 8201 exp.Copy, 8202 this=this, 8203 kind=kind, 8204 credentials=credentials, 8205 files=files, 8206 params=params, 8207 ) 8208 8209 def _parse_normalize(self) -> exp.Normalize: 8210 return self.expression( 8211 exp.Normalize, 8212 this=self._parse_bitwise(), 8213 form=self._match(TokenType.COMMA) and self._parse_var(), 8214 ) 8215 8216 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8217 args = self._parse_csv(lambda: self._parse_lambda()) 8218 8219 this = seq_get(args, 0) 8220 decimals = seq_get(args, 1) 8221 8222 return expr_type( 8223 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8224 ) 8225 8226 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8227 star_token = self._prev 8228 8229 if self._match_text_seq("COLUMNS", "(", advance=False): 8230 this = self._parse_function() 8231 if isinstance(this, exp.Columns): 8232 this.set("unpack", True) 8233 return this 8234 8235 return self.expression( 8236 exp.Star, 8237 **{ # type: ignore 8238 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8239 "replace": self._parse_star_op("REPLACE"), 8240 "rename": self._parse_star_op("RENAME"), 8241 }, 8242 ).update_positions(star_token) 8243 8244 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8245 privilege_parts = [] 8246 8247 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8248 # (end of privilege list) or L_PAREN (start of column list) are met 8249 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8250 privilege_parts.append(self._curr.text.upper()) 8251 self._advance() 8252 8253 this = exp.var(" ".join(privilege_parts)) 8254 expressions = ( 8255 self._parse_wrapped_csv(self._parse_column) 8256 if self._match(TokenType.L_PAREN, advance=False) 8257 else None 8258 ) 8259 8260 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8261 8262 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8263 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8264 principal = self._parse_id_var() 8265 8266 if not principal: 8267 return None 8268 8269 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8270 8271 def _parse_grant(self) -> exp.Grant | exp.Command: 8272 start = self._prev 8273 8274 privileges = self._parse_csv(self._parse_grant_privilege) 8275 8276 self._match(TokenType.ON) 8277 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8278 8279 # Attempt to parse the securable e.g. MySQL allows names 8280 # such as "foo.*", "*.*" which are not easily parseable yet 8281 securable = self._try_parse(self._parse_table_parts) 8282 8283 if not securable or not self._match_text_seq("TO"): 8284 return self._parse_as_command(start) 8285 8286 principals = self._parse_csv(self._parse_grant_principal) 8287 8288 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8289 8290 if self._curr: 8291 return self._parse_as_command(start) 8292 8293 return self.expression( 8294 exp.Grant, 8295 privileges=privileges, 8296 kind=kind, 8297 securable=securable, 8298 principals=principals, 8299 grant_option=grant_option, 8300 ) 8301 8302 def _parse_overlay(self) -> exp.Overlay: 8303 return self.expression( 8304 exp.Overlay, 8305 **{ # type: ignore 8306 "this": self._parse_bitwise(), 8307 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8308 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8309 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8310 }, 8311 ) 8312 8313 def _parse_format_name(self) -> exp.Property: 8314 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8315 # for FILE_FORMAT = <format_name> 8316 return self.expression( 8317 exp.Property, 8318 this=exp.var("FORMAT_NAME"), 8319 value=self._parse_string() or self._parse_table_parts(), 8320 ) 8321 8322 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8323 args: t.List[exp.Expression] = [] 8324 8325 if self._match(TokenType.DISTINCT): 8326 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8327 self._match(TokenType.COMMA) 8328 8329 args.extend(self._parse_csv(self._parse_assignment)) 8330 8331 return self.expression( 8332 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8333 ) 8334 8335 def _identifier_expression( 8336 self, token: t.Optional[Token] = None, **kwargs: t.Any 8337 ) -> exp.Identifier: 8338 token = token or self._prev 8339 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8340 expression.update_positions(token) 8341 return expression 8342 8343 def _build_pipe_cte( 8344 self, 8345 query: exp.Query, 8346 expressions: t.List[exp.Expression], 8347 alias_cte: t.Optional[exp.TableAlias] = None, 8348 ) -> exp.Select: 8349 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8350 if alias_cte: 8351 new_cte = alias_cte 8352 else: 8353 self._pipe_cte_counter += 1 8354 new_cte = f"__tmp{self._pipe_cte_counter}" 8355 8356 with_ = query.args.get("with") 8357 ctes = with_.pop() if with_ else None 8358 8359 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8360 if ctes: 8361 new_select.set("with", ctes) 8362 8363 return new_select.with_(new_cte, as_=query, copy=False) 8364 8365 def _build_pipe_ctes( 8366 self, 8367 query: exp.Select, 8368 expressions: t.List[exp.Expression], 8369 alias_cte: t.Optional[exp.TableAlias] = None, 8370 ) -> exp.Select: 8371 select = query.selects[0].assert_is(exp.Star) 8372 if select.args.get("except") or select.args.get("replace"): 8373 query = self._build_pipe_cte( 8374 query=query.select( 8375 *[expr for expr in expressions if not expr.is_star and expr.args.get("alias")], 8376 copy=False, 8377 ), 8378 expressions=[ 8379 projection.args.get("alias", projection) for projection in expressions 8380 ], 8381 ) 8382 else: 8383 query.select(*expressions, append=False, copy=False) 8384 8385 return self._build_pipe_cte(query=query, expressions=[exp.Star()], alias_cte=alias_cte) 8386 8387 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8388 select = self._parse_select() 8389 if not select: 8390 return query 8391 8392 return self._build_pipe_ctes(query=query, expressions=select.expressions) 8393 8394 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8395 limit = self._parse_limit() 8396 offset = self._parse_offset() 8397 if limit: 8398 curr_limit = query.args.get("limit", limit) 8399 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8400 query.limit(limit, copy=False) 8401 if offset: 8402 curr_offset = query.args.get("offset") 8403 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8404 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8405 8406 return query 8407 8408 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8409 this = self._parse_assignment() 8410 if self._match_text_seq("GROUP", "AND", advance=False): 8411 return this 8412 8413 this = self._parse_alias(this) 8414 8415 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8416 return self._parse_ordered(lambda: this) 8417 8418 return this 8419 8420 def _parse_pipe_syntax_aggregate_group_order_by( 8421 self, query: exp.Select, group_by_exists: bool = True 8422 ) -> exp.Select: 8423 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8424 aggregates_or_groups, orders = [], [] 8425 for element in expr: 8426 if isinstance(element, exp.Ordered): 8427 this = element.this 8428 if isinstance(this, exp.Alias): 8429 element.set("this", this.args["alias"]) 8430 orders.append(element) 8431 else: 8432 this = element 8433 aggregates_or_groups.append(this) 8434 8435 if group_by_exists: 8436 query.select(*aggregates_or_groups, copy=False).group_by( 8437 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8438 copy=False, 8439 ) 8440 else: 8441 query.select(*aggregates_or_groups, copy=False) 8442 8443 if orders: 8444 return query.order_by(*orders, append=False, copy=False) 8445 8446 return query 8447 8448 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8449 self._match_text_seq("AGGREGATE") 8450 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8451 8452 if self._match(TokenType.GROUP_BY) or ( 8453 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8454 ): 8455 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8456 8457 return self._build_pipe_ctes( 8458 query=query, expressions=[expr for expr in query.selects if not expr.is_star] 8459 ) 8460 8461 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Select]: 8462 first_setop = self.parse_set_operation(this=query) 8463 if not first_setop: 8464 return None 8465 8466 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8467 expr = self._parse_paren() 8468 return expr.assert_is(exp.Subquery).unnest() if expr else None 8469 8470 first_setop.this.pop() 8471 8472 setops = [ 8473 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8474 *self._parse_csv(_parse_and_unwrap_query), 8475 ] 8476 8477 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8478 with_ = query.args.get("with") 8479 ctes = with_.pop() if with_ else None 8480 8481 if isinstance(first_setop, exp.Union): 8482 query = query.union(*setops, copy=False, **first_setop.args) 8483 elif isinstance(first_setop, exp.Except): 8484 query = query.except_(*setops, copy=False, **first_setop.args) 8485 else: 8486 query = query.intersect(*setops, copy=False, **first_setop.args) 8487 8488 query.set("with", ctes) 8489 8490 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8491 8492 def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]: 8493 join = self._parse_join() 8494 if not join: 8495 return None 8496 8497 return query.join(join, copy=False) 8498 8499 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8500 pivots = self._parse_pivots() 8501 if not pivots: 8502 return query 8503 8504 from_ = query.args.get("from") 8505 if from_: 8506 from_.this.set("pivots", pivots) 8507 8508 return self._build_pipe_ctes(query=query, expressions=[exp.Star()]) 8509 8510 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8511 self._match_text_seq("EXTEND") 8512 return self._build_pipe_ctes( 8513 query=query, 8514 expressions=[query.selects[0].assert_is(exp.Star), *self._parse_expressions()], 8515 ) 8516 8517 def _parse_pipe_syntax_drop(self, query: exp.Select) -> exp.Select: 8518 self._match_text_seq("DROP") 8519 dropped_columns = self._parse_csv(self._parse_assignment) 8520 8521 select = query.selects[0].assert_is(exp.Star) 8522 except_ = select.args.get("except") or [] 8523 select.set("except", [*except_, *dropped_columns]) 8524 8525 return query 8526 8527 def _parse_pipe_syntax_set(self, query: exp.Select) -> exp.Select: 8528 self._match_text_seq("SET") 8529 replaced_columns = [ 8530 self.expression(exp.Alias, this=expr.expression, alias=expr.this) 8531 for expr in self._parse_csv(self._parse_assignment) 8532 ] 8533 8534 select = query.selects[0].assert_is(exp.Star) 8535 replace_ = select.args.get("replace") or [] 8536 select.set("replace", [*replace_, *replaced_columns]) 8537 8538 return query 8539 8540 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8541 sample = self._parse_table_sample() 8542 8543 with_ = query.args.get("with") 8544 if with_: 8545 with_.expressions[-1].this.set("sample", sample) 8546 else: 8547 query.set("sample", sample) 8548 8549 return query 8550 8551 def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]: 8552 while self._match(TokenType.PIPE_GT): 8553 start = self._curr 8554 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8555 if not parser: 8556 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8557 # keywords, making it tricky to disambiguate them without lookahead. The approach 8558 # here is to try and parse a set operation and if that fails, then try to parse a 8559 # join operator. If that fails as well, then the operator is not supported. 8560 parsed_query = self._parse_pipe_syntax_set_operator(query) 8561 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8562 if not parsed_query: 8563 self._retreat(start) 8564 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8565 break 8566 query = parsed_query 8567 else: 8568 query = parser(self, query) 8569 8570 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1542 def __init__( 1543 self, 1544 error_level: t.Optional[ErrorLevel] = None, 1545 error_message_context: int = 100, 1546 max_errors: int = 3, 1547 dialect: DialectType = None, 1548 ): 1549 from sqlglot.dialects import Dialect 1550 1551 self.error_level = error_level or ErrorLevel.IMMEDIATE 1552 self.error_message_context = error_message_context 1553 self.max_errors = max_errors 1554 self.dialect = Dialect.get_or_raise(dialect) 1555 self.reset()
1568 def parse( 1569 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1570 ) -> t.List[t.Optional[exp.Expression]]: 1571 """ 1572 Parses a list of tokens and returns a list of syntax trees, one tree 1573 per parsed SQL statement. 1574 1575 Args: 1576 raw_tokens: The list of tokens. 1577 sql: The original SQL string, used to produce helpful debug messages. 1578 1579 Returns: 1580 The list of the produced syntax trees. 1581 """ 1582 return self._parse( 1583 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1584 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1586 def parse_into( 1587 self, 1588 expression_types: exp.IntoType, 1589 raw_tokens: t.List[Token], 1590 sql: t.Optional[str] = None, 1591 ) -> t.List[t.Optional[exp.Expression]]: 1592 """ 1593 Parses a list of tokens into a given Expression type. If a collection of Expression 1594 types is given instead, this method will try to parse the token list into each one 1595 of them, stopping at the first for which the parsing succeeds. 1596 1597 Args: 1598 expression_types: The expression type(s) to try and parse the token list into. 1599 raw_tokens: The list of tokens. 1600 sql: The original SQL string, used to produce helpful debug messages. 1601 1602 Returns: 1603 The target Expression. 1604 """ 1605 errors = [] 1606 for expression_type in ensure_list(expression_types): 1607 parser = self.EXPRESSION_PARSERS.get(expression_type) 1608 if not parser: 1609 raise TypeError(f"No parser registered for {expression_type}") 1610 1611 try: 1612 return self._parse(parser, raw_tokens, sql) 1613 except ParseError as e: 1614 e.errors[0]["into_expression"] = expression_type 1615 errors.append(e) 1616 1617 raise ParseError( 1618 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1619 errors=merge_errors(errors), 1620 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1660 def check_errors(self) -> None: 1661 """Logs or raises any found errors, depending on the chosen error level setting.""" 1662 if self.error_level == ErrorLevel.WARN: 1663 for error in self.errors: 1664 logger.error(str(error)) 1665 elif self.error_level == ErrorLevel.RAISE and self.errors: 1666 raise ParseError( 1667 concat_messages(self.errors, self.max_errors), 1668 errors=merge_errors(self.errors), 1669 )
Logs or raises any found errors, depending on the chosen error level setting.
1671 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1672 """ 1673 Appends an error in the list of recorded errors or raises it, depending on the chosen 1674 error level setting. 1675 """ 1676 token = token or self._curr or self._prev or Token.string("") 1677 start = token.start 1678 end = token.end + 1 1679 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1680 highlight = self.sql[start:end] 1681 end_context = self.sql[end : end + self.error_message_context] 1682 1683 error = ParseError.new( 1684 f"{message}. Line {token.line}, Col: {token.col}.\n" 1685 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1686 description=message, 1687 line=token.line, 1688 col=token.col, 1689 start_context=start_context, 1690 highlight=highlight, 1691 end_context=end_context, 1692 ) 1693 1694 if self.error_level == ErrorLevel.IMMEDIATE: 1695 raise error 1696 1697 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1699 def expression( 1700 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1701 ) -> E: 1702 """ 1703 Creates a new, validated Expression. 1704 1705 Args: 1706 exp_class: The expression class to instantiate. 1707 comments: An optional list of comments to attach to the expression. 1708 kwargs: The arguments to set for the expression along with their respective values. 1709 1710 Returns: 1711 The target expression. 1712 """ 1713 instance = exp_class(**kwargs) 1714 instance.add_comments(comments) if comments else self._add_comments(instance) 1715 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1722 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1723 """ 1724 Validates an Expression, making sure that all its mandatory arguments are set. 1725 1726 Args: 1727 expression: The expression to validate. 1728 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1729 1730 Returns: 1731 The validated expression. 1732 """ 1733 if self.error_level != ErrorLevel.IGNORE: 1734 for error_message in expression.error_messages(args): 1735 self.raise_error(error_message) 1736 1737 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4712 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4713 start = self._index 4714 _, side_token, kind_token = self._parse_join_parts() 4715 4716 side = side_token.text if side_token else None 4717 kind = kind_token.text if kind_token else None 4718 4719 if not self._match_set(self.SET_OPERATIONS): 4720 self._retreat(start) 4721 return None 4722 4723 token_type = self._prev.token_type 4724 4725 if token_type == TokenType.UNION: 4726 operation: t.Type[exp.SetOperation] = exp.Union 4727 elif token_type == TokenType.EXCEPT: 4728 operation = exp.Except 4729 else: 4730 operation = exp.Intersect 4731 4732 comments = self._prev.comments 4733 4734 if self._match(TokenType.DISTINCT): 4735 distinct: t.Optional[bool] = True 4736 elif self._match(TokenType.ALL): 4737 distinct = False 4738 else: 4739 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4740 if distinct is None: 4741 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4742 4743 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4744 "STRICT", "CORRESPONDING" 4745 ) 4746 if self._match_text_seq("CORRESPONDING"): 4747 by_name = True 4748 if not side and not kind: 4749 kind = "INNER" 4750 4751 on_column_list = None 4752 if by_name and self._match_texts(("ON", "BY")): 4753 on_column_list = self._parse_wrapped_csv(self._parse_column) 4754 4755 expression = self._parse_select(nested=True, parse_set_operation=False) 4756 4757 return self.expression( 4758 operation, 4759 comments=comments, 4760 this=this, 4761 distinct=distinct, 4762 by_name=by_name, 4763 expression=expression, 4764 side=side, 4765 kind=kind, 4766 on=on_column_list, 4767 )