sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_timestamp_from_parts, 13 date_delta_sql, 14 date_trunc_to_time, 15 datestrtodate_sql, 16 build_formatted_time, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 rename_func, 22 timestamptrunc_sql, 23 timestrtotime_sql, 24 var_map_sql, 25 map_date_part, 26 no_timestamp_sql, 27 strposition_sql, 28 timestampdiff_sql, 29 no_make_interval_sql, 30 groupconcat_sql, 31) 32from sqlglot.generator import unsupported_args 33from sqlglot.helper import flatten, is_float, is_int, seq_get 34from sqlglot.optimizer.scope import find_all_in_scope 35from sqlglot.tokens import TokenType 36 37if t.TYPE_CHECKING: 38 from sqlglot._typing import E, B 39 40 41# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 42def _build_datetime( 43 name: str, kind: exp.DataType.Type, safe: bool = False 44) -> t.Callable[[t.List], exp.Func]: 45 def _builder(args: t.List) -> exp.Func: 46 value = seq_get(args, 0) 47 scale_or_fmt = seq_get(args, 1) 48 49 int_value = value is not None and is_int(value.name) 50 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 51 52 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 53 # Converts calls like `TO_TIME('01:02:03')` into casts 54 if len(args) == 1 and value.is_string and not int_value: 55 return ( 56 exp.TryCast(this=value, to=exp.DataType.build(kind)) 57 if safe 58 else exp.cast(value, kind) 59 ) 60 61 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 62 # cases so we can transpile them, since they're relatively common 63 if kind == exp.DataType.Type.TIMESTAMP: 64 if not safe and (int_value or int_scale_or_fmt): 65 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 66 # it's not easily transpilable 67 return exp.UnixToTime(this=value, scale=scale_or_fmt) 68 if not int_scale_or_fmt and not is_float(value.name): 69 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 70 expr.set("safe", safe) 71 return expr 72 73 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 74 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 75 formatted_exp = build_formatted_time(klass, "snowflake")(args) 76 formatted_exp.set("safe", safe) 77 return formatted_exp 78 79 return exp.Anonymous(this=name, expressions=args) 80 81 return _builder 82 83 84def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 85 expression = parser.build_var_map(args) 86 87 if isinstance(expression, exp.StarMap): 88 return expression 89 90 return exp.Struct( 91 expressions=[ 92 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 93 ] 94 ) 95 96 97def _build_datediff(args: t.List) -> exp.DateDiff: 98 return exp.DateDiff( 99 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 100 ) 101 102 103def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 104 def _builder(args: t.List) -> E: 105 return expr_type( 106 this=seq_get(args, 2), 107 expression=seq_get(args, 1), 108 unit=map_date_part(seq_get(args, 0)), 109 ) 110 111 return _builder 112 113 114def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 115 def _builder(args: t.List) -> B | exp.Anonymous: 116 if len(args) == 3: 117 return exp.Anonymous(this=name, expressions=args) 118 119 return binary_from_function(expr_type)(args) 120 121 return _builder 122 123 124# https://docs.snowflake.com/en/sql-reference/functions/div0 125def _build_if_from_div0(args: t.List) -> exp.If: 126 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 127 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 128 129 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 130 exp.Is(this=lhs, expression=exp.null()).not_() 131 ) 132 true = exp.Literal.number(0) 133 false = exp.Div(this=lhs, expression=rhs) 134 return exp.If(this=cond, true=true, false=false) 135 136 137# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 138def _build_if_from_zeroifnull(args: t.List) -> exp.If: 139 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 140 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 141 142 143# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 144def _build_if_from_nullifzero(args: t.List) -> exp.If: 145 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 146 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 147 148 149def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 150 flag = expression.text("flag") 151 152 if "i" not in flag: 153 flag += "i" 154 155 return self.func( 156 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 157 ) 158 159 160def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 161 regexp_replace = exp.RegexpReplace.from_arg_list(args) 162 163 if not regexp_replace.args.get("replacement"): 164 regexp_replace.set("replacement", exp.Literal.string("")) 165 166 return regexp_replace 167 168 169def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 170 def _parse(self: Snowflake.Parser) -> exp.Show: 171 return self._parse_show_snowflake(*args, **kwargs) 172 173 return _parse 174 175 176def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 177 trunc = date_trunc_to_time(args) 178 trunc.set("unit", map_date_part(trunc.args["unit"])) 179 return trunc 180 181 182def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 183 """ 184 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 185 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 186 187 Example: 188 >>> from sqlglot import parse_one 189 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 190 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 191 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 192 """ 193 if isinstance(expression, exp.Pivot): 194 if expression.unpivot: 195 expression = transforms.unqualify_columns(expression) 196 else: 197 for field in expression.fields: 198 field_expr = seq_get(field.expressions if field else [], 0) 199 200 if isinstance(field_expr, exp.PivotAny): 201 unqualified_field_expr = transforms.unqualify_columns(field_expr) 202 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 203 204 return expression 205 206 207def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 208 assert isinstance(expression, exp.Create) 209 210 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 211 if expression.this in exp.DataType.NESTED_TYPES: 212 expression.set("expressions", None) 213 return expression 214 215 props = expression.args.get("properties") 216 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 217 for schema_expression in expression.this.expressions: 218 if isinstance(schema_expression, exp.ColumnDef): 219 column_type = schema_expression.kind 220 if isinstance(column_type, exp.DataType): 221 column_type.transform(_flatten_structured_type, copy=False) 222 223 return expression 224 225 226def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 227 generate_date_array = unnest.expressions[0] 228 start = generate_date_array.args.get("start") 229 end = generate_date_array.args.get("end") 230 step = generate_date_array.args.get("step") 231 232 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 233 return 234 235 unit = step.args.get("unit") 236 237 unnest_alias = unnest.args.get("alias") 238 if unnest_alias: 239 unnest_alias = unnest_alias.copy() 240 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 241 else: 242 sequence_value_name = "value" 243 244 # We'll add the next sequence value to the starting date and project the result 245 date_add = _build_date_time_add(exp.DateAdd)( 246 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 247 ).as_(sequence_value_name) 248 249 # We use DATEDIFF to compute the number of sequence values needed 250 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 251 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 252 ) 253 254 unnest.set("expressions", [number_sequence]) 255 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 256 257 258def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 259 if isinstance(expression, exp.Select): 260 for generate_date_array in expression.find_all(exp.GenerateDateArray): 261 parent = generate_date_array.parent 262 263 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 264 # query is the following (it'll be unnested properly on the next iteration due to copy): 265 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 266 if not isinstance(parent, exp.Unnest): 267 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 268 generate_date_array.replace( 269 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 270 ) 271 272 if ( 273 isinstance(parent, exp.Unnest) 274 and isinstance(parent.parent, (exp.From, exp.Join)) 275 and len(parent.expressions) == 1 276 ): 277 _unnest_generate_date_array(parent) 278 279 return expression 280 281 282def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 283 def _builder(args: t.List) -> E: 284 return expr_type( 285 this=seq_get(args, 0), 286 expression=seq_get(args, 1), 287 position=seq_get(args, 2), 288 occurrence=seq_get(args, 3), 289 parameters=seq_get(args, 4), 290 group=seq_get(args, 5) or exp.Literal.number(0), 291 ) 292 293 return _builder 294 295 296def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 297 # Other dialects don't support all of the following parameters, so we need to 298 # generate default values as necessary to ensure the transpilation is correct 299 group = expression.args.get("group") 300 301 # To avoid generating all these default values, we set group to None if 302 # it's 0 (also default value) which doesn't trigger the following chain 303 if group and group.name == "0": 304 group = None 305 306 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 307 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 308 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 309 310 return self.func( 311 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 312 expression.this, 313 expression.expression, 314 position, 315 occurrence, 316 parameters, 317 group, 318 ) 319 320 321def _json_extract_value_array_sql( 322 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 323) -> str: 324 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 325 ident = exp.to_identifier("x") 326 327 if isinstance(expression, exp.JSONValueArray): 328 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 329 else: 330 this = exp.ParseJSON(this=f"TO_JSON({ident})") 331 332 transform_lambda = exp.Lambda(expressions=[ident], this=this) 333 334 return self.func("TRANSFORM", json_extract, transform_lambda) 335 336 337def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 338 if isinstance(expression, exp.Select): 339 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 340 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 341 # by Snowflake's parser. 342 # 343 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 344 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 345 unnest_aliases = set() 346 for unnest in find_all_in_scope(expression, exp.Unnest): 347 unnest_alias = unnest.args.get("alias") 348 if ( 349 isinstance(unnest_alias, exp.TableAlias) 350 and not unnest_alias.this 351 and len(unnest_alias.columns) == 1 352 ): 353 unnest_aliases.add(unnest_alias.columns[0].name) 354 355 if unnest_aliases: 356 for c in find_all_in_scope(expression, exp.Column): 357 if c.table in unnest_aliases: 358 bracket_lhs = c.args["table"] 359 bracket_rhs = exp.Literal.string(c.name) 360 c.replace(exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs])) 361 362 return expression 363 364 365class Snowflake(Dialect): 366 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 367 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 368 NULL_ORDERING = "nulls_are_large" 369 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 370 SUPPORTS_USER_DEFINED_TYPES = False 371 SUPPORTS_SEMI_ANTI_JOIN = False 372 PREFER_CTE_ALIAS_COLUMN = True 373 TABLESAMPLE_SIZE_IS_PERCENT = True 374 COPY_PARAMS_ARE_CSV = False 375 ARRAY_AGG_INCLUDES_NULLS = None 376 377 TIME_MAPPING = { 378 "YYYY": "%Y", 379 "yyyy": "%Y", 380 "YY": "%y", 381 "yy": "%y", 382 "MMMM": "%B", 383 "mmmm": "%B", 384 "MON": "%b", 385 "mon": "%b", 386 "MM": "%m", 387 "mm": "%m", 388 "DD": "%d", 389 "dd": "%-d", 390 "DY": "%a", 391 "dy": "%w", 392 "HH24": "%H", 393 "hh24": "%H", 394 "HH12": "%I", 395 "hh12": "%I", 396 "MI": "%M", 397 "mi": "%M", 398 "SS": "%S", 399 "ss": "%S", 400 "FF6": "%f", 401 "ff6": "%f", 402 } 403 404 DATE_PART_MAPPING = { 405 **Dialect.DATE_PART_MAPPING, 406 "ISOWEEK": "WEEKISO", 407 } 408 409 def quote_identifier(self, expression: E, identify: bool = True) -> E: 410 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 411 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 412 if ( 413 isinstance(expression, exp.Identifier) 414 and isinstance(expression.parent, exp.Table) 415 and expression.name.lower() == "dual" 416 ): 417 return expression # type: ignore 418 419 return super().quote_identifier(expression, identify=identify) 420 421 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 422 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 423 SINGLE_TOKENS.pop("$") 424 425 class Parser(parser.Parser): 426 IDENTIFY_PIVOT_STRINGS = True 427 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 428 COLON_IS_VARIANT_EXTRACT = True 429 430 ID_VAR_TOKENS = { 431 *parser.Parser.ID_VAR_TOKENS, 432 TokenType.MATCH_CONDITION, 433 } 434 435 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 436 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 437 438 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 439 440 FUNCTIONS = { 441 **parser.Parser.FUNCTIONS, 442 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 443 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 444 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 445 this=seq_get(args, 1), expression=seq_get(args, 0) 446 ), 447 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 448 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 449 start=seq_get(args, 0), 450 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 451 step=seq_get(args, 2), 452 ), 453 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 454 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 455 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 456 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 457 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 458 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 459 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 460 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 461 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 462 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 463 "DATE_TRUNC": _date_trunc_to_time, 464 "DATEADD": _build_date_time_add(exp.DateAdd), 465 "DATEDIFF": _build_datediff, 466 "DIV0": _build_if_from_div0, 467 "EDITDISTANCE": lambda args: exp.Levenshtein( 468 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 469 ), 470 "FLATTEN": exp.Explode.from_arg_list, 471 "GET_PATH": lambda args, dialect: exp.JSONExtract( 472 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 473 ), 474 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 475 "IFF": exp.If.from_arg_list, 476 "LAST_DAY": lambda args: exp.LastDay( 477 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 478 ), 479 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 480 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 481 "NULLIFZERO": _build_if_from_nullifzero, 482 "OBJECT_CONSTRUCT": _build_object_construct, 483 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 484 "REGEXP_REPLACE": _build_regexp_replace, 485 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 486 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 487 "RLIKE": exp.RegexpLike.from_arg_list, 488 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 489 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 490 "TIMEADD": _build_date_time_add(exp.TimeAdd), 491 "TIMEDIFF": _build_datediff, 492 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 493 "TIMESTAMPDIFF": _build_datediff, 494 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 495 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 496 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 497 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 498 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 499 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 500 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 501 "TRY_TO_TIMESTAMP": _build_datetime( 502 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 503 ), 504 "TO_CHAR": build_timetostr_or_tochar, 505 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 506 "TO_NUMBER": lambda args: exp.ToNumber( 507 this=seq_get(args, 0), 508 format=seq_get(args, 1), 509 precision=seq_get(args, 2), 510 scale=seq_get(args, 3), 511 ), 512 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 513 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 514 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 515 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 516 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 517 "TO_VARCHAR": exp.ToChar.from_arg_list, 518 "ZEROIFNULL": _build_if_from_zeroifnull, 519 } 520 521 FUNCTION_PARSERS = { 522 **parser.Parser.FUNCTION_PARSERS, 523 "DATE_PART": lambda self: self._parse_date_part(), 524 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 525 "LISTAGG": lambda self: self._parse_string_agg(), 526 } 527 FUNCTION_PARSERS.pop("TRIM") 528 529 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 530 531 RANGE_PARSERS = { 532 **parser.Parser.RANGE_PARSERS, 533 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 534 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 535 } 536 537 ALTER_PARSERS = { 538 **parser.Parser.ALTER_PARSERS, 539 "UNSET": lambda self: self.expression( 540 exp.Set, 541 tag=self._match_text_seq("TAG"), 542 expressions=self._parse_csv(self._parse_id_var), 543 unset=True, 544 ), 545 } 546 547 STATEMENT_PARSERS = { 548 **parser.Parser.STATEMENT_PARSERS, 549 TokenType.GET: lambda self: self._parse_get(), 550 TokenType.PUT: lambda self: self._parse_put(), 551 TokenType.SHOW: lambda self: self._parse_show(), 552 } 553 554 PROPERTY_PARSERS = { 555 **parser.Parser.PROPERTY_PARSERS, 556 "CREDENTIALS": lambda self: self._parse_credentials_property(), 557 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 558 "LOCATION": lambda self: self._parse_location_property(), 559 "TAG": lambda self: self._parse_tag(), 560 "USING": lambda self: self._match_text_seq("TEMPLATE") 561 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 562 } 563 564 TYPE_CONVERTERS = { 565 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 566 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 567 } 568 569 SHOW_PARSERS = { 570 "DATABASES": _show_parser("DATABASES"), 571 "TERSE DATABASES": _show_parser("DATABASES"), 572 "SCHEMAS": _show_parser("SCHEMAS"), 573 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 574 "OBJECTS": _show_parser("OBJECTS"), 575 "TERSE OBJECTS": _show_parser("OBJECTS"), 576 "TABLES": _show_parser("TABLES"), 577 "TERSE TABLES": _show_parser("TABLES"), 578 "VIEWS": _show_parser("VIEWS"), 579 "TERSE VIEWS": _show_parser("VIEWS"), 580 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 581 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 582 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 583 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 584 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 585 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 586 "SEQUENCES": _show_parser("SEQUENCES"), 587 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 588 "STAGES": _show_parser("STAGES"), 589 "COLUMNS": _show_parser("COLUMNS"), 590 "USERS": _show_parser("USERS"), 591 "TERSE USERS": _show_parser("USERS"), 592 "FILE FORMATS": _show_parser("FILE FORMATS"), 593 "FUNCTIONS": _show_parser("FUNCTIONS"), 594 "PROCEDURES": _show_parser("PROCEDURES"), 595 "WAREHOUSES": _show_parser("WAREHOUSES"), 596 } 597 598 CONSTRAINT_PARSERS = { 599 **parser.Parser.CONSTRAINT_PARSERS, 600 "WITH": lambda self: self._parse_with_constraint(), 601 "MASKING": lambda self: self._parse_with_constraint(), 602 "PROJECTION": lambda self: self._parse_with_constraint(), 603 "TAG": lambda self: self._parse_with_constraint(), 604 } 605 606 STAGED_FILE_SINGLE_TOKENS = { 607 TokenType.DOT, 608 TokenType.MOD, 609 TokenType.SLASH, 610 } 611 612 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 613 614 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 615 616 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 617 618 LAMBDAS = { 619 **parser.Parser.LAMBDAS, 620 TokenType.ARROW: lambda self, expressions: self.expression( 621 exp.Lambda, 622 this=self._replace_lambda( 623 self._parse_assignment(), 624 expressions, 625 ), 626 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 627 ), 628 } 629 630 def _parse_use(self) -> exp.Use: 631 if self._match_text_seq("SECONDARY", "ROLES"): 632 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 633 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 634 return self.expression( 635 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 636 ) 637 638 return super()._parse_use() 639 640 def _negate_range( 641 self, this: t.Optional[exp.Expression] = None 642 ) -> t.Optional[exp.Expression]: 643 if not this: 644 return this 645 646 query = this.args.get("query") 647 if isinstance(this, exp.In) and isinstance(query, exp.Query): 648 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 649 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 650 # which can produce different results (most likely a SnowFlake bug). 651 # 652 # https://docs.snowflake.com/en/sql-reference/functions/in 653 # Context: https://github.com/tobymao/sqlglot/issues/3890 654 return self.expression( 655 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 656 ) 657 658 return self.expression(exp.Not, this=this) 659 660 def _parse_tag(self) -> exp.Tags: 661 return self.expression( 662 exp.Tags, 663 expressions=self._parse_wrapped_csv(self._parse_property), 664 ) 665 666 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 667 if self._prev.token_type != TokenType.WITH: 668 self._retreat(self._index - 1) 669 670 if self._match_text_seq("MASKING", "POLICY"): 671 policy = self._parse_column() 672 return self.expression( 673 exp.MaskingPolicyColumnConstraint, 674 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 675 expressions=self._match(TokenType.USING) 676 and self._parse_wrapped_csv(self._parse_id_var), 677 ) 678 if self._match_text_seq("PROJECTION", "POLICY"): 679 policy = self._parse_column() 680 return self.expression( 681 exp.ProjectionPolicyColumnConstraint, 682 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 683 ) 684 if self._match(TokenType.TAG): 685 return self._parse_tag() 686 687 return None 688 689 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 690 if self._match(TokenType.TAG): 691 return self._parse_tag() 692 693 return super()._parse_with_property() 694 695 def _parse_create(self) -> exp.Create | exp.Command: 696 expression = super()._parse_create() 697 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 698 # Replace the Table node with the enclosed Identifier 699 expression.this.replace(expression.this.this) 700 701 return expression 702 703 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 704 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 705 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 706 this = self._parse_var() or self._parse_type() 707 708 if not this: 709 return None 710 711 self._match(TokenType.COMMA) 712 expression = self._parse_bitwise() 713 this = map_date_part(this) 714 name = this.name.upper() 715 716 if name.startswith("EPOCH"): 717 if name == "EPOCH_MILLISECOND": 718 scale = 10**3 719 elif name == "EPOCH_MICROSECOND": 720 scale = 10**6 721 elif name == "EPOCH_NANOSECOND": 722 scale = 10**9 723 else: 724 scale = None 725 726 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 727 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 728 729 if scale: 730 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 731 732 return to_unix 733 734 return self.expression(exp.Extract, this=this, expression=expression) 735 736 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 737 if is_map: 738 # Keys are strings in Snowflake's objects, see also: 739 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 740 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 741 return self._parse_slice(self._parse_string()) 742 743 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 744 745 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 746 lateral = super()._parse_lateral() 747 if not lateral: 748 return lateral 749 750 if isinstance(lateral.this, exp.Explode): 751 table_alias = lateral.args.get("alias") 752 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 753 if table_alias and not table_alias.args.get("columns"): 754 table_alias.set("columns", columns) 755 elif not table_alias: 756 exp.alias_(lateral, "_flattened", table=columns, copy=False) 757 758 return lateral 759 760 def _parse_table_parts( 761 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 762 ) -> exp.Table: 763 # https://docs.snowflake.com/en/user-guide/querying-stage 764 if self._match(TokenType.STRING, advance=False): 765 table = self._parse_string() 766 elif self._match_text_seq("@", advance=False): 767 table = self._parse_location_path() 768 else: 769 table = None 770 771 if table: 772 file_format = None 773 pattern = None 774 775 wrapped = self._match(TokenType.L_PAREN) 776 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 777 if self._match_text_seq("FILE_FORMAT", "=>"): 778 file_format = self._parse_string() or super()._parse_table_parts( 779 is_db_reference=is_db_reference 780 ) 781 elif self._match_text_seq("PATTERN", "=>"): 782 pattern = self._parse_string() 783 else: 784 break 785 786 self._match(TokenType.COMMA) 787 788 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 789 else: 790 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 791 792 return table 793 794 def _parse_table( 795 self, 796 schema: bool = False, 797 joins: bool = False, 798 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 799 parse_bracket: bool = False, 800 is_db_reference: bool = False, 801 parse_partition: bool = False, 802 ) -> t.Optional[exp.Expression]: 803 table = super()._parse_table( 804 schema=schema, 805 joins=joins, 806 alias_tokens=alias_tokens, 807 parse_bracket=parse_bracket, 808 is_db_reference=is_db_reference, 809 parse_partition=parse_partition, 810 ) 811 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 812 table_from_rows = table.this 813 for arg in exp.TableFromRows.arg_types: 814 if arg != "this": 815 table_from_rows.set(arg, table.args.get(arg)) 816 817 table = table_from_rows 818 819 return table 820 821 def _parse_id_var( 822 self, 823 any_token: bool = True, 824 tokens: t.Optional[t.Collection[TokenType]] = None, 825 ) -> t.Optional[exp.Expression]: 826 if self._match_text_seq("IDENTIFIER", "("): 827 identifier = ( 828 super()._parse_id_var(any_token=any_token, tokens=tokens) 829 or self._parse_string() 830 ) 831 self._match_r_paren() 832 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 833 834 return super()._parse_id_var(any_token=any_token, tokens=tokens) 835 836 def _parse_show_snowflake(self, this: str) -> exp.Show: 837 scope = None 838 scope_kind = None 839 840 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 841 # which is syntactically valid but has no effect on the output 842 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 843 844 history = self._match_text_seq("HISTORY") 845 846 like = self._parse_string() if self._match(TokenType.LIKE) else None 847 848 if self._match(TokenType.IN): 849 if self._match_text_seq("ACCOUNT"): 850 scope_kind = "ACCOUNT" 851 elif self._match_text_seq("CLASS"): 852 scope_kind = "CLASS" 853 scope = self._parse_table_parts() 854 elif self._match_text_seq("APPLICATION"): 855 scope_kind = "APPLICATION" 856 if self._match_text_seq("PACKAGE"): 857 scope_kind += " PACKAGE" 858 scope = self._parse_table_parts() 859 elif self._match_set(self.DB_CREATABLES): 860 scope_kind = self._prev.text.upper() 861 if self._curr: 862 scope = self._parse_table_parts() 863 elif self._curr: 864 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 865 scope = self._parse_table_parts() 866 867 return self.expression( 868 exp.Show, 869 **{ 870 "terse": terse, 871 "this": this, 872 "history": history, 873 "like": like, 874 "scope": scope, 875 "scope_kind": scope_kind, 876 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 877 "limit": self._parse_limit(), 878 "from": self._parse_string() if self._match(TokenType.FROM) else None, 879 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 880 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 881 }, 882 ) 883 884 def _parse_put(self) -> exp.Put | exp.Command: 885 if self._curr.token_type != TokenType.STRING: 886 return self._parse_as_command(self._prev) 887 888 return self.expression( 889 exp.Put, 890 this=self._parse_string(), 891 target=self._parse_location_path(), 892 properties=self._parse_properties(), 893 ) 894 895 def _parse_get(self) -> t.Optional[exp.Expression]: 896 start = self._prev 897 898 # If we detect GET( then we need to parse a function, not a statement 899 if self._match(TokenType.L_PAREN): 900 self._retreat(self._index - 2) 901 return self._parse_expression() 902 903 target = self._parse_location_path() 904 905 # Parse as command if unquoted file path 906 if self._curr.token_type == TokenType.URI_START: 907 return self._parse_as_command(start) 908 909 return self.expression( 910 exp.Get, 911 this=self._parse_string(), 912 target=target, 913 properties=self._parse_properties(), 914 ) 915 916 def _parse_location_property(self) -> exp.LocationProperty: 917 self._match(TokenType.EQ) 918 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 919 920 def _parse_file_location(self) -> t.Optional[exp.Expression]: 921 # Parse either a subquery or a staged file 922 return ( 923 self._parse_select(table=True, parse_subquery_alias=False) 924 if self._match(TokenType.L_PAREN, advance=False) 925 else self._parse_table_parts() 926 ) 927 928 def _parse_location_path(self) -> exp.Var: 929 start = self._curr 930 self._advance_any(ignore_reserved=True) 931 932 # We avoid consuming a comma token because external tables like @foo and @bar 933 # can be joined in a query with a comma separator, as well as closing paren 934 # in case of subqueries 935 while self._is_connected() and not self._match_set( 936 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 937 ): 938 self._advance_any(ignore_reserved=True) 939 940 return exp.var(self._find_sql(start, self._prev)) 941 942 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 943 this = super()._parse_lambda_arg() 944 945 if not this: 946 return this 947 948 typ = self._parse_types() 949 950 if typ: 951 return self.expression(exp.Cast, this=this, to=typ) 952 953 return this 954 955 def _parse_foreign_key(self) -> exp.ForeignKey: 956 # inlineFK, the REFERENCES columns are implied 957 if self._match(TokenType.REFERENCES, advance=False): 958 return self.expression(exp.ForeignKey) 959 960 # outoflineFK, explicitly names the columns 961 return super()._parse_foreign_key() 962 963 def _parse_file_format_property(self) -> exp.FileFormatProperty: 964 self._match(TokenType.EQ) 965 if self._match(TokenType.L_PAREN, advance=False): 966 expressions = self._parse_wrapped_options() 967 else: 968 expressions = [self._parse_format_name()] 969 970 return self.expression( 971 exp.FileFormatProperty, 972 expressions=expressions, 973 ) 974 975 def _parse_credentials_property(self) -> exp.CredentialsProperty: 976 return self.expression( 977 exp.CredentialsProperty, 978 expressions=self._parse_wrapped_options(), 979 ) 980 981 class Tokenizer(tokens.Tokenizer): 982 STRING_ESCAPES = ["\\", "'"] 983 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 984 RAW_STRINGS = ["$$"] 985 COMMENTS = ["--", "//", ("/*", "*/")] 986 NESTED_COMMENTS = False 987 988 KEYWORDS = { 989 **tokens.Tokenizer.KEYWORDS, 990 "FILE://": TokenType.URI_START, 991 "BYTEINT": TokenType.INT, 992 "EXCLUDE": TokenType.EXCEPT, 993 "FILE FORMAT": TokenType.FILE_FORMAT, 994 "GET": TokenType.GET, 995 "ILIKE ANY": TokenType.ILIKE_ANY, 996 "LIKE ANY": TokenType.LIKE_ANY, 997 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 998 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 999 "MINUS": TokenType.EXCEPT, 1000 "NCHAR VARYING": TokenType.VARCHAR, 1001 "PUT": TokenType.PUT, 1002 "REMOVE": TokenType.COMMAND, 1003 "RM": TokenType.COMMAND, 1004 "SAMPLE": TokenType.TABLE_SAMPLE, 1005 "SQL_DOUBLE": TokenType.DOUBLE, 1006 "SQL_VARCHAR": TokenType.VARCHAR, 1007 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1008 "TAG": TokenType.TAG, 1009 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1010 "TOP": TokenType.TOP, 1011 "WAREHOUSE": TokenType.WAREHOUSE, 1012 "STAGE": TokenType.STAGE, 1013 "STREAMLIT": TokenType.STREAMLIT, 1014 } 1015 KEYWORDS.pop("/*+") 1016 1017 SINGLE_TOKENS = { 1018 **tokens.Tokenizer.SINGLE_TOKENS, 1019 "$": TokenType.PARAMETER, 1020 } 1021 1022 VAR_SINGLE_TOKENS = {"$"} 1023 1024 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1025 1026 class Generator(generator.Generator): 1027 PARAMETER_TOKEN = "$" 1028 MATCHED_BY_SOURCE = False 1029 SINGLE_STRING_INTERVAL = True 1030 JOIN_HINTS = False 1031 TABLE_HINTS = False 1032 QUERY_HINTS = False 1033 AGGREGATE_FILTER_SUPPORTED = False 1034 SUPPORTS_TABLE_COPY = False 1035 COLLATE_IS_FUNC = True 1036 LIMIT_ONLY_LITERALS = True 1037 JSON_KEY_VALUE_PAIR_SEP = "," 1038 INSERT_OVERWRITE = " OVERWRITE INTO" 1039 STRUCT_DELIMITER = ("(", ")") 1040 COPY_PARAMS_ARE_WRAPPED = False 1041 COPY_PARAMS_EQ_REQUIRED = True 1042 STAR_EXCEPT = "EXCLUDE" 1043 SUPPORTS_EXPLODING_PROJECTIONS = False 1044 ARRAY_CONCAT_IS_VAR_LEN = False 1045 SUPPORTS_CONVERT_TIMEZONE = True 1046 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1047 SUPPORTS_MEDIAN = True 1048 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1049 1050 TRANSFORMS = { 1051 **generator.Generator.TRANSFORMS, 1052 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1053 exp.ArgMax: rename_func("MAX_BY"), 1054 exp.ArgMin: rename_func("MIN_BY"), 1055 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1056 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1057 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1058 exp.AtTimeZone: lambda self, e: self.func( 1059 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1060 ), 1061 exp.BitwiseOr: rename_func("BITOR"), 1062 exp.BitwiseXor: rename_func("BITXOR"), 1063 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1064 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1065 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1066 exp.DateAdd: date_delta_sql("DATEADD"), 1067 exp.DateDiff: date_delta_sql("DATEDIFF"), 1068 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1069 exp.DatetimeDiff: timestampdiff_sql, 1070 exp.DateStrToDate: datestrtodate_sql, 1071 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1072 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1073 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1074 exp.DayOfYear: rename_func("DAYOFYEAR"), 1075 exp.Explode: rename_func("FLATTEN"), 1076 exp.Extract: lambda self, e: self.func( 1077 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1078 ), 1079 exp.FileFormatProperty: lambda self, 1080 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1081 exp.FromTimeZone: lambda self, e: self.func( 1082 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1083 ), 1084 exp.GenerateSeries: lambda self, e: self.func( 1085 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1086 ), 1087 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1088 exp.If: if_sql(name="IFF", false_value="NULL"), 1089 exp.JSONExtractArray: _json_extract_value_array_sql, 1090 exp.JSONExtractScalar: lambda self, e: self.func( 1091 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1092 ), 1093 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1094 exp.JSONPathRoot: lambda *_: "", 1095 exp.JSONValueArray: _json_extract_value_array_sql, 1096 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1097 rename_func("EDITDISTANCE") 1098 ), 1099 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1100 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1101 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1102 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1103 exp.MakeInterval: no_make_interval_sql, 1104 exp.Max: max_or_greatest, 1105 exp.Min: min_or_least, 1106 exp.ParseJSON: lambda self, e: self.func( 1107 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1108 ), 1109 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1110 exp.PercentileCont: transforms.preprocess( 1111 [transforms.add_within_group_for_percentiles] 1112 ), 1113 exp.PercentileDisc: transforms.preprocess( 1114 [transforms.add_within_group_for_percentiles] 1115 ), 1116 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1117 exp.RegexpExtract: _regexpextract_sql, 1118 exp.RegexpExtractAll: _regexpextract_sql, 1119 exp.RegexpILike: _regexpilike_sql, 1120 exp.Rand: rename_func("RANDOM"), 1121 exp.Select: transforms.preprocess( 1122 [ 1123 transforms.eliminate_window_clause, 1124 transforms.eliminate_distinct_on, 1125 transforms.explode_projection_to_unnest(), 1126 transforms.eliminate_semi_and_anti_joins, 1127 _transform_generate_date_array, 1128 _eliminate_dot_variant_lookup, 1129 ] 1130 ), 1131 exp.SHA: rename_func("SHA1"), 1132 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1133 exp.StartsWith: rename_func("STARTSWITH"), 1134 exp.EndsWith: rename_func("ENDSWITH"), 1135 exp.StrPosition: lambda self, e: strposition_sql( 1136 self, e, func_name="CHARINDEX", supports_position=True 1137 ), 1138 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1139 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1140 exp.Stuff: rename_func("INSERT"), 1141 exp.StPoint: rename_func("ST_MAKEPOINT"), 1142 exp.TimeAdd: date_delta_sql("TIMEADD"), 1143 exp.Timestamp: no_timestamp_sql, 1144 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1145 exp.TimestampDiff: lambda self, e: self.func( 1146 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1147 ), 1148 exp.TimestampTrunc: timestamptrunc_sql(), 1149 exp.TimeStrToTime: timestrtotime_sql, 1150 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1151 exp.ToArray: rename_func("TO_ARRAY"), 1152 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1153 exp.ToDouble: rename_func("TO_DOUBLE"), 1154 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1155 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1156 exp.TsOrDsToDate: lambda self, e: self.func( 1157 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1158 ), 1159 exp.TsOrDsToTime: lambda self, e: self.func( 1160 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1161 ), 1162 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1163 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1164 exp.Uuid: rename_func("UUID_STRING"), 1165 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1166 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1167 exp.Xor: rename_func("BOOLXOR"), 1168 } 1169 1170 SUPPORTED_JSON_PATH_PARTS = { 1171 exp.JSONPathKey, 1172 exp.JSONPathRoot, 1173 exp.JSONPathSubscript, 1174 } 1175 1176 TYPE_MAPPING = { 1177 **generator.Generator.TYPE_MAPPING, 1178 exp.DataType.Type.NESTED: "OBJECT", 1179 exp.DataType.Type.STRUCT: "OBJECT", 1180 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1181 } 1182 1183 TOKEN_MAPPING = { 1184 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1185 } 1186 1187 PROPERTIES_LOCATION = { 1188 **generator.Generator.PROPERTIES_LOCATION, 1189 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1190 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1191 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1192 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1193 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1194 } 1195 1196 UNSUPPORTED_VALUES_EXPRESSIONS = { 1197 exp.Map, 1198 exp.StarMap, 1199 exp.Struct, 1200 exp.VarMap, 1201 } 1202 1203 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1204 1205 def with_properties(self, properties: exp.Properties) -> str: 1206 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1207 1208 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1209 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1210 values_as_table = False 1211 1212 return super().values_sql(expression, values_as_table=values_as_table) 1213 1214 def datatype_sql(self, expression: exp.DataType) -> str: 1215 expressions = expression.expressions 1216 if ( 1217 expressions 1218 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1219 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1220 ): 1221 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1222 return "OBJECT" 1223 1224 return super().datatype_sql(expression) 1225 1226 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1227 return self.func( 1228 "TO_NUMBER", 1229 expression.this, 1230 expression.args.get("format"), 1231 expression.args.get("precision"), 1232 expression.args.get("scale"), 1233 ) 1234 1235 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1236 milli = expression.args.get("milli") 1237 if milli is not None: 1238 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1239 expression.set("nano", milli_to_nano) 1240 1241 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1242 1243 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1244 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1245 return self.func("TO_GEOGRAPHY", expression.this) 1246 if expression.is_type(exp.DataType.Type.GEOMETRY): 1247 return self.func("TO_GEOMETRY", expression.this) 1248 1249 return super().cast_sql(expression, safe_prefix=safe_prefix) 1250 1251 def trycast_sql(self, expression: exp.TryCast) -> str: 1252 value = expression.this 1253 1254 if value.type is None: 1255 from sqlglot.optimizer.annotate_types import annotate_types 1256 1257 value = annotate_types(value, dialect=self.dialect) 1258 1259 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1260 return super().trycast_sql(expression) 1261 1262 # TRY_CAST only works for string values in Snowflake 1263 return self.cast_sql(expression) 1264 1265 def log_sql(self, expression: exp.Log) -> str: 1266 if not expression.expression: 1267 return self.func("LN", expression.this) 1268 1269 return super().log_sql(expression) 1270 1271 def unnest_sql(self, expression: exp.Unnest) -> str: 1272 unnest_alias = expression.args.get("alias") 1273 offset = expression.args.get("offset") 1274 1275 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1276 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1277 1278 columns = [ 1279 exp.to_identifier("seq"), 1280 exp.to_identifier("key"), 1281 exp.to_identifier("path"), 1282 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1283 value, 1284 exp.to_identifier("this"), 1285 ] 1286 1287 if unnest_alias: 1288 unnest_alias.set("columns", columns) 1289 else: 1290 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1291 1292 table_input = self.sql(expression.expressions[0]) 1293 if not table_input.startswith("INPUT =>"): 1294 table_input = f"INPUT => {table_input}" 1295 1296 explode = f"TABLE(FLATTEN({table_input}))" 1297 alias = self.sql(unnest_alias) 1298 alias = f" AS {alias}" if alias else "" 1299 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1300 1301 return f"{value}{explode}{alias}" 1302 1303 def show_sql(self, expression: exp.Show) -> str: 1304 terse = "TERSE " if expression.args.get("terse") else "" 1305 history = " HISTORY" if expression.args.get("history") else "" 1306 like = self.sql(expression, "like") 1307 like = f" LIKE {like}" if like else "" 1308 1309 scope = self.sql(expression, "scope") 1310 scope = f" {scope}" if scope else "" 1311 1312 scope_kind = self.sql(expression, "scope_kind") 1313 if scope_kind: 1314 scope_kind = f" IN {scope_kind}" 1315 1316 starts_with = self.sql(expression, "starts_with") 1317 if starts_with: 1318 starts_with = f" STARTS WITH {starts_with}" 1319 1320 limit = self.sql(expression, "limit") 1321 1322 from_ = self.sql(expression, "from") 1323 if from_: 1324 from_ = f" FROM {from_}" 1325 1326 privileges = self.expressions(expression, key="privileges", flat=True) 1327 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1328 1329 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1330 1331 def describe_sql(self, expression: exp.Describe) -> str: 1332 # Default to table if kind is unknown 1333 kind_value = expression.args.get("kind") or "TABLE" 1334 kind = f" {kind_value}" if kind_value else "" 1335 this = f" {self.sql(expression, 'this')}" 1336 expressions = self.expressions(expression, flat=True) 1337 expressions = f" {expressions}" if expressions else "" 1338 return f"DESCRIBE{kind}{this}{expressions}" 1339 1340 def generatedasidentitycolumnconstraint_sql( 1341 self, expression: exp.GeneratedAsIdentityColumnConstraint 1342 ) -> str: 1343 start = expression.args.get("start") 1344 start = f" START {start}" if start else "" 1345 increment = expression.args.get("increment") 1346 increment = f" INCREMENT {increment}" if increment else "" 1347 1348 order = expression.args.get("order") 1349 if order is not None: 1350 order_clause = " ORDER" if order else " NOORDER" 1351 else: 1352 order_clause = "" 1353 1354 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1355 1356 def cluster_sql(self, expression: exp.Cluster) -> str: 1357 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1358 1359 def struct_sql(self, expression: exp.Struct) -> str: 1360 keys = [] 1361 values = [] 1362 1363 for i, e in enumerate(expression.expressions): 1364 if isinstance(e, exp.PropertyEQ): 1365 keys.append( 1366 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1367 ) 1368 values.append(e.expression) 1369 else: 1370 keys.append(exp.Literal.string(f"_{i}")) 1371 values.append(e) 1372 1373 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1374 1375 @unsupported_args("weight", "accuracy") 1376 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1377 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1378 1379 def alterset_sql(self, expression: exp.AlterSet) -> str: 1380 exprs = self.expressions(expression, flat=True) 1381 exprs = f" {exprs}" if exprs else "" 1382 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1383 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1384 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1385 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1386 tag = self.expressions(expression, key="tag", flat=True) 1387 tag = f" TAG {tag}" if tag else "" 1388 1389 return f"SET{exprs}{file_format}{copy_options}{tag}" 1390 1391 def strtotime_sql(self, expression: exp.StrToTime): 1392 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1393 return self.func( 1394 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1395 ) 1396 1397 def timestampsub_sql(self, expression: exp.TimestampSub): 1398 return self.sql( 1399 exp.TimestampAdd( 1400 this=expression.this, 1401 expression=expression.expression * -1, 1402 unit=expression.unit, 1403 ) 1404 ) 1405 1406 def jsonextract_sql(self, expression: exp.JSONExtract): 1407 this = expression.this 1408 1409 # JSON strings are valid coming from other dialects such as BQ 1410 return self.func( 1411 "GET_PATH", 1412 exp.ParseJSON(this=this) if this.is_string else this, 1413 expression.expression, 1414 ) 1415 1416 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1417 this = expression.this 1418 if not isinstance(this, exp.TsOrDsToTimestamp): 1419 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1420 1421 return self.func("TO_CHAR", this, self.format_time(expression)) 1422 1423 def datesub_sql(self, expression: exp.DateSub) -> str: 1424 value = expression.expression 1425 if value: 1426 value.replace(value * (-1)) 1427 else: 1428 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1429 1430 return date_delta_sql("DATEADD")(self, expression) 1431 1432 def select_sql(self, expression: exp.Select) -> str: 1433 limit = expression.args.get("limit") 1434 offset = expression.args.get("offset") 1435 if offset and not limit: 1436 expression.limit(exp.Null(), copy=False) 1437 return super().select_sql(expression) 1438 1439 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1440 is_materialized = expression.find(exp.MaterializedProperty) 1441 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1442 1443 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1444 # For materialized views, COPY GRANTS is located *before* the columns list 1445 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1446 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1447 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1448 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1449 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1450 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1451 1452 this_name = self.sql(expression.this, "this") 1453 copy_grants = self.sql(copy_grants_property) 1454 this_schema = self.schema_columns_sql(expression.this) 1455 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1456 1457 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1458 1459 return super().createable_sql(expression, locations) 1460 1461 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1462 this = expression.this 1463 1464 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1465 # and add it later as part of the WITHIN GROUP clause 1466 order = this if isinstance(this, exp.Order) else None 1467 if order: 1468 expression.set("this", order.this.pop()) 1469 1470 expr_sql = super().arrayagg_sql(expression) 1471 1472 if order: 1473 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1474 1475 return expr_sql 1476 1477 def array_sql(self, expression: exp.Array) -> str: 1478 expressions = expression.expressions 1479 1480 first_expr = seq_get(expressions, 0) 1481 if isinstance(first_expr, exp.Select): 1482 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1483 if first_expr.text("kind").upper() == "STRUCT": 1484 object_construct_args = [] 1485 for expr in first_expr.expressions: 1486 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1487 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1488 name = expr.this if isinstance(expr, exp.Alias) else expr 1489 1490 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1491 1492 array_agg = exp.ArrayAgg( 1493 this=_build_object_construct(args=object_construct_args) 1494 ) 1495 1496 first_expr.set("kind", None) 1497 first_expr.set("expressions", [array_agg]) 1498 1499 return self.sql(first_expr.subquery()) 1500 1501 return inline_array_sql(self, expression)
366class Snowflake(Dialect): 367 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 368 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 369 NULL_ORDERING = "nulls_are_large" 370 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 371 SUPPORTS_USER_DEFINED_TYPES = False 372 SUPPORTS_SEMI_ANTI_JOIN = False 373 PREFER_CTE_ALIAS_COLUMN = True 374 TABLESAMPLE_SIZE_IS_PERCENT = True 375 COPY_PARAMS_ARE_CSV = False 376 ARRAY_AGG_INCLUDES_NULLS = None 377 378 TIME_MAPPING = { 379 "YYYY": "%Y", 380 "yyyy": "%Y", 381 "YY": "%y", 382 "yy": "%y", 383 "MMMM": "%B", 384 "mmmm": "%B", 385 "MON": "%b", 386 "mon": "%b", 387 "MM": "%m", 388 "mm": "%m", 389 "DD": "%d", 390 "dd": "%-d", 391 "DY": "%a", 392 "dy": "%w", 393 "HH24": "%H", 394 "hh24": "%H", 395 "HH12": "%I", 396 "hh12": "%I", 397 "MI": "%M", 398 "mi": "%M", 399 "SS": "%S", 400 "ss": "%S", 401 "FF6": "%f", 402 "ff6": "%f", 403 } 404 405 DATE_PART_MAPPING = { 406 **Dialect.DATE_PART_MAPPING, 407 "ISOWEEK": "WEEKISO", 408 } 409 410 def quote_identifier(self, expression: E, identify: bool = True) -> E: 411 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 412 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 413 if ( 414 isinstance(expression, exp.Identifier) 415 and isinstance(expression.parent, exp.Table) 416 and expression.name.lower() == "dual" 417 ): 418 return expression # type: ignore 419 420 return super().quote_identifier(expression, identify=identify) 421 422 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 423 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 424 SINGLE_TOKENS.pop("$") 425 426 class Parser(parser.Parser): 427 IDENTIFY_PIVOT_STRINGS = True 428 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 429 COLON_IS_VARIANT_EXTRACT = True 430 431 ID_VAR_TOKENS = { 432 *parser.Parser.ID_VAR_TOKENS, 433 TokenType.MATCH_CONDITION, 434 } 435 436 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 437 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 438 439 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 440 441 FUNCTIONS = { 442 **parser.Parser.FUNCTIONS, 443 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 444 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 445 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 446 this=seq_get(args, 1), expression=seq_get(args, 0) 447 ), 448 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 449 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 450 start=seq_get(args, 0), 451 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 452 step=seq_get(args, 2), 453 ), 454 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 455 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 456 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 457 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 458 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 459 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 460 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 461 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 462 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 463 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 464 "DATE_TRUNC": _date_trunc_to_time, 465 "DATEADD": _build_date_time_add(exp.DateAdd), 466 "DATEDIFF": _build_datediff, 467 "DIV0": _build_if_from_div0, 468 "EDITDISTANCE": lambda args: exp.Levenshtein( 469 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 470 ), 471 "FLATTEN": exp.Explode.from_arg_list, 472 "GET_PATH": lambda args, dialect: exp.JSONExtract( 473 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 474 ), 475 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 476 "IFF": exp.If.from_arg_list, 477 "LAST_DAY": lambda args: exp.LastDay( 478 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 479 ), 480 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 481 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 482 "NULLIFZERO": _build_if_from_nullifzero, 483 "OBJECT_CONSTRUCT": _build_object_construct, 484 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 485 "REGEXP_REPLACE": _build_regexp_replace, 486 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 487 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 488 "RLIKE": exp.RegexpLike.from_arg_list, 489 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 490 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 491 "TIMEADD": _build_date_time_add(exp.TimeAdd), 492 "TIMEDIFF": _build_datediff, 493 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 494 "TIMESTAMPDIFF": _build_datediff, 495 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 496 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 497 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 498 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 499 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 500 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 501 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 502 "TRY_TO_TIMESTAMP": _build_datetime( 503 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 504 ), 505 "TO_CHAR": build_timetostr_or_tochar, 506 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 507 "TO_NUMBER": lambda args: exp.ToNumber( 508 this=seq_get(args, 0), 509 format=seq_get(args, 1), 510 precision=seq_get(args, 2), 511 scale=seq_get(args, 3), 512 ), 513 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 514 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 515 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 516 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 517 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 518 "TO_VARCHAR": exp.ToChar.from_arg_list, 519 "ZEROIFNULL": _build_if_from_zeroifnull, 520 } 521 522 FUNCTION_PARSERS = { 523 **parser.Parser.FUNCTION_PARSERS, 524 "DATE_PART": lambda self: self._parse_date_part(), 525 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 526 "LISTAGG": lambda self: self._parse_string_agg(), 527 } 528 FUNCTION_PARSERS.pop("TRIM") 529 530 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 531 532 RANGE_PARSERS = { 533 **parser.Parser.RANGE_PARSERS, 534 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 535 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 536 } 537 538 ALTER_PARSERS = { 539 **parser.Parser.ALTER_PARSERS, 540 "UNSET": lambda self: self.expression( 541 exp.Set, 542 tag=self._match_text_seq("TAG"), 543 expressions=self._parse_csv(self._parse_id_var), 544 unset=True, 545 ), 546 } 547 548 STATEMENT_PARSERS = { 549 **parser.Parser.STATEMENT_PARSERS, 550 TokenType.GET: lambda self: self._parse_get(), 551 TokenType.PUT: lambda self: self._parse_put(), 552 TokenType.SHOW: lambda self: self._parse_show(), 553 } 554 555 PROPERTY_PARSERS = { 556 **parser.Parser.PROPERTY_PARSERS, 557 "CREDENTIALS": lambda self: self._parse_credentials_property(), 558 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 559 "LOCATION": lambda self: self._parse_location_property(), 560 "TAG": lambda self: self._parse_tag(), 561 "USING": lambda self: self._match_text_seq("TEMPLATE") 562 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 563 } 564 565 TYPE_CONVERTERS = { 566 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 567 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 568 } 569 570 SHOW_PARSERS = { 571 "DATABASES": _show_parser("DATABASES"), 572 "TERSE DATABASES": _show_parser("DATABASES"), 573 "SCHEMAS": _show_parser("SCHEMAS"), 574 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 575 "OBJECTS": _show_parser("OBJECTS"), 576 "TERSE OBJECTS": _show_parser("OBJECTS"), 577 "TABLES": _show_parser("TABLES"), 578 "TERSE TABLES": _show_parser("TABLES"), 579 "VIEWS": _show_parser("VIEWS"), 580 "TERSE VIEWS": _show_parser("VIEWS"), 581 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 582 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 583 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 584 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 585 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 586 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 587 "SEQUENCES": _show_parser("SEQUENCES"), 588 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 589 "STAGES": _show_parser("STAGES"), 590 "COLUMNS": _show_parser("COLUMNS"), 591 "USERS": _show_parser("USERS"), 592 "TERSE USERS": _show_parser("USERS"), 593 "FILE FORMATS": _show_parser("FILE FORMATS"), 594 "FUNCTIONS": _show_parser("FUNCTIONS"), 595 "PROCEDURES": _show_parser("PROCEDURES"), 596 "WAREHOUSES": _show_parser("WAREHOUSES"), 597 } 598 599 CONSTRAINT_PARSERS = { 600 **parser.Parser.CONSTRAINT_PARSERS, 601 "WITH": lambda self: self._parse_with_constraint(), 602 "MASKING": lambda self: self._parse_with_constraint(), 603 "PROJECTION": lambda self: self._parse_with_constraint(), 604 "TAG": lambda self: self._parse_with_constraint(), 605 } 606 607 STAGED_FILE_SINGLE_TOKENS = { 608 TokenType.DOT, 609 TokenType.MOD, 610 TokenType.SLASH, 611 } 612 613 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 614 615 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 616 617 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 618 619 LAMBDAS = { 620 **parser.Parser.LAMBDAS, 621 TokenType.ARROW: lambda self, expressions: self.expression( 622 exp.Lambda, 623 this=self._replace_lambda( 624 self._parse_assignment(), 625 expressions, 626 ), 627 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 628 ), 629 } 630 631 def _parse_use(self) -> exp.Use: 632 if self._match_text_seq("SECONDARY", "ROLES"): 633 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 634 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 635 return self.expression( 636 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 637 ) 638 639 return super()._parse_use() 640 641 def _negate_range( 642 self, this: t.Optional[exp.Expression] = None 643 ) -> t.Optional[exp.Expression]: 644 if not this: 645 return this 646 647 query = this.args.get("query") 648 if isinstance(this, exp.In) and isinstance(query, exp.Query): 649 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 650 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 651 # which can produce different results (most likely a SnowFlake bug). 652 # 653 # https://docs.snowflake.com/en/sql-reference/functions/in 654 # Context: https://github.com/tobymao/sqlglot/issues/3890 655 return self.expression( 656 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 657 ) 658 659 return self.expression(exp.Not, this=this) 660 661 def _parse_tag(self) -> exp.Tags: 662 return self.expression( 663 exp.Tags, 664 expressions=self._parse_wrapped_csv(self._parse_property), 665 ) 666 667 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 668 if self._prev.token_type != TokenType.WITH: 669 self._retreat(self._index - 1) 670 671 if self._match_text_seq("MASKING", "POLICY"): 672 policy = self._parse_column() 673 return self.expression( 674 exp.MaskingPolicyColumnConstraint, 675 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 676 expressions=self._match(TokenType.USING) 677 and self._parse_wrapped_csv(self._parse_id_var), 678 ) 679 if self._match_text_seq("PROJECTION", "POLICY"): 680 policy = self._parse_column() 681 return self.expression( 682 exp.ProjectionPolicyColumnConstraint, 683 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 684 ) 685 if self._match(TokenType.TAG): 686 return self._parse_tag() 687 688 return None 689 690 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 691 if self._match(TokenType.TAG): 692 return self._parse_tag() 693 694 return super()._parse_with_property() 695 696 def _parse_create(self) -> exp.Create | exp.Command: 697 expression = super()._parse_create() 698 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 699 # Replace the Table node with the enclosed Identifier 700 expression.this.replace(expression.this.this) 701 702 return expression 703 704 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 705 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 706 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 707 this = self._parse_var() or self._parse_type() 708 709 if not this: 710 return None 711 712 self._match(TokenType.COMMA) 713 expression = self._parse_bitwise() 714 this = map_date_part(this) 715 name = this.name.upper() 716 717 if name.startswith("EPOCH"): 718 if name == "EPOCH_MILLISECOND": 719 scale = 10**3 720 elif name == "EPOCH_MICROSECOND": 721 scale = 10**6 722 elif name == "EPOCH_NANOSECOND": 723 scale = 10**9 724 else: 725 scale = None 726 727 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 728 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 729 730 if scale: 731 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 732 733 return to_unix 734 735 return self.expression(exp.Extract, this=this, expression=expression) 736 737 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 738 if is_map: 739 # Keys are strings in Snowflake's objects, see also: 740 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 741 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 742 return self._parse_slice(self._parse_string()) 743 744 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 745 746 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 747 lateral = super()._parse_lateral() 748 if not lateral: 749 return lateral 750 751 if isinstance(lateral.this, exp.Explode): 752 table_alias = lateral.args.get("alias") 753 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 754 if table_alias and not table_alias.args.get("columns"): 755 table_alias.set("columns", columns) 756 elif not table_alias: 757 exp.alias_(lateral, "_flattened", table=columns, copy=False) 758 759 return lateral 760 761 def _parse_table_parts( 762 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 763 ) -> exp.Table: 764 # https://docs.snowflake.com/en/user-guide/querying-stage 765 if self._match(TokenType.STRING, advance=False): 766 table = self._parse_string() 767 elif self._match_text_seq("@", advance=False): 768 table = self._parse_location_path() 769 else: 770 table = None 771 772 if table: 773 file_format = None 774 pattern = None 775 776 wrapped = self._match(TokenType.L_PAREN) 777 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 778 if self._match_text_seq("FILE_FORMAT", "=>"): 779 file_format = self._parse_string() or super()._parse_table_parts( 780 is_db_reference=is_db_reference 781 ) 782 elif self._match_text_seq("PATTERN", "=>"): 783 pattern = self._parse_string() 784 else: 785 break 786 787 self._match(TokenType.COMMA) 788 789 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 790 else: 791 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 792 793 return table 794 795 def _parse_table( 796 self, 797 schema: bool = False, 798 joins: bool = False, 799 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 800 parse_bracket: bool = False, 801 is_db_reference: bool = False, 802 parse_partition: bool = False, 803 ) -> t.Optional[exp.Expression]: 804 table = super()._parse_table( 805 schema=schema, 806 joins=joins, 807 alias_tokens=alias_tokens, 808 parse_bracket=parse_bracket, 809 is_db_reference=is_db_reference, 810 parse_partition=parse_partition, 811 ) 812 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 813 table_from_rows = table.this 814 for arg in exp.TableFromRows.arg_types: 815 if arg != "this": 816 table_from_rows.set(arg, table.args.get(arg)) 817 818 table = table_from_rows 819 820 return table 821 822 def _parse_id_var( 823 self, 824 any_token: bool = True, 825 tokens: t.Optional[t.Collection[TokenType]] = None, 826 ) -> t.Optional[exp.Expression]: 827 if self._match_text_seq("IDENTIFIER", "("): 828 identifier = ( 829 super()._parse_id_var(any_token=any_token, tokens=tokens) 830 or self._parse_string() 831 ) 832 self._match_r_paren() 833 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 834 835 return super()._parse_id_var(any_token=any_token, tokens=tokens) 836 837 def _parse_show_snowflake(self, this: str) -> exp.Show: 838 scope = None 839 scope_kind = None 840 841 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 842 # which is syntactically valid but has no effect on the output 843 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 844 845 history = self._match_text_seq("HISTORY") 846 847 like = self._parse_string() if self._match(TokenType.LIKE) else None 848 849 if self._match(TokenType.IN): 850 if self._match_text_seq("ACCOUNT"): 851 scope_kind = "ACCOUNT" 852 elif self._match_text_seq("CLASS"): 853 scope_kind = "CLASS" 854 scope = self._parse_table_parts() 855 elif self._match_text_seq("APPLICATION"): 856 scope_kind = "APPLICATION" 857 if self._match_text_seq("PACKAGE"): 858 scope_kind += " PACKAGE" 859 scope = self._parse_table_parts() 860 elif self._match_set(self.DB_CREATABLES): 861 scope_kind = self._prev.text.upper() 862 if self._curr: 863 scope = self._parse_table_parts() 864 elif self._curr: 865 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 866 scope = self._parse_table_parts() 867 868 return self.expression( 869 exp.Show, 870 **{ 871 "terse": terse, 872 "this": this, 873 "history": history, 874 "like": like, 875 "scope": scope, 876 "scope_kind": scope_kind, 877 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 878 "limit": self._parse_limit(), 879 "from": self._parse_string() if self._match(TokenType.FROM) else None, 880 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 881 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 882 }, 883 ) 884 885 def _parse_put(self) -> exp.Put | exp.Command: 886 if self._curr.token_type != TokenType.STRING: 887 return self._parse_as_command(self._prev) 888 889 return self.expression( 890 exp.Put, 891 this=self._parse_string(), 892 target=self._parse_location_path(), 893 properties=self._parse_properties(), 894 ) 895 896 def _parse_get(self) -> t.Optional[exp.Expression]: 897 start = self._prev 898 899 # If we detect GET( then we need to parse a function, not a statement 900 if self._match(TokenType.L_PAREN): 901 self._retreat(self._index - 2) 902 return self._parse_expression() 903 904 target = self._parse_location_path() 905 906 # Parse as command if unquoted file path 907 if self._curr.token_type == TokenType.URI_START: 908 return self._parse_as_command(start) 909 910 return self.expression( 911 exp.Get, 912 this=self._parse_string(), 913 target=target, 914 properties=self._parse_properties(), 915 ) 916 917 def _parse_location_property(self) -> exp.LocationProperty: 918 self._match(TokenType.EQ) 919 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 920 921 def _parse_file_location(self) -> t.Optional[exp.Expression]: 922 # Parse either a subquery or a staged file 923 return ( 924 self._parse_select(table=True, parse_subquery_alias=False) 925 if self._match(TokenType.L_PAREN, advance=False) 926 else self._parse_table_parts() 927 ) 928 929 def _parse_location_path(self) -> exp.Var: 930 start = self._curr 931 self._advance_any(ignore_reserved=True) 932 933 # We avoid consuming a comma token because external tables like @foo and @bar 934 # can be joined in a query with a comma separator, as well as closing paren 935 # in case of subqueries 936 while self._is_connected() and not self._match_set( 937 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 938 ): 939 self._advance_any(ignore_reserved=True) 940 941 return exp.var(self._find_sql(start, self._prev)) 942 943 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 944 this = super()._parse_lambda_arg() 945 946 if not this: 947 return this 948 949 typ = self._parse_types() 950 951 if typ: 952 return self.expression(exp.Cast, this=this, to=typ) 953 954 return this 955 956 def _parse_foreign_key(self) -> exp.ForeignKey: 957 # inlineFK, the REFERENCES columns are implied 958 if self._match(TokenType.REFERENCES, advance=False): 959 return self.expression(exp.ForeignKey) 960 961 # outoflineFK, explicitly names the columns 962 return super()._parse_foreign_key() 963 964 def _parse_file_format_property(self) -> exp.FileFormatProperty: 965 self._match(TokenType.EQ) 966 if self._match(TokenType.L_PAREN, advance=False): 967 expressions = self._parse_wrapped_options() 968 else: 969 expressions = [self._parse_format_name()] 970 971 return self.expression( 972 exp.FileFormatProperty, 973 expressions=expressions, 974 ) 975 976 def _parse_credentials_property(self) -> exp.CredentialsProperty: 977 return self.expression( 978 exp.CredentialsProperty, 979 expressions=self._parse_wrapped_options(), 980 ) 981 982 class Tokenizer(tokens.Tokenizer): 983 STRING_ESCAPES = ["\\", "'"] 984 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 985 RAW_STRINGS = ["$$"] 986 COMMENTS = ["--", "//", ("/*", "*/")] 987 NESTED_COMMENTS = False 988 989 KEYWORDS = { 990 **tokens.Tokenizer.KEYWORDS, 991 "FILE://": TokenType.URI_START, 992 "BYTEINT": TokenType.INT, 993 "EXCLUDE": TokenType.EXCEPT, 994 "FILE FORMAT": TokenType.FILE_FORMAT, 995 "GET": TokenType.GET, 996 "ILIKE ANY": TokenType.ILIKE_ANY, 997 "LIKE ANY": TokenType.LIKE_ANY, 998 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 999 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1000 "MINUS": TokenType.EXCEPT, 1001 "NCHAR VARYING": TokenType.VARCHAR, 1002 "PUT": TokenType.PUT, 1003 "REMOVE": TokenType.COMMAND, 1004 "RM": TokenType.COMMAND, 1005 "SAMPLE": TokenType.TABLE_SAMPLE, 1006 "SQL_DOUBLE": TokenType.DOUBLE, 1007 "SQL_VARCHAR": TokenType.VARCHAR, 1008 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1009 "TAG": TokenType.TAG, 1010 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1011 "TOP": TokenType.TOP, 1012 "WAREHOUSE": TokenType.WAREHOUSE, 1013 "STAGE": TokenType.STAGE, 1014 "STREAMLIT": TokenType.STREAMLIT, 1015 } 1016 KEYWORDS.pop("/*+") 1017 1018 SINGLE_TOKENS = { 1019 **tokens.Tokenizer.SINGLE_TOKENS, 1020 "$": TokenType.PARAMETER, 1021 } 1022 1023 VAR_SINGLE_TOKENS = {"$"} 1024 1025 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1026 1027 class Generator(generator.Generator): 1028 PARAMETER_TOKEN = "$" 1029 MATCHED_BY_SOURCE = False 1030 SINGLE_STRING_INTERVAL = True 1031 JOIN_HINTS = False 1032 TABLE_HINTS = False 1033 QUERY_HINTS = False 1034 AGGREGATE_FILTER_SUPPORTED = False 1035 SUPPORTS_TABLE_COPY = False 1036 COLLATE_IS_FUNC = True 1037 LIMIT_ONLY_LITERALS = True 1038 JSON_KEY_VALUE_PAIR_SEP = "," 1039 INSERT_OVERWRITE = " OVERWRITE INTO" 1040 STRUCT_DELIMITER = ("(", ")") 1041 COPY_PARAMS_ARE_WRAPPED = False 1042 COPY_PARAMS_EQ_REQUIRED = True 1043 STAR_EXCEPT = "EXCLUDE" 1044 SUPPORTS_EXPLODING_PROJECTIONS = False 1045 ARRAY_CONCAT_IS_VAR_LEN = False 1046 SUPPORTS_CONVERT_TIMEZONE = True 1047 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1048 SUPPORTS_MEDIAN = True 1049 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1050 1051 TRANSFORMS = { 1052 **generator.Generator.TRANSFORMS, 1053 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1054 exp.ArgMax: rename_func("MAX_BY"), 1055 exp.ArgMin: rename_func("MIN_BY"), 1056 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1057 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1058 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1059 exp.AtTimeZone: lambda self, e: self.func( 1060 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1061 ), 1062 exp.BitwiseOr: rename_func("BITOR"), 1063 exp.BitwiseXor: rename_func("BITXOR"), 1064 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1065 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1066 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1067 exp.DateAdd: date_delta_sql("DATEADD"), 1068 exp.DateDiff: date_delta_sql("DATEDIFF"), 1069 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1070 exp.DatetimeDiff: timestampdiff_sql, 1071 exp.DateStrToDate: datestrtodate_sql, 1072 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1073 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1074 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1075 exp.DayOfYear: rename_func("DAYOFYEAR"), 1076 exp.Explode: rename_func("FLATTEN"), 1077 exp.Extract: lambda self, e: self.func( 1078 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1079 ), 1080 exp.FileFormatProperty: lambda self, 1081 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1082 exp.FromTimeZone: lambda self, e: self.func( 1083 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1084 ), 1085 exp.GenerateSeries: lambda self, e: self.func( 1086 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1087 ), 1088 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1089 exp.If: if_sql(name="IFF", false_value="NULL"), 1090 exp.JSONExtractArray: _json_extract_value_array_sql, 1091 exp.JSONExtractScalar: lambda self, e: self.func( 1092 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1093 ), 1094 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1095 exp.JSONPathRoot: lambda *_: "", 1096 exp.JSONValueArray: _json_extract_value_array_sql, 1097 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1098 rename_func("EDITDISTANCE") 1099 ), 1100 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1101 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1102 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1103 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1104 exp.MakeInterval: no_make_interval_sql, 1105 exp.Max: max_or_greatest, 1106 exp.Min: min_or_least, 1107 exp.ParseJSON: lambda self, e: self.func( 1108 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1109 ), 1110 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1111 exp.PercentileCont: transforms.preprocess( 1112 [transforms.add_within_group_for_percentiles] 1113 ), 1114 exp.PercentileDisc: transforms.preprocess( 1115 [transforms.add_within_group_for_percentiles] 1116 ), 1117 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1118 exp.RegexpExtract: _regexpextract_sql, 1119 exp.RegexpExtractAll: _regexpextract_sql, 1120 exp.RegexpILike: _regexpilike_sql, 1121 exp.Rand: rename_func("RANDOM"), 1122 exp.Select: transforms.preprocess( 1123 [ 1124 transforms.eliminate_window_clause, 1125 transforms.eliminate_distinct_on, 1126 transforms.explode_projection_to_unnest(), 1127 transforms.eliminate_semi_and_anti_joins, 1128 _transform_generate_date_array, 1129 _eliminate_dot_variant_lookup, 1130 ] 1131 ), 1132 exp.SHA: rename_func("SHA1"), 1133 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1134 exp.StartsWith: rename_func("STARTSWITH"), 1135 exp.EndsWith: rename_func("ENDSWITH"), 1136 exp.StrPosition: lambda self, e: strposition_sql( 1137 self, e, func_name="CHARINDEX", supports_position=True 1138 ), 1139 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1140 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1141 exp.Stuff: rename_func("INSERT"), 1142 exp.StPoint: rename_func("ST_MAKEPOINT"), 1143 exp.TimeAdd: date_delta_sql("TIMEADD"), 1144 exp.Timestamp: no_timestamp_sql, 1145 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1146 exp.TimestampDiff: lambda self, e: self.func( 1147 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1148 ), 1149 exp.TimestampTrunc: timestamptrunc_sql(), 1150 exp.TimeStrToTime: timestrtotime_sql, 1151 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1152 exp.ToArray: rename_func("TO_ARRAY"), 1153 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1154 exp.ToDouble: rename_func("TO_DOUBLE"), 1155 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1156 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1157 exp.TsOrDsToDate: lambda self, e: self.func( 1158 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1159 ), 1160 exp.TsOrDsToTime: lambda self, e: self.func( 1161 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1162 ), 1163 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1164 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1165 exp.Uuid: rename_func("UUID_STRING"), 1166 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1167 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1168 exp.Xor: rename_func("BOOLXOR"), 1169 } 1170 1171 SUPPORTED_JSON_PATH_PARTS = { 1172 exp.JSONPathKey, 1173 exp.JSONPathRoot, 1174 exp.JSONPathSubscript, 1175 } 1176 1177 TYPE_MAPPING = { 1178 **generator.Generator.TYPE_MAPPING, 1179 exp.DataType.Type.NESTED: "OBJECT", 1180 exp.DataType.Type.STRUCT: "OBJECT", 1181 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1182 } 1183 1184 TOKEN_MAPPING = { 1185 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1186 } 1187 1188 PROPERTIES_LOCATION = { 1189 **generator.Generator.PROPERTIES_LOCATION, 1190 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1191 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1192 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1193 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1194 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1195 } 1196 1197 UNSUPPORTED_VALUES_EXPRESSIONS = { 1198 exp.Map, 1199 exp.StarMap, 1200 exp.Struct, 1201 exp.VarMap, 1202 } 1203 1204 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1205 1206 def with_properties(self, properties: exp.Properties) -> str: 1207 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1208 1209 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1210 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1211 values_as_table = False 1212 1213 return super().values_sql(expression, values_as_table=values_as_table) 1214 1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 expressions = expression.expressions 1217 if ( 1218 expressions 1219 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1220 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1221 ): 1222 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1223 return "OBJECT" 1224 1225 return super().datatype_sql(expression) 1226 1227 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1228 return self.func( 1229 "TO_NUMBER", 1230 expression.this, 1231 expression.args.get("format"), 1232 expression.args.get("precision"), 1233 expression.args.get("scale"), 1234 ) 1235 1236 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1237 milli = expression.args.get("milli") 1238 if milli is not None: 1239 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1240 expression.set("nano", milli_to_nano) 1241 1242 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1243 1244 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1245 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1246 return self.func("TO_GEOGRAPHY", expression.this) 1247 if expression.is_type(exp.DataType.Type.GEOMETRY): 1248 return self.func("TO_GEOMETRY", expression.this) 1249 1250 return super().cast_sql(expression, safe_prefix=safe_prefix) 1251 1252 def trycast_sql(self, expression: exp.TryCast) -> str: 1253 value = expression.this 1254 1255 if value.type is None: 1256 from sqlglot.optimizer.annotate_types import annotate_types 1257 1258 value = annotate_types(value, dialect=self.dialect) 1259 1260 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1261 return super().trycast_sql(expression) 1262 1263 # TRY_CAST only works for string values in Snowflake 1264 return self.cast_sql(expression) 1265 1266 def log_sql(self, expression: exp.Log) -> str: 1267 if not expression.expression: 1268 return self.func("LN", expression.this) 1269 1270 return super().log_sql(expression) 1271 1272 def unnest_sql(self, expression: exp.Unnest) -> str: 1273 unnest_alias = expression.args.get("alias") 1274 offset = expression.args.get("offset") 1275 1276 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1277 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1278 1279 columns = [ 1280 exp.to_identifier("seq"), 1281 exp.to_identifier("key"), 1282 exp.to_identifier("path"), 1283 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1284 value, 1285 exp.to_identifier("this"), 1286 ] 1287 1288 if unnest_alias: 1289 unnest_alias.set("columns", columns) 1290 else: 1291 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1292 1293 table_input = self.sql(expression.expressions[0]) 1294 if not table_input.startswith("INPUT =>"): 1295 table_input = f"INPUT => {table_input}" 1296 1297 explode = f"TABLE(FLATTEN({table_input}))" 1298 alias = self.sql(unnest_alias) 1299 alias = f" AS {alias}" if alias else "" 1300 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1301 1302 return f"{value}{explode}{alias}" 1303 1304 def show_sql(self, expression: exp.Show) -> str: 1305 terse = "TERSE " if expression.args.get("terse") else "" 1306 history = " HISTORY" if expression.args.get("history") else "" 1307 like = self.sql(expression, "like") 1308 like = f" LIKE {like}" if like else "" 1309 1310 scope = self.sql(expression, "scope") 1311 scope = f" {scope}" if scope else "" 1312 1313 scope_kind = self.sql(expression, "scope_kind") 1314 if scope_kind: 1315 scope_kind = f" IN {scope_kind}" 1316 1317 starts_with = self.sql(expression, "starts_with") 1318 if starts_with: 1319 starts_with = f" STARTS WITH {starts_with}" 1320 1321 limit = self.sql(expression, "limit") 1322 1323 from_ = self.sql(expression, "from") 1324 if from_: 1325 from_ = f" FROM {from_}" 1326 1327 privileges = self.expressions(expression, key="privileges", flat=True) 1328 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1329 1330 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1331 1332 def describe_sql(self, expression: exp.Describe) -> str: 1333 # Default to table if kind is unknown 1334 kind_value = expression.args.get("kind") or "TABLE" 1335 kind = f" {kind_value}" if kind_value else "" 1336 this = f" {self.sql(expression, 'this')}" 1337 expressions = self.expressions(expression, flat=True) 1338 expressions = f" {expressions}" if expressions else "" 1339 return f"DESCRIBE{kind}{this}{expressions}" 1340 1341 def generatedasidentitycolumnconstraint_sql( 1342 self, expression: exp.GeneratedAsIdentityColumnConstraint 1343 ) -> str: 1344 start = expression.args.get("start") 1345 start = f" START {start}" if start else "" 1346 increment = expression.args.get("increment") 1347 increment = f" INCREMENT {increment}" if increment else "" 1348 1349 order = expression.args.get("order") 1350 if order is not None: 1351 order_clause = " ORDER" if order else " NOORDER" 1352 else: 1353 order_clause = "" 1354 1355 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1356 1357 def cluster_sql(self, expression: exp.Cluster) -> str: 1358 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1359 1360 def struct_sql(self, expression: exp.Struct) -> str: 1361 keys = [] 1362 values = [] 1363 1364 for i, e in enumerate(expression.expressions): 1365 if isinstance(e, exp.PropertyEQ): 1366 keys.append( 1367 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1368 ) 1369 values.append(e.expression) 1370 else: 1371 keys.append(exp.Literal.string(f"_{i}")) 1372 values.append(e) 1373 1374 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1375 1376 @unsupported_args("weight", "accuracy") 1377 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1378 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1379 1380 def alterset_sql(self, expression: exp.AlterSet) -> str: 1381 exprs = self.expressions(expression, flat=True) 1382 exprs = f" {exprs}" if exprs else "" 1383 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1384 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1385 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1386 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1387 tag = self.expressions(expression, key="tag", flat=True) 1388 tag = f" TAG {tag}" if tag else "" 1389 1390 return f"SET{exprs}{file_format}{copy_options}{tag}" 1391 1392 def strtotime_sql(self, expression: exp.StrToTime): 1393 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1394 return self.func( 1395 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1396 ) 1397 1398 def timestampsub_sql(self, expression: exp.TimestampSub): 1399 return self.sql( 1400 exp.TimestampAdd( 1401 this=expression.this, 1402 expression=expression.expression * -1, 1403 unit=expression.unit, 1404 ) 1405 ) 1406 1407 def jsonextract_sql(self, expression: exp.JSONExtract): 1408 this = expression.this 1409 1410 # JSON strings are valid coming from other dialects such as BQ 1411 return self.func( 1412 "GET_PATH", 1413 exp.ParseJSON(this=this) if this.is_string else this, 1414 expression.expression, 1415 ) 1416 1417 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1418 this = expression.this 1419 if not isinstance(this, exp.TsOrDsToTimestamp): 1420 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1421 1422 return self.func("TO_CHAR", this, self.format_time(expression)) 1423 1424 def datesub_sql(self, expression: exp.DateSub) -> str: 1425 value = expression.expression 1426 if value: 1427 value.replace(value * (-1)) 1428 else: 1429 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1430 1431 return date_delta_sql("DATEADD")(self, expression) 1432 1433 def select_sql(self, expression: exp.Select) -> str: 1434 limit = expression.args.get("limit") 1435 offset = expression.args.get("offset") 1436 if offset and not limit: 1437 expression.limit(exp.Null(), copy=False) 1438 return super().select_sql(expression) 1439 1440 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1441 is_materialized = expression.find(exp.MaterializedProperty) 1442 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1443 1444 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1445 # For materialized views, COPY GRANTS is located *before* the columns list 1446 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1447 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1448 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1449 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1450 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1451 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1452 1453 this_name = self.sql(expression.this, "this") 1454 copy_grants = self.sql(copy_grants_property) 1455 this_schema = self.schema_columns_sql(expression.this) 1456 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1457 1458 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1459 1460 return super().createable_sql(expression, locations) 1461 1462 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1463 this = expression.this 1464 1465 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1466 # and add it later as part of the WITHIN GROUP clause 1467 order = this if isinstance(this, exp.Order) else None 1468 if order: 1469 expression.set("this", order.this.pop()) 1470 1471 expr_sql = super().arrayagg_sql(expression) 1472 1473 if order: 1474 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1475 1476 return expr_sql 1477 1478 def array_sql(self, expression: exp.Array) -> str: 1479 expressions = expression.expressions 1480 1481 first_expr = seq_get(expressions, 0) 1482 if isinstance(first_expr, exp.Select): 1483 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1484 if first_expr.text("kind").upper() == "STRUCT": 1485 object_construct_args = [] 1486 for expr in first_expr.expressions: 1487 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1488 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1489 name = expr.this if isinstance(expr, exp.Alias) else expr 1490 1491 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1492 1493 array_agg = exp.ArrayAgg( 1494 this=_build_object_construct(args=object_construct_args) 1495 ) 1496 1497 first_expr.set("kind", None) 1498 first_expr.set("expressions", [array_agg]) 1499 1500 return self.sql(first_expr.subquery()) 1501 1502 return inline_array_sql(self, expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
410 def quote_identifier(self, expression: E, identify: bool = True) -> E: 411 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 412 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 413 if ( 414 isinstance(expression, exp.Identifier) 415 and isinstance(expression.parent, exp.Table) 416 and expression.name.lower() == "dual" 417 ): 418 return expression # type: ignore 419 420 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
422 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 423 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 424 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
426 class Parser(parser.Parser): 427 IDENTIFY_PIVOT_STRINGS = True 428 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 429 COLON_IS_VARIANT_EXTRACT = True 430 431 ID_VAR_TOKENS = { 432 *parser.Parser.ID_VAR_TOKENS, 433 TokenType.MATCH_CONDITION, 434 } 435 436 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 437 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 438 439 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 440 441 FUNCTIONS = { 442 **parser.Parser.FUNCTIONS, 443 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 444 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 445 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 446 this=seq_get(args, 1), expression=seq_get(args, 0) 447 ), 448 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 449 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 450 start=seq_get(args, 0), 451 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 452 step=seq_get(args, 2), 453 ), 454 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 455 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 456 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 457 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 458 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 459 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 460 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 461 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 462 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 463 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 464 "DATE_TRUNC": _date_trunc_to_time, 465 "DATEADD": _build_date_time_add(exp.DateAdd), 466 "DATEDIFF": _build_datediff, 467 "DIV0": _build_if_from_div0, 468 "EDITDISTANCE": lambda args: exp.Levenshtein( 469 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 470 ), 471 "FLATTEN": exp.Explode.from_arg_list, 472 "GET_PATH": lambda args, dialect: exp.JSONExtract( 473 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 474 ), 475 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 476 "IFF": exp.If.from_arg_list, 477 "LAST_DAY": lambda args: exp.LastDay( 478 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 479 ), 480 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 481 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 482 "NULLIFZERO": _build_if_from_nullifzero, 483 "OBJECT_CONSTRUCT": _build_object_construct, 484 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 485 "REGEXP_REPLACE": _build_regexp_replace, 486 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 487 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 488 "RLIKE": exp.RegexpLike.from_arg_list, 489 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 490 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 491 "TIMEADD": _build_date_time_add(exp.TimeAdd), 492 "TIMEDIFF": _build_datediff, 493 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 494 "TIMESTAMPDIFF": _build_datediff, 495 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 496 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 497 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 498 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 499 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 500 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 501 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 502 "TRY_TO_TIMESTAMP": _build_datetime( 503 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 504 ), 505 "TO_CHAR": build_timetostr_or_tochar, 506 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 507 "TO_NUMBER": lambda args: exp.ToNumber( 508 this=seq_get(args, 0), 509 format=seq_get(args, 1), 510 precision=seq_get(args, 2), 511 scale=seq_get(args, 3), 512 ), 513 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 514 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 515 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 516 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 517 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 518 "TO_VARCHAR": exp.ToChar.from_arg_list, 519 "ZEROIFNULL": _build_if_from_zeroifnull, 520 } 521 522 FUNCTION_PARSERS = { 523 **parser.Parser.FUNCTION_PARSERS, 524 "DATE_PART": lambda self: self._parse_date_part(), 525 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 526 "LISTAGG": lambda self: self._parse_string_agg(), 527 } 528 FUNCTION_PARSERS.pop("TRIM") 529 530 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 531 532 RANGE_PARSERS = { 533 **parser.Parser.RANGE_PARSERS, 534 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 535 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 536 } 537 538 ALTER_PARSERS = { 539 **parser.Parser.ALTER_PARSERS, 540 "UNSET": lambda self: self.expression( 541 exp.Set, 542 tag=self._match_text_seq("TAG"), 543 expressions=self._parse_csv(self._parse_id_var), 544 unset=True, 545 ), 546 } 547 548 STATEMENT_PARSERS = { 549 **parser.Parser.STATEMENT_PARSERS, 550 TokenType.GET: lambda self: self._parse_get(), 551 TokenType.PUT: lambda self: self._parse_put(), 552 TokenType.SHOW: lambda self: self._parse_show(), 553 } 554 555 PROPERTY_PARSERS = { 556 **parser.Parser.PROPERTY_PARSERS, 557 "CREDENTIALS": lambda self: self._parse_credentials_property(), 558 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 559 "LOCATION": lambda self: self._parse_location_property(), 560 "TAG": lambda self: self._parse_tag(), 561 "USING": lambda self: self._match_text_seq("TEMPLATE") 562 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 563 } 564 565 TYPE_CONVERTERS = { 566 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 567 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 568 } 569 570 SHOW_PARSERS = { 571 "DATABASES": _show_parser("DATABASES"), 572 "TERSE DATABASES": _show_parser("DATABASES"), 573 "SCHEMAS": _show_parser("SCHEMAS"), 574 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 575 "OBJECTS": _show_parser("OBJECTS"), 576 "TERSE OBJECTS": _show_parser("OBJECTS"), 577 "TABLES": _show_parser("TABLES"), 578 "TERSE TABLES": _show_parser("TABLES"), 579 "VIEWS": _show_parser("VIEWS"), 580 "TERSE VIEWS": _show_parser("VIEWS"), 581 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 582 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 583 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 584 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 585 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 586 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 587 "SEQUENCES": _show_parser("SEQUENCES"), 588 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 589 "STAGES": _show_parser("STAGES"), 590 "COLUMNS": _show_parser("COLUMNS"), 591 "USERS": _show_parser("USERS"), 592 "TERSE USERS": _show_parser("USERS"), 593 "FILE FORMATS": _show_parser("FILE FORMATS"), 594 "FUNCTIONS": _show_parser("FUNCTIONS"), 595 "PROCEDURES": _show_parser("PROCEDURES"), 596 "WAREHOUSES": _show_parser("WAREHOUSES"), 597 } 598 599 CONSTRAINT_PARSERS = { 600 **parser.Parser.CONSTRAINT_PARSERS, 601 "WITH": lambda self: self._parse_with_constraint(), 602 "MASKING": lambda self: self._parse_with_constraint(), 603 "PROJECTION": lambda self: self._parse_with_constraint(), 604 "TAG": lambda self: self._parse_with_constraint(), 605 } 606 607 STAGED_FILE_SINGLE_TOKENS = { 608 TokenType.DOT, 609 TokenType.MOD, 610 TokenType.SLASH, 611 } 612 613 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 614 615 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 616 617 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 618 619 LAMBDAS = { 620 **parser.Parser.LAMBDAS, 621 TokenType.ARROW: lambda self, expressions: self.expression( 622 exp.Lambda, 623 this=self._replace_lambda( 624 self._parse_assignment(), 625 expressions, 626 ), 627 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 628 ), 629 } 630 631 def _parse_use(self) -> exp.Use: 632 if self._match_text_seq("SECONDARY", "ROLES"): 633 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 634 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 635 return self.expression( 636 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 637 ) 638 639 return super()._parse_use() 640 641 def _negate_range( 642 self, this: t.Optional[exp.Expression] = None 643 ) -> t.Optional[exp.Expression]: 644 if not this: 645 return this 646 647 query = this.args.get("query") 648 if isinstance(this, exp.In) and isinstance(query, exp.Query): 649 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 650 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 651 # which can produce different results (most likely a SnowFlake bug). 652 # 653 # https://docs.snowflake.com/en/sql-reference/functions/in 654 # Context: https://github.com/tobymao/sqlglot/issues/3890 655 return self.expression( 656 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 657 ) 658 659 return self.expression(exp.Not, this=this) 660 661 def _parse_tag(self) -> exp.Tags: 662 return self.expression( 663 exp.Tags, 664 expressions=self._parse_wrapped_csv(self._parse_property), 665 ) 666 667 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 668 if self._prev.token_type != TokenType.WITH: 669 self._retreat(self._index - 1) 670 671 if self._match_text_seq("MASKING", "POLICY"): 672 policy = self._parse_column() 673 return self.expression( 674 exp.MaskingPolicyColumnConstraint, 675 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 676 expressions=self._match(TokenType.USING) 677 and self._parse_wrapped_csv(self._parse_id_var), 678 ) 679 if self._match_text_seq("PROJECTION", "POLICY"): 680 policy = self._parse_column() 681 return self.expression( 682 exp.ProjectionPolicyColumnConstraint, 683 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 684 ) 685 if self._match(TokenType.TAG): 686 return self._parse_tag() 687 688 return None 689 690 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 691 if self._match(TokenType.TAG): 692 return self._parse_tag() 693 694 return super()._parse_with_property() 695 696 def _parse_create(self) -> exp.Create | exp.Command: 697 expression = super()._parse_create() 698 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 699 # Replace the Table node with the enclosed Identifier 700 expression.this.replace(expression.this.this) 701 702 return expression 703 704 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 705 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 706 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 707 this = self._parse_var() or self._parse_type() 708 709 if not this: 710 return None 711 712 self._match(TokenType.COMMA) 713 expression = self._parse_bitwise() 714 this = map_date_part(this) 715 name = this.name.upper() 716 717 if name.startswith("EPOCH"): 718 if name == "EPOCH_MILLISECOND": 719 scale = 10**3 720 elif name == "EPOCH_MICROSECOND": 721 scale = 10**6 722 elif name == "EPOCH_NANOSECOND": 723 scale = 10**9 724 else: 725 scale = None 726 727 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 728 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 729 730 if scale: 731 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 732 733 return to_unix 734 735 return self.expression(exp.Extract, this=this, expression=expression) 736 737 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 738 if is_map: 739 # Keys are strings in Snowflake's objects, see also: 740 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 741 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 742 return self._parse_slice(self._parse_string()) 743 744 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 745 746 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 747 lateral = super()._parse_lateral() 748 if not lateral: 749 return lateral 750 751 if isinstance(lateral.this, exp.Explode): 752 table_alias = lateral.args.get("alias") 753 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 754 if table_alias and not table_alias.args.get("columns"): 755 table_alias.set("columns", columns) 756 elif not table_alias: 757 exp.alias_(lateral, "_flattened", table=columns, copy=False) 758 759 return lateral 760 761 def _parse_table_parts( 762 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 763 ) -> exp.Table: 764 # https://docs.snowflake.com/en/user-guide/querying-stage 765 if self._match(TokenType.STRING, advance=False): 766 table = self._parse_string() 767 elif self._match_text_seq("@", advance=False): 768 table = self._parse_location_path() 769 else: 770 table = None 771 772 if table: 773 file_format = None 774 pattern = None 775 776 wrapped = self._match(TokenType.L_PAREN) 777 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 778 if self._match_text_seq("FILE_FORMAT", "=>"): 779 file_format = self._parse_string() or super()._parse_table_parts( 780 is_db_reference=is_db_reference 781 ) 782 elif self._match_text_seq("PATTERN", "=>"): 783 pattern = self._parse_string() 784 else: 785 break 786 787 self._match(TokenType.COMMA) 788 789 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 790 else: 791 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 792 793 return table 794 795 def _parse_table( 796 self, 797 schema: bool = False, 798 joins: bool = False, 799 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 800 parse_bracket: bool = False, 801 is_db_reference: bool = False, 802 parse_partition: bool = False, 803 ) -> t.Optional[exp.Expression]: 804 table = super()._parse_table( 805 schema=schema, 806 joins=joins, 807 alias_tokens=alias_tokens, 808 parse_bracket=parse_bracket, 809 is_db_reference=is_db_reference, 810 parse_partition=parse_partition, 811 ) 812 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 813 table_from_rows = table.this 814 for arg in exp.TableFromRows.arg_types: 815 if arg != "this": 816 table_from_rows.set(arg, table.args.get(arg)) 817 818 table = table_from_rows 819 820 return table 821 822 def _parse_id_var( 823 self, 824 any_token: bool = True, 825 tokens: t.Optional[t.Collection[TokenType]] = None, 826 ) -> t.Optional[exp.Expression]: 827 if self._match_text_seq("IDENTIFIER", "("): 828 identifier = ( 829 super()._parse_id_var(any_token=any_token, tokens=tokens) 830 or self._parse_string() 831 ) 832 self._match_r_paren() 833 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 834 835 return super()._parse_id_var(any_token=any_token, tokens=tokens) 836 837 def _parse_show_snowflake(self, this: str) -> exp.Show: 838 scope = None 839 scope_kind = None 840 841 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 842 # which is syntactically valid but has no effect on the output 843 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 844 845 history = self._match_text_seq("HISTORY") 846 847 like = self._parse_string() if self._match(TokenType.LIKE) else None 848 849 if self._match(TokenType.IN): 850 if self._match_text_seq("ACCOUNT"): 851 scope_kind = "ACCOUNT" 852 elif self._match_text_seq("CLASS"): 853 scope_kind = "CLASS" 854 scope = self._parse_table_parts() 855 elif self._match_text_seq("APPLICATION"): 856 scope_kind = "APPLICATION" 857 if self._match_text_seq("PACKAGE"): 858 scope_kind += " PACKAGE" 859 scope = self._parse_table_parts() 860 elif self._match_set(self.DB_CREATABLES): 861 scope_kind = self._prev.text.upper() 862 if self._curr: 863 scope = self._parse_table_parts() 864 elif self._curr: 865 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 866 scope = self._parse_table_parts() 867 868 return self.expression( 869 exp.Show, 870 **{ 871 "terse": terse, 872 "this": this, 873 "history": history, 874 "like": like, 875 "scope": scope, 876 "scope_kind": scope_kind, 877 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 878 "limit": self._parse_limit(), 879 "from": self._parse_string() if self._match(TokenType.FROM) else None, 880 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 881 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 882 }, 883 ) 884 885 def _parse_put(self) -> exp.Put | exp.Command: 886 if self._curr.token_type != TokenType.STRING: 887 return self._parse_as_command(self._prev) 888 889 return self.expression( 890 exp.Put, 891 this=self._parse_string(), 892 target=self._parse_location_path(), 893 properties=self._parse_properties(), 894 ) 895 896 def _parse_get(self) -> t.Optional[exp.Expression]: 897 start = self._prev 898 899 # If we detect GET( then we need to parse a function, not a statement 900 if self._match(TokenType.L_PAREN): 901 self._retreat(self._index - 2) 902 return self._parse_expression() 903 904 target = self._parse_location_path() 905 906 # Parse as command if unquoted file path 907 if self._curr.token_type == TokenType.URI_START: 908 return self._parse_as_command(start) 909 910 return self.expression( 911 exp.Get, 912 this=self._parse_string(), 913 target=target, 914 properties=self._parse_properties(), 915 ) 916 917 def _parse_location_property(self) -> exp.LocationProperty: 918 self._match(TokenType.EQ) 919 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 920 921 def _parse_file_location(self) -> t.Optional[exp.Expression]: 922 # Parse either a subquery or a staged file 923 return ( 924 self._parse_select(table=True, parse_subquery_alias=False) 925 if self._match(TokenType.L_PAREN, advance=False) 926 else self._parse_table_parts() 927 ) 928 929 def _parse_location_path(self) -> exp.Var: 930 start = self._curr 931 self._advance_any(ignore_reserved=True) 932 933 # We avoid consuming a comma token because external tables like @foo and @bar 934 # can be joined in a query with a comma separator, as well as closing paren 935 # in case of subqueries 936 while self._is_connected() and not self._match_set( 937 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 938 ): 939 self._advance_any(ignore_reserved=True) 940 941 return exp.var(self._find_sql(start, self._prev)) 942 943 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 944 this = super()._parse_lambda_arg() 945 946 if not this: 947 return this 948 949 typ = self._parse_types() 950 951 if typ: 952 return self.expression(exp.Cast, this=this, to=typ) 953 954 return this 955 956 def _parse_foreign_key(self) -> exp.ForeignKey: 957 # inlineFK, the REFERENCES columns are implied 958 if self._match(TokenType.REFERENCES, advance=False): 959 return self.expression(exp.ForeignKey) 960 961 # outoflineFK, explicitly names the columns 962 return super()._parse_foreign_key() 963 964 def _parse_file_format_property(self) -> exp.FileFormatProperty: 965 self._match(TokenType.EQ) 966 if self._match(TokenType.L_PAREN, advance=False): 967 expressions = self._parse_wrapped_options() 968 else: 969 expressions = [self._parse_format_name()] 970 971 return self.expression( 972 exp.FileFormatProperty, 973 expressions=expressions, 974 ) 975 976 def _parse_credentials_property(self) -> exp.CredentialsProperty: 977 return self.expression( 978 exp.CredentialsProperty, 979 expressions=self._parse_wrapped_options(), 980 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
982 class Tokenizer(tokens.Tokenizer): 983 STRING_ESCAPES = ["\\", "'"] 984 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 985 RAW_STRINGS = ["$$"] 986 COMMENTS = ["--", "//", ("/*", "*/")] 987 NESTED_COMMENTS = False 988 989 KEYWORDS = { 990 **tokens.Tokenizer.KEYWORDS, 991 "FILE://": TokenType.URI_START, 992 "BYTEINT": TokenType.INT, 993 "EXCLUDE": TokenType.EXCEPT, 994 "FILE FORMAT": TokenType.FILE_FORMAT, 995 "GET": TokenType.GET, 996 "ILIKE ANY": TokenType.ILIKE_ANY, 997 "LIKE ANY": TokenType.LIKE_ANY, 998 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 999 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1000 "MINUS": TokenType.EXCEPT, 1001 "NCHAR VARYING": TokenType.VARCHAR, 1002 "PUT": TokenType.PUT, 1003 "REMOVE": TokenType.COMMAND, 1004 "RM": TokenType.COMMAND, 1005 "SAMPLE": TokenType.TABLE_SAMPLE, 1006 "SQL_DOUBLE": TokenType.DOUBLE, 1007 "SQL_VARCHAR": TokenType.VARCHAR, 1008 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1009 "TAG": TokenType.TAG, 1010 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1011 "TOP": TokenType.TOP, 1012 "WAREHOUSE": TokenType.WAREHOUSE, 1013 "STAGE": TokenType.STAGE, 1014 "STREAMLIT": TokenType.STREAMLIT, 1015 } 1016 KEYWORDS.pop("/*+") 1017 1018 SINGLE_TOKENS = { 1019 **tokens.Tokenizer.SINGLE_TOKENS, 1020 "$": TokenType.PARAMETER, 1021 } 1022 1023 VAR_SINGLE_TOKENS = {"$"} 1024 1025 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1027 class Generator(generator.Generator): 1028 PARAMETER_TOKEN = "$" 1029 MATCHED_BY_SOURCE = False 1030 SINGLE_STRING_INTERVAL = True 1031 JOIN_HINTS = False 1032 TABLE_HINTS = False 1033 QUERY_HINTS = False 1034 AGGREGATE_FILTER_SUPPORTED = False 1035 SUPPORTS_TABLE_COPY = False 1036 COLLATE_IS_FUNC = True 1037 LIMIT_ONLY_LITERALS = True 1038 JSON_KEY_VALUE_PAIR_SEP = "," 1039 INSERT_OVERWRITE = " OVERWRITE INTO" 1040 STRUCT_DELIMITER = ("(", ")") 1041 COPY_PARAMS_ARE_WRAPPED = False 1042 COPY_PARAMS_EQ_REQUIRED = True 1043 STAR_EXCEPT = "EXCLUDE" 1044 SUPPORTS_EXPLODING_PROJECTIONS = False 1045 ARRAY_CONCAT_IS_VAR_LEN = False 1046 SUPPORTS_CONVERT_TIMEZONE = True 1047 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1048 SUPPORTS_MEDIAN = True 1049 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1050 1051 TRANSFORMS = { 1052 **generator.Generator.TRANSFORMS, 1053 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1054 exp.ArgMax: rename_func("MAX_BY"), 1055 exp.ArgMin: rename_func("MIN_BY"), 1056 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1057 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1058 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1059 exp.AtTimeZone: lambda self, e: self.func( 1060 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1061 ), 1062 exp.BitwiseOr: rename_func("BITOR"), 1063 exp.BitwiseXor: rename_func("BITXOR"), 1064 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1065 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1066 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1067 exp.DateAdd: date_delta_sql("DATEADD"), 1068 exp.DateDiff: date_delta_sql("DATEDIFF"), 1069 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1070 exp.DatetimeDiff: timestampdiff_sql, 1071 exp.DateStrToDate: datestrtodate_sql, 1072 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1073 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1074 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1075 exp.DayOfYear: rename_func("DAYOFYEAR"), 1076 exp.Explode: rename_func("FLATTEN"), 1077 exp.Extract: lambda self, e: self.func( 1078 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1079 ), 1080 exp.FileFormatProperty: lambda self, 1081 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1082 exp.FromTimeZone: lambda self, e: self.func( 1083 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1084 ), 1085 exp.GenerateSeries: lambda self, e: self.func( 1086 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1087 ), 1088 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1089 exp.If: if_sql(name="IFF", false_value="NULL"), 1090 exp.JSONExtractArray: _json_extract_value_array_sql, 1091 exp.JSONExtractScalar: lambda self, e: self.func( 1092 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1093 ), 1094 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1095 exp.JSONPathRoot: lambda *_: "", 1096 exp.JSONValueArray: _json_extract_value_array_sql, 1097 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1098 rename_func("EDITDISTANCE") 1099 ), 1100 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1101 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1102 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1103 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1104 exp.MakeInterval: no_make_interval_sql, 1105 exp.Max: max_or_greatest, 1106 exp.Min: min_or_least, 1107 exp.ParseJSON: lambda self, e: self.func( 1108 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1109 ), 1110 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1111 exp.PercentileCont: transforms.preprocess( 1112 [transforms.add_within_group_for_percentiles] 1113 ), 1114 exp.PercentileDisc: transforms.preprocess( 1115 [transforms.add_within_group_for_percentiles] 1116 ), 1117 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1118 exp.RegexpExtract: _regexpextract_sql, 1119 exp.RegexpExtractAll: _regexpextract_sql, 1120 exp.RegexpILike: _regexpilike_sql, 1121 exp.Rand: rename_func("RANDOM"), 1122 exp.Select: transforms.preprocess( 1123 [ 1124 transforms.eliminate_window_clause, 1125 transforms.eliminate_distinct_on, 1126 transforms.explode_projection_to_unnest(), 1127 transforms.eliminate_semi_and_anti_joins, 1128 _transform_generate_date_array, 1129 _eliminate_dot_variant_lookup, 1130 ] 1131 ), 1132 exp.SHA: rename_func("SHA1"), 1133 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1134 exp.StartsWith: rename_func("STARTSWITH"), 1135 exp.EndsWith: rename_func("ENDSWITH"), 1136 exp.StrPosition: lambda self, e: strposition_sql( 1137 self, e, func_name="CHARINDEX", supports_position=True 1138 ), 1139 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1140 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1141 exp.Stuff: rename_func("INSERT"), 1142 exp.StPoint: rename_func("ST_MAKEPOINT"), 1143 exp.TimeAdd: date_delta_sql("TIMEADD"), 1144 exp.Timestamp: no_timestamp_sql, 1145 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1146 exp.TimestampDiff: lambda self, e: self.func( 1147 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1148 ), 1149 exp.TimestampTrunc: timestamptrunc_sql(), 1150 exp.TimeStrToTime: timestrtotime_sql, 1151 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1152 exp.ToArray: rename_func("TO_ARRAY"), 1153 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1154 exp.ToDouble: rename_func("TO_DOUBLE"), 1155 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1156 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1157 exp.TsOrDsToDate: lambda self, e: self.func( 1158 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1159 ), 1160 exp.TsOrDsToTime: lambda self, e: self.func( 1161 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1162 ), 1163 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1164 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1165 exp.Uuid: rename_func("UUID_STRING"), 1166 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1167 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1168 exp.Xor: rename_func("BOOLXOR"), 1169 } 1170 1171 SUPPORTED_JSON_PATH_PARTS = { 1172 exp.JSONPathKey, 1173 exp.JSONPathRoot, 1174 exp.JSONPathSubscript, 1175 } 1176 1177 TYPE_MAPPING = { 1178 **generator.Generator.TYPE_MAPPING, 1179 exp.DataType.Type.NESTED: "OBJECT", 1180 exp.DataType.Type.STRUCT: "OBJECT", 1181 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1182 } 1183 1184 TOKEN_MAPPING = { 1185 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1186 } 1187 1188 PROPERTIES_LOCATION = { 1189 **generator.Generator.PROPERTIES_LOCATION, 1190 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1191 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1192 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1193 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1194 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1195 } 1196 1197 UNSUPPORTED_VALUES_EXPRESSIONS = { 1198 exp.Map, 1199 exp.StarMap, 1200 exp.Struct, 1201 exp.VarMap, 1202 } 1203 1204 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1205 1206 def with_properties(self, properties: exp.Properties) -> str: 1207 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1208 1209 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1210 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1211 values_as_table = False 1212 1213 return super().values_sql(expression, values_as_table=values_as_table) 1214 1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 expressions = expression.expressions 1217 if ( 1218 expressions 1219 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1220 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1221 ): 1222 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1223 return "OBJECT" 1224 1225 return super().datatype_sql(expression) 1226 1227 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1228 return self.func( 1229 "TO_NUMBER", 1230 expression.this, 1231 expression.args.get("format"), 1232 expression.args.get("precision"), 1233 expression.args.get("scale"), 1234 ) 1235 1236 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1237 milli = expression.args.get("milli") 1238 if milli is not None: 1239 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1240 expression.set("nano", milli_to_nano) 1241 1242 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1243 1244 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1245 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1246 return self.func("TO_GEOGRAPHY", expression.this) 1247 if expression.is_type(exp.DataType.Type.GEOMETRY): 1248 return self.func("TO_GEOMETRY", expression.this) 1249 1250 return super().cast_sql(expression, safe_prefix=safe_prefix) 1251 1252 def trycast_sql(self, expression: exp.TryCast) -> str: 1253 value = expression.this 1254 1255 if value.type is None: 1256 from sqlglot.optimizer.annotate_types import annotate_types 1257 1258 value = annotate_types(value, dialect=self.dialect) 1259 1260 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1261 return super().trycast_sql(expression) 1262 1263 # TRY_CAST only works for string values in Snowflake 1264 return self.cast_sql(expression) 1265 1266 def log_sql(self, expression: exp.Log) -> str: 1267 if not expression.expression: 1268 return self.func("LN", expression.this) 1269 1270 return super().log_sql(expression) 1271 1272 def unnest_sql(self, expression: exp.Unnest) -> str: 1273 unnest_alias = expression.args.get("alias") 1274 offset = expression.args.get("offset") 1275 1276 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1277 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1278 1279 columns = [ 1280 exp.to_identifier("seq"), 1281 exp.to_identifier("key"), 1282 exp.to_identifier("path"), 1283 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1284 value, 1285 exp.to_identifier("this"), 1286 ] 1287 1288 if unnest_alias: 1289 unnest_alias.set("columns", columns) 1290 else: 1291 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1292 1293 table_input = self.sql(expression.expressions[0]) 1294 if not table_input.startswith("INPUT =>"): 1295 table_input = f"INPUT => {table_input}" 1296 1297 explode = f"TABLE(FLATTEN({table_input}))" 1298 alias = self.sql(unnest_alias) 1299 alias = f" AS {alias}" if alias else "" 1300 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1301 1302 return f"{value}{explode}{alias}" 1303 1304 def show_sql(self, expression: exp.Show) -> str: 1305 terse = "TERSE " if expression.args.get("terse") else "" 1306 history = " HISTORY" if expression.args.get("history") else "" 1307 like = self.sql(expression, "like") 1308 like = f" LIKE {like}" if like else "" 1309 1310 scope = self.sql(expression, "scope") 1311 scope = f" {scope}" if scope else "" 1312 1313 scope_kind = self.sql(expression, "scope_kind") 1314 if scope_kind: 1315 scope_kind = f" IN {scope_kind}" 1316 1317 starts_with = self.sql(expression, "starts_with") 1318 if starts_with: 1319 starts_with = f" STARTS WITH {starts_with}" 1320 1321 limit = self.sql(expression, "limit") 1322 1323 from_ = self.sql(expression, "from") 1324 if from_: 1325 from_ = f" FROM {from_}" 1326 1327 privileges = self.expressions(expression, key="privileges", flat=True) 1328 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1329 1330 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1331 1332 def describe_sql(self, expression: exp.Describe) -> str: 1333 # Default to table if kind is unknown 1334 kind_value = expression.args.get("kind") or "TABLE" 1335 kind = f" {kind_value}" if kind_value else "" 1336 this = f" {self.sql(expression, 'this')}" 1337 expressions = self.expressions(expression, flat=True) 1338 expressions = f" {expressions}" if expressions else "" 1339 return f"DESCRIBE{kind}{this}{expressions}" 1340 1341 def generatedasidentitycolumnconstraint_sql( 1342 self, expression: exp.GeneratedAsIdentityColumnConstraint 1343 ) -> str: 1344 start = expression.args.get("start") 1345 start = f" START {start}" if start else "" 1346 increment = expression.args.get("increment") 1347 increment = f" INCREMENT {increment}" if increment else "" 1348 1349 order = expression.args.get("order") 1350 if order is not None: 1351 order_clause = " ORDER" if order else " NOORDER" 1352 else: 1353 order_clause = "" 1354 1355 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1356 1357 def cluster_sql(self, expression: exp.Cluster) -> str: 1358 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1359 1360 def struct_sql(self, expression: exp.Struct) -> str: 1361 keys = [] 1362 values = [] 1363 1364 for i, e in enumerate(expression.expressions): 1365 if isinstance(e, exp.PropertyEQ): 1366 keys.append( 1367 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1368 ) 1369 values.append(e.expression) 1370 else: 1371 keys.append(exp.Literal.string(f"_{i}")) 1372 values.append(e) 1373 1374 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1375 1376 @unsupported_args("weight", "accuracy") 1377 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1378 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1379 1380 def alterset_sql(self, expression: exp.AlterSet) -> str: 1381 exprs = self.expressions(expression, flat=True) 1382 exprs = f" {exprs}" if exprs else "" 1383 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1384 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1385 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1386 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1387 tag = self.expressions(expression, key="tag", flat=True) 1388 tag = f" TAG {tag}" if tag else "" 1389 1390 return f"SET{exprs}{file_format}{copy_options}{tag}" 1391 1392 def strtotime_sql(self, expression: exp.StrToTime): 1393 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1394 return self.func( 1395 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1396 ) 1397 1398 def timestampsub_sql(self, expression: exp.TimestampSub): 1399 return self.sql( 1400 exp.TimestampAdd( 1401 this=expression.this, 1402 expression=expression.expression * -1, 1403 unit=expression.unit, 1404 ) 1405 ) 1406 1407 def jsonextract_sql(self, expression: exp.JSONExtract): 1408 this = expression.this 1409 1410 # JSON strings are valid coming from other dialects such as BQ 1411 return self.func( 1412 "GET_PATH", 1413 exp.ParseJSON(this=this) if this.is_string else this, 1414 expression.expression, 1415 ) 1416 1417 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1418 this = expression.this 1419 if not isinstance(this, exp.TsOrDsToTimestamp): 1420 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1421 1422 return self.func("TO_CHAR", this, self.format_time(expression)) 1423 1424 def datesub_sql(self, expression: exp.DateSub) -> str: 1425 value = expression.expression 1426 if value: 1427 value.replace(value * (-1)) 1428 else: 1429 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1430 1431 return date_delta_sql("DATEADD")(self, expression) 1432 1433 def select_sql(self, expression: exp.Select) -> str: 1434 limit = expression.args.get("limit") 1435 offset = expression.args.get("offset") 1436 if offset and not limit: 1437 expression.limit(exp.Null(), copy=False) 1438 return super().select_sql(expression) 1439 1440 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1441 is_materialized = expression.find(exp.MaterializedProperty) 1442 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1443 1444 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1445 # For materialized views, COPY GRANTS is located *before* the columns list 1446 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1447 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1448 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1449 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1450 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1451 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1452 1453 this_name = self.sql(expression.this, "this") 1454 copy_grants = self.sql(copy_grants_property) 1455 this_schema = self.schema_columns_sql(expression.this) 1456 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1457 1458 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1459 1460 return super().createable_sql(expression, locations) 1461 1462 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1463 this = expression.this 1464 1465 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1466 # and add it later as part of the WITHIN GROUP clause 1467 order = this if isinstance(this, exp.Order) else None 1468 if order: 1469 expression.set("this", order.this.pop()) 1470 1471 expr_sql = super().arrayagg_sql(expression) 1472 1473 if order: 1474 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1475 1476 return expr_sql 1477 1478 def array_sql(self, expression: exp.Array) -> str: 1479 expressions = expression.expressions 1480 1481 first_expr = seq_get(expressions, 0) 1482 if isinstance(first_expr, exp.Select): 1483 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1484 if first_expr.text("kind").upper() == "STRUCT": 1485 object_construct_args = [] 1486 for expr in first_expr.expressions: 1487 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1488 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1489 name = expr.this if isinstance(expr, exp.Alias) else expr 1490 1491 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1492 1493 array_agg = exp.ArrayAgg( 1494 this=_build_object_construct(args=object_construct_args) 1495 ) 1496 1497 first_expr.set("kind", None) 1498 first_expr.set("expressions", [array_agg]) 1499 1500 return self.sql(first_expr.subquery()) 1501 1502 return inline_array_sql(self, expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 expressions = expression.expressions 1217 if ( 1218 expressions 1219 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1220 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1221 ): 1222 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1223 return "OBJECT" 1224 1225 return super().datatype_sql(expression)
1236 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1237 milli = expression.args.get("milli") 1238 if milli is not None: 1239 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1240 expression.set("nano", milli_to_nano) 1241 1242 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1244 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1245 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1246 return self.func("TO_GEOGRAPHY", expression.this) 1247 if expression.is_type(exp.DataType.Type.GEOMETRY): 1248 return self.func("TO_GEOMETRY", expression.this) 1249 1250 return super().cast_sql(expression, safe_prefix=safe_prefix)
1252 def trycast_sql(self, expression: exp.TryCast) -> str: 1253 value = expression.this 1254 1255 if value.type is None: 1256 from sqlglot.optimizer.annotate_types import annotate_types 1257 1258 value = annotate_types(value, dialect=self.dialect) 1259 1260 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1261 return super().trycast_sql(expression) 1262 1263 # TRY_CAST only works for string values in Snowflake 1264 return self.cast_sql(expression)
1272 def unnest_sql(self, expression: exp.Unnest) -> str: 1273 unnest_alias = expression.args.get("alias") 1274 offset = expression.args.get("offset") 1275 1276 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1277 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1278 1279 columns = [ 1280 exp.to_identifier("seq"), 1281 exp.to_identifier("key"), 1282 exp.to_identifier("path"), 1283 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1284 value, 1285 exp.to_identifier("this"), 1286 ] 1287 1288 if unnest_alias: 1289 unnest_alias.set("columns", columns) 1290 else: 1291 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1292 1293 table_input = self.sql(expression.expressions[0]) 1294 if not table_input.startswith("INPUT =>"): 1295 table_input = f"INPUT => {table_input}" 1296 1297 explode = f"TABLE(FLATTEN({table_input}))" 1298 alias = self.sql(unnest_alias) 1299 alias = f" AS {alias}" if alias else "" 1300 value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM " 1301 1302 return f"{value}{explode}{alias}"
1304 def show_sql(self, expression: exp.Show) -> str: 1305 terse = "TERSE " if expression.args.get("terse") else "" 1306 history = " HISTORY" if expression.args.get("history") else "" 1307 like = self.sql(expression, "like") 1308 like = f" LIKE {like}" if like else "" 1309 1310 scope = self.sql(expression, "scope") 1311 scope = f" {scope}" if scope else "" 1312 1313 scope_kind = self.sql(expression, "scope_kind") 1314 if scope_kind: 1315 scope_kind = f" IN {scope_kind}" 1316 1317 starts_with = self.sql(expression, "starts_with") 1318 if starts_with: 1319 starts_with = f" STARTS WITH {starts_with}" 1320 1321 limit = self.sql(expression, "limit") 1322 1323 from_ = self.sql(expression, "from") 1324 if from_: 1325 from_ = f" FROM {from_}" 1326 1327 privileges = self.expressions(expression, key="privileges", flat=True) 1328 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1329 1330 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1332 def describe_sql(self, expression: exp.Describe) -> str: 1333 # Default to table if kind is unknown 1334 kind_value = expression.args.get("kind") or "TABLE" 1335 kind = f" {kind_value}" if kind_value else "" 1336 this = f" {self.sql(expression, 'this')}" 1337 expressions = self.expressions(expression, flat=True) 1338 expressions = f" {expressions}" if expressions else "" 1339 return f"DESCRIBE{kind}{this}{expressions}"
1341 def generatedasidentitycolumnconstraint_sql( 1342 self, expression: exp.GeneratedAsIdentityColumnConstraint 1343 ) -> str: 1344 start = expression.args.get("start") 1345 start = f" START {start}" if start else "" 1346 increment = expression.args.get("increment") 1347 increment = f" INCREMENT {increment}" if increment else "" 1348 1349 order = expression.args.get("order") 1350 if order is not None: 1351 order_clause = " ORDER" if order else " NOORDER" 1352 else: 1353 order_clause = "" 1354 1355 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1360 def struct_sql(self, expression: exp.Struct) -> str: 1361 keys = [] 1362 values = [] 1363 1364 for i, e in enumerate(expression.expressions): 1365 if isinstance(e, exp.PropertyEQ): 1366 keys.append( 1367 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1368 ) 1369 values.append(e.expression) 1370 else: 1371 keys.append(exp.Literal.string(f"_{i}")) 1372 values.append(e) 1373 1374 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1380 def alterset_sql(self, expression: exp.AlterSet) -> str: 1381 exprs = self.expressions(expression, flat=True) 1382 exprs = f" {exprs}" if exprs else "" 1383 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1384 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1385 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1386 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1387 tag = self.expressions(expression, key="tag", flat=True) 1388 tag = f" TAG {tag}" if tag else "" 1389 1390 return f"SET{exprs}{file_format}{copy_options}{tag}"
1424 def datesub_sql(self, expression: exp.DateSub) -> str: 1425 value = expression.expression 1426 if value: 1427 value.replace(value * (-1)) 1428 else: 1429 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1430 1431 return date_delta_sql("DATEADD")(self, expression)
1440 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1441 is_materialized = expression.find(exp.MaterializedProperty) 1442 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1443 1444 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1445 # For materialized views, COPY GRANTS is located *before* the columns list 1446 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1447 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1448 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1449 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1450 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1451 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1452 1453 this_name = self.sql(expression.this, "this") 1454 copy_grants = self.sql(copy_grants_property) 1455 this_schema = self.schema_columns_sql(expression.this) 1456 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1457 1458 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1459 1460 return super().createable_sql(expression, locations)
1462 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1463 this = expression.this 1464 1465 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1466 # and add it later as part of the WITHIN GROUP clause 1467 order = this if isinstance(this, exp.Order) else None 1468 if order: 1469 expression.set("this", order.this.pop()) 1470 1471 expr_sql = super().arrayagg_sql(expression) 1472 1473 if order: 1474 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1475 1476 return expr_sql
1478 def array_sql(self, expression: exp.Array) -> str: 1479 expressions = expression.expressions 1480 1481 first_expr = seq_get(expressions, 0) 1482 if isinstance(first_expr, exp.Select): 1483 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1484 if first_expr.text("kind").upper() == "STRUCT": 1485 object_construct_args = [] 1486 for expr in first_expr.expressions: 1487 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1488 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1489 name = expr.this if isinstance(expr, exp.Alias) else expr 1490 1491 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1492 1493 array_agg = exp.ArrayAgg( 1494 this=_build_object_construct(args=object_construct_args) 1495 ) 1496 1497 first_expr.set("kind", None) 1498 first_expr.set("expressions", [array_agg]) 1499 1500 return self.sql(first_expr.subquery()) 1501 1502 return inline_array_sql(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql