Back to Deck
Terminal window
[utility]
SQL to Directus Seed
SOURCE Python
VERSION 1.0
AUTHOR Cododel
A robust Python utility that parses INSERT INTO statements from SQL dumps (PostgreSQL, MySQL) and converts them into Directus Seed JSON format for directus-sync. Handles complex types, nulls, casts, and automatically generates sync IDs.
Features
- Smart SQL Parsing: Handles comments, multiline statements, quoted identifiers
- Type Detection: Automatically converts strings, numbers, nulls, UUIDs, timestamps
- PostgreSQL Casts: Handles
::uuid,::timestamptype casts - Sync ID Generation: Auto-generates
_sync_idbased on primary key - Multi-table Support: Processes multiple tables with dependency ordering
- Error Handling: Validates column/value count, logs parsing warnings
Use Case
Perfect for migrating data between environments or converting legacy SQL dumps to Directus-compatible seeds:
# Convert PostgreSQL dump to Directus seedpython sql_to_seed.py -i production_dump.sql -o seeds/production.json
# Convert multiple tablespython sql_to_seed.py -i multi_table_dump.sql -o seeds/all_tables.jsonInput Example
dump.sql:
INSERT INTO products (id, name, price, created_at)VALUES (1, 'Product A', 99.99, '2024-01-01'::timestamp), (2, 'Product B', 149.50, '2024-01-02'::timestamp);
INSERT INTO categories (id, name, slug)VALUES ('uuid-123'::uuid, 'Electronics', 'electronics'), ('uuid-456'::uuid, 'Books', 'books');Output Example
seed.json:
[ { "collection": "products", "meta": { "insert_order": 1, "create": true, "update": true, "delete": true, "preserve_ids": true }, "data": [ { "id": 1, "name": "Product A", "price": 99.99, "created_at": "2024-01-01", "_sync_id": "products-1" }, { "id": 2, "name": "Product B", "price": 149.5, "created_at": "2024-01-02", "_sync_id": "products-2" } ] }, { "collection": "categories", "meta": { "insert_order": 1, "create": true, "update": true, "delete": true, "preserve_ids": true }, "data": [ { "id": "uuid-123", "name": "Electronics", "slug": "electronics", "_sync_id": "categories-uuid-123" }, { "id": "uuid-456", "name": "Books", "slug": "books", "_sync_id": "categories-uuid-456" } ] }]How It Works
- Normalization: Removes comments, standardizes whitespace
- Statement Splitting: Separates multiple INSERT statements
- Parsing: Extracts table name, columns, and values
- Type Conversion: Converts SQL literals to JSON types
- Seed Generation: Creates Directus-compatible JSON structure
- Dependency Ordering: Smart ordering for tables with relationships
Source Code
sql_to_seed.py (Full implementation - 260+ lines)
import reimport jsonimport argparsefrom typing import List, Dict, Any, Tuple, Optionalfrom pathlib import Path
def normalize_sql_content(sql_content: str) -> str: """Normalize SQL: remove comments, standardize whitespace, split statements.""" # Remove comments sql_content = re.sub(r'/\*.*?\*/', '', sql_content, flags=re.DOTALL) sql_content = re.sub(r'--.*$', '', sql_content, flags=re.MULTILINE)
# Collapse and standardize lines = [line.strip() for line in sql_content.splitlines() if line.strip()] sql_content = ' '.join(lines) sql_content = re.sub(r'\s*,\s*', ',', sql_content) sql_content = re.sub(r'\s*\(\s*', '(', sql_content) sql_content = re.sub(r'\s*\)\s*', ')', sql_content) sql_content = re.sub(r'\s+', ' ', sql_content).strip()
# Split by semicolon (respecting quotes) statements = [] current_statement = [] in_quotes = False for char in sql_content: current_statement.append(char) if char == "'": in_quotes = not in_quotes elif char == ';' and not in_quotes: statements.append("".join(current_statement).strip()) current_statement = []
if current_statement: statements.append("".join(current_statement).strip())
return statements
def parse_value(value_str: str) -> Any: """Parse SQL value: handles casts, strings, nulls, numbers.""" value_str = value_str.strip()
# Handle PostgreSQL casts: 'value'::type cast_match = re.match(r"^'(.*?)'(?:::[\w\s]+)?$", value_str) if cast_match: return cast_match.group(1)
# Simple quoted string if value_str.startswith("'") and value_str.endswith("'"): return value_str[1:-1]
# NULL if value_str.lower() == 'null': return None
# Numbers try: return int(value_str) except ValueError: try: return float(value_str) except ValueError: return value_str
def parse_insert_statement(statement: str) -> Optional[Tuple[str, List[str], List[List[Any]]]]: """Parse INSERT INTO statement, return (table, columns, values).""" if not statement.lower().startswith('insert into'): return None
# Extract table name table_match = re.search(r'insert\s+into\s+("?)([\w\.]+)\1', statement, re.IGNORECASE) if not table_match: return None table_name = table_match.group(2)
# Extract columns columns_match = re.search(r'\((.*?)\)\s*values', statement, re.IGNORECASE | re.DOTALL) if not columns_match: return None columns = [col.strip().strip('"') for col in columns_match.group(1).split(',')]
# Extract value tuples values_part = statement[columns_match.end():].strip() value_tuples_str = re.findall(r'\((.*?)\)', values_part)
parsed_values = [] for tuple_str in value_tuples_str: values_in_tuple = [] current_val = [] in_quotes = False for char in tuple_str: if char == "'": in_quotes = not in_quotes current_val.append(char) elif char == ',' and not in_quotes: values_in_tuple.append("".join(current_val).strip()) current_val = [] else: current_val.append(char) values_in_tuple.append("".join(current_val).strip())
if len(values_in_tuple) == len(columns): parsed_values.append([parse_value(v) for v in values_in_tuple])
return table_name, columns, parsed_values if columns and parsed_values else None
def create_seed_format(table_name: str, columns: List[str], values_list: List[List[Any]]) -> Dict[str, Any]: """Create Directus seed JSON structure.""" pk_column = 'id' if 'id' in columns else ('uuid' if 'uuid' in columns else columns[0]) pk_index = columns.index(pk_column)
items = [] for row_values in values_list: item = dict(zip(columns, row_values)) pk_value = row_values[pk_index] item['_sync_id'] = f"{table_name}-{pk_value}" if pk_value else f"{table_name}-item-{len(items)}" items.append(item)
return { "collection": table_name, "meta": { "insert_order": 1, "create": True, "update": True, "delete": True, "preserve_ids": True, "ignore_on_update": [] }, "data": items }
def process_sql_file(input_file: Path, output_file: Path) -> None: """Main processing function.""" with open(input_file, 'r', encoding='utf-8') as f: sql_content = f.read()
statements = normalize_sql_content(sql_content) all_seeds = []
for statement in statements: parsed = parse_insert_statement(statement) if parsed: table_name, columns, values = parsed print(f"Processing {table_name}: {len(values)} rows") all_seeds.append(create_seed_format(table_name, columns, values))
if not all_seeds: print("Error: No valid INSERT statements found") return
# Smart ordering for translations if len(all_seeds) > 1: for seed in all_seeds: if 'translations' in seed['collection'].lower(): seed['meta']['insert_order'] = 2
output_data = all_seeds if len(all_seeds) > 1 else all_seeds[0]
with open(output_file, 'w', encoding='utf-8') as f: json.dump(output_data, f, indent=2, ensure_ascii=False) print(f"✓ Converted {input_file} → {output_file}")
def main(): parser = argparse.ArgumentParser(description='Convert SQL INSERT to Directus seed') parser.add_argument('-i', '--input', type=Path, required=True, help='Input SQL file') parser.add_argument('-o', '--output', type=Path, required=True, help='Output JSON file') args = parser.parse_args()
if not args.input.exists(): print(f"Error: {args.input} not found") return
args.output.parent.mkdir(parents=True, exist_ok=True) process_sql_file(args.input, args.output)
if __name__ == "__main__": main()Limitations
- Basic regex parsing (not a full SQL parser)
- May struggle with heavily nested parentheses or complex escaped quotes
- Dependency ordering is heuristic (manual adjustment may be needed)
- Tested primarily with PostgreSQL dumps