Skip to content

Commit 4f087fa

Browse files
committed
Thin db export revert
1 parent e992870 commit 4f087fa

File tree

1 file changed

+3
-141
lines changed

1 file changed

+3
-141
lines changed

commands/web/thin-db-export

Lines changed: 3 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ DB_NAME=""
4545

4646
# Patterns to empty on the copied DB (only applied to tables that exist)
4747
TRUNCATE_PATTERNS=(
48+
"^node" # nodes and revisions/fields
49+
"^media" # media entities
4850
"^taxonomy" # taxonomy terms and indexes
4951
"^comment" # comments
5052
"^path_alias" # path aliases
@@ -64,12 +66,6 @@ TRUNCATE_PATTERNS=(
6466
"^history$" # content read markers
6567
)
6668

67-
# Patterns for selective cleanup (keep 1 of each type)
68-
SELECTIVE_CLEANUP_PATTERNS=(
69-
"^node" # nodes and revisions/fields
70-
"^media" # media entities
71-
)
72-
7369
# --------------------------
7470
# Create copy database
7571
# --------------------------
@@ -126,7 +122,7 @@ echo "✅ Database copy created."
126122
# --------------------------
127123
# Thin the copied DB
128124
# --------------------------
129-
echo "🗑️ Removing content and rebuildable data from ${DB_NAME} (keeping 1 node of each content type and 1 media of each type)..."
125+
echo "🗑️ Removing content and rebuildable data from ${DB_NAME}..."
130126

131127
# Build and execute truncate statements in batches for better performance
132128
echo "Building truncate statements..."
@@ -154,140 +150,6 @@ echo "Executing batch truncate..."
154150
drush sql:cli $DRUSH_ARGS < "$TRUNCATE_SQL" || true
155151
rm -f "$TRUNCATE_SQL"
156152

157-
# Selective cleanup: keep 1 of each content type/bundle
158-
for pattern in "${SELECTIVE_CLEANUP_PATTERNS[@]}"; do
159-
echo "Selective cleanup for tables matching: ${pattern}"
160-
161-
if [[ "$pattern" == "^node" ]]; then
162-
echo "Keeping 1 node of each content type..."
163-
164-
# Create batch SQL file for node cleanup
165-
NODE_CLEANUP_SQL="/tmp/node_cleanup.sql"
166-
echo "SET FOREIGN_KEY_CHECKS=0;" > "$NODE_CLEANUP_SQL"
167-
168-
# Get nodes to keep (one per type) in a single query
169-
NODES_TO_KEEP=$(drush sql:query $DRUSH_ARGS "
170-
SELECT nfd1.type, MIN(nfd1.nid) as keep_nid
171-
FROM node_field_data nfd1
172-
GROUP BY nfd1.type;
173-
" 2>/dev/null | grep -v '^type' || true)
174-
175-
if [ -n "$NODES_TO_KEEP" ]; then
176-
# Build list of NIDs to keep for IN clause
177-
KEEP_NIDS=""
178-
while IFS=$'\t' read -r content_type keep_nid; do
179-
[ -z "$content_type" ] || [ -z "$keep_nid" ] && continue
180-
echo "Keeping node ID ${keep_nid} for type ${content_type}"
181-
if [ -z "$KEEP_NIDS" ]; then
182-
KEEP_NIDS="$keep_nid"
183-
else
184-
KEEP_NIDS="$KEEP_NIDS,$keep_nid"
185-
fi
186-
done <<< "$NODES_TO_KEEP"
187-
188-
if [ -n "$KEEP_NIDS" ]; then
189-
# Delete nodes not in keep list - batch operation
190-
echo "DELETE FROM node WHERE nid NOT IN ($KEEP_NIDS);" >> "$NODE_CLEANUP_SQL"
191-
echo "DELETE FROM node_field_data WHERE nid NOT IN ($KEEP_NIDS);" >> "$NODE_CLEANUP_SQL"
192-
echo "DELETE FROM node_field_revision WHERE nid NOT IN ($KEEP_NIDS);" >> "$NODE_CLEANUP_SQL"
193-
194-
# Get all node field tables once
195-
NODE_FIELD_TABLES=$(drush sql:query $DRUSH_ARGS "
196-
SELECT TABLE_NAME FROM information_schema.TABLES
197-
WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME LIKE 'node\_\_field\_%';
198-
" 2>/dev/null || true)
199-
200-
# Add field table cleanup to batch
201-
for table in $NODE_FIELD_TABLES; do
202-
[ -z "$table" ] && continue
203-
echo "DELETE FROM \`${table}\` WHERE entity_id NOT IN ($KEEP_NIDS);" >> "$NODE_CLEANUP_SQL"
204-
done
205-
206-
# Get all node revision field tables once
207-
NODE_REVISION_TABLES=$(drush sql:query $DRUSH_ARGS "
208-
SELECT TABLE_NAME FROM information_schema.TABLES
209-
WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME LIKE 'node\_revision\_\_field\_%';
210-
" 2>/dev/null || true)
211-
212-
# Add revision field table cleanup to batch
213-
for table in $NODE_REVISION_TABLES; do
214-
[ -z "$table" ] && continue
215-
echo "DELETE FROM \`${table}\` WHERE entity_id NOT IN ($KEEP_NIDS);" >> "$NODE_CLEANUP_SQL"
216-
done
217-
fi
218-
fi
219-
220-
echo "SET FOREIGN_KEY_CHECKS=1;" >> "$NODE_CLEANUP_SQL"
221-
echo "Executing batch node cleanup..."
222-
drush sql:cli $DRUSH_ARGS < "$NODE_CLEANUP_SQL" || true
223-
rm -f "$NODE_CLEANUP_SQL"
224-
225-
elif [[ "$pattern" == "^media" ]]; then
226-
echo "Keeping 1 media of each media type..."
227-
228-
# Create batch SQL file for media cleanup
229-
MEDIA_CLEANUP_SQL="/tmp/media_cleanup.sql"
230-
echo "SET FOREIGN_KEY_CHECKS=0;" > "$MEDIA_CLEANUP_SQL"
231-
232-
# Get media to keep (one per type) in a single query
233-
MEDIA_TO_KEEP=$(drush sql:query $DRUSH_ARGS "
234-
SELECT mfd1.bundle, MIN(mfd1.mid) as keep_mid
235-
FROM media_field_data mfd1
236-
GROUP BY mfd1.bundle;
237-
" 2>/dev/null | grep -v '^bundle' || true)
238-
239-
if [ -n "$MEDIA_TO_KEEP" ]; then
240-
# Build list of MIDs to keep for IN clause
241-
KEEP_MIDS=""
242-
while IFS=$'\t' read -r media_type keep_mid; do
243-
[ -z "$media_type" ] || [ -z "$keep_mid" ] && continue
244-
echo "Keeping media ID ${keep_mid} for type ${media_type}"
245-
if [ -z "$KEEP_MIDS" ]; then
246-
KEEP_MIDS="$keep_mid"
247-
else
248-
KEEP_MIDS="$KEEP_MIDS,$keep_mid"
249-
fi
250-
done <<< "$MEDIA_TO_KEEP"
251-
252-
if [ -n "$KEEP_MIDS" ]; then
253-
# Delete media not in keep list - batch operation
254-
echo "DELETE FROM media WHERE mid NOT IN ($KEEP_MIDS);" >> "$MEDIA_CLEANUP_SQL"
255-
echo "DELETE FROM media_field_data WHERE mid NOT IN ($KEEP_MIDS);" >> "$MEDIA_CLEANUP_SQL"
256-
echo "DELETE FROM media_field_revision WHERE mid NOT IN ($KEEP_MIDS);" >> "$MEDIA_CLEANUP_SQL"
257-
258-
# Get all media field tables once
259-
MEDIA_FIELD_TABLES=$(drush sql:query $DRUSH_ARGS "
260-
SELECT TABLE_NAME FROM information_schema.TABLES
261-
WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME LIKE 'media\_\_field\_%';
262-
" 2>/dev/null || true)
263-
264-
# Add field table cleanup to batch
265-
for table in $MEDIA_FIELD_TABLES; do
266-
[ -z "$table" ] && continue
267-
echo "DELETE FROM \`${table}\` WHERE entity_id NOT IN ($KEEP_MIDS);" >> "$MEDIA_CLEANUP_SQL"
268-
done
269-
270-
# Get all media revision field tables once
271-
MEDIA_REVISION_TABLES=$(drush sql:query $DRUSH_ARGS "
272-
SELECT TABLE_NAME FROM information_schema.TABLES
273-
WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME LIKE 'media\_revision\_\_field\_%';
274-
" 2>/dev/null || true)
275-
276-
# Add revision field table cleanup to batch
277-
for table in $MEDIA_REVISION_TABLES; do
278-
[ -z "$table" ] && continue
279-
echo "DELETE FROM \`${table}\` WHERE entity_id NOT IN ($KEEP_MIDS);" >> "$MEDIA_CLEANUP_SQL"
280-
done
281-
fi
282-
fi
283-
284-
echo "SET FOREIGN_KEY_CHECKS=1;" >> "$MEDIA_CLEANUP_SQL"
285-
echo "Executing batch media cleanup..."
286-
drush sql:cli $DRUSH_ARGS < "$MEDIA_CLEANUP_SQL" || true
287-
rm -f "$MEDIA_CLEANUP_SQL"
288-
fi
289-
done
290-
291153
# Remove pathauto state keys if table exists (safe on all sites with key_value)
292154
drush sql:query $DRUSH_ARGS "
293155
DELETE FROM key_value

0 commit comments

Comments
 (0)