#!/bin/sh

set -eux

TMP_DATABASE_NAME=guix_data_service_small
DEFAULT_BACKUP_DIRECTORY="/var/lib/guix-data-service/dumps"
BACKUP_DIRECTORY="${GUIX_DATA_SERVICE_BACKUP_DIRECTORY:-$DEFAULT_BACKUP_DIRECTORY}"

if [ -z "${1:-}" ]; then
    DATE="$(date "+%Y-%m-%d")"
else
    DATE="$1"
fi

FULL_BACKUP_NAME="guix_data_service_full.dump"
SMALL_BACKUP_NAME="guix_data_service_small.dump"

renice 19 -p $$ || true
ionice -p $$ -c 3 || true

TMP_DATABASE=$(mktemp -d -t guix-data-service-database-XXXX)
URI=$(pg_tmp -w 0 -o "-S2000000" -d "$TMP_DATABASE")

function cleanup {
    pg_tmp stop -w 1 -d "$TMP_DATABASE"
}
trap cleanup EXIT

psql --no-psqlrc "$URI" --command="CREATE USER guix_data_service"
psql --no-psqlrc "$URI" --command="CREATE DATABASE $TMP_DATABASE_NAME WITH OWNER guix_data_service"

URI_FOR_DATABASE="${URI/test/$TMP_DATABASE_NAME}"

pg_restore --username=guix_data_service --jobs=8 --dbname="$URI_FOR_DATABASE" \
           --no-tablespaces \
           "$BACKUP_DIRECTORY/$DATE/$FULL_BACKUP_NAME"

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF

DELETE FROM git_branches
USING (
  SELECT MIN(datetime) AS recent_datetime
  FROM (
   SELECT datetime
   FROM git_branches AS recent_branches
   ORDER BY datetime DESC LIMIT 30
  ) AS data
) AS data2
WHERE datetime < recent_datetime;

EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF

CREATE UNLOGGED TABLE tmp_guix_revision_package_derivations AS
SELECT *
FROM guix_revision_package_derivations
WHERE revision_id IN (
  SELECT id
  FROM guix_revisions
  WHERE commit IN (
    SELECT commit FROM git_branches
  )
);

CREATE UNLOGGED TABLE tmp_guix_revision_system_test_derivations AS
SELECT *
FROM guix_revision_system_test_derivations
WHERE guix_revision_id IN (
  SELECT id
  FROM guix_revisions
  WHERE commit IN (
    SELECT commit FROM git_branches
  )
);

CREATE UNLOGGED TABLE tmp_channel_instances AS
SELECT *
FROM channel_instances
WHERE guix_revision_id IN (
  SELECT id
  FROM guix_revisions
  WHERE commit IN (
    SELECT commit FROM git_branches
  )
);

CREATE UNLOGGED TABLE tmp_package_derivations AS
SELECT * FROM package_derivations WHERE id IN (
  SELECT package_derivation_id FROM tmp_guix_revision_package_derivations
);

EXPLAIN ANALYZE CREATE UNLOGGED TABLE tmp_derivations AS
WITH RECURSIVE derivation_ids(id) AS (
    (
        SELECT tmp_package_derivations.derivation_id FROM tmp_package_derivations
      UNION
        SELECT tmp_guix_revision_system_test_derivations.derivation_id
        FROM tmp_guix_revision_system_test_derivations
      UNION
        SELECT tmp_channel_instances.derivation_id FROM tmp_channel_instances
    )
  UNION
    SELECT derivation_outputs.derivation_id
    FROM derivation_ids
    INNER JOIN derivation_inputs
    ON derivation_ids.id = derivation_inputs.derivation_id
    INNER JOIN derivation_outputs
    ON derivation_inputs.derivation_output_id = derivation_outputs.id
)
SELECT * FROM derivations WHERE id IN (SELECT id FROM derivation_ids);

CREATE UNLOGGED TABLE tmp_derivation_inputs AS
SELECT *
FROM derivation_inputs
WHERE derivation_id IN (SELECT id FROM tmp_derivations);

CREATE UNLOGGED TABLE tmp_derivation_outputs AS
SELECT *
FROM derivation_outputs
WHERE derivation_id IN (SELECT id FROM tmp_derivations);

CREATE UNLOGGED TABLE tmp_derivation_output_details AS
SELECT *
FROM derivation_output_details
WHERE id IN (SELECT derivation_output_details_id FROM tmp_derivation_outputs);

CREATE UNLOGGED TABLE tmp_derivation_sources AS
SELECT *
FROM derivation_sources
WHERE derivation_id IN (SELECT id FROM tmp_derivations);

CREATE UNLOGGED TABLE tmp_derivations_by_output_details_set AS
SELECT *
FROM derivations_by_output_details_set
WHERE derivation_id IN (SELECT id FROM tmp_derivations);

CREATE UNLOGGED TABLE tmp_derivation_output_details_sets AS
SELECT *
FROM derivation_output_details_sets
WHERE id IN (
  SELECT derivation_output_details_set_id
  FROM tmp_derivations_by_output_details_set
);

CREATE UNLOGGED TABLE tmp_builds AS
SELECT * FROM builds WHERE derivation_output_details_set_id IN (
  SELECT id FROM tmp_derivation_output_details_sets
);

CREATE UNLOGGED TABLE tmp_build_status AS
SELECT *
FROM build_status
WHERE build_id IN (SELECT id FROM tmp_builds);

TRUNCATE derivations,
         derivation_inputs,
         derivation_outputs,
         derivation_output_details,
         derivation_sources,
         derivations_by_output_details_set,
         derivation_output_details_sets,
         channel_instances,
         guix_revision_system_test_derivations,
         guix_revision_package_derivations,
         package_derivations,
         builds,
         build_status,
         latest_build_status;

EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

CREATE UNLOGGED TABLE tmp_derivation_source_files AS
SELECT *
FROM derivation_source_files
WHERE id IN (
  SELECT derivation_source_file_id
  FROM tmp_derivation_sources
);

CREATE UNLOGGED TABLE tmp_derivation_source_file_nars AS
SELECT *
FROM derivation_source_file_nars
WHERE derivation_source_file_id IN (
  SELECT id FROM tmp_derivation_source_files
);

TRUNCATE derivation_source_files, derivation_source_file_nars, derivation_sources;

EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

DELETE FROM guix_revision_lint_warnings WHERE guix_revision_id NOT IN (
  SELECT id FROM guix_revisions WHERE commit IN (SELECT commit FROM git_branches)
);
EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

DELETE FROM guix_revision_lint_checkers WHERE guix_revision_id NOT IN (
  SELECT id FROM guix_revisions WHERE commit IN (SELECT commit FROM git_branches)
);
EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

DELETE FROM guix_revision_channel_news_entries WHERE guix_revision_id NOT IN (
  SELECT id FROM guix_revisions WHERE commit IN (SELECT commit FROM git_branches)
);
EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

TRUNCATE package_derivations_by_guix_revision_range;

INSERT INTO package_derivations_by_guix_revision_range
SELECT DISTINCT
       git_branches.git_repository_id,
       git_branches.name AS branch_name,
       packages.name AS package_name,
       packages.version AS package_version,
       revision_packages.derivation_id AS derivation_id,
       revision_packages.system AS system,
       revision_packages.target AS target,
       first_value(guix_revisions.id)
         OVER package_version AS first_guix_revision_id,
       last_value(guix_revisions.id)
         OVER package_version AS last_guix_revision_id
FROM packages
INNER JOIN (
  SELECT package_derivations.package_id,
         package_derivations.derivation_id,
         package_derivations.system,
         package_derivations.target,
         guix_revision_package_derivations.revision_id
  FROM package_derivations
  INNER JOIN guix_revision_package_derivations
    ON package_derivations.id = guix_revision_package_derivations.package_derivation_id
) AS revision_packages ON packages.id = revision_packages.package_id
INNER JOIN guix_revisions
  ON revision_packages.revision_id = guix_revisions.id
INNER JOIN git_commits
  ON guix_revisions.commit = git_commits.commit
INNER JOIN git_branches
  ON git_branches.id = git_commits.git_branch_id
WINDOW package_version AS (
  PARTITION BY git_branches.git_repository_id, git_branches.name,
               packages.name, packages.version, revision_packages.derivation_id
  ORDER BY git_commits.datetime
  RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY packages.name, packages.version;

EOF

wait

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &

EXPLAIN ANALYZE DELETE FROM guix_revisions WHERE commit NOT IN (SELECT commit FROM git_branches);
EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF &
INSERT INTO derivations
  SELECT * FROM tmp_derivations;

EOF

wait

# Don't do this bit in the background, as it's error prone
psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF

INSERT INTO package_derivations OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_package_derivations;

INSERT INTO guix_revision_package_derivations
  SELECT * FROM tmp_guix_revision_package_derivations;

INSERT INTO guix_revision_system_test_derivations
  SELECT * FROM tmp_guix_revision_system_test_derivations;

INSERT INTO channel_instances
  SELECT * FROM tmp_channel_instances;

INSERT INTO derivation_output_details_sets OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_derivation_output_details_sets;

INSERT INTO derivations_by_output_details_set
  SELECT * FROM tmp_derivations_by_output_details_set;

INSERT INTO builds OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_builds;

INSERT INTO build_status OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_build_status;

INSERT INTO derivation_output_details OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_derivation_output_details;

INSERT INTO derivation_outputs OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_derivation_outputs;

INSERT INTO derivation_inputs
  SELECT * FROM tmp_derivation_inputs;

INSERT INTO derivation_source_files OVERRIDING SYSTEM VALUE
  SELECT * FROM tmp_derivation_source_files;

INSERT INTO derivation_sources
  SELECT * FROM tmp_derivation_sources;

INSERT INTO derivation_source_file_nars
  SELECT * FROM tmp_derivation_source_file_nars;

INSERT INTO latest_build_status
SELECT DISTINCT build_id,
                first_value(timestamp) OVER rows_for_build AS timestamp,
                first_value(status) OVER rows_for_build AS status
FROM build_status
WINDOW rows_for_build AS (
  PARTITION BY build_id
  ORDER BY
    CASE WHEN status = 'scheduled' THEN -2
         WHEN status = 'started' THEN -1
         ELSE 0
    END DESC,
    timestamp DESC
    RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
);

EOF

psql -v ON_ERROR_STOP=1 --echo-queries --no-psqlrc "$URI_FOR_DATABASE" -U guix_data_service <<EOF

DROP TABLE tmp_derivations;
DROP TABLE tmp_package_derivations;
DROP TABLE tmp_guix_revision_package_derivations;
DROP TABLE tmp_builds;
DROP TABLE tmp_build_status;
DROP TABLE tmp_derivation_output_details;
DROP TABLE tmp_derivation_outputs;
DROP TABLE tmp_derivation_inputs;
DROP TABLE tmp_derivation_sources;
DROP TABLE tmp_derivation_source_files;
DROP TABLE tmp_derivation_source_file_nars;

EOF

TEMPORARY_FILE_NAME="${TMPDIR:-/tmp}/guix_data_service_small-$DATE.dump.tmp"

"${PG_DUMP:-pg_dump}" --username=guix_data_service \
        --format=custom --compress=9 --serializable-deferrable \
        --no-comments \
        --username=guix_data_service \
        --file="$TEMPORARY_FILE_NAME" \
        "$URI_FOR_DATABASE"

mv "$TEMPORARY_FILE_NAME" \
   "$BACKUP_DIRECTORY/$DATE/$SMALL_BACKUP_NAME"