Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dockerfiles to qa citus packages #379

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions packaging_qa/Debian.bookworm.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Use Debian as the base image
FROM debian:bookworm

# Set environment variables to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
ARG VERSION=13.0.1
ENV CITUS_VERSION=${VERSION}.citus-1
ENV PG_MAJOR=17

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gnupg \
lsb-release \
sudo

# Add PostgreSQL repository
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | tee /etc/apt/trusted.gpg.d/postgresql.asc && \
echo "deb http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" | tee /etc/apt/sources.list.d/pgdg.list

# Install PostgreSQL $PG_MAJOR and required packages
RUN apt-get update && apt-get install -y postgresql-$PG_MAJOR postgresql-contrib

# Install Citus repository and required extensions
RUN curl -s https://install.citusdata.com/community/deb.sh | bash && \
apt-get update && apt-get install -y \
postgresql-$PG_MAJOR-citus-13.0=$CITUS_VERSION \
postgresql-$PG_MAJOR-hll=2.18.citus-1 \
postgresql-$PG_MAJOR-topn=2.7.0.citus-1

# Ensure PostgreSQL data directory exists and has correct permissions
RUN mkdir -p /var/lib/postgresql/$PG_MAJOR/main && chown -R postgres:postgres /var/lib/postgresql && chmod -R 700 /var/lib/postgresql

# Switch to postgres user before running PostgreSQL
USER postgres

# Initialize the database only if it's not already initialized
RUN bash -c '[ ! -f "/var/lib/postgresql/${PG_MAJOR}/main/PG_VERSION" ] && /usr/lib/postgresql/${PG_MAJOR}/bin/initdb -D /var/lib/postgresql/${PG_MAJOR}/main || echo "Database already initialized"'

# Fix pg_hba.conf to allow connections
RUN echo "local all postgres trust" > /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "local all all md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all 127.0.0.1/32 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all ::1/128 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf

# Add Citus to PostgreSQL config
RUN echo "shared_preload_libraries = 'citus'" >> /var/lib/postgresql/$PG_MAJOR/main/postgresql.conf

# Create an entrypoint script to start PostgreSQL properly
USER root
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh

# Switch back to postgres user
USER postgres

# Use custom entrypoint to start PostgreSQL and create the Citus extension
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

# Expose PostgreSQL port
EXPOSE 5432
62 changes: 62 additions & 0 deletions packaging_qa/Debian.bullseye.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Use Debian as the base image
FROM debian:bullseye

# Set environment variables to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
ARG VERSION=13.0.1
ENV CITUS_VERSION=${VERSION}.citus-1
ENV PG_MAJOR=15

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gnupg \
lsb-release \
sudo

# Add PostgreSQL repository
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | tee /etc/apt/trusted.gpg.d/postgresql.asc && \
echo "deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main" | tee /etc/apt/sources.list.d/pgdg.list

# Install PostgreSQL $PG_MAJOR and required packages
RUN apt-get update && apt-get install -y postgresql-$PG_MAJOR postgresql-contrib

# Install Citus repository and required extensions
RUN curl -s https://install.citusdata.com/community/deb.sh | bash && \
apt-get update && apt-get install -y \
postgresql-$PG_MAJOR-citus-13.0=$CITUS_VERSION \
postgresql-$PG_MAJOR-hll=2.18.citus-1 \
postgresql-$PG_MAJOR-topn=2.7.0.citus-1

# Ensure PostgreSQL data directory exists and has correct permissions
RUN mkdir -p /var/lib/postgresql/$PG_MAJOR/main && chown -R postgres:postgres /var/lib/postgresql && chmod -R 700 /var/lib/postgresql

# Switch to postgres user before running PostgreSQL
USER postgres

# Initialize the database only if it's not already initialized
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can we have the database already initialized?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make this more robust, we can check for pg_control in /var/lib/postgresql/${PG_MAJOR}/main/global/, which ensures that the database directory is actually valid before skipping initialization.

RUN if [ ! -f "/var/lib/postgresql/${PG_MAJOR}/main/global/pg_control" ]; then \
       echo "Initializing new PostgreSQL database..."; \
       /usr/lib/postgresql/${PG_MAJOR}/bin/initdb -D /var/lib/postgresql/${PG_MAJOR}/main; \
    else \
       echo "Existing PostgreSQL database detected, skipping initdb."; \
    fi

Let me know if you think this approach works, and I can push an update!

RUN bash -c '[ ! -f "/var/lib/postgresql/${PG_MAJOR}/main/PG_VERSION" ] && /usr/lib/postgresql/${PG_MAJOR}/bin/initdb -D /var/lib/postgresql/${PG_MAJOR}/main || echo "Database already initialized"'

# Fix pg_hba.conf to allow connections
RUN echo "local all postgres trust" > /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "local all all md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all 127.0.0.1/32 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all ::1/128 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf

# Add Citus to PostgreSQL config
RUN echo "shared_preload_libraries = 'citus'" >> /var/lib/postgresql/$PG_MAJOR/main/postgresql.conf

# Create an entrypoint script to start PostgreSQL properly
USER root
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh

# Switch back to postgres user
USER postgres

# Use custom entrypoint to start PostgreSQL and create the Citus extension
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

# Expose PostgreSQL port
EXPOSE 5432
66 changes: 66 additions & 0 deletions packaging_qa/Ubuntu.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Use Ubuntu 22.04 Jammy as the base image
# FROM ubuntu:22.04

# Use Ubuntu 20.04 Focal as the base image
FROM ubuntu:20.04


# Set environment variables to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
ARG VERSION=13.0.1
ENV CITUS_VERSION=${VERSION}.citus-1
ENV PG_MAJOR=17

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gnupg \
lsb-release \
sudo

# Add PostgreSQL repository
RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | tee /etc/apt/trusted.gpg.d/postgresql.asc && \
echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | tee /etc/apt/sources.list.d/pgdg.list

# Install PostgreSQL $PG_MAJOR and required packages
RUN apt-get update && apt-get install -y postgresql-$PG_MAJOR postgresql-contrib

# Install Citus repository and required extensions
RUN curl -s https://install.citusdata.com/community/deb.sh | bash && \
apt-get update && apt-get install -y \
postgresql-$PG_MAJOR-citus-13.0=$CITUS_VERSION \
postgresql-$PG_MAJOR-hll=2.18.citus-1 \
postgresql-$PG_MAJOR-topn=2.7.0.citus-1

# Ensure PostgreSQL data directory exists and has correct permissions
RUN mkdir -p /var/lib/postgresql/$PG_MAJOR/main && chown -R postgres:postgres /var/lib/postgresql && chmod -R 700 /var/lib/postgresql

# Switch to postgres user before running PostgreSQL
USER postgres

# Initialize the database only if it's not already initialized
RUN bash -c '[ ! -f "/var/lib/postgresql/${PG_MAJOR}/main/PG_VERSION" ] && /usr/lib/postgresql/${PG_MAJOR}/bin/initdb -D /var/lib/postgresql/${PG_MAJOR}/main || echo "Database already initialized"'

# Fix pg_hba.conf to allow connections
RUN echo "local all postgres trust" > /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "local all all md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all 127.0.0.1/32 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf && \
echo "host all all ::1/128 md5" >> /var/lib/postgresql/$PG_MAJOR/main/pg_hba.conf

# Add Citus to PostgreSQL config
RUN echo "shared_preload_libraries = 'citus'" >> /var/lib/postgresql/$PG_MAJOR/main/postgresql.conf

# Create an entrypoint script to start PostgreSQL properly
USER root
COPY docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh

# Switch back to postgres user
USER postgres

# Use custom entrypoint to start PostgreSQL and create the Citus extension
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

# Expose PostgreSQL port
EXPOSE 5432
14 changes: 14 additions & 0 deletions packaging_qa/build_run
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
docker build -t citus-test -f Ubuntu.dockerfile .
docker build -t citus-test -f Debian.bookworm.dockerfile .

docker rm -f citus-container
docker run -d --name citus-container citus-test
docker exec -it citus-container psql -U postgres

docker exec -i citus-container psql -U postgres < test.sql


docker logs -f citus-container

docker exec -it citus-container cat /etc/os-release

35 changes: 35 additions & 0 deletions packaging_qa/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
set -e

# Set PG_MAJOR to 16 by default if not provided
PG_MAJOR=${PG_MAJOR:-16}

# Define PostgreSQL paths dynamically
PG_CTL="/usr/lib/postgresql/${PG_MAJOR}/bin/pg_ctl"
PG_DATA_DIR="/var/lib/postgresql/${PG_MAJOR}/main"
PG_LOG="/var/lib/postgresql/logfile"
PG_BIN="/usr/lib/postgresql/${PG_MAJOR}/bin/postgres"

# Ensure PostgreSQL data directory exists
if [ ! -d "$PG_DATA_DIR" ]; then
echo "Error: PostgreSQL data directory $PG_DATA_DIR does not exist!"
exit 1
fi

# Start PostgreSQL in the background
$PG_CTL -D "$PG_DATA_DIR" -l "$PG_LOG" start

# Wait for PostgreSQL to be ready
until pg_isready -q -d postgres; do
sleep 1
done

# Ensure Citus extension is installed
psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS citus;"
psql -U postgres -c "SELECT * FROM pg_extension WHERE extname = 'citus';"

# Stop PostgreSQL before running the container foreground process
$PG_CTL -D "$PG_DATA_DIR" stop

# Start PostgreSQL in the foreground
exec $PG_BIN -D "$PG_DATA_DIR"
43 changes: 43 additions & 0 deletions packaging_qa/test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
\dx

CREATE TABLE events (
device_id bigint,
event_id bigserial,
event_time timestamptz default now(),
data jsonb not null,
PRIMARY KEY (device_id, event_id)
);

-- distribute the events table across shards placed locally or on the worker nodes
SELECT create_distributed_table('events', 'device_id');

INSERT INTO events (device_id, data)
SELECT s % 100, ('{"measurement":'||random()||'}')::jsonb FROM generate_series(1,1000000) s;

-- get the last 3 events for device 1, routed to a single node
SELECT * FROM events WHERE device_id = 1 ORDER BY event_time DESC, event_id DESC LIMIT 3;


CREATE TABLE devices (
device_id bigint primary key,
device_name text,
device_type_id int
);
CREATE INDEX ON devices (device_type_id);

-- co-locate the devices table with the events table
SELECT create_distributed_table('devices', 'device_id', colocate_with := 'events');

-- insert device metadata
INSERT INTO devices (device_id, device_name, device_type_id)
SELECT s, 'device-'||s, 55 FROM generate_series(0, 99) s;

-- optionally: make sure the application can only insert events for a known device
ALTER TABLE events ADD CONSTRAINT device_id_fk
FOREIGN KEY (device_id) REFERENCES devices (device_id);

-- get the average measurement across all devices of type 55, parallelized across shards
SELECT avg((data->>'measurement')::double precision)
FROM events JOIN devices USING (device_id)
WHERE device_type_id = 55;

Loading