Skip to content

Commit

Permalink
zerocopy nonsense via cista::offset
Browse files Browse the repository at this point in the history
  • Loading branch information
falconindy committed Feb 28, 2025
1 parent a4f49e9 commit a318562
Show file tree
Hide file tree
Showing 34 changed files with 11,328 additions and 557 deletions.
8 changes: 1 addition & 7 deletions extra/bash-completion
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ _pkgfile() {
local shortopts=(-l -s -u -b -C -D -g -i -q -R -r -h -V -v -w -z -0)
local longopts=(--list --search --update --binaries --glob --ignorecase
--quiet --regex --help --version --verbose --raw --null)
local longoptsarg=(--compress --config --cachedir --repo)
local longoptsarg=(--config --cachedir --repo)
local allopts=("${shortopts[@]}" "${longopts[@]}" "${longoptsarg[@]}")

local compressopts=(none gzip bzip2 lz4 lzma lzop xz zstd)

# maybe mangle the arguments in case we're looking at a --longopt=$val
[[ $cur = '=' ]] && cur=
if [[ $prev = '=' ]] && __inarray "$prevprev" "${allopts[@]}"; then
Expand All @@ -42,10 +40,6 @@ _pkgfile() {
compopt -o filenames
return 0
;;
-z|--compress)
COMPREPLY=($(compgen -W '${compressopts[*]}' -- "$cur"))
return 0
;;
-R|--repo)
local repos=$(sed '/^\[\(.*\)\]$/!d;s//\1/g;/options/d' /etc/pacman.conf)
COMPREPLY=($(compgen -W '$repos' -- "$cur"))
Expand Down
9 changes: 0 additions & 9 deletions extra/zsh-completion
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,6 @@ _repos(){
compadd "$@" -a repositories
}

_compression(){
local -a cmd _comps
_comps=('none' 'gzip' 'bzip2' 'lzma' 'lzop' 'lz4' 'xz' 'zstd')
typeset -U _comps
compadd "$@" -a _comps
}

_action_none(){
_arguments \
"$_shortopts[@]" \
Expand All @@ -37,7 +30,6 @@ _longopts=(
'--verbose[output more]'
'--raw[disable output justification]'
'--null[null terminate output]'
'--compress=[compress downloaded repos]: :_compression'
'--config=[use an alternate pacman config]: :_files'
'--cachedir=[use an alternate cache directory]: :_files -/'
)
Expand All @@ -58,7 +50,6 @@ _shortopts=(
'*-v[output more]'
'*-w[disable output justification]'
'*-0[null terminate output]'
'*-z[compress downloaded repos]: :_compression'
'*-C[use an alternate pacman config]: :_files'
'*-D[use an alternate cache directory]: :_files -/'
)
Expand Down
9 changes: 0 additions & 9 deletions man/pkgfile.pod
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,6 @@ Avoid justification of 2 column output.

=over 4

=item B<-z>, B<--compress>[B<=>I<COMPRESSION>]

Repack downloaded repos with the optionally supplied compression method, which
may be one of B<none>, B<gzip>, B<bzip2>, B<lzop>, B<lz4>, B<lzma>, B<xz>, or
B<zstd>. If this flag is passed without a compression method, this defaults to
B<gzip>. If this flag is not passed at all, no compression will be applied.
Applying any form of compression will decrease performance, but may be
desirable for disk space concerns.

=back

=head1 GENERAL OPTIONS
Expand Down
9 changes: 0 additions & 9 deletions man/pkgfiled.pod
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,6 @@ On the initial sync, ignore timestamp comparisons and rewrite all found database

Exit after pkgfiled's initial sync, rather than continuing to listen for changes.

=item B<-z>, B<--compress>[B<=>I<COMPRESSION>]

Repack repos with the optionally supplied compression method, which may be one
of B<none>, B<gzip>, B<bzip2>, B<lzop>, B<lz4>, B<lzma>, B<xz>, or B<zstd>. If
this flag is passed without a compression method, this defaults to B<gzip>. If
this flag is not passed at all, no compression will be applied. Applying any
form of compression will decrease performance, but may be desirable for disk
space concerns.

=item B<-h>, B<--help>

Print help and exit.
Expand Down
28 changes: 16 additions & 12 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,7 @@ endif

configure_file(output: 'config.hh', configuration: conf)

add_project_arguments(
'-include',
'config.hh',
'-fno-exceptions',
language: 'cpp',
)
add_project_arguments('-include', 'config.hh', language: 'cpp')

libpcre = dependency('libpcre', version: '>= 8.30')
libarchive = dependency('libarchive', version: '>= 3.2.0')
Expand All @@ -36,6 +31,7 @@ pthreads = dependency('threads')
stdcppfs = cpp.find_library('stdc++fs')
gtest = dependency('gtest', required: false)
gmock = dependency('gmock', required: false)
cista = declare_dependency(include_directories: 'third_party/cista')

pod2man = find_program('pod2man')
pkgconfig = find_program('pkg-config')
Expand All @@ -48,15 +44,15 @@ libcommon = static_library(
src/archive_converter.cc src/archive_converter.hh
src/archive_io.cc src/archive_io.hh
src/archive_reader.cc src/archive_reader.hh
src/compress.cc src/compress.hh
src/db.cc src/db.hh
src/filter.cc src/filter.hh
src/repo.cc src/repo.hh
src/result.cc src/result.hh
src/update.cc src/update.hh
src/queue.hh
'''.split(),
),
dependencies: [libpcre, libarchive, libcurl, pthreads, stdcppfs],
dependencies: [cista, libpcre, libarchive, libcurl, pthreads, stdcppfs],
install: false,
)

Expand All @@ -75,14 +71,15 @@ executable(
'''.split(),
),
link_with: [libcommon],
dependencies: [cista],
install: true,
)

executable(
'pkgfiled',
'src/pkgfiled.cc',
link_with: [libcommon],
dependencies: [libsystemd, stdcppfs],
dependencies: [cista, libsystemd, stdcppfs],
install: true,
)

Expand Down Expand Up @@ -190,7 +187,7 @@ if gtest.found() and gmock.found()
'''.split(),
),
link_with: [libcommon, gtest_main],
dependencies: [gmock, gtest, libpcre],
dependencies: [cista, gmock, gtest, libpcre],
),
protocol: 'gtest',
)
Expand All @@ -203,15 +200,22 @@ py3 = python.find_installation('python3')

python_requirement = '>=3.7'
if py3.found() and py3.language_version().version_compare(python_requirement)
foreach input : ['tests/list.py', 'tests/search.py', 'tests/update.py']
integration_tests = [
'tests/database.py',
'tests/list.py',
'tests/search.py',
'tests/update.py'
]

foreach input : integration_tests
basename = input.split('/')[-1].split('.')[0]

test(
'pkgfile_@0@_integration_test'.format(basename),
py3,
args: [join_paths(meson.project_source_root(), input)],
env: ['PYTHONDONTWRITEBYTECODE=1'],
suite: 'integration'
suite: 'integration',
)
endforeach
else
Expand Down
131 changes: 48 additions & 83 deletions src/archive_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,139 +5,102 @@

#include <filesystem>
#include <format>
#include <fstream>
#include <iostream>

namespace fs = std::filesystem;

namespace pkgfile {

// static
std::unique_ptr<ArchiveConverter> ArchiveConverter::New(
const std::string& reponame, int fd_in, std::string base_filename_out,
int compress, int repo_chunk_bytes) {
const char* error;

auto reader = ReadArchive::New(fd_in, &error);
if (reader == nullptr) {
std::cerr << std::format(
"error: failed to create archive reader for {}: {}\n", reponame, error);
return nullptr;
namespace {

std::pair<std::string_view, std::string_view> ParsePkgname(
std::string_view entryname) {
const auto pkgrel = entryname.rfind('-');
if (pkgrel == entryname.npos) {
return {};
}

auto writer = WriteArchive::New(
MakeArchiveChunkFilename(base_filename_out, 0, true), compress, &error);
if (writer == nullptr) {
std::cerr << std::format("error: failed to open file for writing: {}: {}\n",
base_filename_out, error);
return nullptr;
const auto pkgver = entryname.substr(0, pkgrel).rfind('-');
if (pkgver == entryname.npos) {
return {};
}

return std::make_unique<ArchiveConverter>(
reponame, std::move(base_filename_out), compress, repo_chunk_bytes,
std::move(reader), std::move(writer));
return {entryname.substr(0, pkgver), entryname.substr(pkgver + 1)};
}

} // namespace

std::string ArchiveConverter::MakeArchiveChunkFilename(
const std::string& base_filename, int chunk_number, bool tempfile) {
return std::format("{}.{:03d}{}", base_filename, chunk_number,
tempfile ? "~" : "");
}

bool ArchiveConverter::NextArchiveChunk() {
if (!out_->Close()) {
return false;
}

const char* error;

auto writer = WriteArchive::New(
MakeArchiveChunkFilename(base_filename_out_, ++chunk_number_, true),
compress_, &error);
if (writer == nullptr) {
std::cerr << std::format("error: failed to open file for writing: {}: {}\n",
base_filename_out_, error);
return false;
}
std::string chunk_name =
MakeArchiveChunkFilename(base_filename_out_, chunk_number_++, true);
cista::buf mmap{cista::mmap{chunk_name.c_str()}};
cista::serialize<cista::mode::NONE>(mmap, data_);

out_ = std::move(writer);
data_.clear();

return true;
}

int ArchiveConverter::WriteCpioEntry(archive_entry* ae,
const fs::path& entryname) {
pkgfile::ArchiveReader reader(in_->read_archive());
int ArchiveConverter::WriteMetaEntry(const fs::path& entryname) {
ArchiveReader reader(in_->read_archive());
std::string_view line;

// discard the first line
reader.GetLine(&line);

std::string entry;
while (reader.GetLine(&line) == ARCHIVE_OK) {
// do the copy, with a slash prepended
std::format_to(std::back_inserter(entry), "/{}\n", line);
auto [name, version] = ParsePkgname(entryname.c_str());
if (name.empty()) {
return 0;
}

// adjust the entry size for removing the first line and adding slashes
archive_entry_set_size(ae, entry.size());

// inodes in cpio archives are dumb.
archive_entry_set_ino64(ae, 0);

// store the metadata as simply $pkgname-$pkgver-$pkgrel
archive_entry_update_pathname_utf8(ae, entryname.parent_path().c_str());

if (archive_write_header(out_->write_archive(), ae) != ARCHIVE_OK) {
std::cerr << std::format("error: failed to write entry header: {}/{}: {}\n",
reponame_, archive_entry_pathname(ae),
strerror(errno));
return -errno;
}
auto& pkg = data_[name];
pkg.version = version;

if (archive_write_data(out_->write_archive(), entry.c_str(), entry.size()) !=
static_cast<ssize_t>(entry.size())) {
std::cerr << std::format("error: failed to write entry: {}/{}: {}\n",
reponame_, archive_entry_pathname(ae),
strerror(errno));
return -errno;
int bytesize = 0;
while (reader.GetLine(&line) == ARCHIVE_OK) {
// do the copy, with a slash prepended
bytesize += pkg.files.emplace_back(std::format("/{}", line)).size();
}

return entry.size();
return bytesize;
}

bool ArchiveConverter::Finalize() {
in_->Close();

if (!out_->Close()) {
return false;
}
NextArchiveChunk();

struct stat st;
fstat(in_->fd(), &st);
in_->Stat(&st);
in_->Close();

const struct timeval times[] = {
{st.st_atim.tv_sec, 0},
{st.st_mtim.tv_sec, 0},
};

for (int i = 0; i <= chunk_number_; ++i) {
for (int i = 0; i < chunk_number_; ++i) {
std::string path = MakeArchiveChunkFilename(base_filename_out_, i, true);
std::string dest = MakeArchiveChunkFilename(base_filename_out_, i, false);

if (utimes(path.c_str(), times) < 0) {
std::cerr << std::format("warning: failed to set filetimes on {}: {}\n",
out_->path(), strerror(errno));
path, strerror(errno));
}

const fs::path dest = path.substr(0, path.size() - 1);

std::error_code ec;
if (fs::rename(path, dest, ec); ec.value() != 0) {
std::cerr << std::format("error: renaming tmpfile to {} failed: {}\n",
dest.string(), ec.message());
dest, ec.message());
}
}

for (int i = chunk_number_ + 1;; ++i) {
for (int i = chunk_number_;; ++i) {
std::string path = MakeArchiveChunkFilename(base_filename_out_, i, false);

std::error_code ec;
Expand All @@ -162,17 +125,19 @@ bool ArchiveConverter::RewriteArchive() {
chunk_size = 0;
}

fs::path entryname = archive_entry_pathname(ae);
const fs::path entryname = archive_entry_pathname(ae);

// ignore everything but the /files metadata
if (entryname.filename() == "files") {
const int bytes_written = WriteCpioEntry(ae, entryname);
if (bytes_written < 0) {
return false;
}
if (entryname.filename() != "files") {
continue;
}

chunk_size += bytes_written;
const int bytes_written = WriteMetaEntry(entryname.parent_path());
if (bytes_written < 0) {
return false;
}

chunk_size += bytes_written;
}

return Finalize();
Expand Down
Loading

0 comments on commit a318562

Please sign in to comment.