Phosphor
Qt6 / Wayland library suite for window-management tools
 
Loading...
Searching...
No Matches
MetadataPackScanStrategy.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2026 fuddlesworth
2// SPDX-License-Identifier: LGPL-2.1-or-later
3
4#pragma once
5
8
9#include <QtCore/QByteArray>
10#include <QtCore/QCryptographicHash>
11#include <QtCore/QDir>
12#include <QtCore/QFile>
13#include <QtCore/QFileInfo>
14#include <QtCore/QHash>
15#include <QtCore/QJsonDocument>
16#include <QtCore/QJsonObject>
17#include <QtCore/QJsonParseError>
18#include <QtCore/QList>
19#include <QtCore/QLoggingCategory>
20#include <QtCore/QString>
21#include <QtCore/QStringList>
22
23#include <algorithm>
24#include <functional>
25#include <optional>
26#include <type_traits>
27#include <utility>
28#include <vector>
29
30namespace PhosphorFsLoader {
31
185template<typename Payload>
187{
188 // The strategy hashes `id` into the per-rescan signature and uses it
189 // as the QHash key for first-wins layering — neither works without
190 // a public QString id member. ShaderInfo and AnimationShaderEffect
191 // both satisfy this; bespoke payloads must too.
192 //
193 // `decltype(... .id)` on an lvalue Payload yields `QString&`; strip
194 // the reference before comparing so the assertion fires only when
195 // the field's type itself isn't `QString`.
196 static_assert(std::is_same_v<std::remove_reference_t<decltype(std::declval<Payload&>().id)>, QString>,
197 "MetadataPackScanStrategy<Payload> requires Payload to expose a public 'QString id' member.");
198
199public:
216 static constexpr int kDefaultMaxEntries = 10'000;
217
230 using Parser =
231 std::function<std::optional<Payload>(const QString& subdirPath, const QJsonObject& root, bool isUser)>;
232
237 using PerEntryWatchPaths = std::function<QStringList(const Payload&)>;
238
243 using PerDirectoryWatchPaths = std::function<QStringList(const QString& searchPath)>;
244
248 using PerSubdirSkip = std::function<bool(const QString& subdirName)>;
249
258 using SignatureContrib = std::function<void(QCryptographicHash&, const Payload&)>;
259
265 using OnCommit = std::function<void()>;
266
279 : m_parser(std::move(parser))
280 , m_onCommit(std::move(onCommit))
281 {
282 // An empty `Parser` would silently skip every entry on every
283 // rescan and look like a configuration bug from the outside ("my
284 // packs all disappeared"). Both real consumers always pass a
285 // real parser; assert in debug builds so a future caller doesn't
286 // have to debug an empty registry from a default-constructed
287 // `std::function`. `OnCommit` is allowed to be empty (a consumer
288 // that doesn't care about content-changed signals).
289 Q_ASSERT_X(static_cast<bool>(m_parser), "MetadataPackScanStrategy",
290 "Parser must not be empty — every rescan would silently skip every entry");
291 }
292
293 ~MetadataPackScanStrategy() override = default;
294
297
300 {
301 m_perEntryWatch = std::move(fn);
302 }
303
306 {
307 m_perDirWatch = std::move(fn);
308 }
309
312 {
313 m_subdirSkip = std::move(fn);
314 }
315
318 {
319 m_sigContrib = std::move(fn);
320 }
321
343 void setUserPath(const QString& path)
344 {
345 m_userPath = path;
346 }
347
354 void setMaxEntries(int cap)
355 {
356 Q_ASSERT_X(cap >= 0, "MetadataPackScanStrategy::setMaxEntries", "cap must be non-negative");
357 m_maxEntries = std::max(0, cap);
358 }
359
372 void setLoggingCategory(const QLoggingCategory& cat)
373 {
374 m_loggingCat = &cat;
375 }
376
397 QStringList performScan(const QStringList& directoriesInScanOrder) override;
398
399 // ─── Accessors used by the consumer registry ────────────────────────────
400
402 const QHash<QString, Payload>& packsById() const
403 {
404 return m_packs;
405 }
406
410 QList<Payload> packs() const
411 {
412 QList<Payload> sorted = m_packs.values();
413 std::sort(sorted.begin(), sorted.end(), [](const Payload& a, const Payload& b) {
414 return a.id < b.id;
415 });
416 return sorted;
417 }
418
423 QStringList packIds() const
424 {
425 QStringList ids = m_packs.keys();
426 std::sort(ids.begin(), ids.end());
427 return ids;
428 }
429
432 Payload pack(const QString& id) const
433 {
434 return m_packs.value(id);
435 }
436
438 bool contains(const QString& id) const
439 {
440 return m_packs.contains(id);
441 }
442
444 int size() const
445 {
446 return m_packs.size();
447 }
448
449private:
450 Parser m_parser;
451 OnCommit m_onCommit;
452 PerEntryWatchPaths m_perEntryWatch;
453 PerDirectoryWatchPaths m_perDirWatch;
454 PerSubdirSkip m_subdirSkip;
455 SignatureContrib m_sigContrib;
456
457 QString m_userPath;
458 int m_maxEntries = kDefaultMaxEntries;
459 const QLoggingCategory* m_loggingCat = nullptr;
460
461 QHash<QString, Payload> m_packs;
462 QByteArray m_lastSignature;
463 bool m_signatureSeeded = false;
464};
465
466// ─── Template implementation ─────────────────────────────────────────────────
467
468namespace detail {
469
472PHOSPHORFSLOADER_EXPORT Q_DECLARE_LOGGING_CATEGORY(lcMetadataPackScan)
473
474} // namespace detail
475
476template<typename Payload>
477QStringList MetadataPackScanStrategy<Payload>::performScan(const QStringList& directoriesInScanOrder)
478{
479 // Per-entry filesystem fingerprint (`metadata.json` size+mtime +
480 // `isUser`) captured during the parse loop and mixed into the SHA-1
481 // signature below. Decoupled from `Payload` so the strategy can
482 // fingerprint facts the parser doesn't necessarily store, without
483 // requiring Payload to expose them.
484 struct EntryFingerprint
485 {
486 qint64 metadataSize = 0;
487 qint64 metadataMtimeMs = 0;
488 bool isUser = false;
489 };
490 // Single accumulator holding everything we need about an entry: id
491 // (for the signature key + final map key), fingerprint, payload.
492 // Sorted-by-id at the end of the parse pass so signature mixing and
493 // accessor population both consume a stable order without paying
494 // the cost of a parallel `QHash<QString, EntryFingerprint>`.
495 struct Entry
496 {
497 QString id;
498 EntryFingerprint fp;
499 Payload payload;
500 };
501 std::vector<Entry> entries;
502 // Tracks claimed ids for first-wins collision detection during the
503 // parse loop. Cheaper than a parallel `QHash<QString, Payload>` —
504 // one O(1) key insert per entry, no payload duplication.
505 QSet<QString> seenIds;
506
507 QStringList desiredWatches;
508
509 const QLoggingCategory& log = m_loggingCat ? *m_loggingCat : detail::lcMetadataPackScan();
510
511 // Resolve the user path's canonical form once per rescan. Empty
512 // (no user path configured, or the path doesn't exist yet) yields
513 // `false` for every dir — the iterated-dir compare below short-
514 // circuits when this is empty. Canonicalised once here, then
515 // compared per-search-path inside the OUTER loop only — the inner
516 // subdir loop never canonicalises, so cost stays O(searchPaths).
517 const QString canonicalUserPath = m_userPath.isEmpty() ? QString() : QFileInfo(m_userPath).canonicalFilePath();
518
519 bool capTripped = false;
520
521 // Reverse-iterate: highest-priority dirs first, first-wins on id
522 // collision. The base normalises caller input into the canonical
523 // `[lowest, ..., highest]` shape at registration time, so this
524 // reversal is the SSOT for the user-wins semantic the two
525 // consumer registries promise.
526 for (auto dirIt = directoriesInScanOrder.crbegin(); dirIt != directoriesInScanOrder.crend() && !capTripped;
527 ++dirIt) {
528 const QString& searchPath = *dirIt;
529 QDir dirObj(searchPath);
530 if (!dirObj.exists()) {
531 qCDebug(log) << "MetadataPackScanStrategy: search path does not exist:" << searchPath;
532 continue;
533 }
534
535 const bool isUserDir =
536 !canonicalUserPath.isEmpty() && QFileInfo(searchPath).canonicalFilePath() == canonicalUserPath;
537
538 // Per-search-path watch additions (top-level shared files —
539 // shader-pack registry watches `*.glsl` includes here).
540 if (m_perDirWatch) {
541 desiredWatches.append(m_perDirWatch(searchPath));
542 }
543
544 const QStringList subdirs = dirObj.entryList(QDir::Dirs | QDir::NoDotAndDotDot, QDir::Name);
545 for (const QString& subdir : subdirs) {
546 if (m_subdirSkip && m_subdirSkip(subdir)) {
547 continue;
548 }
549 // Per-rescan entry-count DoS guard. Reverse-iteration scans
550 // user-first / system-last, so cap-trip drops *system*
551 // overflow rather than user overrides.
552 if (entries.size() >= static_cast<std::size_t>(m_maxEntries)) {
553 capTripped = true;
554 break;
555 }
556
557 const QString subdirPath = dirObj.filePath(subdir);
558 const QString metadataPath = subdirPath + QStringLiteral("/metadata.json");
559
560 // Always re-arm the metadata.json watch — even if parsing
561 // fails or the id collides. An edit that fixes a broken
562 // metadata.json is the most common way an entry transitions
563 // from invisible to visible; we want to wake on it.
564 desiredWatches.append(metadataPath);
565
566 const QFileInfo metadataInfo(metadataPath);
567 if (!metadataInfo.exists()) {
568 qCDebug(log) << "MetadataPackScanStrategy: skipping subdir, no metadata.json:" << subdirPath;
569 continue;
570 }
571
572 // DoS guard: untrusted same-user metadata.json must not
573 // stall the GUI thread with a 2 GB blob. Reuse
574 // `DirectoryLoader::kMaxFileBytes` as the SSOT — same cap
575 // the sister `JsonScanStrategy` enforces on every JSON
576 // file it loads.
577 if (metadataInfo.size() > DirectoryLoader::kMaxFileBytes) {
578 qCWarning(log) << "MetadataPackScanStrategy: skipping oversized metadata.json:" << metadataPath << "("
579 << metadataInfo.size() << "bytes, cap" << DirectoryLoader::kMaxFileBytes << ")";
580 continue;
581 }
582
583 QFile file(metadataPath);
584 if (!file.open(QIODevice::ReadOnly)) {
585 qCWarning(log) << "MetadataPackScanStrategy: failed to open metadata.json:" << metadataPath;
586 continue;
587 }
588
589 QJsonParseError parseError{};
590 const QJsonDocument doc = QJsonDocument::fromJson(file.readAll(), &parseError);
591 if (parseError.error != QJsonParseError::NoError) {
592 qCWarning(log) << "MetadataPackScanStrategy: parse error in" << metadataPath << ":"
593 << parseError.errorString();
594 continue;
595 }
596 if (!doc.isObject()) {
597 qCWarning(log) << "MetadataPackScanStrategy: non-object root in" << metadataPath;
598 continue;
599 }
600
601 // Schema-specific parse. The ctor asserts `m_parser` is
602 // non-null and there is no setter to clear it, so it is
603 // safe to invoke unconditionally.
604 std::optional<Payload> parsed = m_parser(subdirPath, doc.object(), isUserDir);
605 if (!parsed.has_value()) {
606 qCDebug(log) << "MetadataPackScanStrategy: parser declined" << metadataPath;
607 continue;
608 }
609 if (parsed->id.isEmpty()) {
610 qCWarning(log) << "MetadataPackScanStrategy: skipping" << metadataPath << ": empty 'id' field";
611 continue;
612 }
613
614 // First-wins on id collision. Reverse-iteration means a
615 // user-dir entry claims its id before any system-dir entry
616 // can; a colliding system entry is silently shadowed.
617 if (seenIds.contains(parsed->id)) {
618 qCDebug(log) << "MetadataPackScanStrategy: id" << parsed->id
619 << "already registered from a higher-priority dir; shadowed at:" << subdirPath;
620 continue;
621 }
622
623 // Per-payload watches — frag/vert/kwin shaders, etc.
624 if (m_perEntryWatch) {
625 desiredWatches.append(m_perEntryWatch(*parsed));
626 }
627
628 // Capture the metadata.json fingerprint BEFORE the move so
629 // we can mix it into the per-rescan signature below. Any
630 // parser-consumed field's edit shifts the file's mtime
631 // (POSIX guarantees this on a content-change write — the
632 // editors used in our tests and the production save paths
633 // rely on it), so a single mtime+size mix-in covers every
634 // schema field without forcing per-field enumeration in
635 // SignatureContrib.
636 QString id = parsed->id;
637 seenIds.insert(id);
638 entries.push_back(
639 Entry{std::move(id),
640 EntryFingerprint{metadataInfo.size(), metadataInfo.lastModified().toMSecsSinceEpoch(), isUserDir},
641 std::move(*parsed)});
642 }
643 }
644
645 if (capTripped) {
646 qCWarning(log).nospace() << "MetadataPackScanStrategy: reached entry cap (" << m_maxEntries
647 << ") — later entries skipped to protect the GUI thread. Prune the watched search "
648 "paths or raise the cap.";
649 }
650
651 // Sort by id once. Stable hash + stable accessor ordering both fall
652 // out of this single pass — no parallel structures, no second
653 // QHash::keys() + sort.
654 std::sort(entries.begin(), entries.end(), [](const Entry& a, const Entry& b) {
655 return a.id < b.id;
656 });
657
658 // SHA-1 signature in two passes.
659 //
660 // Pass 1 — per-entry attribution. (id, isUser, metadata.json size+mtime,
661 // payload-specific bytes via SignatureContrib). Stable iteration order
662 // is the sorted entries vec; QHash randomisation never leaks into the
663 // signature.
664 //
665 // Pass 2 — watch-set auto-fingerprint. Every distinct file the
666 // strategy is watching (the per-entry metadata.json plus everything
667 // returned by `PerEntryWatchPaths` and `PerDirectoryWatchPaths`)
668 // contributes path|size|mtime|. This is load-bearing for change-only
669 // emit completeness: any file the watcher fires on must shift the
670 // signature, otherwise the rescan runs but `OnCommit` stays silent
671 // and consumers hold stale state. Without this pass, top-level
672 // shared `*.glsl` includes (`common.glsl`, `audio.glsl`, …) and
673 // per-pack auxiliary files (`helpers.glsl`, etc.) would fire the
674 // watcher but not the consumer's content-changed signal.
675 //
676 // The metadata.json mtime+size shows up in BOTH passes (per-entry +
677 // watch-set). Deterministic redundancy — costs nothing, keeps the
678 // per-entry attribution clean.
679 QCryptographicHash hasher(QCryptographicHash::Sha1);
680 for (const Entry& e : entries) {
681 hasher.addData(e.id.toUtf8());
682 hasher.addData(QByteArrayView("|"));
683 hasher.addData(e.fp.isUser ? QByteArrayView("u") : QByteArrayView("s"));
684 hasher.addData(QByteArrayView("|"));
685 hasher.addData(QByteArray::number(e.fp.metadataSize));
686 hasher.addData(QByteArrayView("|"));
687 hasher.addData(QByteArray::number(e.fp.metadataMtimeMs));
688 hasher.addData(QByteArrayView("|"));
689 if (m_sigContrib) {
690 m_sigContrib(hasher, e.payload);
691 }
692 hasher.addData(QByteArrayView("\n"));
693 }
694 // Watch-set pass. Sorted + deduped for deterministic ordering across
695 // rescans regardless of the order paths were appended during the
696 // outer / inner loops above. `removeDuplicates` is O(n log n) and
697 // entry counts are bounded by `m_maxEntries`, so cost is negligible.
698 QStringList sortedWatches = desiredWatches;
699 sortedWatches.removeDuplicates();
700 std::sort(sortedWatches.begin(), sortedWatches.end());
701 for (const QString& path : sortedWatches) {
702 hasher.addData(path.toUtf8());
703 hasher.addData(QByteArrayView("|"));
704 const QFileInfo fi(path);
705 if (fi.exists()) {
706 hasher.addData(QByteArray::number(fi.size()));
707 hasher.addData(QByteArrayView("|"));
708 hasher.addData(QByteArray::number(fi.lastModified().toMSecsSinceEpoch()));
709 } else {
710 // Stable sentinel for absent files. `lastModified()` on an
711 // invalid datetime is implementation-defined — explicit
712 // "missing" keeps the hash format portable across Qt
713 // versions and filesystems.
714 hasher.addData(QByteArrayView("missing"));
715 }
716 hasher.addData(QByteArrayView("\n"));
717 }
718 const QByteArray signature = hasher.result();
719
720 QHash<QString, Payload> fresh;
721 fresh.reserve(static_cast<int>(entries.size()));
722 for (Entry& e : entries) {
723 fresh.insert(e.id, std::move(e.payload));
724 }
725
726 const bool isFirstScan = !m_signatureSeeded;
727 const bool changed = isFirstScan ? !fresh.isEmpty() : signature != m_lastSignature;
728
729 m_packs = std::move(fresh);
730 m_lastSignature = signature;
731 m_signatureSeeded = true;
732
733 if (changed && m_onCommit) {
734 m_onCommit();
735 }
736
737 // Return the deduped + lex-sorted watch set rather than the raw
738 // append-order `desiredWatches`. The caller (`WatchedDirectorySet`)
739 // dedupes again internally via `QSet<QString> m_watchedFiles`, so
740 // this isn't a correctness fix — but the strategy already paid for
741 // dedup + sort during the signature pass, so handing the cleaned
742 // list back saves the watcher its own dedup pass and removes a
743 // class of "watcher diagnostics in nondeterministic order" from
744 // the system. Costs nothing.
745 return sortedWatches;
746}
747
748} // namespace PhosphorFsLoader
static constexpr qint64 kMaxFileBytes
Per-file size cap.
Definition DirectoryLoader.h:133
Pluggable enumeration / parse / commit policy for WatchedDirectorySet.
Definition IScanStrategy.h:63
Reusable scan strategy for metadata.json-driven subdirectory pack registries.
Definition MetadataPackScanStrategy.h:187
MetadataPackScanStrategy & operator=(const MetadataPackScanStrategy &)=delete
void setUserPath(const QString &path)
Set the user-data search path used for isUser classification.
Definition MetadataPackScanStrategy.h:343
QList< Payload > packs() const
Live entries sorted by id for deterministic ordering.
Definition MetadataPackScanStrategy.h:410
bool contains(const QString &id) const
True if id is registered.
Definition MetadataPackScanStrategy.h:438
void setPerDirectoryWatchPaths(PerDirectoryWatchPaths fn)
Set the per-directory watch-extractor. Default: empty list.
Definition MetadataPackScanStrategy.h:305
MetadataPackScanStrategy(Parser parser, OnCommit onCommit)
Construct with the parser + commit hook (the two always-required policies).
Definition MetadataPackScanStrategy.h:278
const QHash< QString, Payload > & packsById() const
Live entries by id.
Definition MetadataPackScanStrategy.h:402
void setPerSubdirSkip(PerSubdirSkip fn)
Set the per-subdir-name skip predicate. Default: never skip.
Definition MetadataPackScanStrategy.h:311
void setPerEntryWatchPaths(PerEntryWatchPaths fn)
Set the per-payload watch-extractor. Default: empty list.
Definition MetadataPackScanStrategy.h:299
std::function< bool(const QString &subdirName)> PerSubdirSkip
Optional: predicate skipping subdirectories whose bare name matches a sentinel.
Definition MetadataPackScanStrategy.h:248
MetadataPackScanStrategy(const MetadataPackScanStrategy &)=delete
QStringList performScan(const QStringList &directoriesInScanOrder) override
Run a full rescan across directoriesInScanOrder.
Definition MetadataPackScanStrategy.h:477
Payload pack(const QString &id) const
Lookup by id.
Definition MetadataPackScanStrategy.h:432
void setMaxEntries(int cap)
Per-rescan entry cap.
Definition MetadataPackScanStrategy.h:354
void setLoggingCategory(const QLoggingCategory &cat)
Override the logging category used for the strategy's own warnings (cap trip, oversized metadata....
Definition MetadataPackScanStrategy.h:372
std::function< std::optional< Payload >(const QString &subdirPath, const QJsonObject &root, bool isUser)> Parser
Parse one metadata.json into a payload.
Definition MetadataPackScanStrategy.h:231
std::function< void()> OnCommit
Synchronous "the discovered set changed" hook.
Definition MetadataPackScanStrategy.h:265
QStringList packIds() const
Live entry ids in lexicographic order.
Definition MetadataPackScanStrategy.h:423
static constexpr int kDefaultMaxEntries
Hard cap on successfully parsed entries discovered per rescan, summed across every registered search ...
Definition MetadataPackScanStrategy.h:216
std::function< void(QCryptographicHash &, const Payload &)> SignatureContrib
Optional: payload-specific bytes to fan into the per-rescan SHA-1 signature.
Definition MetadataPackScanStrategy.h:258
std::function< QStringList(const Payload &)> PerEntryWatchPaths
Extract the per-payload paths the base must re-arm individual file watches on after every rescan.
Definition MetadataPackScanStrategy.h:237
std::function< QStringList(const QString &searchPath)> PerDirectoryWatchPaths
Optional: per-search-path watch additions beyond per-pack extraction.
Definition MetadataPackScanStrategy.h:243
void setSignatureContrib(SignatureContrib fn)
Set the payload-specific signature contributor. Default: contributes nothing.
Definition MetadataPackScanStrategy.h:317
int size() const
Number of currently registered packs.
Definition MetadataPackScanStrategy.h:444
Definition DirectoryLoader.h:18