fix(activitypub): Drop cached URLs without scheme

- Validate cached file/reply URLs start with http(s) and regenerate if stale
- Add tests for scheme-less cache entries
- Update changelog for 1.3.5 fix
This commit is contained in:
Fabio Manganiello 2026-04-09 03:49:25 +02:00
commit 0e79f90843
Signed by: blacklight
GPG key ID: D90FBA7F76362774
4 changed files with 60 additions and 1 deletions

View file

@ -1,5 +1,15 @@
# Changelog # Changelog
## 1.3.5
### Fixed
- **Stale cached URLs without scheme**: `file_to_url` and
`reply_file_to_url` now validate that cached URLs in `file_urls.json`
have a proper `https://` (or `http://`) scheme. Entries cached before
the URL normalization fix (v1.3.3) are discarded and regenerated,
ensuring ActivityPub object IDs are always fully qualified URLs.
## 1.3.4 ## 1.3.4
### Fixed ### Fixed

View file

@ -224,7 +224,7 @@ class ActivityPubIntegration(ActivityPubRepliesMixin, StartupSyncMixin):
def file_to_url(self, filepath: str) -> str: def file_to_url(self, filepath: str) -> str:
stored = self._get_file_url(filepath) stored = self._get_file_url(filepath)
if stored: if stored and re.match(r"^https?://", stored):
return stored return stored
# Generate the base URL # Generate the base URL

View file

@ -1,5 +1,6 @@
import logging import logging
import os import os
import re
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Callable from typing import Callable
@ -79,6 +80,9 @@ class ActivityPubRepliesMixin(ActivityPubPublishMixin):
if "/None/" in stored or stored.endswith("/None"): if "/None/" in stored or stored.endswith("/None"):
logger.info("Clearing stale URL mapping with None: %s", stored) logger.info("Clearing stale URL mapping with None: %s", stored)
self._remove_reply_file_url(filepath) self._remove_reply_file_url(filepath)
elif not re.match(r"^https?://", stored):
logger.info("Clearing stale URL without scheme: %s", stored)
self._remove_reply_file_url(filepath)
else: else:
return stored return stored

View file

@ -1560,6 +1560,51 @@ class ActivityPubPublishTest(unittest.TestCase):
"https://persist.example/users/frank", "https://persist.example/users/frank",
) )
# ------------------------------------------------------------------
# URL cache validation
# ------------------------------------------------------------------
@skip_if_no_pubby
def test_file_to_url_discards_cached_url_without_scheme(self):
"""A cached URL missing https:// is discarded and regenerated."""
integration, _, test_file, _ = self._make_integration()
# Simulate a stale cache entry from before _normalize_url existed
integration._set_file_url(str(test_file), "example.com/article/test-post")
url = integration.file_to_url(str(test_file))
self.assertTrue(
url.startswith("https://"),
f"Expected URL with scheme, got: {url}",
)
@skip_if_no_pubby
def test_reply_file_to_url_discards_cached_url_without_scheme(self):
"""A cached reply URL missing https:// is discarded and regenerated."""
integration, _, _, pages_dir = self._make_integration()
replies_dir = pages_dir.parent / "replies"
replies_dir.mkdir()
integration.replies_dir = str(replies_dir)
art_dir = replies_dir / "some-article"
art_dir.mkdir()
reply_file = art_dir / "my-reply.md"
reply_file.write_text(
"[//]: # (reply-to: https://remote.example/post/1)\n\nGreat post!"
)
# Simulate a stale cache entry without scheme
integration._set_reply_file_url(
str(reply_file), "example.com/reply/some-article/my-reply"
)
url = integration.reply_file_to_url(str(reply_file))
self.assertTrue(
url.startswith("https://"),
f"Expected URL with scheme, got: {url}",
)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Non-blocking / concurrency # Non-blocking / concurrency
# ------------------------------------------------------------------ # ------------------------------------------------------------------