diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-13 16:32:08 -0400 |
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-13 16:55:02 -0400 |
| commit | c28f9e360110cd797d47d57cb29d4e4498fb1e0b (patch) | |
| tree | c65223e3785241c73c6fc9d1e1abb10421f54cf6 /org | |
| parent | spine may be run against a document-markup zip pod (diff) | |
- claude contributed src
- processes zip from url using (system
installed) curl for download
Diffstat (limited to 'org')
| -rw-r--r-- | org/in_zip_pod.org | 122 | ||||
| -rw-r--r-- | org/spine.org | 27 |
2 files changed, 147 insertions, 2 deletions
diff --git a/org/in_zip_pod.org b/org/in_zip_pod.org index bc5b069..e44b10e 100644 --- a/org/in_zip_pod.org +++ b/org/in_zip_pod.org @@ -241,6 +241,128 @@ template spineExtractZipPod() { return ""; } + /+ ↓ download a zip pod from a URL to a temp file +/ + enum size_t MAX_DOWNLOAD_SIZE = 200 * 1024 * 1024; /+ 200 MB download limit +/ + enum int DOWNLOAD_TIMEOUT = 120; /+ seconds +/ + + static auto rgx_url_zip = ctRegex!(`^https?://[a-zA-Z0-9._:/-]+[.]zip$`); + + struct DownloadResult { + string local_path; /+ path to downloaded temp file +/ + bool ok; + string error_msg; + } + + bool isUrl(string arg) { + return arg.length > 8 + && (arg[0..8] == "https://" || arg[0..7] == "http://"); + } + + @trusted DownloadResult downloadZipUrl(string url) { + import std.process : execute, environment; + DownloadResult result; + result.ok = false; + /+ ↓ validate URL scheme +/ + if (url.length < 8 || (url[0..8] != "https://" && url[0..7] != "http://")) { + result.error_msg = "only http/https URLs are supported: " ~ url; + return result; + } + if (url[0..7] == "http://" && url[0..8] != "https://") { + stderr.writeln("WARNING: downloading over insecure http: ", url); + } + /+ ↓ validate URL format +/ + if (!(url.matchFirst(rgx_url_zip))) { + result.error_msg = "URL does not match expected zip URL pattern: " ~ url; + return result; + } + /+ ↓ reject URLs that could target internal services +/ + { + import std.uni : toLower; + string url_lower = url.toLower; + /+ strip scheme to get host portion +/ + string after_scheme = (url_lower[0..8] == "https://") + ? url_lower[8..$] + : url_lower[7..$]; + /+ extract host (up to first / or :) +/ + string host; + foreach (i, c; after_scheme) { + if (c == '/' || c == ':') { + host = after_scheme[0..i]; + break; + } + } + if (host.length == 0) host = after_scheme; + if (host == "localhost" + || host == "127.0.0.1" + || host == "::1" + || host == "[::1]" + || host == "0.0.0.0" + || host.canFind("169.254.") + || host.canFind("10.") + || host.canFind("192.168.") + ) { + result.error_msg = "URL targets a local/private address: " ~ url; + return result; + } + } + /+ ↓ derive filename from URL +/ + string url_basename = url.baseName; + if (url_basename.length == 0 || url_basename.indexOf('.') < 0) { + result.error_msg = "cannot determine filename from URL: " ~ url; + return result; + } + /+ ↓ create temp directory for download +/ + string tmp_base = tempDir.buildPath("spine-zip-pod"); + try { + if (!exists(tmp_base)) + mkdirRecurse(tmp_base); + } catch (FileException ex) { + result.error_msg = "failed to create temp directory: " ~ ex.msg; + return result; + } + string tmp_file = tmp_base.buildPath(url_basename); + /+ ↓ download using curl +/ + auto curl_result = execute([ + "curl", + "--silent", "--show-error", + "--fail", /+ fail on HTTP errors +/ + "--location", /+ follow redirects +/ + "--max-redirs", "5", /+ limit redirects +/ + "--max-time", DOWNLOAD_TIMEOUT.to!string, + "--max-filesize", MAX_DOWNLOAD_SIZE.to!string, + "--proto", "=https,http", /+ restrict protocols +/ + "--output", tmp_file, + url + ]); + if (curl_result.status != 0) { + result.error_msg = "download failed: " ~ url; + if (curl_result.output.length > 0) + result.error_msg ~= " - " ~ curl_result.output; + /+ clean up partial download +/ + try { if (exists(tmp_file)) remove(tmp_file); } catch (FileException) {} + return result; + } + if (!exists(tmp_file) || !tmp_file.isFile) { + result.error_msg = "download produced no file: " ~ url; + return result; + } + result.local_path = tmp_file; + result.ok = true; + return result; + } + + /+ ↓ clean up a downloaded temp file +/ + void cleanupDownload(ref DownloadResult dlr) { + if (dlr.local_path.length > 0 && exists(dlr.local_path)) { + try { + remove(dlr.local_path); + } catch (FileException ex) { + stderr.writeln("WARNING: failed to clean up downloaded file: ", dlr.local_path); + } + } + dlr.ok = false; + } + /+ ↓ clean up extracted temp directory +/ void cleanupZipPod(ref ZipPodResult zpr) { if (zpr.pod_dir.length > 0 && exists(zpr.pod_dir)) { diff --git a/org/spine.org b/org/spine.org index c218df0..a8b7cf9 100644 --- a/org/spine.org +++ b/org/spine.org @@ -119,6 +119,10 @@ string program_name = "spine"; foreach (ref _zpr; _zip_pod_extractions) { cleanupZipPod(_zpr); } + /+ ↓ clean up any downloaded temp files +/ + foreach (ref _dlr; _url_downloads) { + cleanupDownload(_dlr); + } } #+END_SRC @@ -1065,12 +1069,31 @@ auto _conf_file_details = configFilePaths!()(_manifested, _env, _opt_action.conf /+ ↓ track extracted zip pod temp directories for cleanup +/ mixin spineExtractZipPod; ZipPodResult[] _zip_pod_extractions; +DownloadResult[] _url_downloads; +/+ ↓ pre-process args: resolve URL arguments to local temp files +/ +string[] _resolved_args; +foreach (arg; args[1..$]) { + if (isUrl(arg)) { + auto _dlr = downloadZipUrl(arg); + if (_dlr.ok) { + _url_downloads ~= _dlr; + _resolved_args ~= _dlr.local_path; + if (_opt_action.vox_gt_1) { + writeln("downloaded: ", arg, " -> ", _dlr.local_path); + } + } else { + writeln("ERROR >> Download failed: ", arg, " - ", _dlr.error_msg); + } + } else { + _resolved_args ~= arg; + } +} ConfComposite _siteConfig; if ( _opt_action.require_processing_files && _opt_action.config_path_set.empty ) { - foreach(arg; args[1..$]) { + foreach(arg; _resolved_args) { if (!(arg.match(rgx.flag_action))) { /+ cli markup source path +/ // get first input markup source file names for processing string _config_arg = arg; /+ ↓ if first non-flag arg is a zip, extract for config discovery +/ @@ -1127,7 +1150,7 @@ if (!(_opt_action.skip_output)) { #+BEGIN_SRC d ConfComposite _make_and_meta_struct = _siteConfig; destroy(_siteConfig); -foreach(arg; args[1..$]) { +foreach(arg; _resolved_args) { if (arg.match(rgx.flag_action)) { /+ cli instruction, flag do +/ flag_action ~= " " ~ arg; // flags not taken by getopt } else if (_opt_action.require_processing_files) { /+ cli, assumed to be path to source files +/ |
