[PATCH] Return 304 on snapshot download when ETag matches

[PATCH] Return 304 on snapshot download when ETag matches

From: benaryorg
This allows to take advantage of the If-None-Match header to provide the ETag returned earlier.
Since the ETag is the commit hex verbatim this is basically content addressed data, hence it can be easily compared and cached.
My limited testing has not shown any implementation actually doing this right now however, but at least with this implementation there is merit to client-side implementations.

A similar implementation for blob may be possible which is also effectively content addressed.
Most other endpoints include the list of branches and shouldn't be cached to that extent.

The tests run just fine for me, and have been failing for the better part of the day so I'm pretty sure they actually work.
Running the entire thing in production also works for me.
The mechanism seems to heavily clash with server-side caching too, and since serving the same 200 response for a cacheable request doesn't make a lot of sense but serving a 304 to a request not qualifying for a 304 treatment I took the easy way out of disabling the feature when caching is enabled.
Future improvements could make this more fine-grained or ideally even move this into the caching layer entirely.

Due to my limited knowledge of the codebase I'm not sure if the functions and checks all ended up in places that make sense and are maintainable, so if anyone can recommend some improvements for that, please go ahead.
At the very least I was happy to write some C again.

Signed-off-by: benaryorg <binary@benary.org>
---
 cgit.c                  |  1 +
 cgit.h                  |  1 +
 tests/t0107-snapshot.sh | 15 ++++++++++++++-
 ui-shared.c             | 20 ++++++++++++++++++++
 ui-shared.h             |  1 +
 ui-snapshot.c           | 17 ++++++++++++++++-
 6 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/cgit.c b/cgit.c
index dd28a79..4f42d2b 100644
--- a/cgit.c
+++ b/cgit.c
@@ -419,6 +419,7 @@ static void prepare_context(void)
 	ctx.env.server_port = getenv("SERVER_PORT");
 	ctx.env.http_cookie = getenv("HTTP_COOKIE");
 	ctx.env.http_referer = getenv("HTTP_REFERER");
+	ctx.env.if_none_match = getenv("HTTP_IF_NONE_MATCH");
 	ctx.env.content_length = getenv("CONTENT_LENGTH") ? strtoul(getenv("CONTENT_LENGTH"), NULL, 10) : 0;
 	ctx.env.authenticated = 0;
 	ctx.page.mimetype = "text/html";
diff --git a/cgit.h b/cgit.h
index 72fcd84..6bf7c6a 100644
--- a/cgit.h
+++ b/cgit.h
@@ -298,6 +298,7 @@ struct cgit_environment {
 	const char *server_port;
 	const char *http_cookie;
 	const char *http_referer;
+	const char *if_none_match;
 	unsigned int content_length;
 	int authenticated;
 };
diff --git a/tests/t0107-snapshot.sh b/tests/t0107-snapshot.sh
index 0811ec4..fe1ce0e 100755
--- a/tests/t0107-snapshot.sh
+++ b/tests/t0107-snapshot.sh
@@ -3,6 +3,10 @@
 test_description='Verify snapshot'
 . ./setup.sh
 
+test_expect_success 'disable cache' '
+	sed -i -E "s/^cache-size=.*/cache-size=0/" cgitrc
+'
+
 test_expect_success 'get foo/snapshot/master.tar.gz' '
 	cgit_url "foo/snapshot/master.tar.gz" >tmp
 '
@@ -12,7 +16,16 @@ test_expect_success 'check html headers' '
 	grep "Content-Type: application/x-gzip" &&
 
 	head -n 2 tmp |
-	grep "Content-Disposition: inline; filename=.master.tar.gz."
+	grep "Content-Disposition: inline; filename=.master.tar.gz." &&
+
+	head -n 5 tmp |
+	grep "^ETag: " >etag
+'
+
+test_expect_success 'check not modified with etag' '
+	HTTP_IF_NONE_MATCH="$(cut -d " " -f 2- etag)" cgit_url "foo/snapshot/master.tar.gz" |
+	head -n 1 |
+	grep "^Status: 304 Not modified"
 '
 
 test_expect_success 'strip off the header lines' '
diff --git a/ui-shared.c b/ui-shared.c
index 72a1505..b494d8d 100644
--- a/ui-shared.c
+++ b/ui-shared.c
@@ -1239,3 +1239,23 @@ void cgit_set_title_from_path(const char *path)
 	strbuf_addf(&sb, " - %s", ctx.page.title);
 	ctx.page.title = strbuf_detach(&sb, NULL);
 }
+
+int cgit_cache_if_none_match_etag(const char *etag) {
+	int len;
+
+	if(!ctx.env.if_none_match) {
+		return 0;
+	}
+	// if-none-match has double quotes in the value
+	len = strlen(ctx.env.if_none_match);
+	if(len != strlen(etag) + 2) {
+		return 0;
+	}
+	// length is at least 2 due to the check above
+	if(ctx.env.if_none_match[0] != '"' || ctx.env.if_none_match[len - 1] != '"') {
+		return 0;
+	}
+
+	// again, no length checks required
+	return !strncmp(ctx.env.if_none_match + 1, etag, len - 2);
+}
diff --git a/ui-shared.h b/ui-shared.h
index 6964873..c82fa98 100644
--- a/ui-shared.h
+++ b/ui-shared.h
@@ -84,4 +84,5 @@ extern void cgit_add_hidden_formfields(int incl_head, int incl_search,
 				       const char *page);
 
 extern void cgit_set_title_from_path(const char *path);
+extern int cgit_cache_if_none_match_etag(const char *etag);
 #endif /* UI_SHARED_H */
diff --git a/ui-snapshot.c b/ui-snapshot.c
index 2801393..e38c757 100644
--- a/ui-snapshot.c
+++ b/ui-snapshot.c
@@ -169,7 +169,15 @@ static int make_snapshot(const struct cgit_snapshot_format *format,
 				"Not a commit reference: %s", hex);
 		return 1;
 	}
+
 	ctx.page.etag = oid_to_hex(&oid);
+	if(!ctx.cfg.cache_size && cgit_cache_if_none_match_etag(ctx.page.etag)) {
+		ctx.page.status = 304;
+		ctx.page.statusmsg = "Not modified";
+		cgit_print_http_headers();
+		exit(0);
+	}
+
 	ctx.page.mimetype = xstrdup(format->mimetype);
 	ctx.page.filename = xstrdup(filename);
 	cgit_print_http_headers();
@@ -193,6 +201,14 @@ static int write_sig(const struct cgit_snapshot_format *format,
 		return 0;
 	}
 
+	ctx.page.etag = oid_to_hex(note);
+	if(!ctx.cfg.cache_size && cgit_cache_if_none_match_etag(ctx.page.etag)) {
+		ctx.page.status = 304;
+		ctx.page.statusmsg = "Not modified";
+		cgit_print_http_headers();
+		exit(0);
+	}
+
 	buf = read_object_file(note, &type, &size);
 	if (!buf) {
 		cgit_print_error_page(404, "Not found", "Not found");
@@ -201,7 +217,6 @@ static int write_sig(const struct cgit_snapshot_format *format,
 
 	html("X-Content-Type-Options: nosniff\n");
 	html("Content-Security-Policy: default-src 'none'\n");
-	ctx.page.etag = oid_to_hex(note);
 	ctx.page.mimetype = xstrdup("application/pgp-signature");
 	ctx.page.filename = xstrdup(filename);
 	cgit_print_http_headers();
-- 
2.47.2