summary refs log tree commit diff
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2013-05-28 14:17:00 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2013-08-12 13:14:10 -0600
commit23debef62104c70600be2b745ec3957538eeac6e (patch)
tree3b68eab7624907836b0e614328a529e686224830
parent830eb6f6ff9e6dded2bfae99d7cb46f049790470 (diff)
robots.txt: disallow access to snapshots
My dmesg is filled with the oom killer bringing down processes while the
Bingbot downloads every snapshot for every commit of the Linux kernel in
tar.xz format. Sure, I should be running with memory limits, and now I'm
using cgroups, but a more general solution is to prevent crawlers from
wasting resources like that in the first place.

Suggested-by: Natanael Copa <ncopa@alpinelinux.org>
Suggested-by: Julius Plenz <plenz@cis.fu-berlin.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-rw-r--r--Makefile1
-rw-r--r--robots.txt3
2 files changed, 4 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 00b3269..f11b60f 100644
--- a/Makefile
+++ b/Makefile
@@ -78,6 +78,7 @@ install: all
 	$(INSTALL) -m 0644 cgit.css $(DESTDIR)$(CGIT_DATA_PATH)/cgit.css
 	$(INSTALL) -m 0644 cgit.png $(DESTDIR)$(CGIT_DATA_PATH)/cgit.png
 	$(INSTALL) -m 0644 favicon.ico $(DESTDIR)$(CGIT_DATA_PATH)/favicon.ico
+	$(INSTALL) -m 0644 robots.txt $(DESTDIR)$(CGIT_DATA_PATH)/robots.txt
 	$(INSTALL) -m 0755 -d $(DESTDIR)$(filterdir)
 	$(COPYTREE)  filters/* $(DESTDIR)$(filterdir)
 
diff --git a/robots.txt b/robots.txt
new file mode 100644
index 0000000..4ce948f
--- /dev/null
+++ b/robots.txt
@@ -0,0 +1,3 @@
+User-agent: *
+Disallow: /*/snapshot/*
+Allow: /