From 90f6ecd024850ff4c4a8cbf014533f7629ccde7a Mon Sep 17 00:00:00 2001 From: antanst Date: Fri, 27 Dec 2024 12:11:35 +0200 Subject: [PATCH] Add README.md and Makefile. --- .gitignore | 1 + Makefile | 35 +++++++++++++++++++++++++++++++++++ README.md | 30 ++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 Makefile create mode 100644 README.md diff --git a/.gitignore b/.gitignore index b456597..cee4eaf 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ /db/initdb.sql /gemini-grc run*.sh +/main diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..78c15dc --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +SHELL := /usr/local/bin/oksh +export PATH := $(PATH) + +all: fmt lintfix test clean build + +clean: + rm ./main + +debug: + @echo "PATH: $(PATH)" + @echo "GOPATH: $(shell go env GOPATH)" + @which go + @which gofumpt + @which gci + @which golangci-lint + +# Test +test: + go test ./... + +# Format code +fmt: + gofumpt -l -w . + gci write . + +# Run linter +lint: fmt + golangci-lint run + +# Run linter and fix +lintfix: fmt + golangci-lint run --fix + +build: + go build ./main.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..eab5653 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# gemini-grc + +A Gemini crawler. + +URLs to visit as well as data from visited URLs are stored as "snapshots" in the database. +This makes it easily extendable as a "wayback machine" of Gemini. + +## Done +- [x] Concurrent downloading with workers +- [x] Concurrent connection limit per host +- [x] URL Blacklist +- [x] Save image/* and text/* files +- [x] Configuration via environment variables +- [x] Storing snapshots in PostgreSQL +- [x] Proper response header & body UTF-8 and format validation +- [x] Follow robots.txt, see gemini://geminiprotocol.net/docs/companion/robots.gmi +- [x] Handle redirects (3X status codes) +- [x] Better URL normalization + +## TODO +- [ ] Add snapshot hash and support snapshot history +- [ ] Add web interface +- [ ] Provide a TLS cert for sites that require it, like Astrobotany + +## TODO with lower priority +- [ ] Gopher +- [ ] Scroll gemini://auragem.letz.dev/devlog/20240316.gmi +- [ ] Spartan +- [ ] Nex +- [ ] SuperTXT https://supertxt.net/00-intro.html