Allow filtering by remote addresses (#52)
* Added the possibility to define rules for remote addresses * Added change in changelog * Added check for X-Real-Ip and X-Forwarded-For when checking for remote address filtering * cmd/anubis: refine IP filtering logic * Optimize the configuration so that the IP trie is created once at application start instead of dynamically being created every request. * Document the changes in the changelog and docs site. * Allow pure IP range filtering. * Allow user agent based IP range filtering. * Allow path based IP range filtering. * Create --debug-x-real-ip-default flag for testing Anubis locally without a HTTP load balancer. --------- Co-authored-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
e7b9b17b92
commit
d6d879133e
11 changed files with 554 additions and 27 deletions
|
@ -8,22 +8,332 @@
|
|||
{
|
||||
"name": "googlebot",
|
||||
"user_agent_regex": "\\+http\\:\\/\\/www\\.google\\.com/bot\\.html",
|
||||
"action": "ALLOW"
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"2001:4860:4801:10::/64",
|
||||
"2001:4860:4801:11::/64",
|
||||
"2001:4860:4801:12::/64",
|
||||
"2001:4860:4801:13::/64",
|
||||
"2001:4860:4801:14::/64",
|
||||
"2001:4860:4801:15::/64",
|
||||
"2001:4860:4801:16::/64",
|
||||
"2001:4860:4801:17::/64",
|
||||
"2001:4860:4801:18::/64",
|
||||
"2001:4860:4801:19::/64",
|
||||
"2001:4860:4801:1a::/64",
|
||||
"2001:4860:4801:1b::/64",
|
||||
"2001:4860:4801:1c::/64",
|
||||
"2001:4860:4801:1d::/64",
|
||||
"2001:4860:4801:1e::/64",
|
||||
"2001:4860:4801:1f::/64",
|
||||
"2001:4860:4801:20::/64",
|
||||
"2001:4860:4801:21::/64",
|
||||
"2001:4860:4801:22::/64",
|
||||
"2001:4860:4801:23::/64",
|
||||
"2001:4860:4801:24::/64",
|
||||
"2001:4860:4801:25::/64",
|
||||
"2001:4860:4801:26::/64",
|
||||
"2001:4860:4801:27::/64",
|
||||
"2001:4860:4801:28::/64",
|
||||
"2001:4860:4801:29::/64",
|
||||
"2001:4860:4801:2::/64",
|
||||
"2001:4860:4801:2a::/64",
|
||||
"2001:4860:4801:2b::/64",
|
||||
"2001:4860:4801:2c::/64",
|
||||
"2001:4860:4801:2d::/64",
|
||||
"2001:4860:4801:2e::/64",
|
||||
"2001:4860:4801:2f::/64",
|
||||
"2001:4860:4801:31::/64",
|
||||
"2001:4860:4801:32::/64",
|
||||
"2001:4860:4801:33::/64",
|
||||
"2001:4860:4801:34::/64",
|
||||
"2001:4860:4801:35::/64",
|
||||
"2001:4860:4801:36::/64",
|
||||
"2001:4860:4801:37::/64",
|
||||
"2001:4860:4801:38::/64",
|
||||
"2001:4860:4801:39::/64",
|
||||
"2001:4860:4801:3a::/64",
|
||||
"2001:4860:4801:3b::/64",
|
||||
"2001:4860:4801:3c::/64",
|
||||
"2001:4860:4801:3d::/64",
|
||||
"2001:4860:4801:3e::/64",
|
||||
"2001:4860:4801:40::/64",
|
||||
"2001:4860:4801:41::/64",
|
||||
"2001:4860:4801:42::/64",
|
||||
"2001:4860:4801:43::/64",
|
||||
"2001:4860:4801:44::/64",
|
||||
"2001:4860:4801:45::/64",
|
||||
"2001:4860:4801:46::/64",
|
||||
"2001:4860:4801:47::/64",
|
||||
"2001:4860:4801:48::/64",
|
||||
"2001:4860:4801:49::/64",
|
||||
"2001:4860:4801:4a::/64",
|
||||
"2001:4860:4801:4b::/64",
|
||||
"2001:4860:4801:4c::/64",
|
||||
"2001:4860:4801:50::/64",
|
||||
"2001:4860:4801:51::/64",
|
||||
"2001:4860:4801:52::/64",
|
||||
"2001:4860:4801:53::/64",
|
||||
"2001:4860:4801:54::/64",
|
||||
"2001:4860:4801:55::/64",
|
||||
"2001:4860:4801:56::/64",
|
||||
"2001:4860:4801:60::/64",
|
||||
"2001:4860:4801:61::/64",
|
||||
"2001:4860:4801:62::/64",
|
||||
"2001:4860:4801:63::/64",
|
||||
"2001:4860:4801:64::/64",
|
||||
"2001:4860:4801:65::/64",
|
||||
"2001:4860:4801:66::/64",
|
||||
"2001:4860:4801:67::/64",
|
||||
"2001:4860:4801:68::/64",
|
||||
"2001:4860:4801:69::/64",
|
||||
"2001:4860:4801:6a::/64",
|
||||
"2001:4860:4801:6b::/64",
|
||||
"2001:4860:4801:6c::/64",
|
||||
"2001:4860:4801:6d::/64",
|
||||
"2001:4860:4801:6e::/64",
|
||||
"2001:4860:4801:6f::/64",
|
||||
"2001:4860:4801:70::/64",
|
||||
"2001:4860:4801:71::/64",
|
||||
"2001:4860:4801:72::/64",
|
||||
"2001:4860:4801:73::/64",
|
||||
"2001:4860:4801:74::/64",
|
||||
"2001:4860:4801:75::/64",
|
||||
"2001:4860:4801:76::/64",
|
||||
"2001:4860:4801:77::/64",
|
||||
"2001:4860:4801:78::/64",
|
||||
"2001:4860:4801:79::/64",
|
||||
"2001:4860:4801:80::/64",
|
||||
"2001:4860:4801:81::/64",
|
||||
"2001:4860:4801:82::/64",
|
||||
"2001:4860:4801:83::/64",
|
||||
"2001:4860:4801:84::/64",
|
||||
"2001:4860:4801:85::/64",
|
||||
"2001:4860:4801:86::/64",
|
||||
"2001:4860:4801:87::/64",
|
||||
"2001:4860:4801:88::/64",
|
||||
"2001:4860:4801:90::/64",
|
||||
"2001:4860:4801:91::/64",
|
||||
"2001:4860:4801:92::/64",
|
||||
"2001:4860:4801:93::/64",
|
||||
"2001:4860:4801:94::/64",
|
||||
"2001:4860:4801:95::/64",
|
||||
"2001:4860:4801:96::/64",
|
||||
"2001:4860:4801:a0::/64",
|
||||
"2001:4860:4801:a1::/64",
|
||||
"2001:4860:4801:a2::/64",
|
||||
"2001:4860:4801:a3::/64",
|
||||
"2001:4860:4801:a4::/64",
|
||||
"2001:4860:4801:a5::/64",
|
||||
"2001:4860:4801:c::/64",
|
||||
"2001:4860:4801:f::/64",
|
||||
"192.178.5.0/27",
|
||||
"192.178.6.0/27",
|
||||
"192.178.6.128/27",
|
||||
"192.178.6.160/27",
|
||||
"192.178.6.192/27",
|
||||
"192.178.6.32/27",
|
||||
"192.178.6.64/27",
|
||||
"192.178.6.96/27",
|
||||
"34.100.182.96/28",
|
||||
"34.101.50.144/28",
|
||||
"34.118.254.0/28",
|
||||
"34.118.66.0/28",
|
||||
"34.126.178.96/28",
|
||||
"34.146.150.144/28",
|
||||
"34.147.110.144/28",
|
||||
"34.151.74.144/28",
|
||||
"34.152.50.64/28",
|
||||
"34.154.114.144/28",
|
||||
"34.155.98.32/28",
|
||||
"34.165.18.176/28",
|
||||
"34.175.160.64/28",
|
||||
"34.176.130.16/28",
|
||||
"34.22.85.0/27",
|
||||
"34.64.82.64/28",
|
||||
"34.65.242.112/28",
|
||||
"34.80.50.80/28",
|
||||
"34.88.194.0/28",
|
||||
"34.89.10.80/28",
|
||||
"34.89.198.80/28",
|
||||
"34.96.162.48/28",
|
||||
"35.247.243.240/28",
|
||||
"66.249.64.0/27",
|
||||
"66.249.64.128/27",
|
||||
"66.249.64.160/27",
|
||||
"66.249.64.224/27",
|
||||
"66.249.64.32/27",
|
||||
"66.249.64.64/27",
|
||||
"66.249.64.96/27",
|
||||
"66.249.65.0/27",
|
||||
"66.249.65.128/27",
|
||||
"66.249.65.160/27",
|
||||
"66.249.65.192/27",
|
||||
"66.249.65.224/27",
|
||||
"66.249.65.32/27",
|
||||
"66.249.65.64/27",
|
||||
"66.249.65.96/27",
|
||||
"66.249.66.0/27",
|
||||
"66.249.66.128/27",
|
||||
"66.249.66.160/27",
|
||||
"66.249.66.192/27",
|
||||
"66.249.66.224/27",
|
||||
"66.249.66.32/27",
|
||||
"66.249.66.64/27",
|
||||
"66.249.66.96/27",
|
||||
"66.249.68.0/27",
|
||||
"66.249.68.128/27",
|
||||
"66.249.68.32/27",
|
||||
"66.249.68.64/27",
|
||||
"66.249.68.96/27",
|
||||
"66.249.69.0/27",
|
||||
"66.249.69.128/27",
|
||||
"66.249.69.160/27",
|
||||
"66.249.69.192/27",
|
||||
"66.249.69.224/27",
|
||||
"66.249.69.32/27",
|
||||
"66.249.69.64/27",
|
||||
"66.249.69.96/27",
|
||||
"66.249.70.0/27",
|
||||
"66.249.70.128/27",
|
||||
"66.249.70.160/27",
|
||||
"66.249.70.192/27",
|
||||
"66.249.70.224/27",
|
||||
"66.249.70.32/27",
|
||||
"66.249.70.64/27",
|
||||
"66.249.70.96/27",
|
||||
"66.249.71.0/27",
|
||||
"66.249.71.128/27",
|
||||
"66.249.71.160/27",
|
||||
"66.249.71.192/27",
|
||||
"66.249.71.224/27",
|
||||
"66.249.71.32/27",
|
||||
"66.249.71.64/27",
|
||||
"66.249.71.96/27",
|
||||
"66.249.72.0/27",
|
||||
"66.249.72.128/27",
|
||||
"66.249.72.160/27",
|
||||
"66.249.72.192/27",
|
||||
"66.249.72.224/27",
|
||||
"66.249.72.32/27",
|
||||
"66.249.72.64/27",
|
||||
"66.249.72.96/27",
|
||||
"66.249.73.0/27",
|
||||
"66.249.73.128/27",
|
||||
"66.249.73.160/27",
|
||||
"66.249.73.192/27",
|
||||
"66.249.73.224/27",
|
||||
"66.249.73.32/27",
|
||||
"66.249.73.64/27",
|
||||
"66.249.73.96/27",
|
||||
"66.249.74.0/27",
|
||||
"66.249.74.128/27",
|
||||
"66.249.74.160/27",
|
||||
"66.249.74.192/27",
|
||||
"66.249.74.32/27",
|
||||
"66.249.74.64/27",
|
||||
"66.249.74.96/27",
|
||||
"66.249.75.0/27",
|
||||
"66.249.75.128/27",
|
||||
"66.249.75.160/27",
|
||||
"66.249.75.192/27",
|
||||
"66.249.75.224/27",
|
||||
"66.249.75.32/27",
|
||||
"66.249.75.64/27",
|
||||
"66.249.75.96/27",
|
||||
"66.249.76.0/27",
|
||||
"66.249.76.128/27",
|
||||
"66.249.76.160/27",
|
||||
"66.249.76.192/27",
|
||||
"66.249.76.224/27",
|
||||
"66.249.76.32/27",
|
||||
"66.249.76.64/27",
|
||||
"66.249.76.96/27",
|
||||
"66.249.77.0/27",
|
||||
"66.249.77.128/27",
|
||||
"66.249.77.160/27",
|
||||
"66.249.77.192/27",
|
||||
"66.249.77.224/27",
|
||||
"66.249.77.32/27",
|
||||
"66.249.77.64/27",
|
||||
"66.249.77.96/27",
|
||||
"66.249.78.0/27",
|
||||
"66.249.78.32/27",
|
||||
"66.249.79.0/27",
|
||||
"66.249.79.128/27",
|
||||
"66.249.79.160/27",
|
||||
"66.249.79.192/27",
|
||||
"66.249.79.224/27",
|
||||
"66.249.79.32/27",
|
||||
"66.249.79.64/27",
|
||||
"66.249.79.96/27"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "bingbot",
|
||||
"user_agent_regex": "\\+http\\:\\/\\/www\\.bing\\.com/bingbot\\.htm",
|
||||
"action": "ALLOW"
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"157.55.39.0/24",
|
||||
"207.46.13.0/24",
|
||||
"40.77.167.0/24",
|
||||
"13.66.139.0/24",
|
||||
"13.66.144.0/24",
|
||||
"52.167.144.0/24",
|
||||
"13.67.10.16/28",
|
||||
"13.69.66.240/28",
|
||||
"13.71.172.224/28",
|
||||
"139.217.52.0/28",
|
||||
"191.233.204.224/28",
|
||||
"20.36.108.32/28",
|
||||
"20.43.120.16/28",
|
||||
"40.79.131.208/28",
|
||||
"40.79.186.176/28",
|
||||
"52.231.148.0/28",
|
||||
"20.79.107.240/28",
|
||||
"51.105.67.0/28",
|
||||
"20.125.163.80/28",
|
||||
"40.77.188.0/22",
|
||||
"65.55.210.0/24",
|
||||
"199.30.24.0/23",
|
||||
"40.77.202.0/24",
|
||||
"40.77.139.0/25",
|
||||
"20.74.197.0/28",
|
||||
"20.15.133.160/27",
|
||||
"40.77.177.0/24",
|
||||
"40.77.178.0/23"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "qwantbot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
|
||||
"action": "ALLOW"
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"91.242.162.0/24"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kagibot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/kagi\\.com/bot",
|
||||
"action": "ALLOW"
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"216.18.205.234/32",
|
||||
"35.212.27.76/32",
|
||||
"104.254.65.50/32",
|
||||
"209.151.156.194/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "marginalia",
|
||||
"user_agent_regex": "search\\.marginalia\\.nu",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"193.183.0.162/31",
|
||||
"193.183.0.164/30",
|
||||
"193.183.0.168/30",
|
||||
"193.183.0.172/31",
|
||||
"193.183.0.174/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "us-artificial-intelligence-scraper",
|
||||
|
|
|
@ -3,6 +3,7 @@ package config
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
|
@ -28,17 +29,19 @@ type Bot struct {
|
|||
UserAgentRegex *string `json:"user_agent_regex"`
|
||||
PathRegex *string `json:"path_regex"`
|
||||
Action Rule `json:"action"`
|
||||
RemoteAddr []string `json:"remote_addresses"`
|
||||
Challenge *ChallengeRules `json:"challenge,omitempty"`
|
||||
}
|
||||
|
||||
var (
|
||||
ErrNoBotRulesDefined = errors.New("config: must define at least one (1) bot rule")
|
||||
ErrBotMustHaveName = errors.New("config.Bot: must set name")
|
||||
ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex")
|
||||
ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex, or remote_addresses")
|
||||
ErrBotMustHaveUserAgentOrPathNotBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, and not both")
|
||||
ErrUnknownAction = errors.New("config.Bot: unknown action")
|
||||
ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex")
|
||||
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
|
||||
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
|
||||
)
|
||||
|
||||
func (b Bot) Valid() error {
|
||||
|
@ -48,7 +51,7 @@ func (b Bot) Valid() error {
|
|||
errs = append(errs, ErrBotMustHaveName)
|
||||
}
|
||||
|
||||
if b.UserAgentRegex == nil && b.PathRegex == nil {
|
||||
if b.UserAgentRegex == nil && b.PathRegex == nil && (b.RemoteAddr == nil || len(b.RemoteAddr) == 0) {
|
||||
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
|
||||
}
|
||||
|
||||
|
@ -68,6 +71,14 @@ func (b Bot) Valid() error {
|
|||
}
|
||||
}
|
||||
|
||||
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
|
||||
for _, cidr := range b.RemoteAddr {
|
||||
if _, _, err := net.ParseCIDR(cidr); err != nil {
|
||||
errs = append(errs, ErrInvalidCIDR, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch b.Action {
|
||||
case RuleAllow, RuleChallenge, RuleDeny:
|
||||
// okay
|
||||
|
|
|
@ -129,6 +129,44 @@ func TestBotValid(t *testing.T) {
|
|||
},
|
||||
err: ErrChallengeRuleHasWrongAlgorithm,
|
||||
},
|
||||
{
|
||||
name: "invalid cidr range",
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
RemoteAddr: []string{"0.0.0.0/33"},
|
||||
},
|
||||
err: ErrInvalidCIDR,
|
||||
},
|
||||
{
|
||||
name: "only filter by IP range",
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
RemoteAddr: []string{"0.0.0.0/0"},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "filter by user agent and IP range",
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
RemoteAddr: []string{"0.0.0.0/0"},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "filter by path and IP range",
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
PathRegex: p("^.*$"),
|
||||
RemoteAddr: []string{"0.0.0.0/0"},
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, cs := range tests {
|
||||
|
|
12
cmd/anubis/internal/config/testdata/good/allow_everyone.json
vendored
Normal file
12
cmd/anubis/internal/config/testdata/good/allow_everyone.json
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"bots": [
|
||||
{
|
||||
"name": "everyones-invited",
|
||||
"remote_addresses": [
|
||||
"0.0.0.0/0",
|
||||
"::/0"
|
||||
],
|
||||
"action": "ALLOW"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -53,6 +53,7 @@ var (
|
|||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
target = flag.String("target", "http://localhost:3923", "target to reverse proxy to")
|
||||
healthcheck = flag.Bool("healthcheck", false, "run a health check against Anubis")
|
||||
debugXRealIPDefault = flag.String("debug-x-real-ip-default", "", "If set, replace empty X-Real-Ip headers with this value, useful only for debugging Anubis and running it locally")
|
||||
|
||||
//go:embed static botPolicies.json
|
||||
static embed.FS
|
||||
|
@ -210,9 +211,21 @@ func main() {
|
|||
|
||||
mux.HandleFunc("/", s.maybeReverseProxy)
|
||||
|
||||
srv := http.Server{Handler: mux}
|
||||
var h http.Handler
|
||||
h = mux
|
||||
h = internal.DefaultXRealIP(*debugXRealIPDefault, h)
|
||||
|
||||
srv := http.Server{Handler: h}
|
||||
listener, url := setupListener(*bindNetwork, *bind)
|
||||
slog.Info("listening", "url", url, "difficulty", *challengeDifficulty, "serveRobotsTXT", *robotsTxt, "target", *target, "version", anubis.Version)
|
||||
slog.Info(
|
||||
"listening",
|
||||
"url", url,
|
||||
"difficulty", *challengeDifficulty,
|
||||
"serveRobotsTXT", *robotsTxt,
|
||||
"target", *target,
|
||||
"version", anubis.Version,
|
||||
"debug-x-real-ip-default", *debugXRealIPDefault,
|
||||
)
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
|
@ -364,11 +377,7 @@ type Server struct {
|
|||
}
|
||||
|
||||
func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) {
|
||||
cr, rule := s.check(r)
|
||||
r.Header.Add("X-Anubis-Rule", cr.Name)
|
||||
r.Header.Add("X-Anubis-Action", string(cr.Rule))
|
||||
lg := slog.With(
|
||||
"check_result", cr,
|
||||
"user_agent", r.UserAgent(),
|
||||
"accept_language", r.Header.Get("Accept-Language"),
|
||||
"priority", r.Header.Get("Priority"),
|
||||
|
@ -376,6 +385,17 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request) {
|
|||
r.Header.Get("X-Forwarded-For"),
|
||||
"x-real-ip", r.Header.Get("X-Real-Ip"),
|
||||
)
|
||||
|
||||
cr, rule, err := s.check(r)
|
||||
if err != nil {
|
||||
lg.Error("check failed", "err", err)
|
||||
templ.Handler(base("Oh noes!", errorPage("Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"maybeReverseProxy\"")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
r.Header.Add("X-Anubis-Rule", cr.Name)
|
||||
r.Header.Add("X-Anubis-Action", string(cr.Rule))
|
||||
lg = lg.With("check_result", cr)
|
||||
policyApplications.WithLabelValues(cr.Name, string(cr.Rule)).Add(1)
|
||||
|
||||
ip := r.Header.Get("X-Real-Ip")
|
||||
|
@ -530,11 +550,22 @@ func (s *Server) renderIndex(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
|
||||
func (s *Server) makeChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
cr, rule := s.check(r)
|
||||
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
|
||||
lg := slog.With("user_agent", r.UserAgent(), "accept_language", r.Header.Get("Accept-Language"), "priority", r.Header.Get("Priority"), "x-forwarded-for", r.Header.Get("X-Forwarded-For"), "x-real-ip", r.Header.Get("X-Real-Ip"))
|
||||
|
||||
cr, rule, err := s.check(r)
|
||||
if err != nil {
|
||||
lg.Error("check failed", "err", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
json.NewEncoder(w).Encode(struct {
|
||||
Error string `json:"error"`
|
||||
}{
|
||||
Error: "Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"makeChallenge\"",
|
||||
})
|
||||
return
|
||||
}
|
||||
lg = lg.With("check_result", cr)
|
||||
challenge := s.challengeFor(r, rule.Challenge.Difficulty)
|
||||
|
||||
json.NewEncoder(w).Encode(struct {
|
||||
Challenge string `json:"challenge"`
|
||||
Rules *config.ChallengeRules `json:"rules"`
|
||||
|
@ -547,16 +578,22 @@ func (s *Server) makeChallenge(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
|
||||
func (s *Server) passChallenge(w http.ResponseWriter, r *http.Request) {
|
||||
cr, rule := s.check(r)
|
||||
lg := slog.With(
|
||||
"user_agent", r.UserAgent(),
|
||||
"accept_language", r.Header.Get("Accept-Language"),
|
||||
"priority", r.Header.Get("Priority"),
|
||||
"x-forwarded-for", r.Header.Get("X-Forwarded-For"),
|
||||
"x-real-ip", r.Header.Get("X-Real-Ip"),
|
||||
"cr", cr,
|
||||
)
|
||||
|
||||
cr, rule, err := s.check(r)
|
||||
if err != nil {
|
||||
lg.Error("check failed", "err", err)
|
||||
templ.Handler(base("Oh noes!", errorPage("Internal Server Error: administrator has misconfigured Anubis. Please contact the administrator and ask them to look for the logs around \"passChallenge\".")), templ.WithStatus(http.StatusInternalServerError)).ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
lg = lg.With("check_result", cr)
|
||||
|
||||
nonceStr := r.FormValue("nonce")
|
||||
if nonceStr == "" {
|
||||
clearCookie(w)
|
||||
|
|
|
@ -5,13 +5,16 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"github.com/TecharoHQ/anubis/cmd/anubis/internal/config"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/yl2chen/cidranger"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -32,8 +35,9 @@ type Bot struct {
|
|||
Name string
|
||||
UserAgent *regexp.Regexp
|
||||
Path *regexp.Regexp
|
||||
Action config.Rule
|
||||
Action config.Rule `json:"action"`
|
||||
Challenge *config.ChallengeRules
|
||||
Ranger cidranger.Ranger
|
||||
}
|
||||
|
||||
func (b Bot) Hash() (string, error) {
|
||||
|
@ -77,6 +81,19 @@ func parseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon
|
|||
Action: b.Action,
|
||||
}
|
||||
|
||||
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
|
||||
parsedBot.Ranger = cidranger.NewPCTrieRanger()
|
||||
|
||||
for _, cidr := range b.RemoteAddr {
|
||||
_, rng, err := net.ParseCIDR(cidr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[unexpected] range %s not parsing: %w", cidr, err)
|
||||
}
|
||||
|
||||
parsedBot.Ranger.Insert(cidranger.NewBasicRangerEntry(*rng))
|
||||
}
|
||||
}
|
||||
|
||||
if b.UserAgentRegex != nil {
|
||||
userAgent, err := regexp.Compile(*b.UserAgentRegex)
|
||||
if err != nil {
|
||||
|
@ -140,18 +157,47 @@ func cr(name string, rule config.Rule) CheckResult {
|
|||
}
|
||||
}
|
||||
|
||||
func (s *Server) checkRemoteAddress(b Bot, addr net.IP) bool {
|
||||
if b.Ranger == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
ok, err := b.Ranger.Contains(addr)
|
||||
if err != nil {
|
||||
log.Panicf("[unexpected] something very funky is going on, %q does not have a calculable network number: %v", addr.String(), err)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
// Check evaluates the list of rules, and returns the result
|
||||
func (s *Server) check(r *http.Request) (CheckResult, *Bot) {
|
||||
func (s *Server) check(r *http.Request) (CheckResult, *Bot, error) {
|
||||
host := r.Header.Get("X-Real-Ip")
|
||||
if host == "" {
|
||||
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] X-Real-Ip header is not set")
|
||||
}
|
||||
|
||||
addr := net.ParseIP(host)
|
||||
if addr == nil {
|
||||
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host)
|
||||
}
|
||||
|
||||
for _, b := range s.policy.Bots {
|
||||
if b.UserAgent != nil {
|
||||
if b.UserAgent.MatchString(r.UserAgent()) {
|
||||
return cr("bot/"+b.Name, b.Action), &b
|
||||
if uaMatch := b.UserAgent.MatchString(r.UserAgent()); uaMatch || (uaMatch && s.checkRemoteAddress(b, addr)) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
|
||||
if b.Path != nil {
|
||||
if b.Path.MatchString(r.URL.Path) {
|
||||
return cr("bot/"+b.Name, b.Action), &b
|
||||
if pathMatch := b.Path.MatchString(r.URL.Path); pathMatch || (pathMatch && s.checkRemoteAddress(b, addr)) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
|
||||
if b.Ranger != nil {
|
||||
if s.checkRemoteAddress(b, addr) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -162,5 +208,5 @@ func (s *Server) check(r *http.Request) (CheckResult, *Bot) {
|
|||
ReportAs: defaultDifficulty,
|
||||
Algorithm: config.AlgorithmFast,
|
||||
},
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -40,6 +40,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
- [KagiBot](https://kagi.com/bot) is allowed through the filter [#44](https://github.com/TecharoHQ/anubis/pull/44)
|
||||
- Fixed hang when navigator.hardwareConcurrency is undefined
|
||||
- Support Unix domain sockets [#45](https://github.com/TecharoHQ/anubis/pull/45)
|
||||
- Allow filtering by remote addresses:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "qwantbot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": ["91.242.162.0/24"]
|
||||
}
|
||||
```
|
||||
|
||||
This also works at an IP range level:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "internal-network",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": ["100.64.0.0/10"]
|
||||
}
|
||||
```
|
||||
|
||||
## 1.13.0
|
||||
|
||||
|
|
|
@ -68,6 +68,8 @@ There are three actions that can be returned from a rule:
|
|||
|
||||
Name your rules in lower case using kebab-case. Rule names will be exposed in Prometheus metrics.
|
||||
|
||||
### Challenge configuration
|
||||
|
||||
Rules can also have their own challenge settings. These are customized using the `"challenge"` key. For example, here is a rule that makes challenges artificially hard for connections with the substring "bot" in their user agent:
|
||||
|
||||
```json
|
||||
|
@ -91,6 +93,33 @@ Challenges can be configured with these settings:
|
|||
| `report_as` | `4` | What difficulty the UI should report to the user. Useful for messing with industrial-scale scraping efforts. |
|
||||
| `algorithm` | `"fast"` | The algorithm used on the client to run proof-of-work calculations. This must be set to `"fast"` or `"slow"`. See [Proof-of-Work Algorithm Selection](./algorithm-selection) for more details. |
|
||||
|
||||
### Remote IP based filtering
|
||||
|
||||
The `remote_addresses` field of a Bot rule allows you to set the IP range that this ruleset applies to.
|
||||
|
||||
For example, you can allow a search engine to connect if and only if its IP address matches the ones they published:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "qwantbot",
|
||||
"user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": ["91.242.162.0/24"]
|
||||
}
|
||||
```
|
||||
|
||||
This also works at an IP range level without any other checks:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "internal-network",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": ["100.64.0.0/10"]
|
||||
}
|
||||
```
|
||||
|
||||
## Risk calculation for downstream services
|
||||
|
||||
In case your service needs it for risk calculation reasons, Anubis exposes information about the rules that any requests match using a few headers:
|
||||
|
||||
| Header | Explanation | Example |
|
||||
|
|
1
go.mod
1
go.mod
|
@ -7,6 +7,7 @@ require (
|
|||
github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/prometheus/client_golang v1.21.1
|
||||
github.com/yl2chen/cidranger v1.0.2
|
||||
)
|
||||
|
||||
require (
|
||||
|
|
8
go.sum
8
go.sum
|
@ -16,6 +16,7 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
|
|||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cli/browser v1.3.0 h1:LejqCrpWr+1pRqmEPDGnTZOjsMe7sehifLynZJuqJpo=
|
||||
github.com/cli/browser v1.3.0/go.mod h1:HH8s+fOAxjhQoBUAsKuPCbqUuxZDhQ2/aD+SzsEfBTk=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
|
||||
|
@ -47,6 +48,7 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
|||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/natefinch/atomic v1.0.1 h1:ZPYKxkqQOx3KZ+RsbnP/YsgvxWQPGxjC0oBt2AhwV0A=
|
||||
github.com/natefinch/atomic v1.0.1/go.mod h1:N/D/ELrljoqDyT3rZrsUmtsuzvHkeB/wWjHV22AZRbM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk=
|
||||
|
@ -57,8 +59,12 @@ github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ
|
|||
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU=
|
||||
github.com/yl2chen/cidranger v1.0.2/go.mod h1:9U1yz7WPYDwf0vpNWFaeRh0bjwz5RVgRy/9UEQfHl0g=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
|
@ -137,5 +143,7 @@ golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ
|
|||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
|
||||
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"net/http"
|
||||
|
||||
"github.com/TecharoHQ/anubis"
|
||||
|
@ -8,13 +9,27 @@ import (
|
|||
|
||||
// UnchangingCache sets the Cache-Control header to cache a response for 1 year if
|
||||
// and only if the application is compiled in "release" mode by Docker.
|
||||
func UnchangingCache(h http.Handler) http.Handler {
|
||||
func UnchangingCache(next http.Handler) http.Handler {
|
||||
if anubis.Version == "devel" {
|
||||
return h
|
||||
return next
|
||||
}
|
||||
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Cache-Control", "public, max-age=31536000")
|
||||
h.ServeHTTP(w, r)
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// DefaultXRealIP sets the X-Real-Ip header to the given value if and only if
|
||||
// it is not an empty string.
|
||||
func DefaultXRealIP(defaultIP string, next http.Handler) http.Handler {
|
||||
if defaultIP == "" {
|
||||
slog.Debug("skipping middleware, defaultIP is empty")
|
||||
return next
|
||||
}
|
||||
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
r.Header.Set("X-Real-Ip", defaultIP)
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue