aboutsummaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/blogspam.pm
blob: 3835f52ca2b0a46df1f3a436f69b7bb576431e05 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/perl
package IkiWiki::Plugin::blogspam;

use warnings;
use strict;
use IkiWiki 3.00;
use Encode;

my $defaulturl='http://test.blogspam.net:9999/';
my $client;

sub import {
	hook(type => "getsetup", id => "blogspam",  call => \&getsetup);
	hook(type => "checkconfig", id => "blogspam", call => \&checkconfig);
	hook(type => "checkcontent", id => "blogspam", call => \&checkcontent);
}

sub getsetup () {
	return
		plugin => {
			safe => 1,
			rebuild => 0,
			section => "auth",
		},
		blogspam_pagespec => {
			type => 'pagespec',
			example => 'postcomment(*)',
			description => 'PageSpec of pages to check for spam',
			link => 'ikiwiki/PageSpec',
			safe => 1,
			rebuild => 0,
		},
		blogspam_options => {
			type => "string",
			example => "blacklist=1.2.3.4,blacklist=8.7.6.5,max-links=10",
			description => "options to send to blogspam server",
			link => "http://blogspam.net/api/2.0/testComment.html#options",
			safe => 1,
			rebuild => 0,
		},
		blogspam_server => {
			type => "string",
			default => $defaulturl,
			description => "blogspam server JSON url",
			safe => 1,
			rebuild => 0,
		},
}

sub checkconfig () {
	# This is done at checkconfig time because printing an error
	# if the module is missing when a spam is posted would not
	# let the admin know about the problem.
	eval q{
		use JSON;
		use HTTP::Request;
	};
	error $@ if $@;

	# Using the for_url parameter makes sure we crash if used
	# with an older IkiWiki.pm that didn't automatically try
	# to use LWPx::ParanoidAgent.
	$client=useragent(for_url => $config{blogspam_server});
}

sub checkcontent (@) {
	my %params=@_;
	my $session=$params{session};
	
	my $spec='!admin()';
 	if (exists $config{blogspam_pagespec} &&
	    length $config{blogspam_pagespec}) {
		$spec.=" and (".$config{blogspam_pagespec}.")";
	}

	my $user=$session->param("name");
	return undef unless pagespec_match($params{page}, $spec,
		(defined $user ? (user => $user) : ()),
		(defined $session->remote_addr() ? (ip => $session->remote_addr()) : ()),
		location => $params{page});

	my $url=$defaulturl;
	$url = $config{blogspam_server} if exists $config{blogspam_server};

	my @options = split(",", $config{blogspam_options})
		if exists $config{blogspam_options};

	# Allow short comments and whitespace-only edits, unless the user
	# has overridden min-words themselves.
	push @options, "min-words=0"
		unless grep /^min-words=/i, @options;
	# Wiki pages can have a lot of urls, unless the user specifically
	# wants to limit them.
	push @options, "exclude=lotsaurls"
		unless grep /^max-links/i, @options;
	# Unless the user specified a size check, disable such checking.
	push @options, "exclude=size"
		unless grep /^(?:max|min)-size/i, @options;
	# This test has absurd false positives on words like "alpha"
	# and "buy".
	push @options, "exclude=stopwords";

	my %req=(
		ip => $session->remote_addr(),
		comment => encode_utf8(defined $params{diff} ? $params{diff} : $params{content}),
		subject => encode_utf8(defined $params{subject} ? $params{subject} : ""),
		name => encode_utf8(defined $params{author} ? $params{author} : ""),
		link => encode_utf8(exists $params{url} ? $params{url} : ""),
		options => join(",", @options),
		site => encode_utf8($config{url}),
		version => "ikiwiki ".$IkiWiki::version,
	);
	eval q{use JSON; use HTTP::Request}; # errors handled in checkconfig()
	my $res = $client->request(
		HTTP::Request->new(
			'POST',
			$url,
			[ 'Content-Type' => 'application/json' ],
			to_json(\%req),
		),
	);

	if (! ref $res || ! $res->is_success()) {
		debug("failed to get response from blogspam server ($url)");
		return undef;
	}
	my $details = from_json($res->content);
	if ($details->{result} eq 'SPAM') {
		eval q{use Data::Dumper};
		debug("blogspam server reports $details->{reason}: ".Dumper(\%req));
		return gettext("Sorry, but that looks like spam to <a href=\"http://blogspam.net/\">blogspam</a>: ").$details->{reason};
	}
	elsif ($details->{result} ne 'OK') {
		debug("blogspam server failure: ".$res->content);
		return undef;
	}
	else {
		return undef;
	}
}

1