1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#!/usr/bin/perl -w
#
# Version history:
#
# sep-offprint 1.11 - John MacFarlane - August 16, 2007
# + use File::Spec and File::Basename for platform-independent
# manipulation of files and directories.
# sep-offprint 1.1 - John MacFarlane - August 16, 2007
# + added --output|o option to specify output filename
# sep-offprint 1.0 - John MacFarlane - July 19, 2007
# + include supplements in the ordered they are linked to
# + always put notes at the end
# + removed --localpath option; use file:/// URL instead
# sep-offprint 0.9 - John MacFarlane - March 8, 2007
# + fixed regex for stripping off SEP header (thanks to George Galfalvi)
# sep-offprint 0.8 - John MacFarlane - February 22, 2007
# + strip off "(Stanford Encyclopedia of Philosophy)" from
# HTML title (thanks to Uri Nodelman)
# sep-offprint 0.7 - John MacFarlane - January 23, 2007
# + include supplements, if present (thanks to Dan Robins)
# + removed unnecessary call to lwp-rget (Dan Robins)
# + added --linkcolor option (JM and Dan Robins)
# + added error checking: error exit if index.html not found
# + fixed '--version' and adjusted '--help' output
# sep-offprint 0.6 - John MacFarlane - August 30, 2006
# sep-offprint 0.5 - John MacFarlane - August 25, 2006
# sep-offprint 0.4 - John MacFarlane - August 22, 2006
# sep-offprint 0.3 - John MacFarlane - May 25, 2005
#
# Synopsis:
#
# produces a PDF or postscript "offprint" of a Stanford
# Encyclopedia of Philosophy (SEP) article
#
# Argument is an entry name from SEP, as it appears in the URL.
# For example, to get the article on classical logic, which is at
# http://plato.stanford.edu/entries/logic-classical/, just type
#
# perl sep-offprint logic-classical
#
# and it will create logic-classical.pdf.
#
# There are many command-line options. For a list, type
#
# perl sep-offprint --help
#
# The programs html2ps and ps2pdf must be in the user's path:
#
# html2ps can be found at http://user.it.uu.se/~jan/html2ps.html.
# Download the tarball or zip file and run the "install" script.
#
# ps2pdf is part of Ghostscript -- many users will have it
# already: http://www.cs.wisc.edu/~ghost/doc/AFPL/get851.htm
#
# In addition, the LWP package for Perl must be installed.
#
# For more information and updates, see
# http://philosophy.berkeley.edu/macfarlane/sep-offprint.html

my $version_number = '1.11';

use Getopt::Long;
use File::Temp qw/ tempdir /;
use File::Copy;
use File::Basename;
use File::Spec;

# printhelp - returns a usage message

sub printhelp {
die
"Produces a PDF offprint from a Stanford Encyclopedia of Philosophy article.
(http://plato.stanford.edu/)

Usage: sep-offprint [options] <entry name>

Examples: sep-offprint russell
sep-offprint --1up --ps --paper a4 frege

Options (* indicates a default):

--1up print one page per sheet, portrait orientation
--2up print two pages per sheet, landscape orientation*
--ps produce postscript (PS) output
--pdf produce PDF output*
--output <filename> name of output file (defaults to <entryname>.ps|pdf)
--font <font> use <font> (Times*, Helvetica, Palatino, Courier)
--size <size> use <size> (10pt, 12pt, 14pt*, 16pt)
--align <align> use <align> (left, justified*)
--paper <papersize> specify <papersize> (letter*, legal, a4)
--linkcolor <color> specify color of hyperlinks (black*, gray, blue, ...)
--help this message
--version prints version number\n";
}

# slurp - slurps contents of a file and returns as a string;
# takes filename as argument

sub slurp {
my $file = shift;
local( $/, *FILE );
open(FILE, "< $file") or die "Couldn't open $file to read";
my $contents = <FILE>;
close(FILE);
return $contents;
}

# uniq - remove duplicates from an array, preserving the order of the original

sub uniq {
my @in = @_;
undef %seen;
grep(!$seen{$_}++, @in);
}

# preprocess html - preprocess HTML file, stripping out navigation bars,
# etc., and replacing entity references with appropriate characters or images.
# takes filename as argument

sub preprocess_html {
my $file = $_;
my $contents = slurp $file;

# get rid of header stuff
$contents =~ s/<body>.*?<!--DO NOT MODIFY THIS LINE AND ABOVE-->/<body><div id="content"><div id="aueditable">/gs;

# get rid of "(Stanford Encyclopedia of Philosophy)" in title:
$contents =~ s/<title>(.*)\ \(Stanford Encyclopedia of Philosophy\)/<title>$1/;

# make publication date into regular paragraph
$contents =~ s/<br \/><span class="xsmall">(.*)<\/span><\/h1>/<\/h1><p>$1<\/p>/g;

# center copyright notice
$contents =~ s/<div id="foot">(.*?)<\/div>/<center>$1<\/center>/gs;

# replace unicode character references
%replacements = (
"&\#133;" => "&hellip;",
"&\#145;" => "&lsquo;",
"&\#146;" => "&rsquo;",
"&\#147;" => "&ldquo;",
"&\#148;" => "&rdquo;",
"&\#149;" => "&bull;",
"&\#150;" => "&minus;",
"&\#257;" => "a",
"&\#261;" => "a",
"&\#263;" => "c",
"&\#269;" => "c",
"&\#281;" => "e",
"&\#299;" => "i",
"&\#321;" => "L",
"&\#322;" => "l",
"&\#324;" => "n",
"&\#333;" => "o",
"&\#345;" => "r",
"&\#346;" => "S",
"&\#347;" => "s",
"&\#351;" => "s",
"&\#363;" => "u",
"&\#365;" => "u",
"&\#369;" => "u",
"&\#378;" => "z",
"&\#380;" => "z",
"&\#381;" => "Z",
"&\#599;" => "u",
"&\#768;" => "",
"&\#769;" => "",
"&\#770;" => "",
"&\#771;" => "",
"&\#772;" => "",
"&\#773;" => "",
"&\#775;" => "",
"&\#803;" => "",
"&\#8209;" => "-",
"&\#8600;" => "<img alt=\"southeast-arrow\" src=\"http:\/\/plato.stanford.edu\/symbols\/searrow.gif\">",
"<sup>&\#9484;<\/sup>" => "<img alt=\"left-corner-quote\" src=\"http:\/\/plato.stanford.edu\/symbols\/l-corner-quote.gif\">",
"<sup>&\#9488;<\/sup>" => "<img alt=\"right-corner-quote\" src=\"http:\/\/plato.stanford.edu\/symbols\/r-corner-quote.gif\">",
"&\#8463;" => "<img alt=\"hbar\" src=\"http:\/\/plato.stanford.edu\/symbols\/hbar.gif\">",
"&\#9633;" => "<img alt=\"Box\" src=\"http:\/\/plato.stanford.edu\/symbols\/Box.gif\">"
);
while ( my ($ref, $rep) = each(%replacements) ) {
$contents =~ s/$ref/$rep/g;
}

# write back to file
open(FILE, "> $file") or die "Couldn't open $file to write";
print FILE $contents;
close(FILE);
}

#
# parse command-line options
#

GetOptions( '1up|1' => \$oneup,
'2up|2' => \$twoup,
'ps' => \$ps,
'pdf' => \$pdf,
'output|o=s' => \$outfile,
'font=s' => \$fontfamily,
'size=s' => \$fontsize,
'align=s' => \$textalign,
'paper=s' => \$papersize,
'linkcolor=s' => \$linkcolor,
'help|h' => \$help,
'version|v' => \$version);

if ($version) {die "sep-offprint $version_number\n";};

if ($#ARGV < 0) {&printhelp;};
$sourceArg = $ARGV[0];

# remove trailing slash, if any, from sourceArg:
$sourceArg =~ s{/$}{};

# derive entry name from argument:
$entryname = $sourceArg;

# remove uppercase and spaces
$entryname =~ tr/A-Z/a-z/;
$entryname =~ tr/ /-/;

# remove /index.html if specified
$entryname =~ s{/index\.html$}{};

# remove URL prefix (everything before slash)
$entryname =~ s{.*/}{};

if ($sourceArg =~ /^file:/) {
$source = $sourceArg; # file URL was specified - use local source
}
else {
$source = "http://plato.stanford.edu/entries/$entryname/";
}
$footer = $source;

$current = File::Spec->curdir; # working directory from which sep-offprint is run

if ($help) {&printhelp;};
if (not ($pdf or $ps)) {$pdf=1};
if ($oneup) {$twoup = 0} else {$twoup = 1};
if (not $fontsize) {$fontsize = "14pt"};
if (not $outfile) { $outfile = $entryname; }
if (not $fontfamily) {$fontfamily = "Times"};
if (not $textalign) {$textalign = "justify"};
if (not $papersize) {$papersize = "letter"};
if (not $linkcolor) {$linkcolor = "black"};

# strip .pdf or .ps extension from outfile name, and add path:
my($filename, $directories, $suffix) = fileparse($outfile,qr/\.pdf|\.ps/);
if (not $directories) { $directories = $current };
my $outpath = File::Spec->rel2abs(File::Spec->catfile($directories,$filename));

# create temporary directory
$temp = tempdir ( CLEANUP => 1 );

# get all the source files and put them in temp directory,
# then change to temp directory

chdir $temp;
# download all the HTML files
print STDERR "Retrieving files...\n";
$downloadedFiles = `lwp-rget --limit=200 $source/index.html 2>&1`;
(-e "index.html") or die "Could not retrieve files from $source\nAre you sure you have the right entry name?\n";

# create blank html file to work around html2ps bug.
# without this blank file after notes.html, html2ps will cut off
# the last page of an entry if it occurs in the left column in 2up mode.

$blank = "blankpage";

open FILE, ">$blank" or die "unable to open $blank: $!";

print FILE <<EOF;
<html>
<head>
<title>&nbsp;</title>
</head>
<body>
<p>&nbsp;</p>
</body>
</html>
EOF

close FILE;

# create a configuration file with appropriate footers

$html2psrc = "html2psrc";

open FILE, ">$html2psrc" or die "unable to open $html2psrc: $!";

print FILE <<EOF;
BODY {
font-size: $fontsize;
font-family: $fontfamily;
text-align: $textalign;
}
A:link {
color: $linkcolor;
}
\@page {
margin-left: 2.5cm;
margin-right: 2.5cm;
margin-top: 2.5cm;
margin-bottom: 2.5cm;
}
\@html2ps {
option {
twoup: $twoup;
landscape: $twoup;
number: 0;
}
paper { type: $papersize }
header {
right: "STANFORD ENCYCLOPEDIA OF PHILOSOPHY";
left: \$T;
}
footer {
left: \$N;
right: $footer;
}
}
EOF

close FILE;

# name of temporary file to hold postscript output of html2ps
$pstemp = "pstemp";

# preprocess all the html files in the working (i.e., temp) directory
preprocess_html foreach <*.html>;

#
# determine the order in which the HTML pages should be processed:
#

@htmlFiles = $downloadedFiles =~ /^.*\.html$/gim;

# make a space-separated list of the HTML files to process, in order
my $orderedHtmlFiles = join(' ', @htmlFiles);

# set $notes to "notes.html" if there are notes
my $notes = "";
if ($orderedHtmlFiles =~ /notes\.html/) {
$notes = "notes.html"
}

# discard index.html and notes.html from the list
$orderedHtmlFiles =~ s/(index|notes)\.html//g;

print STDERR "Creating offprint...\n";

# call html2ps to create the postscript version of the entry
system("html2ps -D -U -f $html2psrc -o $pstemp index.html " . $orderedHtmlFiles . " $notes $blank");

# create pdf if requested
if ($pdf) {system("ps2pdf -sPAPERSIZE=$papersize $pstemp $outpath.pdf") || print "Created $outpath.pdf\n";};

# copy ps file if requested
if ($ps) {copy($pstemp, "$outpath.ps") && print "Created $outpath.ps\n";};

# note: temporary directory will be deleted automatically on exit