Old file.c
1 /*
2 * file.c
3 */
4
5 /*
6 * mpage: a program to reduce pages of print so that several pages
7 * of output appear on one printed page.
8 *
9 * Copyright (c) 1994-2004 Marcel J.E. Mol, The Netherlands
10 * Copyright (c) 1988 Mark P. Hahn, Herndon, Virginia
11 *
12 * Permission is granted to anyone to make or distribute verbatim
13 * copies of this document as received, in any medium, provided
14 * that this copyright notice is preserved, and that the
15 * distributor grants the recipient permission for further
16 * redistribution as permitted by this notice.
17 *
18 */
19
20
21 #include "mpage.h"
22
23
24 static int looks_utf8(FILE *fp);
25
26 /*
27 * do_file converts one file into postscript for output. The file type is
28 * determined then the proper conversion routine is selected.
29 */
30 void
31 do_file(fname, asheet, outfd)
32 char *fname;
33 struct sheet *asheet;
34 FILE *outfd;
35 {
36 FILE *fd;
37 int in_type;
38
39 /*
40 * Open fname and try to figure out what type of file it is
41 */
42 if ((fd = fopen(fname, "r")) == NULL) {
43 fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
44 perror(MPAGE);
45 return;
46 }
47
48 /*
49 * if we have the pr option, then we have to assume it's a text file
50 */
51 if (opt_pr || opt_input == IN_ASCII)
52 in_type = IN_ASCII;
53 else {
54 /*
55 * check for the cutomary characters that flag a postscript file
56 */
57 if (ps_check(fd))
58 in_type = IN_PS;
59 else
60 in_type = IN_ASCII;
61 }
62
63 /*
64 * For text input check if input is UTF-8 or not
65 */
66 if (in_type == IN_ASCII && check_utf8 && looks_utf8 (fd))
67 use_utf8 = 1;
68
69 (void) fclose(fd);
70
71 if (opt_pr) {
72 do_pr_file(fname, asheet, outfd);
73 return;
74 }
75
76
77 /*
78 * if not using pr(1), open fname and run th file trough the
79 * specific processor.
80 */
81 if ((fd = fopen(fname, "r")) == NULL) {
82 fprintf(stderr, "%s: cannot open %s\n", MPAGE, fname);
83 perror(MPAGE);
84 return;
85 }
86
87 switch (in_type) {
88 case IN_ASCII: do_text_doc(fd, asheet, outfd, fname);
89 break;
90 case IN_PS: do_ps_doc(fd, asheet, outfd, fname);
91 break;
92 /* Default figure out ourselfes */
93 }
94
95 (void) fclose(fd);
96
97 return;
98
99 } /* do_file */
100
101
102
103 /*
104 * do_pr_file processes one text file into postscript, but first runs the file
105 * through pr(1).
106 */
107 void
108 do_pr_file(fname, asheet, outfd)
109 char *fname;
110 struct sheet *asheet;
111 FILE *outfd;
112 {
113 FILE *fd;
114 char command[LINESIZE];
115
116 /*
117 * build the proper command based upon a specified
118 * header or not
119 */
120 #define DASHES "-- "
121 if (opt_header != NULL)
122 (void)sprintf(command, "%s -l%d -w%d -h \"%s\" %s%s", prprog,
123 asheet->sh_plength, asheet->sh_cwidth, opt_header,
124 fname[0] == '-' ? DASHES : "", fname);
125 else
126 (void)sprintf(command, "%s -l%d -w%d %s%s", prprog,
127 asheet->sh_plength, asheet->sh_cwidth,
128 fname[0] == '-' ? DASHES : "", fname);
129 /*
130 * open a pipe to the proper pr(1) command, and pr provides
131 * us with the input
132 */
133 if ((fd = popen(command, "r")) == NULL) {
134 fprintf(stderr, "%s: cannot create pipe for '%s'\n", MPAGE, command);
135 perror(MPAGE);
136 }
137 else {
138 do_text_doc(fd, asheet, outfd, fname);
139 (void)pclose(fd);
140 }
141
142 return;
143
144 } /* do_pr_file */
145
146
147
148 /*
149 * do_stdin uses do_????_doc to process the standard input
150 */
151 void
152 do_stdin(asheet, outfd)
153 struct sheet *asheet;
154 FILE *outfd;
155 {
156 #if 1
157 FILE *fd;
158 char buffer[LINESIZE];
159 char tmpfile[LINESIZE];
160 int incnt, outcnt;
161 int tmpfd;
162
163 /*
164 * Now the utf8 patch is in we always create a temporary file.
165 * So now is the time to just create a temp file and continue
166 * as if a filename was passed. This has some minor change
167 * on the output pages as it does nit show <stdin> anymore
168 * but the tmpfilename
169 */
170
171 (void) strcpy(tmpfile, "/tmp/mpage-stdin-XXXXXX");
172 if ( (tmpfd = mkstemp(tmpfile)) == -1) {
173 fprintf(stderr, "%s: cannot create temporary file", MPAGE);
174 perror(MPAGE);
175 return;
176 }
177 close(tmpfd);
178 if ((fd = fopen (tmpfile, "w")) == NULL) {
179 fprintf(stderr, "%s: cannot reopen temporary file", MPAGE);
180 perror(MPAGE);
181 return;
182 }
183
184 do {
185 incnt = fread(buffer, 1, sizeof buffer, stdin);
186 outcnt = fwrite(buffer, 1, incnt, fd);
187 } while (incnt && outcnt);
188 (void) fclose(fd);
189
190 do_file(tmpfile, asheet, outfd);
191
192 (void) unlink(tmpfile);
193
194 return;
195
196 #else
197
198 FILE *fd;
199 char command[LINESIZE];
200 char tmpfile[LINESIZE];
201 char buffer[LINESIZE];
202 int incnt, outcnt;
203 int tmpfd;
204 if (opt_pr) {
205 Debug(DB_STDIN, "%%do_stdin: pr option selects text\n", 0);
206 /*
207 * if pr(1) is to be used we need to read the input
208 * and pass it to a pr(1) command which will write
209 * a temporary file; this temporary file will then
210 * be used as input to the do_doc routine
211 */
212 (void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
213 if ( (tmpfd = mkstemp(tmpfile)) == -1) {
214 fprintf(stderr, "%s: cannot create temporary file", MPAGE);
215 perror(MPAGE);
216 return;
217 }
218 close(tmpfd);
219 if (opt_header != NULL)
220 (void)sprintf(command, "%s -l%d -w%d -h \"%s\" > %s", prprog,
221 asheet->sh_plength, asheet->sh_cwidth,
222 opt_header, tmpfile);
223 else
224 (void)sprintf(command, "%s -l%d -w%d > %s", prprog,
225 asheet->sh_plength, asheet->sh_cwidth, tmpfile);
226 /*
227 * open a pipe to the pr(1) command which will create a
228 * temporary file for convertin into PS
229 */
230 if ((fd = popen(command, "w")) == NULL) {
231 fprintf(stderr, "%s: cannot create pipe for '%s'\n",
232 MPAGE, command);
233 perror(MPAGE);
234 return;
235 }
236 #ifdef DEBUG
237 errno = 0;
238 Debug(DB_STDIN, "%% sizeof buffer == %d\n", sizeof buffer);
239 #endif
240 /*
241 * read input to mpage and pass it onto the pr(1) command
242 */
243 do {
244 incnt = fread(buffer, 1, sizeof buffer, stdin);
245 outcnt = fwrite(buffer, 1, incnt, fd);
246 Debug(DB_STDIN, "%% incnt == %d,", incnt);
247 Debug(DB_STDIN, " outcnt == %d,", outcnt);
248 Debug(DB_STDIN, " errno == %d\n", errno);
249 } while (incnt && outcnt);
250 Debug(DB_STDIN, "%% Done with while\n", 0);
251 (void)pclose(fd);
252 Debug(DB_STDIN, "%% closed pipe, looking for tmpfile\n", 0);
253 /*
254 * now open the temporary file and use do_doc to
255 * convert it to PS
256 */
257 if ((fd = fopen(tmpfile, "r")) == NULL) {
258 fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
259 perror(MPAGE);
260 }
261 else {
262 /*
263 * check if the input is UTF-8 or not
264 */
265 if (looks_utf8 (fd))
266 use_utf8 = 1;
267 Debug(DB_STDIN, "%% got tmpfile, now do_doc\n", 0);
268 do_text_doc(fd, asheet, outfd, command);
269 (void)fclose(fd);
270 }
271 /*
272 * tidy up by removing our temp file
273 */
274 Debug(DB_STDIN, "%% now remove '%s'\n", tmpfile);
275 (void)unlink(tmpfile);
276 }
277 else {
278 FILE *tfd;
279 int dont_close = 0;
280
281 /*
282 * store the input to the temporary file to guess encoding correctly
283 */
284 (void)strcpy(tmpfile, "/tmp/mpageXXXXXX");
285 if ( (tmpfd = mkstemp(tmpfile)) == -1) {
286 fprintf(stderr, "%s: cannot create temporary file", MPAGE);
287 tmpfile[0] = 0;
288 }
289 close(tmpfd);
290 if (tmpfile[0] && (tfd = fopen (tmpfile, "w"))) {
291 do {
292 incnt = fread(buffer, 1, sizeof buffer, stdin);
293 outcnt = fwrite(buffer, 1, incnt, tfd);
294 } while (incnt && outcnt);
295 fclose (tfd);
296 if ((fd = fopen(tmpfile, "r")) == NULL) {
297 fprintf(stderr, "%s: cannot open %s\n", MPAGE, tmpfile);
298 perror(MPAGE);
299 /* we should already read the input from stdin.
300 * so probably it can't recovers
301 */
302 return;
303 }
304 } else {
305 /* try to use stdin */
306 fd = stdin;
307 dont_close = 1;
308 }
309 /*
310 * check that the input is whether UTF-8 or not.
311 */
312 if (looks_utf8 (fd))
313 use_utf8 = 1;
314 /*
315 * check for the cutomary flag at the start of postscript files
316 */
317 if (ps_check(fd)) {
318 /*
319 * found the flag signaling PS input
320 */
321 Debug(DB_STDIN, "%%do_stdin: is postscript\n", 0);
322 do_ps_doc(fd, asheet, outfd, "stdin");
323 }
324 else {
325 /*
326 * no postscript flag, print the ascii text
327 */
328 Debug(DB_STDIN, "%%do_stdin: not postscript\n", 0);
329 do_text_doc(fd, asheet, outfd, "stdin");
330 }
331 if (!dont_close)
332 fclose (fd);
333 /* remove the temporary file */
334 if (tmpfile[0])
335 (void)unlink(tmpfile);
336 }
337
338 return;
339 #endif
340
341 } /* do_stdin */
342
343
344
345 /*
346 * iswanted () returns 1 if the specified page needs to be printed.
347 * returns 0 if not.
348 */
349 int
350 iswanted(int sn)
351 {
352 int i;
353
354 Debug(DB_STDIN, "%%iswanted: opt_jarg: %d\n", opt_jarg);
355 Debug(DB_STDIN, "%%iswanted: sn: %d\n", sn);
356 if (!opt_jarg) {
357 Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
358 ps_outpages++;
359 return 1;
360 }
361 for (i = 0; i < opt_jarg; i++) {
362 Debug(DB_STDIN, "%%iswanted: i: %d\n", i);
363 Debug(DB_STDIN, "%%iswanted: opt_first[i]: %d\n", opt_first[i]);
364 Debug(DB_STDIN, "%%iswanted: opt_alt[i]: %d\n", opt_alt[i]);
365 Debug(DB_STDIN, "%%iswanted: opt_last[i]: %d\n", opt_last[i]);
366 if ((sn >= opt_first[i] && (opt_alt[i] <= 1 || (sn - opt_first[i]) % opt_alt[i] == 0) ) &&
367 (sn <= opt_last[i])) {
368 Debug(DB_STDIN, "%%iswanted: wanted page %d\n", sn);
369 ps_outpages++;
370 return 1;
371 }
372 }
373 Debug(DB_STDIN, "%%iswanted: unwanted page %d\n", sn);
374
375 return 0;
376
377 } /* iswanted */
378
379
380
381 /*
382 * do_sheets() is called from do_xxx_doc() to render the sheets;
383 * it does sheet selection and reversal.
384 */
385 void
386 do_sheets(sheetfunc, inf, asheet, outf)
387 int (*sheetfunc)();
388 FILE *inf;
389 struct sheet *asheet;
390 FILE *outf;
391 {
392 FILE *nullf = NULL;
393 register int sheetno;
394 int max_opt_last;
395
396 max_opt_last = 0;
397 for (sheetno = 0; sheetno < opt_jarg; sheetno++)
398 if (max_opt_last < opt_last[sheetno])
399 max_opt_last = opt_last[sheetno];
400 if (max_opt_last == 0)
401 max_opt_last = MAXINT;
402
403 Debug(DB_STDIN, "%%do_sheets: max_opt_last: %d\n", max_opt_last);
404
405 nullf = fopen("/dev/null", "w");
406
407 if (opt_reverse) {
408 FILE *revf;
409 long *pagebase;
410 int pageroom;
411
412 revf = tmpfile();
413 if (revf == NULL) {
414 fprintf(stderr, "%s: can't create temporary file\n", MPAGE);
415 exit(1);
416 }
417 pageroom = 50;
418 pagebase = (long *)malloc(pageroom * sizeof(long));
419 if(pagebase == NULL) {
420 fprintf(stderr, "%s: can't malloc 50 words\n", MPAGE);
421 exit(1);
422 }
423 pagebase[0] = 0;
424
425 for (sheetno = 1; sheetno <= max_opt_last; ) {
426 if ((*sheetfunc)(inf, asheet, iswanted(sheetno) ? revf : nullf)
427 == FILE_EOF)
428 break;
429
430 if (ferror(revf))
431 break;
432
433 pagebase[sheetno++] = ftell(revf);
434 if (sheetno >= pageroom) {
435 pageroom *= 4;
436 pagebase = (long *)realloc(pagebase, pageroom * sizeof(long));
437 if (pagebase == NULL) {
438 fprintf(stderr, "%s: can't malloc %d words\n",
439 MPAGE, pageroom);
440 exit(1);
441 }
442
443 }
444 }
445
446 if (ferror(revf))
447 fprintf(stderr, "%s: error writing to temporary file\n", MPAGE);
448 else {
449 pagebase[sheetno] = ftell(revf);
450 rewind(revf);
451
452 while (--sheetno >= 0) {
453 register int i, n;
454 char buf[BUFSIZ];
455
456 fseek(revf, pagebase[sheetno], 0);
457 for(i = pagebase[sheetno+1]-pagebase[sheetno]; i>0; i-=n) {
458 n = i < BUFSIZ ? i : BUFSIZ;
459 if (fread(buf, n, 1, revf) != 1) {
460 fprintf(stderr, "%s: Premature EOF on temp file\n",
461 MPAGE);
462 break;
463 }
464 (void) fwrite(buf, n, 1, outf);
465 }
466 }
467 }
468 fclose(revf);
469 free(pagebase);
470
471 }
472 else {
473 /* Normal, non-reversed pages */
474 sheetno = 1;
475 while (sheetno <= max_opt_last &&
476 (*sheetfunc)(inf, asheet, iswanted(sheetno) ?
477 outf : nullf) != FILE_EOF)
478 sheetno++;
479 }
480
481 if (nullf)
482 fclose(nullf);
483
484 return;
485
486 } /* do_sheets */
487
488 /*
489 * The below codes are privided for ascmagic.c in file-4.02.
490 * looks_utf8() function are modified to handle the file handle directly.
491 */
492 /*
493 * This table reflects a particular philosophy about what constitutes
494 * "text," and there is room for disagreement about it.
495 *
496 * Version 3.31 of the file command considered a file to be ASCII if
497 * each of its characters was approved by either the isascii() or
498 * isalpha() function. On most systems, this would mean that any
499 * file consisting only of characters in the range 0x00 ... 0x7F
500 * would be called ASCII text, but many systems might reasonably
501 * consider some characters outside this range to be alphabetic,
502 * so the file command would call such characters ASCII. It might
503 * have been more accurate to call this "considered textual on the
504 * local system" than "ASCII."
505 *
506 * It considered a file to be "International language text" if each
507 * of its characters was either an ASCII printing character (according
508 * to the real ASCII standard, not the above test), a character in
509 * the range 0x80 ... 0xFF, or one of the following control characters:
510 * backspace, tab, line feed, vertical tab, form feed, carriage return,
511 * escape. No attempt was made to determine the language in which files
512 * of this type were written.
513 *
514 *
515 * The table below considers a file to be ASCII if all of its characters
516 * are either ASCII printing characters (again, according to the X3.4
517 * standard, not isascii()) or any of the following controls: bell,
518 * backspace, tab, line feed, form feed, carriage return, esc, nextline.
519 *
520 * I include bell because some programs (particularly shell scripts)
521 * use it literally, even though it is rare in normal text. I exclude
522 * vertical tab because it never seems to be used in real text. I also
523 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
524 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
525 * character to. It might be more appropriate to include it in the 8859
526 * set instead of the ASCII set, but it's got to be included in *something*
527 * we recognize or EBCDIC files aren't going to be considered textual.
528 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
529 * and Latin characters, so these should possibly be allowed. But they
530 * make a real mess on VT100-style displays if they're not paired properly,
531 * so we are probably better off not calling them text.
532 *
533 * A file is considered to be ISO-8859 text if its characters are all
534 * either ASCII, according to the above definition, or printing characters
535 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
536 *
537 * Finally, a file is considered to be international text from some other
538 * character code if its characters are all either ISO-8859 (according to
539 * the above definition) or characters in the range 0x80 ... 0x9F, which
540 * ISO-8859 considers to be control characters but the IBM PC and Macintosh
541 * consider to be printing characters.
542 */
543
544 #define F 0 /* character never appears in text */
545 #define T 1 /* character appears in plain ASCII text */
546 #define I 2 /* character appears in ISO-8859 text */
547 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
548
549 static char text_chars[256] = {
550 /* BEL BS HT LF FF CR */
551 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
552 /* ESC */
553 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
554 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
555 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
556 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
557 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
558 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
559 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
560 /* NEL */
561 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
562 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
563 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
564 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
565 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
566 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
567 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
568 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
569 };
570
571 static int
572 looks_utf8(FILE *fp)
573 {
574 long whence, nbytes;
575 char *buf = NULL;
576 int i, n;
577 unsigned long c;
578 int gotone = 0;
579
580 /* memorize current position */
581 whence = ftell (fp);
582 /* check the input size */
583 fseek (fp, 0L, SEEK_END);
584 nbytes = ftell (fp) - whence;
585 /* allocate memories */
586 buf = (char *) malloc (sizeof (char) * nbytes + 1);
587 buf[nbytes] = 0;
588 /* rewind the position */
589 fseek (fp, 0L, whence);
590 /* read data */
591 fread (buf, sizeof (char), nbytes, fp);
592 /* rewind the position again */
593 fseek (fp, 0L, whence);
594
595 for (i = 0; i < nbytes; i++) {
596 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
597 /*
598 * Even if the whole file is valid UTF-8 sequences,
599 * still reject it if it uses weird control characters.
600 */
601
602 if (text_chars[(int)buf[i]] != T)
603 return 0;
604
605 /* no need to store it
606 * ubuf[(*ulen)++] = buf[i];
607 */
608 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
609 return 0;
610 } else { /* 11xxxxxx begins UTF-8 */
611 int following;
612
613 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
614 c = buf[i] & 0x1f;
615 following = 1;
616 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
617 c = buf[i] & 0x0f;
618 following = 2;
619 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
620 c = buf[i] & 0x07;
621 following = 3;
622 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
623 c = buf[i] & 0x03;
624 following = 4;
625 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
626 c = buf[i] & 0x01;
627 following = 5;
628 } else
629 return 0;
630
631 for (n = 0; n < following; n++) {
632 i++;
633 if (i >= nbytes)
634 goto done;
635
636 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
637 return 0;
638
639 c = (c << 6) + (buf[i] & 0x3f);
640 }
641
642 /* no need to store it
643 * ubuf[(*ulen)++] = c;
644 */
645 gotone = 1;
646 }
647 }
648 done:
649 if (buf)
650 free (buf);
651
652 return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
653 }