Fri, 21 Feb 2003 12:28:06 +0000
g4 tables
1 /*
2 * t2p: Create a PDF file from the contents of one or more TIFF
3 * bilevel image files. The images in the resulting PDF file
4 * will be compressed using ITU-T T.6 (G4) fax encoding.
5 *
6 * PDF routines
7 * $Id: pdf_prim.c,v 1.5 2003/02/21 02:49:11 eric Exp $
8 * Copyright 2001, 2002, 2003 Eric Smith <eric@brouhaha.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. Note that permission is
13 * not granted to redistribute this program under the terms of any
14 * other version of the General Public License.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA
24 */
27 #include <stdarg.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #include "bitblt.h"
35 #include "pdf.h"
36 #include "pdf_util.h"
37 #include "pdf_prim.h"
38 #include "pdf_private.h"
41 struct pdf_array_elem
42 {
43 struct pdf_array_elem *next;
44 struct pdf_obj *val;
45 };
48 struct pdf_array
49 {
50 struct pdf_array_elem *first;
51 struct pdf_array_elem *last;
52 };
55 struct pdf_dict_entry
56 {
57 struct pdf_dict_entry *next;
58 char *key;
59 struct pdf_obj *val;
60 };
63 struct pdf_dict
64 {
65 struct pdf_dict_entry *first;
66 };
69 #define STREAM_BUF_SIZE 4096
71 struct pdf_stream
72 {
73 struct pdf_obj *stream_dict;
74 struct pdf_obj *length;
75 pdf_stream_write_callback callback;
76 void *app_data; /* arg to pass to callback */
77 struct pdf_obj *filters; /* name or array of names */
78 struct pdf_obj *decode_parms;
80 /* The following fields are used by pdf_stream_write_bits() and
81 pdf_stream_flush_bits(). */
82 uint32_t byte_idx; /* index to next byte position in data buffer */
83 uint32_t bit_idx; /* index to next bit position in data buffer,
84 0 = MSB, 7 = LSB */
85 uint8_t data [STREAM_BUF_SIZE];
86 };
89 struct pdf_obj
90 {
91 /* these fields only apply to indirectly referenced objects */
92 struct pdf_obj *prev;
93 struct pdf_obj *next;
94 unsigned long obj_num;
95 unsigned long obj_gen;
96 long int file_offset;
98 /* these fields apply to all objects */
99 unsigned long ref_count;
100 pdf_obj_type type;
101 union {
102 bool boolean;
103 char *name;
104 char *string;
105 unsigned long integer;
106 double real;
107 struct pdf_obj *ind_ref;
108 struct pdf_dict dict;
109 struct pdf_array array;
110 struct pdf_stream stream;
111 } val;
112 };
115 struct pdf_obj *ref (struct pdf_obj *obj)
116 {
117 obj->ref_count++;
118 return (obj);
119 }
122 void unref (struct pdf_obj *obj)
123 {
124 if ((--obj->ref_count) == 0)
125 {
126 /* $$$ free the object */
127 }
128 }
131 struct pdf_obj *pdf_deref_ind_obj (struct pdf_obj *ind_obj)
132 {
133 pdf_assert (ind_obj->type == PT_IND_REF);
134 return (ind_obj->val.ind_ref);
135 }
138 void pdf_set_dict_entry (struct pdf_obj *dict_obj, char *key, struct pdf_obj *val)
139 {
140 struct pdf_dict_entry *entry;
142 if (dict_obj->type == PT_IND_REF)
143 dict_obj = pdf_deref_ind_obj (dict_obj);
145 pdf_assert (dict_obj->type == PT_DICTIONARY);
147 /* replacing existing entry? */
148 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
149 if (strcmp (entry->key, key) == 0)
150 {
151 unref (entry->val);
152 entry->val = ref (val);
153 return;
154 }
156 /* new entry */
157 entry = pdf_calloc (1, sizeof (struct pdf_dict_entry));
159 entry->next = dict_obj->val.dict.first;
160 dict_obj->val.dict.first = entry;
162 entry->key = pdf_strdup (key);
163 entry->val = ref (val);
164 }
167 struct pdf_obj *pdf_get_dict_entry (struct pdf_obj *dict_obj, char *key)
168 {
169 struct pdf_dict_entry *entry;
171 if (dict_obj->type == PT_IND_REF)
172 dict_obj = pdf_deref_ind_obj (dict_obj);
174 pdf_assert (dict_obj->type == PT_DICTIONARY);
176 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
177 if (strcmp (entry->key, key) == 0)
178 return (entry->val);
180 return (NULL);
181 }
184 void pdf_add_array_elem (struct pdf_obj *array_obj, struct pdf_obj *val)
185 {
186 struct pdf_array_elem *elem = pdf_calloc (1, sizeof (struct pdf_array_elem));
188 if (array_obj->type == PT_IND_REF)
189 array_obj = pdf_deref_ind_obj (array_obj);
191 pdf_assert (array_obj->type == PT_ARRAY);
193 elem->val = ref (val);
195 if (! array_obj->val.array.first)
196 array_obj->val.array.first = elem;
197 else
198 array_obj->val.array.last->next = elem;
200 array_obj->val.array.last = elem;
201 }
204 struct pdf_obj *pdf_new_obj (pdf_obj_type type)
205 {
206 struct pdf_obj *obj = pdf_calloc (1, sizeof (struct pdf_obj));
207 obj->type = type;
208 return (obj);
209 }
212 struct pdf_obj *pdf_new_bool (bool val)
213 {
214 struct pdf_obj *obj = pdf_new_obj (PT_BOOL);
215 obj->val.boolean = val;
216 return (obj);
217 }
220 struct pdf_obj *pdf_new_name (char *name)
221 {
222 struct pdf_obj *obj = pdf_new_obj (PT_NAME);
223 obj->val.name = pdf_strdup (name);
224 return (obj);
225 }
228 struct pdf_obj *pdf_new_string (char *str)
229 {
230 struct pdf_obj *obj = pdf_new_obj (PT_STRING);
231 obj->val.string = pdf_strdup (str);
232 return (obj);
233 }
236 struct pdf_obj *pdf_new_integer (unsigned long val)
237 {
238 struct pdf_obj *obj = pdf_new_obj (PT_INTEGER);
239 obj->val.integer = val;
240 return (obj);
241 }
244 struct pdf_obj *pdf_new_real (double val)
245 {
246 struct pdf_obj *obj = pdf_new_obj (PT_REAL);
247 obj->val.real = val;
248 return (obj);
249 }
252 struct pdf_obj *pdf_new_stream (pdf_file_handle pdf_file,
253 struct pdf_obj *stream_dict,
254 pdf_stream_write_callback callback,
255 void *app_data)
256 {
257 struct pdf_obj *obj = pdf_new_obj (PT_STREAM);
259 obj->val.stream.stream_dict = stream_dict;
260 obj->val.stream.length = pdf_new_ind_ref (pdf_file, pdf_new_integer (0));
261 pdf_set_dict_entry (obj->val.stream.stream_dict, "Length", obj->val.stream.length);
263 obj->val.stream.callback = callback;
264 obj->val.stream.app_data = app_data;
265 return (obj);
266 }
269 /* $$$ currently limited to one filter per stream */
270 void pdf_stream_add_filter (struct pdf_obj *stream,
271 char *filter_name,
272 struct pdf_obj *decode_parms)
273 {
274 if (stream->type == PT_IND_REF)
275 stream = pdf_deref_ind_obj (stream);
277 pdf_assert (stream->type == PT_STREAM);
279 pdf_set_dict_entry (stream->val.stream.stream_dict, "Filter", pdf_new_name (filter_name));
280 if (decode_parms)
281 pdf_set_dict_entry (stream->val.stream.stream_dict, "DecodeParms", decode_parms);
282 }
285 struct pdf_obj *pdf_new_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *obj)
286 {
287 struct pdf_obj *ind_obj;
289 pdf_assert (obj->type != PT_IND_REF);
291 ind_obj = pdf_new_obj (PT_IND_REF);
293 ind_obj->type = PT_IND_REF;
294 ind_obj->val.ind_ref = obj;
296 /* is there already an indirect reference to this object? */
297 if (! obj->obj_num)
298 {
299 /* no, assign object number/generation and add to linked list */
300 if (! pdf_file->first_ind_obj)
301 {
302 obj->obj_num = 1;
303 pdf_file->first_ind_obj = pdf_file->last_ind_obj = obj;
304 }
305 else
306 {
307 obj->obj_num = pdf_file->last_ind_obj->obj_num + 1;
308 pdf_file->last_ind_obj->next = obj;
309 obj->prev = pdf_file->last_ind_obj;
310 pdf_file->last_ind_obj = obj;
311 }
312 }
314 return (ind_obj);
315 }
318 unsigned long pdf_get_integer (struct pdf_obj *obj)
319 {
320 if (obj->type == PT_IND_REF)
321 obj = pdf_deref_ind_obj (obj);
323 pdf_assert (obj->type == PT_INTEGER);
325 return (obj->val.integer);
326 }
328 void pdf_set_integer (struct pdf_obj *obj, unsigned long val)
329 {
330 if (obj->type == PT_IND_REF)
331 obj = pdf_deref_ind_obj (obj);
333 pdf_assert (obj->type == PT_INTEGER);
335 obj->val.integer = val;
336 }
339 double pdf_get_real (struct pdf_obj *obj)
340 {
341 if (obj->type == PT_IND_REF)
342 obj = pdf_deref_ind_obj (obj);
344 pdf_assert (obj->type == PT_REAL);
346 return (obj->val.real);
347 }
349 void pdf_set_real (struct pdf_obj *obj, double val)
350 {
351 if (obj->type == PT_IND_REF)
352 obj = pdf_deref_ind_obj (obj);
354 pdf_assert (obj->type == PT_REAL);
356 obj->val.real = val;
357 }
360 static int name_char_needs_quoting (char c)
361 {
362 return ((c < '!') || (c > '~') || (c == '/') || (c == '\\') ||
363 (c == '(') || (c == ')') || (c == '<') || (c == '>') ||
364 (c == '[') || (c == ']') || (c == '{') || (c == '}') ||
365 (c == '%'));
366 }
369 void pdf_write_name (pdf_file_handle pdf_file, char *s)
370 {
371 fprintf (pdf_file->f, "/");
372 while (*s)
373 if (name_char_needs_quoting (*s))
374 fprintf (pdf_file->f, "#%02x", 0xff & *(s++));
375 else
376 fprintf (pdf_file->f, "%c", *(s++));
377 fprintf (pdf_file->f, " ");
378 }
381 static int string_char_needs_quoting (char c)
382 {
383 return ((c < ' ') || (c > '~') || (c == '\\') ||
384 (c == '(') || (c == ')'));
385 }
388 void pdf_write_string (pdf_file_handle pdf_file, char *s)
389 {
390 fprintf (pdf_file->f, "(");
391 while (*s)
392 if (string_char_needs_quoting (*s))
393 fprintf (pdf_file->f, "\\%03o", 0xff & *(s++));
394 else
395 fprintf (pdf_file->f, "%c", *(s++));
396 fprintf (pdf_file->f, ") ");
397 }
400 void pdf_write_real (pdf_file_handle pdf_file, double num)
401 {
402 /* $$$ not actually good enough, precision needs to be variable,
403 and no exponent is allowed */
404 fprintf (pdf_file->f, "%0f ", num);
405 }
408 void pdf_write_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
409 {
410 struct pdf_obj *obj = pdf_deref_ind_obj (ind_obj);
411 fprintf (pdf_file->f, "%ld %ld R ", obj->obj_num, obj->obj_gen);
412 }
415 void pdf_write_array (pdf_file_handle pdf_file, struct pdf_obj *array_obj)
416 {
417 struct pdf_array_elem *elem;
419 pdf_assert (array_obj->type == PT_ARRAY);
421 fprintf (pdf_file->f, "[ ");
422 for (elem = array_obj->val.array.first; elem; elem = elem->next)
423 {
424 pdf_write_obj (pdf_file, elem->val);
425 fprintf (pdf_file->f, " ");
426 }
427 fprintf (pdf_file->f, "] ");
428 }
431 void pdf_write_dict (pdf_file_handle pdf_file, struct pdf_obj *dict_obj)
432 {
433 struct pdf_dict_entry *entry;
435 pdf_assert (dict_obj->type == PT_DICTIONARY);
437 fprintf (pdf_file->f, "<<\r\n");
438 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
439 {
440 pdf_write_name (pdf_file, entry->key);
441 fprintf (pdf_file->f, " ");
442 pdf_write_obj (pdf_file, entry->val);
443 fprintf (pdf_file->f, "\r\n");
444 }
445 fprintf (pdf_file->f, ">>\r\n");
446 }
449 void pdf_stream_write_data (pdf_file_handle pdf_file,
450 struct pdf_obj *stream,
451 char *data,
452 unsigned long len)
453 {
454 while (len)
455 {
456 unsigned long l2 = fwrite (data, 1, len, pdf_file->f);
457 data += l2;
458 len -= l2;
459 if (ferror (pdf_file->f))
460 pdf_fatal ("error writing stream data\n");
461 }
462 }
465 void pdf_stream_flush_bits (pdf_file_handle pdf_file,
466 struct pdf_obj *stream)
467 {
468 struct pdf_stream *s = & stream->val.stream;
470 if (s->bit_idx)
471 {
472 /* zero remaining bits in last byte */
473 s->data [s->byte_idx] &= ~ ((1 << (8 - s->bit_idx)) - 1);
474 s->byte_idx++;
475 s->bit_idx = 0;
476 }
477 pdf_stream_write_data (pdf_file, stream,
478 (char *) & s->data [0],
479 s->byte_idx);
480 s->byte_idx = 0;
481 }
484 static void pdf_stream_advance_byte (pdf_file_handle pdf_file,
485 struct pdf_obj *stream)
486 {
487 struct pdf_stream *s = & stream->val.stream;
489 s->byte_idx++;
490 s->bit_idx = 0;
491 if (s->byte_idx == STREAM_BUF_SIZE)
492 pdf_stream_flush_bits (pdf_file, stream);
493 }
496 void pdf_stream_write_bits (pdf_file_handle pdf_file,
497 struct pdf_obj *stream,
498 uint32_t count,
499 uint32_t bits)
500 {
501 struct pdf_stream *s = & stream->val.stream;
503 uint32_t b2; /* how many bits will fit in byte in data buffer */
504 uint32_t c2; /* how many bits to transfer on this iteration */
505 uint32_t d2; /* bits to transfer on this iteration */
507 while (count)
508 {
509 b2 = 8 - s->bit_idx;
510 if (b2 >= count)
511 c2 = count;
512 else
513 c2 = b2;
514 d2 = bits >> (count - c2);
515 s->data [s->byte_idx] |= (d2 << (b2 + c2));
516 s->bit_idx += c2;
517 if (s->bit_idx > 7)
518 pdf_stream_advance_byte (pdf_file, stream);
519 count -= c2;
520 }
521 }
524 void pdf_stream_printf (pdf_file_handle pdf_file,
525 struct pdf_obj *stream,
526 char *fmt, ...)
527 {
528 va_list ap;
530 va_start (ap, fmt);
531 vfprintf (pdf_file->f, fmt, ap);
532 va_end (ap);
533 }
536 void pdf_write_stream (pdf_file_handle pdf_file, struct pdf_obj *stream)
537 {
538 unsigned long begin_pos, end_pos;
540 pdf_assert (stream->type == PT_STREAM);
542 pdf_write_dict (pdf_file, stream->val.stream.stream_dict);
543 fprintf (pdf_file->f, "stream\r\n");
544 begin_pos = ftell (pdf_file->f);
545 stream->val.stream.callback (pdf_file,
546 stream,
547 stream->val.stream.app_data);
548 end_pos = ftell (pdf_file->f);
549 fprintf (pdf_file->f, "\r\nendstream\r\n");
551 pdf_set_integer (stream->val.stream.length, end_pos - begin_pos);
552 }
555 void pdf_write_obj (pdf_file_handle pdf_file, struct pdf_obj *obj)
556 {
557 switch (obj->type)
558 {
559 case PT_NULL:
560 fprintf (pdf_file->f, "null ");
561 break;
562 case PT_BOOL:
563 if (obj->val.boolean)
564 fprintf (pdf_file->f, "true ");
565 else
566 fprintf (pdf_file->f, "false ");
567 break;
568 case PT_NAME:
569 pdf_write_name (pdf_file, obj->val.name);
570 break;
571 case PT_STRING:
572 pdf_write_string (pdf_file, obj->val.string);
573 break;
574 case PT_INTEGER:
575 fprintf (pdf_file->f, "%ld ", obj->val.integer);
576 break;
577 case PT_REAL:
578 pdf_write_real (pdf_file, obj->val.real);
579 break;
580 case PT_IND_REF:
581 pdf_write_ind_ref (pdf_file, obj);
582 break;
583 case PT_DICTIONARY:
584 pdf_write_dict (pdf_file, obj);
585 break;
586 case PT_ARRAY:
587 pdf_write_array (pdf_file, obj);
588 break;
589 case PT_STREAM:
590 pdf_write_stream (pdf_file, obj);
591 break;
592 default:
593 pdf_fatal ("bad object type\n");
594 }
595 }
598 void pdf_write_ind_obj (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
599 {
600 struct pdf_obj *obj;
602 if (ind_obj->type == PT_IND_REF)
603 obj = pdf_deref_ind_obj (ind_obj);
604 else
605 obj = ind_obj;
607 obj->file_offset = ftell (pdf_file->f);
608 fprintf (pdf_file->f, "%ld %ld obj\r\n", obj->obj_num, obj->obj_gen);
609 pdf_write_obj (pdf_file, obj);
610 fprintf (pdf_file->f, "endobj\r\n");
611 }
614 void pdf_write_all_ind_obj (pdf_file_handle pdf_file)
615 {
616 struct pdf_obj *ind_obj;
617 for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
618 if (! ind_obj->file_offset)
619 pdf_write_ind_obj (pdf_file, ind_obj);
620 }
623 unsigned long pdf_write_xref (pdf_file_handle pdf_file)
624 {
625 struct pdf_obj *ind_obj;
626 pdf_file->xref_offset = ftell (pdf_file->f);
627 fprintf (pdf_file->f, "xref\r\n");
628 fprintf (pdf_file->f, "0 %ld\r\n", pdf_file->last_ind_obj->obj_num + 1);
629 fprintf (pdf_file->f, "0000000000 65535 f\r\n");
630 for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
631 fprintf (pdf_file->f, "%010ld 00000 n\r\n", ind_obj->file_offset);
632 return (pdf_file->last_ind_obj->obj_num + 1);
633 }