Fri, 07 Mar 2003 11:35:36 +0000
more work on pdf_add_tree_element().
1 /*
2 * t2p: Create a PDF file from the contents of one or more TIFF
3 * bilevel image files. The images in the resulting PDF file
4 * will be compressed using ITU-T T.6 (G4) fax encoding.
5 *
6 * PDF routines
7 * $Id: pdf_prim.c,v 1.7 2003/03/07 03:02:31 eric Exp $
8 * Copyright 2001, 2002, 2003 Eric Smith <eric@brouhaha.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. Note that permission is
13 * not granted to redistribute this program under the terms of any
14 * other version of the General Public License.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA
24 */
27 #include <stdarg.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #include "bitblt.h"
35 #include "pdf.h"
36 #include "pdf_util.h"
37 #include "pdf_prim.h"
38 #include "pdf_private.h"
41 struct pdf_array_elem
42 {
43 struct pdf_array_elem *next;
44 struct pdf_obj *val;
45 };
48 struct pdf_array
49 {
50 struct pdf_array_elem *first;
51 struct pdf_array_elem *last;
52 };
55 struct pdf_dict_entry
56 {
57 struct pdf_dict_entry *next;
58 char *key;
59 struct pdf_obj *val;
60 };
63 struct pdf_dict
64 {
65 struct pdf_dict_entry *first;
66 };
69 #define STREAM_BUF_SIZE 4096
71 struct pdf_stream
72 {
73 struct pdf_obj *stream_dict;
74 struct pdf_obj *length;
75 pdf_stream_write_callback callback;
76 void *app_data; /* arg to pass to callback */
77 struct pdf_obj *filters; /* name or array of names */
78 struct pdf_obj *decode_parms;
80 /* The following fields are used by pdf_stream_write_bits() and
81 pdf_stream_flush_bits(). */
82 uint32_t byte_idx; /* index to next byte position in data buffer */
83 uint32_t bit_idx; /* index to next bit position in data buffer,
84 0 = MSB, 7 = LSB */
85 uint8_t data [STREAM_BUF_SIZE];
86 };
89 struct pdf_obj
90 {
91 /* these fields only apply to indirectly referenced objects */
92 struct pdf_obj *prev;
93 struct pdf_obj *next;
94 unsigned long obj_num;
95 unsigned long obj_gen;
96 long int file_offset;
98 /* these fields apply to all objects */
99 unsigned long ref_count;
100 pdf_obj_type type;
101 union {
102 bool boolean;
103 char *name;
104 char *string;
105 long integer;
106 double real;
107 struct pdf_obj *ind_ref;
108 struct pdf_dict dict;
109 struct pdf_array array;
110 struct pdf_stream stream;
111 } val;
112 };
115 struct pdf_obj *ref (struct pdf_obj *obj)
116 {
117 obj->ref_count++;
118 return (obj);
119 }
122 void unref (struct pdf_obj *obj)
123 {
124 if ((--obj->ref_count) == 0)
125 {
126 /* $$$ free the object */
127 }
128 }
131 struct pdf_obj *pdf_deref_ind_obj (struct pdf_obj *ind_obj)
132 {
133 pdf_assert (ind_obj->type == PT_IND_REF);
134 return (ind_obj->val.ind_ref);
135 }
138 void pdf_set_dict_entry (struct pdf_obj *dict_obj, char *key, struct pdf_obj *val)
139 {
140 struct pdf_dict_entry *entry;
142 if (dict_obj->type == PT_IND_REF)
143 dict_obj = pdf_deref_ind_obj (dict_obj);
145 pdf_assert (dict_obj->type == PT_DICTIONARY);
147 /* replacing existing entry? */
148 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
149 if (strcmp (entry->key, key) == 0)
150 {
151 unref (entry->val);
152 entry->val = ref (val);
153 return;
154 }
156 /* new entry */
157 entry = pdf_calloc (1, sizeof (struct pdf_dict_entry));
159 entry->next = dict_obj->val.dict.first;
160 dict_obj->val.dict.first = entry;
162 entry->key = pdf_strdup (key);
163 entry->val = ref (val);
164 }
167 struct pdf_obj *pdf_get_dict_entry (struct pdf_obj *dict_obj, char *key)
168 {
169 struct pdf_dict_entry *entry;
171 if (dict_obj->type == PT_IND_REF)
172 dict_obj = pdf_deref_ind_obj (dict_obj);
174 pdf_assert (dict_obj->type == PT_DICTIONARY);
176 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
177 if (strcmp (entry->key, key) == 0)
178 return (entry->val);
180 return (NULL);
181 }
184 void pdf_add_array_elem (struct pdf_obj *array_obj, struct pdf_obj *val)
185 {
186 struct pdf_array_elem *elem = pdf_calloc (1, sizeof (struct pdf_array_elem));
188 if (array_obj->type == PT_IND_REF)
189 array_obj = pdf_deref_ind_obj (array_obj);
191 pdf_assert (array_obj->type == PT_ARRAY);
193 elem->val = ref (val);
195 if (! array_obj->val.array.first)
196 array_obj->val.array.first = elem;
197 else
198 array_obj->val.array.last->next = elem;
200 array_obj->val.array.last = elem;
201 }
204 struct pdf_obj *pdf_new_obj (pdf_obj_type type)
205 {
206 struct pdf_obj *obj = pdf_calloc (1, sizeof (struct pdf_obj));
207 obj->type = type;
208 return (obj);
209 }
212 struct pdf_obj *pdf_new_bool (bool val)
213 {
214 struct pdf_obj *obj = pdf_new_obj (PT_BOOL);
215 obj->val.boolean = val;
216 return (obj);
217 }
220 struct pdf_obj *pdf_new_name (char *name)
221 {
222 struct pdf_obj *obj = pdf_new_obj (PT_NAME);
223 obj->val.name = pdf_strdup (name);
224 return (obj);
225 }
228 struct pdf_obj *pdf_new_string (char *str)
229 {
230 struct pdf_obj *obj = pdf_new_obj (PT_STRING);
231 obj->val.string = pdf_strdup (str);
232 return (obj);
233 }
236 struct pdf_obj *pdf_new_integer (long val)
237 {
238 struct pdf_obj *obj = pdf_new_obj (PT_INTEGER);
239 obj->val.integer = val;
240 return (obj);
241 }
244 struct pdf_obj *pdf_new_real (double val)
245 {
246 struct pdf_obj *obj = pdf_new_obj (PT_REAL);
247 obj->val.real = val;
248 return (obj);
249 }
252 struct pdf_obj *pdf_new_stream (pdf_file_handle pdf_file,
253 struct pdf_obj *stream_dict,
254 pdf_stream_write_callback callback,
255 void *app_data)
256 {
257 struct pdf_obj *obj = pdf_new_obj (PT_STREAM);
259 obj->val.stream.stream_dict = stream_dict;
260 obj->val.stream.length = pdf_new_ind_ref (pdf_file, pdf_new_integer (0));
261 pdf_set_dict_entry (obj->val.stream.stream_dict, "Length", obj->val.stream.length);
263 obj->val.stream.callback = callback;
264 obj->val.stream.app_data = app_data;
265 return (obj);
266 }
269 /* $$$ currently limited to one filter per stream */
270 void pdf_stream_add_filter (struct pdf_obj *stream,
271 char *filter_name,
272 struct pdf_obj *decode_parms)
273 {
274 if (stream->type == PT_IND_REF)
275 stream = pdf_deref_ind_obj (stream);
277 pdf_assert (stream->type == PT_STREAM);
279 pdf_set_dict_entry (stream->val.stream.stream_dict, "Filter", pdf_new_name (filter_name));
280 if (decode_parms)
281 pdf_set_dict_entry (stream->val.stream.stream_dict, "DecodeParms", decode_parms);
282 }
285 struct pdf_obj *pdf_new_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *obj)
286 {
287 struct pdf_obj *ind_obj;
289 pdf_assert (obj->type != PT_IND_REF);
291 ind_obj = pdf_new_obj (PT_IND_REF);
293 ind_obj->type = PT_IND_REF;
294 ind_obj->val.ind_ref = obj;
296 /* is there already an indirect reference to this object? */
297 if (! obj->obj_num)
298 {
299 /* no, assign object number/generation and add to linked list */
300 if (! pdf_file->first_ind_obj)
301 {
302 obj->obj_num = 1;
303 pdf_file->first_ind_obj = pdf_file->last_ind_obj = obj;
304 }
305 else
306 {
307 obj->obj_num = pdf_file->last_ind_obj->obj_num + 1;
308 pdf_file->last_ind_obj->next = obj;
309 obj->prev = pdf_file->last_ind_obj;
310 pdf_file->last_ind_obj = obj;
311 }
312 }
314 return (ind_obj);
315 }
318 long pdf_get_integer (struct pdf_obj *obj)
319 {
320 if (obj->type == PT_IND_REF)
321 obj = pdf_deref_ind_obj (obj);
323 pdf_assert (obj->type == PT_INTEGER);
325 return (obj->val.integer);
326 }
328 void pdf_set_integer (struct pdf_obj *obj, long val)
329 {
330 if (obj->type == PT_IND_REF)
331 obj = pdf_deref_ind_obj (obj);
333 pdf_assert (obj->type == PT_INTEGER);
335 obj->val.integer = val;
336 }
339 double pdf_get_real (struct pdf_obj *obj)
340 {
341 if (obj->type == PT_IND_REF)
342 obj = pdf_deref_ind_obj (obj);
344 pdf_assert (obj->type == PT_REAL);
346 return (obj->val.real);
347 }
349 void pdf_set_real (struct pdf_obj *obj, double val)
350 {
351 if (obj->type == PT_IND_REF)
352 obj = pdf_deref_ind_obj (obj);
354 pdf_assert (obj->type == PT_REAL);
356 obj->val.real = val;
357 }
360 int pdf_compare_obj (struct pdf_obj *o1, struct pdf_obj *o2)
361 {
362 if (o1->type == PT_IND_REF)
363 o1 = pdf_deref_ind_obj (o1);
365 if (o2->type == PT_IND_REF)
366 o2 = pdf_deref_ind_obj (o2);
368 pdf_assert (o1->type == o2->type);
370 switch (o1->type)
371 {
372 case PT_INTEGER:
373 if (o1->val.integer < o2->val.integer)
374 return (-1);
375 if (o1->val.integer > o2->val.integer)
376 return (1);
377 return (0);
378 case PT_REAL:
379 if (o1->val.real < o2->val.real)
380 return (-1);
381 if (o1->val.real > o2->val.real)
382 return (1);
383 return (0);
384 case PT_STRING:
385 return (strcmp (o1->val.string, o2->val.string));
386 default:
387 pdf_fatal ("invalid object type for comparison\n");
388 }
389 }
392 static int name_char_needs_quoting (char c)
393 {
394 return ((c < '!') || (c > '~') || (c == '/') || (c == '\\') ||
395 (c == '(') || (c == ')') || (c == '<') || (c == '>') ||
396 (c == '[') || (c == ']') || (c == '{') || (c == '}') ||
397 (c == '%'));
398 }
401 void pdf_write_name (pdf_file_handle pdf_file, char *s)
402 {
403 fprintf (pdf_file->f, "/");
404 while (*s)
405 if (name_char_needs_quoting (*s))
406 fprintf (pdf_file->f, "#%02x", 0xff & *(s++));
407 else
408 fprintf (pdf_file->f, "%c", *(s++));
409 fprintf (pdf_file->f, " ");
410 }
413 static int string_char_needs_quoting (char c)
414 {
415 return ((c < ' ') || (c > '~') || (c == '\\') ||
416 (c == '(') || (c == ')'));
417 }
420 void pdf_write_string (pdf_file_handle pdf_file, char *s)
421 {
422 fprintf (pdf_file->f, "(");
423 while (*s)
424 if (string_char_needs_quoting (*s))
425 fprintf (pdf_file->f, "\\%03o", 0xff & *(s++));
426 else
427 fprintf (pdf_file->f, "%c", *(s++));
428 fprintf (pdf_file->f, ") ");
429 }
432 void pdf_write_real (pdf_file_handle pdf_file, double num)
433 {
434 /* $$$ not actually good enough, precision needs to be variable,
435 and no exponent is allowed */
436 fprintf (pdf_file->f, "%0f ", num);
437 }
440 void pdf_write_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
441 {
442 struct pdf_obj *obj = pdf_deref_ind_obj (ind_obj);
443 fprintf (pdf_file->f, "%ld %ld R ", obj->obj_num, obj->obj_gen);
444 }
447 void pdf_write_array (pdf_file_handle pdf_file, struct pdf_obj *array_obj)
448 {
449 struct pdf_array_elem *elem;
451 pdf_assert (array_obj->type == PT_ARRAY);
453 fprintf (pdf_file->f, "[ ");
454 for (elem = array_obj->val.array.first; elem; elem = elem->next)
455 {
456 pdf_write_obj (pdf_file, elem->val);
457 fprintf (pdf_file->f, " ");
458 }
459 fprintf (pdf_file->f, "] ");
460 }
463 void pdf_write_dict (pdf_file_handle pdf_file, struct pdf_obj *dict_obj)
464 {
465 struct pdf_dict_entry *entry;
467 pdf_assert (dict_obj->type == PT_DICTIONARY);
469 fprintf (pdf_file->f, "<<\r\n");
470 for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
471 {
472 pdf_write_name (pdf_file, entry->key);
473 fprintf (pdf_file->f, " ");
474 pdf_write_obj (pdf_file, entry->val);
475 fprintf (pdf_file->f, "\r\n");
476 }
477 fprintf (pdf_file->f, ">>\r\n");
478 }
481 void pdf_stream_write_data (pdf_file_handle pdf_file,
482 struct pdf_obj *stream,
483 char *data,
484 unsigned long len)
485 {
486 while (len)
487 {
488 unsigned long l2 = fwrite (data, 1, len, pdf_file->f);
489 data += l2;
490 len -= l2;
491 if (ferror (pdf_file->f))
492 pdf_fatal ("error writing stream data\n");
493 }
494 }
497 void pdf_stream_flush_bits (pdf_file_handle pdf_file,
498 struct pdf_obj *stream)
499 {
500 struct pdf_stream *s = & stream->val.stream;
502 if (s->bit_idx)
503 {
504 /* zero remaining bits in last byte */
505 s->data [s->byte_idx] &= ~ ((1 << (8 - s->bit_idx)) - 1);
506 s->byte_idx++;
507 s->bit_idx = 0;
508 }
509 pdf_stream_write_data (pdf_file, stream,
510 (char *) & s->data [0],
511 s->byte_idx);
512 s->byte_idx = 0;
513 }
516 static void pdf_stream_advance_byte (pdf_file_handle pdf_file,
517 struct pdf_obj *stream)
518 {
519 struct pdf_stream *s = & stream->val.stream;
521 s->byte_idx++;
522 s->bit_idx = 0;
523 if (s->byte_idx == STREAM_BUF_SIZE)
524 pdf_stream_flush_bits (pdf_file, stream);
525 }
528 void pdf_stream_write_bits (pdf_file_handle pdf_file,
529 struct pdf_obj *stream,
530 uint32_t count,
531 uint32_t bits)
532 {
533 struct pdf_stream *s = & stream->val.stream;
535 uint32_t b2; /* how many bits will fit in byte in data buffer */
536 uint32_t c2; /* how many bits to transfer on this iteration */
537 uint32_t d2; /* bits to transfer on this iteration */
539 while (count)
540 {
541 b2 = 8 - s->bit_idx;
542 if (b2 >= count)
543 c2 = count;
544 else
545 c2 = b2;
546 d2 = bits >> (count - c2);
547 s->data [s->byte_idx] |= (d2 << (b2 + c2));
548 s->bit_idx += c2;
549 if (s->bit_idx > 7)
550 pdf_stream_advance_byte (pdf_file, stream);
551 count -= c2;
552 }
553 }
556 void pdf_stream_printf (pdf_file_handle pdf_file,
557 struct pdf_obj *stream,
558 char *fmt, ...)
559 {
560 va_list ap;
562 va_start (ap, fmt);
563 vfprintf (pdf_file->f, fmt, ap);
564 va_end (ap);
565 }
568 void pdf_write_stream (pdf_file_handle pdf_file, struct pdf_obj *stream)
569 {
570 unsigned long begin_pos, end_pos;
572 pdf_assert (stream->type == PT_STREAM);
574 pdf_write_dict (pdf_file, stream->val.stream.stream_dict);
575 fprintf (pdf_file->f, "stream\r\n");
576 begin_pos = ftell (pdf_file->f);
577 stream->val.stream.callback (pdf_file,
578 stream,
579 stream->val.stream.app_data);
580 end_pos = ftell (pdf_file->f);
581 fprintf (pdf_file->f, "\r\nendstream\r\n");
583 pdf_set_integer (stream->val.stream.length, end_pos - begin_pos);
584 }
587 void pdf_write_obj (pdf_file_handle pdf_file, struct pdf_obj *obj)
588 {
589 switch (obj->type)
590 {
591 case PT_NULL:
592 fprintf (pdf_file->f, "null ");
593 break;
594 case PT_BOOL:
595 if (obj->val.boolean)
596 fprintf (pdf_file->f, "true ");
597 else
598 fprintf (pdf_file->f, "false ");
599 break;
600 case PT_NAME:
601 pdf_write_name (pdf_file, obj->val.name);
602 break;
603 case PT_STRING:
604 pdf_write_string (pdf_file, obj->val.string);
605 break;
606 case PT_INTEGER:
607 fprintf (pdf_file->f, "%ld ", obj->val.integer);
608 break;
609 case PT_REAL:
610 pdf_write_real (pdf_file, obj->val.real);
611 break;
612 case PT_IND_REF:
613 pdf_write_ind_ref (pdf_file, obj);
614 break;
615 case PT_DICTIONARY:
616 pdf_write_dict (pdf_file, obj);
617 break;
618 case PT_ARRAY:
619 pdf_write_array (pdf_file, obj);
620 break;
621 case PT_STREAM:
622 pdf_write_stream (pdf_file, obj);
623 break;
624 default:
625 pdf_fatal ("bad object type\n");
626 }
627 }
630 void pdf_write_ind_obj (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
631 {
632 struct pdf_obj *obj;
634 if (ind_obj->type == PT_IND_REF)
635 obj = pdf_deref_ind_obj (ind_obj);
636 else
637 obj = ind_obj;
639 obj->file_offset = ftell (pdf_file->f);
640 fprintf (pdf_file->f, "%ld %ld obj\r\n", obj->obj_num, obj->obj_gen);
641 pdf_write_obj (pdf_file, obj);
642 fprintf (pdf_file->f, "endobj\r\n");
643 }
646 void pdf_write_all_ind_obj (pdf_file_handle pdf_file)
647 {
648 struct pdf_obj *ind_obj;
649 for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
650 if (! ind_obj->file_offset)
651 pdf_write_ind_obj (pdf_file, ind_obj);
652 }
655 unsigned long pdf_write_xref (pdf_file_handle pdf_file)
656 {
657 struct pdf_obj *ind_obj;
658 pdf_file->xref_offset = ftell (pdf_file->f);
659 fprintf (pdf_file->f, "xref\r\n");
660 fprintf (pdf_file->f, "0 %ld\r\n", pdf_file->last_ind_obj->obj_num + 1);
661 fprintf (pdf_file->f, "0000000000 65535 f\r\n");
662 for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
663 fprintf (pdf_file->f, "%010ld 00000 n\r\n", ind_obj->file_offset);
664 return (pdf_file->last_ind_obj->obj_num + 1);
665 }