Also, can now feed image in through stdin (useful for `pv image.iso | ./revocer-pdfs ...` workflow
134 lines
3.0 KiB
C
134 lines
3.0 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/stat.h>
|
|
|
|
#define CHUNK 4096
|
|
#define MAX_PDF_SIZE (100LL * 1024 * 1024) // 100 MB
|
|
|
|
int valid_pdf_header(const unsigned char *buf, size_t len) {
|
|
if (len < 9) return 0;
|
|
|
|
if (memcmp(buf, "%PDF-", 5) != 0)
|
|
return 0;
|
|
|
|
if (buf[5] < '0' || buf[5] > '9') return 0;
|
|
if (buf[6] != '.') return 0;
|
|
if (buf[7] < '0' || buf[7] > '9') return 0;
|
|
|
|
size_t i = 8;
|
|
|
|
if (buf[i] >= '0' && buf[i] <= '9') i++;
|
|
|
|
if (buf[i] == '\r' && buf[i + 1] == '\n') return 1;
|
|
if (buf[i] == '\n') return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
size_t find_eof(const unsigned char *buf, size_t len) {
|
|
for (size_t i = 0; i + 5 <= len; i++) {
|
|
if (memcmp(buf + i, "%%EOF", 5) == 0)
|
|
return i;
|
|
}
|
|
return (size_t)-1;
|
|
}
|
|
|
|
void write_pdf(const unsigned char *data, size_t len, int idx, const char *outdir) {
|
|
char path[512];
|
|
snprintf(path, sizeof(path), "%s/pdf_%04d.pdf", outdir, idx);
|
|
|
|
FILE *f = fopen(path, "wb");
|
|
if (!f) return;
|
|
|
|
fwrite(data, 1, len, f);
|
|
fclose(f);
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
if (argc != 3) {
|
|
fprintf(stderr, "Usage: %s <image_file> <output_dir>\n", argv[0]);
|
|
return 1;
|
|
}
|
|
|
|
const char *infile = argv[1];
|
|
const char *outdir = argv[2];
|
|
|
|
mkdir(outdir, 0755);
|
|
|
|
if (strcmp(infile, "-") == 0) {
|
|
infile = "/dev/stdin";
|
|
}
|
|
FILE *f = fopen(infile, "rb");
|
|
if (!f) {
|
|
perror("open input");
|
|
return 1;
|
|
}
|
|
|
|
unsigned char *buf = malloc(CHUNK * 2);
|
|
size_t buf_len = 0;
|
|
|
|
unsigned char *pdf_buf = NULL;
|
|
size_t pdf_size = 0;
|
|
|
|
int collecting = 0;
|
|
int pdf_count = 0;
|
|
|
|
while (!feof(f)) {
|
|
size_t r = fread(buf + buf_len, 1, CHUNK, f);
|
|
buf_len += r;
|
|
|
|
size_t i = 0;
|
|
|
|
while (i < buf_len) {
|
|
|
|
if (!collecting) {
|
|
if (i + 10 < buf_len && valid_pdf_header(buf + i, buf_len - i)) {
|
|
collecting = 1;
|
|
pdf_buf = malloc(1024);
|
|
pdf_size = 0;
|
|
}
|
|
}
|
|
|
|
if (collecting) {
|
|
|
|
// HARD SIZE LIMIT CHECK
|
|
if (pdf_size >= MAX_PDF_SIZE) {
|
|
free(pdf_buf);
|
|
pdf_buf = NULL;
|
|
pdf_size = 0;
|
|
collecting = 0;
|
|
continue;
|
|
}
|
|
|
|
if (pdf_size % 1024 == 0) {
|
|
pdf_buf = realloc(pdf_buf, pdf_size + 1024);
|
|
}
|
|
|
|
pdf_buf[pdf_size++] = buf[i];
|
|
|
|
size_t eof_pos = find_eof(pdf_buf, pdf_size);
|
|
if (eof_pos != (size_t)-1) {
|
|
size_t end = eof_pos + 5;
|
|
|
|
write_pdf(pdf_buf, end, pdf_count++, outdir);
|
|
|
|
free(pdf_buf);
|
|
pdf_buf = NULL;
|
|
pdf_size = 0;
|
|
collecting = 0;
|
|
}
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
buf_len = 0;
|
|
}
|
|
|
|
free(buf);
|
|
fclose(f);
|
|
|
|
return 0;
|
|
}
|