More exact PDF pattern and filesize limit

Also, can now feed image in through stdin (useful for `pv image.iso |
./revocer-pdfs ...` workflow
This commit is contained in:
2026-04-11 16:19:27 +01:00
parent 822a00bd0f
commit 38dfc87459

View File

@@ -4,17 +4,31 @@
#include <sys/stat.h> #include <sys/stat.h>
#define CHUNK 4096 #define CHUNK 4096
#define MAX_PDF_SIZE (100LL * 1024 * 1024) // 100 MB
int match_at(const unsigned char *buf, size_t len, const char *pat) { int valid_pdf_header(const unsigned char *buf, size_t len) {
size_t p = strlen(pat); if (len < 9) return 0;
if (len < p) return 0;
return memcmp(buf, pat, p) == 0; if (memcmp(buf, "%PDF-", 5) != 0)
return 0;
if (buf[5] < '0' || buf[5] > '9') return 0;
if (buf[6] != '.') return 0;
if (buf[7] < '0' || buf[7] > '9') return 0;
size_t i = 8;
if (buf[i] >= '0' && buf[i] <= '9') i++;
if (buf[i] == '\r' && buf[i + 1] == '\n') return 1;
if (buf[i] == '\n') return 1;
return 0;
} }
size_t find_in_buffer(const unsigned char *buf, size_t len, const char *pat) { size_t find_eof(const unsigned char *buf, size_t len) {
size_t p = strlen(pat); for (size_t i = 0; i + 5 <= len; i++) {
for (size_t i = 0; i + p <= len; i++) { if (memcmp(buf + i, "%%EOF", 5) == 0)
if (memcmp(buf + i, pat, p) == 0)
return i; return i;
} }
return (size_t)-1; return (size_t)-1;
@@ -42,6 +56,9 @@ int main(int argc, char **argv) {
mkdir(outdir, 0755); mkdir(outdir, 0755);
if (strcmp(infile, "-") == 0) {
infile = "/dev/stdin";
}
FILE *f = fopen(infile, "rb"); FILE *f = fopen(infile, "rb");
if (!f) { if (!f) {
perror("open input"); perror("open input");
@@ -50,11 +67,12 @@ int main(int argc, char **argv) {
unsigned char *buf = malloc(CHUNK * 2); unsigned char *buf = malloc(CHUNK * 2);
size_t buf_len = 0; size_t buf_len = 0;
int pdf_count = 0;
unsigned char *file_data = NULL; unsigned char *pdf_buf = NULL;
size_t file_size = 0; size_t pdf_size = 0;
int collecting = 0; int collecting = 0;
int pdf_count = 0;
while (!feof(f)) { while (!feof(f)) {
size_t r = fread(buf + buf_len, 1, CHUNK, f); size_t r = fread(buf + buf_len, 1, CHUNK, f);
@@ -65,28 +83,40 @@ int main(int argc, char **argv) {
while (i < buf_len) { while (i < buf_len) {
if (!collecting) { if (!collecting) {
if (i + 5 < buf_len && memcmp(buf + i, "%PDF-", 5) == 0) { if (i + 10 < buf_len && valid_pdf_header(buf + i, buf_len - i)) {
collecting = 1; collecting = 1;
file_data = malloc(1024); pdf_buf = malloc(1024);
file_size = 0; pdf_size = 0;
} }
} }
if (collecting) { if (collecting) {
if (file_size % 1024 == 0) {
file_data = realloc(file_data, file_size + 1024); // HARD SIZE LIMIT CHECK
if (pdf_size >= MAX_PDF_SIZE) {
free(pdf_buf);
pdf_buf = NULL;
pdf_size = 0;
collecting = 0;
continue;
} }
file_data[file_size++] = buf[i]; if (pdf_size % 1024 == 0) {
pdf_buf = realloc(pdf_buf, pdf_size + 1024);
}
if (file_size > 6) { pdf_buf[pdf_size++] = buf[i];
if (memmem(file_data, file_size, "%%EOF", 5)) {
write_pdf(file_data, file_size, pdf_count++, outdir); size_t eof_pos = find_eof(pdf_buf, pdf_size);
free(file_data); if (eof_pos != (size_t)-1) {
file_data = NULL; size_t end = eof_pos + 5;
file_size = 0;
collecting = 0; write_pdf(pdf_buf, end, pdf_count++, outdir);
}
free(pdf_buf);
pdf_buf = NULL;
pdf_size = 0;
collecting = 0;
} }
} }