#include #include #include #include #define CHUNK 4096 int match_at(const unsigned char *buf, size_t len, const char *pat) { size_t p = strlen(pat); if (len < p) return 0; return memcmp(buf, pat, p) == 0; } size_t find_in_buffer(const unsigned char *buf, size_t len, const char *pat) { size_t p = strlen(pat); for (size_t i = 0; i + p <= len; i++) { if (memcmp(buf + i, pat, p) == 0) return i; } return (size_t)-1; } void write_pdf(const unsigned char *data, size_t len, int idx, const char *outdir) { char path[512]; snprintf(path, sizeof(path), "%s/pdf_%04d.pdf", outdir, idx); FILE *f = fopen(path, "wb"); if (!f) return; fwrite(data, 1, len, f); fclose(f); } int main(int argc, char **argv) { if (argc != 3) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } const char *infile = argv[1]; const char *outdir = argv[2]; mkdir(outdir, 0755); FILE *f = fopen(infile, "rb"); if (!f) { perror("open input"); return 1; } unsigned char *buf = malloc(CHUNK * 2); size_t buf_len = 0; int pdf_count = 0; unsigned char *file_data = NULL; size_t file_size = 0; int collecting = 0; while (!feof(f)) { size_t r = fread(buf + buf_len, 1, CHUNK, f); buf_len += r; size_t i = 0; while (i < buf_len) { if (!collecting) { if (i + 5 < buf_len && memcmp(buf + i, "%PDF-", 5) == 0) { collecting = 1; file_data = malloc(1024); file_size = 0; } } if (collecting) { if (file_size % 1024 == 0) { file_data = realloc(file_data, file_size + 1024); } file_data[file_size++] = buf[i]; if (file_size > 6) { if (memmem(file_data, file_size, "%%EOF", 5)) { write_pdf(file_data, file_size, pdf_count++, outdir); free(file_data); file_data = NULL; file_size = 0; collecting = 0; } } } i++; } buf_len = 0; } free(buf); fclose(f); return 0; }