commit 822a00bd0f2f149d724a5d095e97791d125eb2b1 Author: Seán Healy Date: Sat Apr 11 16:08:00 2026 +0100 Initial commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9fb1211 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +recover-pdfs diff --git a/recover-pdfs.c b/recover-pdfs.c new file mode 100644 index 0000000..ba530fa --- /dev/null +++ b/recover-pdfs.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +#define CHUNK 4096 + +int match_at(const unsigned char *buf, size_t len, const char *pat) { + size_t p = strlen(pat); + if (len < p) return 0; + return memcmp(buf, pat, p) == 0; +} + +size_t find_in_buffer(const unsigned char *buf, size_t len, const char *pat) { + size_t p = strlen(pat); + for (size_t i = 0; i + p <= len; i++) { + if (memcmp(buf + i, pat, p) == 0) + return i; + } + return (size_t)-1; +} + +void write_pdf(const unsigned char *data, size_t len, int idx, const char *outdir) { + char path[512]; + snprintf(path, sizeof(path), "%s/pdf_%04d.pdf", outdir, idx); + + FILE *f = fopen(path, "wb"); + if (!f) return; + + fwrite(data, 1, len, f); + fclose(f); +} + +int main(int argc, char **argv) { + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const char *infile = argv[1]; + const char *outdir = argv[2]; + + mkdir(outdir, 0755); + + FILE *f = fopen(infile, "rb"); + if (!f) { + perror("open input"); + return 1; + } + + unsigned char *buf = malloc(CHUNK * 2); + size_t buf_len = 0; + int pdf_count = 0; + + unsigned char *file_data = NULL; + size_t file_size = 0; + int collecting = 0; + + while (!feof(f)) { + size_t r = fread(buf + buf_len, 1, CHUNK, f); + buf_len += r; + + size_t i = 0; + + while (i < buf_len) { + + if (!collecting) { + if (i + 5 < buf_len && memcmp(buf + i, "%PDF-", 5) == 0) { + collecting = 1; + file_data = malloc(1024); + file_size = 0; + } + } + + if (collecting) { + if (file_size % 1024 == 0) { + file_data = realloc(file_data, file_size + 1024); + } + + file_data[file_size++] = buf[i]; + + if (file_size > 6) { + if (memmem(file_data, file_size, "%%EOF", 5)) { + write_pdf(file_data, file_size, pdf_count++, outdir); + free(file_data); + file_data = NULL; + file_size = 0; + collecting = 0; + } + } + } + + i++; + } + + buf_len = 0; + } + + free(buf); + fclose(f); + + return 0; +}