Files
recover-pdfs/recover-pdfs.c
2026-04-11 16:08:45 +01:00

104 lines
2.4 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#define CHUNK 4096
int match_at(const unsigned char *buf, size_t len, const char *pat) {
size_t p = strlen(pat);
if (len < p) return 0;
return memcmp(buf, pat, p) == 0;
}
size_t find_in_buffer(const unsigned char *buf, size_t len, const char *pat) {
size_t p = strlen(pat);
for (size_t i = 0; i + p <= len; i++) {
if (memcmp(buf + i, pat, p) == 0)
return i;
}
return (size_t)-1;
}
void write_pdf(const unsigned char *data, size_t len, int idx, const char *outdir) {
char path[512];
snprintf(path, sizeof(path), "%s/pdf_%04d.pdf", outdir, idx);
FILE *f = fopen(path, "wb");
if (!f) return;
fwrite(data, 1, len, f);
fclose(f);
}
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <image_file> <output_dir>\n", argv[0]);
return 1;
}
const char *infile = argv[1];
const char *outdir = argv[2];
mkdir(outdir, 0755);
FILE *f = fopen(infile, "rb");
if (!f) {
perror("open input");
return 1;
}
unsigned char *buf = malloc(CHUNK * 2);
size_t buf_len = 0;
int pdf_count = 0;
unsigned char *file_data = NULL;
size_t file_size = 0;
int collecting = 0;
while (!feof(f)) {
size_t r = fread(buf + buf_len, 1, CHUNK, f);
buf_len += r;
size_t i = 0;
while (i < buf_len) {
if (!collecting) {
if (i + 5 < buf_len && memcmp(buf + i, "%PDF-", 5) == 0) {
collecting = 1;
file_data = malloc(1024);
file_size = 0;
}
}
if (collecting) {
if (file_size % 1024 == 0) {
file_data = realloc(file_data, file_size + 1024);
}
file_data[file_size++] = buf[i];
if (file_size > 6) {
if (memmem(file_data, file_size, "%%EOF", 5)) {
write_pdf(file_data, file_size, pdf_count++, outdir);
free(file_data);
file_data = NULL;
file_size = 0;
collecting = 0;
}
}
}
i++;
}
buf_len = 0;
}
free(buf);
fclose(f);
return 0;
}