
<?php
set_time_limit(0);

$action = $_GET['action'] ?? '';
$targetDir = $_GET['dir'] ?? '';
$progressFile = 'progress.json';
$logFile = 'logs/log.txt';

function logMessage($msg) {
    file_put_contents('logs/log.txt', "[" . date("H:i:s") . "] $msg\n", FILE_APPEND);
}

function getFiles($dir) {
    $rii = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($dir));
    $files = [];
    foreach ($rii as $file) {
        if (!$file->isFile()) continue;
        if (strtolower($file->getExtension()) === 'pdf') {
            $files[] = $file->getPathname();
        }
    }
    return $files;
}

function sanitizeFilename($text, $maxWords = 10) {
    $text = strip_tags($text);
    $words = preg_split('/\s+/', $text);
    $firstWords = array_slice($words, 0, $maxWords);
    $name = implode('_', $firstWords);
    $name = preg_replace('/[^a-zA-Z0-9-_]/', '', $name);
    return substr($name, 0, 60);
}

function convertPdfToImage($pdfPath, $imageBase) {
    $cmd = "pdftoppm -f 1 -singlefile -jpeg " . escapeshellarg($pdfPath) . " " . escapeshellarg($imageBase);
    exec($cmd, $output, $code);
    return $code === 0 ? "$imageBase.jpg" : false;
}

function runTesseract($imagePath) {
    $outputBase = tempnam(sys_get_temp_dir(), 'ocr_');
    $cmd = "tesseract " . escapeshellarg($imagePath) . " " . escapeshellarg($outputBase) . " -l eng";
    exec($cmd);
    $text = @file_get_contents($outputBase . ".txt");
    @unlink($outputBase . ".txt");
    return $text ?: '';
}

function loadProgress() {
    global $progressFile;
    return file_exists($progressFile) ? json_decode(file_get_contents($progressFile), true) : ['index' => 0, 'paused' => false];
}

function saveProgress($data) {
    global $progressFile;
    file_put_contents($progressFile, json_encode($data, JSON_PRETTY_PRINT));
}

if ($action === 'start') {
    if (!is_dir($targetDir)) {
        logMessage("Invalid directory: $targetDir");
        exit;
    }
    $allFiles = getFiles($targetDir);
    $progress = ['files' => $allFiles, 'index' => 0, 'paused' => false];
    saveProgress($progress);
    logMessage("Started processing " . count($allFiles) . " PDFs.");
    exit;
}

if ($action === 'pause') {
    $progress = loadProgress();
    $progress['paused'] = true;
    saveProgress($progress);
    logMessage("Paused processing.");
    exit;
}

if ($action === 'resume') {
    $progress = loadProgress();
    $progress['paused'] = false;
    saveProgress($progress);

    $files = $progress['files'];
    $index = $progress['index'];
    $count = 0;
    $logCsv = fopen("logs/rename_log.csv", "a");

    while ($index < count($files) && $count < 20) {
        $pdf = $files[$index];
        $index++;
        $dir = dirname($pdf);
        $imageBase = sys_get_temp_dir() . '/' . uniqid('ocrimg_');
        $jpgPath = convertPdfToImage($pdf, $imageBase);
        if (!$jpgPath) {
            logMessage("Failed to convert: $pdf");
            continue;
        }
        $ocr = runTesseract($jpgPath);
        unlink($jpgPath);

        if (empty(trim($ocr))) {
            logMessage("No OCR found for: $pdf");
            continue;
        }
        $newName = sanitizeFilename($ocr);
        $suffix = 1;
        $finalPath = $dir . '/' . $newName . '.pdf';
        while (file_exists($finalPath)) {
            $finalPath = $dir . '/' . $newName . "_$suffix.pdf";
            $suffix++;
        }
        if (preg_match('/_\d+\.pdf$/', basename($pdf))) {
            logMessage("Skipping already renamed: $pdf");
            continue;
        }
        if (rename($pdf, $finalPath)) {
            logMessage("Renamed: " . basename($pdf) . " => " . basename($finalPath));
            fputcsv($logCsv, [basename($pdf), basename($finalPath)]);
            $count++;
        } else {
            logMessage("Failed to rename: $pdf");
        }
    }
    fclose($logCsv);
    $progress['index'] = $index;
    $progress['paused'] = true;
    saveProgress($progress);
    logMessage("Paused after $count files.");
    exit;
}

if ($action === 'export') {
    header('Content-Type: text/csv');
    header('Content-Disposition: attachment; filename="rename_log.csv"');
    readfile('logs/rename_log.csv');
    exit;
}

echo "OK";
