Directory Privacy on /public_html/epmc-ui/ */ declare(strict_types=1); session_start(); ini_set('display_errors', '1'); error_reporting(E_ALL); set_time_limit(0); $ROOT = "/home/cpe0003707/epmc"; $AUTH_FILE = $ROOT . "/.ui_auth.php"; $DB_CONF = $ROOT . "/.db.php"; $STATE_DIR = $ROOT . "/state"; $LOCK_FILE = $STATE_DIR . "/ui_run.lock"; @mkdir($ROOT . "/logs", 0755, true); @mkdir($STATE_DIR, 0755, true); $PHP = "/usr/local/bin/php"; $PERL = "/usr/bin/perl"; $CMD_FETCH = "$PHP -q $ROOT/scripts/epmc_daily_fetch.php"; $CMD_MERGE = "$PHP -q $ROOT/scripts/merge_xml.php"; $CMD_EXTRACT = "$PERL $ROOT/scripts/extract_pubmed_emails.pl"; $CMD_IMPORT = "$PHP -q $ROOT/scripts/import_email_csv.php"; function h(string $s): string { return htmlspecialchars($s, ENT_QUOTES, 'UTF-8'); } function safe_date(?string $d): string { if (!$d) return ""; return preg_match('/^\d{4}-\d{2}-\d{2}$/', $d) ? $d : ""; } function dt_utc(string $ymd): DateTime { return new DateTime($ymd, new DateTimeZone("UTC")); } function ymd(DateTime $dt): string { return $dt->format("Y-m-d"); } function require_auth(): void { if (empty($_SESSION['epmc_ui_authed'])) { http_response_code(403); echo "Forbidden"; exit; } } function load_auth(string $path): array { if (!file_exists($path)) return []; $a = require $path; return is_array($a) ? $a : []; } /** * Execute a command and return [output, exitCode] */ function run_cmd_code(string $cmd): array { $lines = []; $code = 0; // redirect stderr to stdout exec($cmd . " 2>&1", $lines, $code); return [implode("\n", $lines), $code]; } function db_connect(string $dbConfPath): PDO { if (!file_exists($dbConfPath)) throw new RuntimeException("Missing DB config: $dbConfPath"); $db = require $dbConfPath; if (!is_array($db) || empty($db['host']) || empty($db['db']) || empty($db['user'])) { throw new RuntimeException("Invalid DB config format in .db.php"); } $charset = $db['charset'] ?? 'utf8mb4'; $dsn = "mysql:host={$db['host']};dbname={$db['db']};charset={$charset}"; $pdo = new PDO($dsn, $db['user'], $db['pass'] ?? '', [ PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION, PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC, ]); $pdo->exec("SET NAMES $charset"); return $pdo; } function csv_download(string $filename, array $header, iterable $rows): void { header('Content-Type: text/csv; charset=utf-8'); header('Content-Disposition: attachment; filename="'.$filename.'"'); $out = fopen('php://output', 'w'); fputcsv($out, $header); foreach ($rows as $r) fputcsv($out, $r); fclose($out); exit; } /** * Safe recursive delete with guard (only deletes inside /home/cpe0003707/epmc/raw/) */ function rrmdir_safe(string $dir): bool { $dir = rtrim($dir, "/"); $allowedRoot = "/home/cpe0003707/epmc/raw"; if ($dir === "" || !is_dir($dir)) return false; if (strpos($dir, $allowedRoot) !== 0) return false; // guard $items = scandir($dir); if ($items === false) return false; foreach ($items as $item) { if ($item === "." || $item === "..") continue; $path = $dir . "/" . $item; if (is_dir($path)) rrmdir_safe($path); else @unlink($path); } return @rmdir($dir); } /* ----------------------- LOGIN / LOGOUT ------------------------ */ $auth = load_auth($AUTH_FILE); if (isset($_GET['logout'])) { session_destroy(); header("Location: ./"); exit; } $login_error = ""; if (isset($_POST['login_user'], $_POST['login_pass'])) { $u = trim((string)$_POST['login_user']); $p = (string)$_POST['login_pass']; if (!$auth || empty($auth['user']) || empty($auth['pass_hash'])) { $login_error = "Auth config missing. Create /home/cpe0003707/epmc/.ui_auth.php"; } elseif (hash_equals((string)$auth['user'], $u) && password_verify($p, (string)$auth['pass_hash'])) { session_regenerate_id(true); $_SESSION['epmc_ui_authed'] = 1; header("Location: ./"); exit; } else { $login_error = "Invalid username or password."; } } /* ----------------------- DOWNLOAD ROUTES ------------------------ */ $action = $_GET['action'] ?? ""; if ($action === "download_all") { require_auth(); $pdo = db_connect($DB_CONF); $stmt = $pdo->query("SELECT email, author_name, article_title, doi, first_publication_date, affiliation, grant_ids, grant_agencies, created_at, updated_at FROM epmc_emails ORDER BY id DESC"); $rows = (function() use ($stmt) { while ($r = $stmt->fetch()) { yield [ $r['email'], $r['author_name'], $r['article_title'], $r['doi'], $r['first_publication_date'], $r['affiliation'], $r['grant_ids'], $r['grant_agencies'], $r['created_at'], $r['updated_at'] ]; } })(); csv_download("epmc_emails_all.csv", ["email","author_name","article_title","doi","first_publication_date","affiliation","grant_ids","grant_agencies","created_at","updated_at"], $rows ); } if ($action === "download_latest") { require_auth(); $pdo = db_connect($DB_CONF); $stmt = $pdo->query("SELECT email, author_name, article_title, doi, first_publication_date, affiliation, grant_ids, grant_agencies, created_at, updated_at FROM epmc_emails ORDER BY id DESC LIMIT 2000"); $rows = (function() use ($stmt) { while ($r = $stmt->fetch()) { yield [ $r['email'], $r['author_name'], $r['article_title'], $r['doi'], $r['first_publication_date'], $r['affiliation'], $r['grant_ids'], $r['grant_agencies'], $r['created_at'], $r['updated_at'] ]; } })(); csv_download("epmc_emails_latest_2000.csv", ["email","author_name","article_title","doi","first_publication_date","affiliation","grant_ids","grant_agencies","created_at","updated_at"], $rows ); } if ($action === "download_filtered") { require_auth(); $from = safe_date($_GET['from'] ?? ""); $to = safe_date($_GET['to'] ?? ""); if ($from === "" || $to === "") { http_response_code(400); echo "Invalid dates"; exit; } $pdo = db_connect($DB_CONF); $sql = "SELECT email, author_name, article_title, doi, first_publication_date, affiliation, grant_ids, grant_agencies, created_at, updated_at FROM epmc_emails WHERE first_publication_date IS NOT NULL AND first_publication_date BETWEEN :f AND :t ORDER BY id DESC"; $stmt = $pdo->prepare($sql); $stmt->execute([':f'=>$from, ':t'=>$to]); $rows = (function() use ($stmt) { while ($r = $stmt->fetch()) { yield [ $r['email'], $r['author_name'], $r['article_title'], $r['doi'], $r['first_publication_date'], $r['affiliation'], $r['grant_ids'], $r['grant_agencies'], $r['created_at'], $r['updated_at'] ]; } })(); csv_download("epmc_emails_{$from}_to_{$to}.csv", ["email","author_name","article_title","doi","first_publication_date","affiliation","grant_ids","grant_agencies","created_at","updated_at"], $rows ); } /* ----------------------- API: RUN ONE MONTH (fetch in 10-day chunks, then merge/extract/import) Deletes month folder immediately after successful import. ------------------------ */ if (isset($_GET['api']) && $_GET['api'] === 'run_month') { require_auth(); header('Content-Type: application/json; charset=utf-8'); set_time_limit(0); $from = safe_date($_POST['from'] ?? ''); $to = safe_date($_POST['to'] ?? ''); $mode = strtoupper($_POST['mode'] ?? 'UPDATE_DATE'); if (!in_array($mode, ['UPDATE_DATE','FIRST_PDATE'], true)) $mode = 'UPDATE_DATE'; $do_fetch = !empty($_POST['do_fetch']); $do_merge = !empty($_POST['do_merge']); $do_extract = !empty($_POST['do_extract']); $do_import = !empty($_POST['do_import']); if ($from === '' || $to === '') { echo json_encode(['ok'=>false,'error'=>'Invalid dates. Use YYYY-MM-DD.']); exit; } // lock to prevent parallel runs $lockFp = fopen($LOCK_FILE, "c"); if (!$lockFp || !flock($lockFp, LOCK_EX | LOCK_NB)) { echo json_encode(['ok'=>false,'error'=>'Another run is already in progress.']); exit; } $out = ""; $cleanupDone = false; try { $monthDir = "/home/cpe0003707/epmc/raw/" . $mode . "_" . $from . "_to_" . $to; @mkdir($monthDir, 0755, true); $out .= "MONTH ($from → $to)\n"; $out .= "RAW FOLDER: $monthDir\n\n"; // 1) FETCH as 10-day chunks if ($do_fetch) { $out .= "▶ FETCH (10-day chunks)\n"; $cur = dt_utc($from); $end = dt_utc($to); $chunkNo = 1; while ($cur <= $end) { $chunkStart = clone $cur; $chunkEnd = clone $cur; $chunkEnd->modify("+9 days"); if ($chunkEnd > $end) $chunkEnd = clone $end; $cs = ymd($chunkStart); $ce = ymd($chunkEnd); $out .= " - Chunk $chunkNo: $cs → $ce\n"; [$o, $code] = run_cmd_code($GLOBALS['CMD_FETCH']." ".escapeshellarg($cs)." ".escapeshellarg($ce)." ".escapeshellarg($mode)." ".escapeshellarg($monthDir)); $out .= $o . "\n"; if ($code !== 0) { echo json_encode(['ok'=>false,'error'=>"FETCH failed (exit $code)", 'output'=>$out]); exit; } $cur = clone $chunkEnd; $cur->modify("+1 day"); $chunkNo++; } $out .= "\n"; } // 2) MERGE month folder if ($do_merge) { $out .= "▶ MERGE (month folder)\n"; [$o, $code] = run_cmd_code($GLOBALS['CMD_MERGE']." ".escapeshellarg($monthDir)); $out .= $o . "\n"; if ($code !== 0) { echo json_encode(['ok'=>false,'error'=>"MERGE failed (exit $code)", 'output'=>$out]); exit; } $out .= "\n"; } // 3) EXTRACT if ($do_extract) { $out .= "▶ EXTRACT\n"; [$o, $code] = run_cmd_code($GLOBALS['CMD_EXTRACT']); $out .= $o . "\n"; if ($code !== 0) { echo json_encode(['ok'=>false,'error'=>"EXTRACT failed (exit $code)", 'output'=>$out]); exit; } $out .= "\n"; } // 4) IMPORT + CLEANUP raw folder immediately if success if ($do_import) { $out .= "▶ IMPORT\n"; [$o, $code] = run_cmd_code($GLOBALS['CMD_IMPORT']); $out .= $o . "\n"; if ($code !== 0) { echo json_encode(['ok'=>false,'error'=>"IMPORT failed (exit $code). Raw folder kept.", 'output'=>$out]); exit; } // ✅ Cleanup raw month folder immediately after successful import $out .= "\n▶ CLEANUP RAW FILES\n"; $ok = rrmdir_safe($monthDir); $cleanupDone = $ok; $out .= $ok ? "Deleted raw folder: $monthDir\n" : "Cleanup failed (folder kept): $monthDir\n"; } echo json_encode(['ok'=>true, 'output'=>$out, 'cleanup'=>$cleanupDone]); exit; } finally { flock($lockFp, LOCK_UN); fclose($lockFp); } } /* ----------------------- If not authed -> show login page ------------------------ */ $authed = !empty($_SESSION['epmc_ui_authed']); if (!$authed) { ?> Login — Fortune Journals EPMC Data Extractor

Fortune Journals EPMC Data Extractor

Secure access required to run searches and downloads.

Auth config missing. Create /home/cpe0003707/epmc/.ui_auth.php
null,'updated'=>null]; try { $pdo = db_connect($DB_CONF); $stats['count'] = (int)$pdo->query("SELECT COUNT(*) c FROM epmc_emails")->fetch()['c']; $stats['updated'] = $pdo->query("SELECT MAX(updated_at) m FROM epmc_emails")->fetch()['m'] ?? null; } catch (Throwable $e) {} $dlFrom = $from; $dlTo = $to; ?> Fortune Journals EPMC Data Extractor

Fortune Journals EPMC Data Extractor

Search Europe PMC (title keywords), extract emails + metadata, store into MySQL.
DB rows:
Last update:
Logout

Run Search + Extraction

UPDATE_DATE daily updates  •  FIRST_PDATE backfill by publication date  •  Note: month runs fetch every 10 days and cleanup raw files after import.
1) Download
Fetch in 10-day chunks inside each month
2) Merge
Merge month folder → pubmed.txt
3) Extract
Creates email.csv + Emails_not_found_doi.csv
4) Import to DB
Upsert to MySQL then delete raw month folder
Run month-by-month automatically
Recommended for long ranges (years). Shows live progress.
Go to Downloads

Download Saved Data from Database

Exports come from CCM_EPMC_Database.epmc_emails.
Tip: For historical backfills, use FIRST_PDATE in Run Pipeline, then download filtered range here.

Quick Info

Database rows
Total rows in epmc_emails

LIVE
Last updated
Max(updated_at)

TIME
Storage policy
Raw files cleanup
Auto-delete
after import
✅ Keep /home/cpe0003707/epmc/.db.php and .ui_auth.php outside public_html.
✅ Enable cPanel → Directory Privacy for /public_html/epmc-ui/.