From 5da134f016ceaa366999489874e63cd82cae7a38 Mon Sep 17 00:00:00 2001 From: kjs Date: Thu, 26 Mar 2026 16:30:53 +0900 Subject: [PATCH] feat: add web crawling management functionality - Introduced a new crawling management feature allowing users to configure, execute, and log web crawls. - Added CRUD operations for crawl configurations, including URL analysis and preview capabilities. - Implemented a new service for handling crawling logic and scheduling tasks. - Integrated cheerio for HTML parsing and axios for HTTP requests. - Created a sample HTML page for testing crawling functionality. This commit enhances the application's data collection capabilities from external websites. --- backend-node/package-lock.json | 282 +++++++ backend-node/package.json | 2 + backend-node/src/app.ts | 7 + .../src/controllers/crawlController.ts | 124 +++ backend-node/src/routes/crawlRoutes.ts | 32 + backend-node/src/services/crawlService.ts | 489 +++++++++++ .../admin/automaticMng/crawlingList/page.tsx | 763 ++++++++++++++++++ .../components/layout/AdminPageRenderer.tsx | 1 + 8 files changed, 1700 insertions(+) create mode 100644 backend-node/src/controllers/crawlController.ts create mode 100644 backend-node/src/routes/crawlRoutes.ts create mode 100644 backend-node/src/services/crawlService.ts create mode 100644 frontend/app/(main)/admin/automaticMng/crawlingList/page.tsx diff --git a/backend-node/package-lock.json b/backend-node/package-lock.json index f482dc7b..24ef7619 100644 --- a/backend-node/package-lock.json +++ b/backend-node/package-lock.json @@ -13,6 +13,7 @@ "axios": "^1.11.0", "bcryptjs": "^2.4.3", "bwip-js": "^4.8.0", + "cheerio": "^1.2.0", "compression": "^1.7.4", "cors": "^2.8.5", "docx": "^9.5.1", @@ -36,6 +37,7 @@ "nodemailer": "^6.10.1", "oracledb": "^6.9.0", "pg": "^8.16.3", + "playwright": "^1.58.2", "quill": "^2.0.3", "react-quill": "^2.0.0", "redis": "^4.6.10", @@ -4408,6 +4410,12 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "license": "MIT" }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/bowser": { "version": "2.12.1", "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.12.1.tgz", @@ -4704,6 +4712,79 @@ "node": ">=10" } }, + "node_modules/cheerio": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", + "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.1.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.19.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cheerio/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/cheerio/node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, "node_modules/chokidar": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", @@ -5091,6 +5172,34 @@ "node": ">= 8" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -5539,6 +5648,31 @@ "node": ">=8.10.0" } }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/encoding-sniffer/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/ent": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/ent/-/ent-2.2.2.tgz", @@ -9020,6 +9154,18 @@ "node": ">=8" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -9254,6 +9400,55 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/parseley": { "version": "0.12.1", "resolved": "https://registry.npmjs.org/parseley/-/parseley-0.12.1.tgz", @@ -9525,6 +9720,50 @@ "node": ">=8" } }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright/node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/postgres-array": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", @@ -11146,6 +11385,15 @@ "dev": true, "license": "MIT" }, + "node_modules/undici": { + "version": "7.24.6", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.24.6.tgz", + "integrity": "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", @@ -11310,6 +11558,40 @@ "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", "license": "BSD-2-Clause" }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/whatwg-url": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", diff --git a/backend-node/package.json b/backend-node/package.json index 53ee00b8..2217eff6 100644 --- a/backend-node/package.json +++ b/backend-node/package.json @@ -27,6 +27,7 @@ "axios": "^1.11.0", "bcryptjs": "^2.4.3", "bwip-js": "^4.8.0", + "cheerio": "^1.2.0", "compression": "^1.7.4", "cors": "^2.8.5", "docx": "^9.5.1", @@ -50,6 +51,7 @@ "nodemailer": "^6.10.1", "oracledb": "^6.9.0", "pg": "^8.16.3", + "playwright": "^1.58.2", "quill": "^2.0.3", "react-quill": "^2.0.0", "redis": "^4.6.10", diff --git a/backend-node/src/app.ts b/backend-node/src/app.ts index 7a3e0071..c087ddfb 100644 --- a/backend-node/src/app.ts +++ b/backend-node/src/app.ts @@ -115,6 +115,7 @@ import workHistoryRoutes from "./routes/workHistoryRoutes"; // 작업 이력 관 import tableHistoryRoutes from "./routes/tableHistoryRoutes"; // 테이블 변경 이력 조회 import bomRoutes from "./routes/bomRoutes"; // BOM 이력/버전 관리 import productionRoutes from "./routes/productionRoutes"; // 생산계획 관리 +import crawlRoutes from "./routes/crawlRoutes"; // 웹 크롤링 import roleRoutes from "./routes/roleRoutes"; // 권한 그룹 관리 import departmentRoutes from "./routes/departmentRoutes"; // 부서 관리 import tableCategoryValueRoutes from "./routes/tableCategoryValueRoutes"; // 카테고리 값 관리 @@ -325,6 +326,7 @@ app.use("/api/work-history", workHistoryRoutes); // 작업 이력 관리 app.use("/api/table-history", tableHistoryRoutes); // 테이블 변경 이력 조회 app.use("/api/bom", bomRoutes); // BOM 이력/버전 관리 app.use("/api/production", productionRoutes); // 생산계획 관리 +app.use("/api/crawl", crawlRoutes); // 웹 크롤링 app.use("/api/material-status", materialStatusRoutes); // 자재현황 app.use("/api/process-info", processInfoRoutes); // 공정정보관리 app.use("/api/roles", roleRoutes); // 권한 그룹 관리 @@ -415,6 +417,11 @@ async function initializeServices() { try { await BatchSchedulerService.initializeScheduler(); logger.info(`⏰ 배치 스케줄러가 시작되었습니다.`); + + // 크롤링 스케줄러 초기화 + const { CrawlService } = await import("./services/crawlService"); + await CrawlService.initializeScheduler(); + logger.info(`🕷️ 크롤링 스케줄러가 시작되었습니다.`); } catch (error) { logger.error(`❌ 배치 스케줄러 초기화 실패:`, error); } diff --git a/backend-node/src/controllers/crawlController.ts b/backend-node/src/controllers/crawlController.ts new file mode 100644 index 00000000..c4f66c94 --- /dev/null +++ b/backend-node/src/controllers/crawlController.ts @@ -0,0 +1,124 @@ +import { Request, Response } from "express"; +import { CrawlService } from "../services/crawlService"; +import { logger } from "../utils/logger"; + +interface AuthenticatedRequest extends Request { + user?: { companyCode: string; userId: string }; +} + +// 설정 목록 조회 +export async function getCrawlConfigs(req: AuthenticatedRequest, res: Response) { + try { + const companyCode = req.user?.companyCode || "*"; + const configs = await CrawlService.getConfigs(companyCode); + return res.json({ success: true, data: configs }); + } catch (error: any) { + logger.error("크롤링 설정 조회 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 설정 상세 조회 +export async function getCrawlConfig(req: AuthenticatedRequest, res: Response) { + try { + const config = await CrawlService.getConfigById(req.params.id); + if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." }); + return res.json({ success: true, data: config }); + } catch (error: any) { + logger.error("크롤링 설정 상세 조회 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 설정 생성 +export async function createCrawlConfig(req: AuthenticatedRequest, res: Response) { + try { + const data = { + ...req.body, + company_code: req.user?.companyCode || req.body.company_code, + writer: req.user?.userId, + }; + const config = await CrawlService.createConfig(data); + return res.json({ success: true, data: config }); + } catch (error: any) { + logger.error("크롤링 설정 생성 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 설정 수정 +export async function updateCrawlConfig(req: AuthenticatedRequest, res: Response) { + try { + const config = await CrawlService.updateConfig(req.params.id, req.body); + if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." }); + return res.json({ success: true, data: config }); + } catch (error: any) { + logger.error("크롤링 설정 수정 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 설정 삭제 +export async function deleteCrawlConfig(req: AuthenticatedRequest, res: Response) { + try { + await CrawlService.deleteConfig(req.params.id); + return res.json({ success: true }); + } catch (error: any) { + logger.error("크롤링 설정 삭제 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 미리보기 +export async function previewCrawl(req: AuthenticatedRequest, res: Response) { + try { + const { url, row_selector, column_mappings, method, headers, request_body } = req.body; + if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." }); + + const result = await CrawlService.preview(url, row_selector, column_mappings || [], method, headers, request_body); + return res.json({ success: true, data: result }); + } catch (error: any) { + logger.error("크롤링 미리보기 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// URL 자동 분석 — 페이지의 테이블/리스트 구조를 감지 +export async function analyzeUrl(req: AuthenticatedRequest, res: Response) { + try { + const { url } = req.body; + if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." }); + + const result = await CrawlService.analyzeUrl(url); + return res.json({ success: true, data: result }); + } catch (error: any) { + logger.error("URL 분석 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 수동 실행 +export async function executeCrawl(req: AuthenticatedRequest, res: Response) { + try { + const config = await CrawlService.getConfigById(req.params.id); + if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." }); + + const result = await CrawlService.executeCrawl(config); + return res.json({ success: true, data: result }); + } catch (error: any) { + logger.error("크롤링 수동 실행 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} + +// 실행 로그 조회 +export async function getCrawlLogs(req: AuthenticatedRequest, res: Response) { + try { + const limit = parseInt(req.query.limit as string) || 20; + const logs = await CrawlService.getLogs(req.params.id, limit); + return res.json({ success: true, data: logs }); + } catch (error: any) { + logger.error("크롤링 로그 조회 실패:", error); + return res.status(500).json({ success: false, message: error.message }); + } +} diff --git a/backend-node/src/routes/crawlRoutes.ts b/backend-node/src/routes/crawlRoutes.ts new file mode 100644 index 00000000..93b6176e --- /dev/null +++ b/backend-node/src/routes/crawlRoutes.ts @@ -0,0 +1,32 @@ +import { Router } from "express"; +import { authenticateToken } from "../middleware/authMiddleware"; +import { + getCrawlConfigs, + getCrawlConfig, + createCrawlConfig, + updateCrawlConfig, + deleteCrawlConfig, + previewCrawl, + analyzeUrl, + executeCrawl, + getCrawlLogs, +} from "../controllers/crawlController"; + +const router = Router(); + +// 설정 CRUD +router.get("/configs", authenticateToken, getCrawlConfigs); +router.get("/configs/:id", authenticateToken, getCrawlConfig); +router.post("/configs", authenticateToken, createCrawlConfig); +router.put("/configs/:id", authenticateToken, updateCrawlConfig); +router.delete("/configs/:id", authenticateToken, deleteCrawlConfig); + +// 분석 & 미리보기 & 실행 +router.post("/analyze", authenticateToken, analyzeUrl); +router.post("/preview", authenticateToken, previewCrawl); +router.post("/execute/:id", authenticateToken, executeCrawl); + +// 실행 로그 +router.get("/configs/:id/logs", authenticateToken, getCrawlLogs); + +export default router; diff --git a/backend-node/src/services/crawlService.ts b/backend-node/src/services/crawlService.ts new file mode 100644 index 00000000..8c829917 --- /dev/null +++ b/backend-node/src/services/crawlService.ts @@ -0,0 +1,489 @@ +import * as cheerio from "cheerio"; +import axios from "axios"; +import cron, { ScheduledTask } from "node-cron"; +import { query } from "../database/db"; +import { logger } from "../utils/logger"; + +export interface CrawlConfig { + id: string; + company_code: string; + name: string; + url: string; + method: string; + headers: Record; + request_body?: string; + selector_type: string; + row_selector: string; + column_mappings: Array<{ + selector: string; + column: string; + type: "text" | "number" | "date"; + attribute?: string; // href, src 등 속성값 추출 + }>; + target_table: string; + upsert_key?: string; + cron_schedule?: string; + is_active: string; + writer?: string; +} + +export interface CrawlResult { + collected: number; + saved: number; + errors: string[]; +} + +const DEFAULT_HEADERS = { + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", +}; + +export class CrawlService { + private static scheduledTasks: Map = new Map(); + + // ─── 스케줄러 ─── + + static async initializeScheduler() { + try { + const configs = await query( + `SELECT * FROM crawl_configs WHERE is_active = 'Y' AND cron_schedule IS NOT NULL AND cron_schedule != ''` + ); + + logger.info(`크롤링 스케줄러: ${configs.length}개 설정 등록`); + + for (const config of configs) { + this.scheduleConfig(config); + } + } catch (error) { + logger.error("크롤링 스케줄러 초기화 실패:", error); + } + } + + static scheduleConfig(config: CrawlConfig) { + if (!config.cron_schedule || !cron.validate(config.cron_schedule)) { + logger.warn(`크롤링 [${config.name}]: 유효하지 않은 cron 표현식 - ${config.cron_schedule}`); + return; + } + + // 기존 스케줄 제거 + if (this.scheduledTasks.has(config.id)) { + this.scheduledTasks.get(config.id)!.stop(); + this.scheduledTasks.delete(config.id); + } + + const task = cron.schedule( + config.cron_schedule, + async () => { + logger.info(`크롤링 [${config.name}] 스케줄 실행 시작`); + await this.executeCrawl(config); + }, + { timezone: "Asia/Seoul" } + ); + + this.scheduledTasks.set(config.id, task); + logger.info(`크롤링 [${config.name}] 스케줄 등록: ${config.cron_schedule}`); + } + + static unscheduleConfig(configId: string) { + if (this.scheduledTasks.has(configId)) { + this.scheduledTasks.get(configId)!.stop(); + this.scheduledTasks.delete(configId); + } + } + + // ─── CRUD ─── + + static async getConfigs(companyCode: string) { + const condition = companyCode === "*" ? "" : "WHERE company_code = $1"; + const params = companyCode === "*" ? [] : [companyCode]; + return query(`SELECT * FROM crawl_configs ${condition} ORDER BY created_date DESC`, params); + } + + static async getConfigById(id: string) { + const rows = await query(`SELECT * FROM crawl_configs WHERE id = $1`, [id]); + return rows[0] || null; + } + + static async createConfig(data: Partial) { + const result = await query( + `INSERT INTO crawl_configs (company_code, name, url, method, headers, request_body, selector_type, row_selector, column_mappings, target_table, upsert_key, cron_schedule, is_active, writer) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) RETURNING *`, + [ + data.company_code, + data.name, + data.url, + data.method || "GET", + JSON.stringify(data.headers || {}), + data.request_body || null, + data.selector_type || "css", + data.row_selector || null, + JSON.stringify(data.column_mappings || []), + data.target_table, + data.upsert_key || null, + data.cron_schedule || null, + data.is_active || "Y", + data.writer || null, + ] + ); + + const config = result[0]; + if (config.is_active === "Y" && config.cron_schedule) { + this.scheduleConfig(config); + } + return config; + } + + static async updateConfig(id: string, data: Partial) { + const result = await query( + `UPDATE crawl_configs SET + name = COALESCE($2, name), + url = COALESCE($3, url), + method = COALESCE($4, method), + headers = COALESCE($5, headers), + request_body = $6, + selector_type = COALESCE($7, selector_type), + row_selector = $8, + column_mappings = COALESCE($9, column_mappings), + target_table = COALESCE($10, target_table), + upsert_key = $11, + cron_schedule = $12, + is_active = COALESCE($13, is_active), + updated_date = now() + WHERE id = $1 RETURNING *`, + [ + id, + data.name, + data.url, + data.method, + data.headers ? JSON.stringify(data.headers) : null, + data.request_body ?? null, + data.selector_type, + data.row_selector ?? null, + data.column_mappings ? JSON.stringify(data.column_mappings) : null, + data.target_table, + data.upsert_key ?? null, + data.cron_schedule ?? null, + data.is_active, + ] + ); + + const config = result[0]; + if (config) { + this.unscheduleConfig(id); + if (config.is_active === "Y" && config.cron_schedule) { + this.scheduleConfig(config); + } + } + return config; + } + + static async deleteConfig(id: string) { + this.unscheduleConfig(id); + await query(`DELETE FROM crawl_configs WHERE id = $1`, [id]); + } + + // ─── 크롤링 실행 ─── + + static async executeCrawl(config: CrawlConfig): Promise { + const logId = await this.createLog(config.id, config.company_code); + const errors: string[] = []; + let collected = 0; + let saved = 0; + + try { + // 1. HTTP 요청 + const headers = { ...DEFAULT_HEADERS, ...(typeof config.headers === "string" ? JSON.parse(config.headers) : config.headers || {}) }; + const response = await axios({ + method: (config.method || "GET") as any, + url: config.url, + headers, + data: config.request_body || undefined, + timeout: 30000, + responseType: "text", + }); + + const html = response.data; + const htmlPreview = typeof html === "string" ? html.substring(0, 2000) : ""; + + // 2. DOM 파싱 + const $ = cheerio.load(html); + const mappings = typeof config.column_mappings === "string" + ? JSON.parse(config.column_mappings) + : config.column_mappings || []; + + // 3. 행 추출 + const rows: Record[] = []; + + if (config.row_selector) { + $(config.row_selector).each((_, el) => { + const row: Record = {}; + for (const mapping of mappings) { + const $el = $(el).find(mapping.selector); + const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim(); + row[mapping.column] = this.castValue(raw, mapping.type); + } + rows.push(row); + }); + } else { + // row_selector 없으면 column_mappings의 selector로 직접 추출 (단일 행) + const row: Record = {}; + for (const mapping of mappings) { + const $el = $(mapping.selector); + const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim(); + row[mapping.column] = this.castValue(raw, mapping.type); + } + rows.push(row); + } + + collected = rows.length; + + // 4. DB 저장 + for (const row of rows) { + try { + row.company_code = config.company_code; + + if (config.upsert_key) { + await this.upsertRow(config.target_table, row, config.upsert_key, config.company_code); + } else { + await this.insertRow(config.target_table, row); + } + saved++; + } catch (err: any) { + errors.push(`행 저장 실패: ${err.message}`); + } + } + + // 5. 상태 업데이트 + await this.updateLog(logId, "success", collected, saved, null, htmlPreview); + await query( + `UPDATE crawl_configs SET last_executed_at = now(), last_status = 'success', last_error = null WHERE id = $1`, + [config.id] + ); + + logger.info(`크롤링 [${config.name}] 완료: ${collected}건 수집, ${saved}건 저장`); + } catch (error: any) { + const errMsg = error.message || "Unknown error"; + errors.push(errMsg); + await this.updateLog(logId, "fail", collected, saved, errMsg, null); + await query( + `UPDATE crawl_configs SET last_executed_at = now(), last_status = 'fail', last_error = $2 WHERE id = $1`, + [config.id, errMsg] + ); + logger.error(`크롤링 [${config.name}] 실패:`, error); + } + + return { collected, saved, errors }; + } + + // ─── URL 자동 분석 ─── + + static async analyzeUrl(url: string) { + const response = await axios({ + method: "GET", + url, + headers: DEFAULT_HEADERS, + timeout: 15000, + responseType: "text", + }); + + const $ = cheerio.load(response.data); + const tables: Array<{ + index: number; + selector: string; + caption: string; + headers: string[]; + rowCount: number; + sampleRows: string[][]; + }> = []; + + // HTML 자동 감지 + $("table").each((i, tableEl) => { + const $table = $(tableEl); + // 헤더 추출 + const headers: string[] = []; + $table.find("thead th, thead td, tr:first-child th").each((_, th) => { + headers.push($(th).text().trim()); + }); + // 헤더가 없으면 첫 행에서 추출 시도 + if (headers.length === 0) { + $table.find("tr:first-child td").each((_, td) => { + headers.push($(td).text().trim()); + }); + } + + // 데이터 행 수 + const bodyRows = $table.find("tbody tr"); + const allRows = bodyRows.length > 0 ? bodyRows : $table.find("tr").slice(headers.length > 0 ? 1 : 0); + const rowCount = allRows.length; + + // 샘플 (최대 3행) + const sampleRows: string[][] = []; + allRows.slice(0, 3).each((_, tr) => { + const cells: string[] = []; + $(tr).find("td, th").each((_, td) => { + cells.push($(td).text().trim()); + }); + sampleRows.push(cells); + }); + + if (headers.length > 0 || rowCount > 0) { + // 선택자 생성 + let selector = "table"; + const id = $table.attr("id"); + const cls = $table.attr("class"); + if (id) selector = `table#${id}`; + else if (cls) selector = `table.${cls.split(/\s+/)[0]}`; + else if (i > 0) selector = `table:nth-of-type(${i + 1})`; + + const caption = $table.find("caption").text().trim() || $table.attr("summary") || ""; + + tables.push({ + index: i, + selector, + caption, + headers, + rowCount, + sampleRows, + }); + } + }); + + return { + title: $("title").text().trim(), + tableCount: tables.length, + tables, + htmlLength: response.data.length, + }; + } + + // ─── 미리보기 ─── + + static async preview( + url: string, + rowSelector: string, + columnMappings: CrawlConfig["column_mappings"], + method = "GET", + headers: Record = {}, + requestBody?: string + ) { + const mergedHeaders = { ...DEFAULT_HEADERS, ...headers }; + const response = await axios({ + method: method as any, + url, + headers: mergedHeaders, + data: requestBody || undefined, + timeout: 15000, + responseType: "text", + }); + + const $ = cheerio.load(response.data); + const rows: Record[] = []; + + if (rowSelector) { + $(rowSelector) + .slice(0, 10) // 미리보기는 10행까지 + .each((_, el) => { + const row: Record = {}; + for (const mapping of columnMappings) { + const $el = $(el).find(mapping.selector); + const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim(); + row[mapping.column] = this.castValue(raw, mapping.type); + } + rows.push(row); + }); + } + + return { + totalElements: rowSelector ? $(rowSelector).length : 0, + previewRows: rows, + htmlLength: response.data.length, + }; + } + + // ─── 유틸 ─── + + private static castValue(raw: string, type: string): any { + if (!raw) return null; + switch (type) { + case "number": { + const cleaned = raw.replace(/[^0-9.\-]/g, ""); + const num = parseFloat(cleaned); + return isNaN(num) ? null : num; + } + case "date": + return raw; + default: + return raw; + } + } + + private static async insertRow(tableName: string, row: Record) { + const cols = Object.keys(row); + const vals = Object.values(row); + const placeholders = cols.map((_, i) => `$${i + 1}`).join(", "); + const colNames = cols.map((c) => `"${c}"`).join(", "); + + await query(`INSERT INTO "${tableName}" (${colNames}) VALUES (${placeholders})`, vals); + } + + private static async upsertRow(tableName: string, row: Record, upsertKey: string, companyCode: string) { + const existing = await query( + `SELECT 1 FROM "${tableName}" WHERE "${upsertKey}" = $1 AND company_code = $2 LIMIT 1`, + [row[upsertKey], companyCode] + ); + + if (existing.length > 0) { + const setClauses: string[] = []; + const vals: any[] = []; + let idx = 1; + for (const [k, v] of Object.entries(row)) { + if (k === upsertKey || k === "company_code") continue; + setClauses.push(`"${k}" = $${idx}`); + vals.push(v); + idx++; + } + if (setClauses.length > 0) { + vals.push(row[upsertKey], companyCode); + await query( + `UPDATE "${tableName}" SET ${setClauses.join(", ")}, updated_date = now() WHERE "${upsertKey}" = $${idx} AND company_code = $${idx + 1}`, + vals + ); + } + } else { + await this.insertRow(tableName, row); + } + } + + private static async createLog(configId: string, companyCode: string): Promise { + const result = await query( + `INSERT INTO crawl_execution_logs (config_id, company_code, status) VALUES ($1, $2, 'running') RETURNING id`, + [configId, companyCode] + ); + return result[0].id; + } + + private static async updateLog( + logId: string, + status: string, + collected: number, + saved: number, + errorMessage: string | null, + htmlPreview: string | null + ) { + await query( + `UPDATE crawl_execution_logs SET status = $2, rows_collected = $3, rows_saved = $4, error_message = $5, response_html_preview = $6, finished_at = now() WHERE id = $1`, + [logId, status, collected, saved, errorMessage, htmlPreview] + ); + } + + // ─── 로그 조회 ─── + + static async getLogs(configId: string, limit = 20) { + return query( + `SELECT * FROM crawl_execution_logs WHERE config_id = $1 ORDER BY started_at DESC LIMIT $2`, + [configId, limit] + ); + } +} diff --git a/frontend/app/(main)/admin/automaticMng/crawlingList/page.tsx b/frontend/app/(main)/admin/automaticMng/crawlingList/page.tsx new file mode 100644 index 00000000..47837764 --- /dev/null +++ b/frontend/app/(main)/admin/automaticMng/crawlingList/page.tsx @@ -0,0 +1,763 @@ +"use client"; + +/** + * 크롤링 관리 — 외부 웹사이트 데이터 수집 설정/실행/로그 관리 + */ + +import React, { useState, useEffect, useCallback } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Switch } from "@/components/ui/switch"; +import { Label } from "@/components/ui/label"; +import { Textarea } from "@/components/ui/textarea"; +import { Badge } from "@/components/ui/badge"; +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogFooter, +} from "@/components/ui/dialog"; +import { + Plus, + Search, + Play, + Pencil, + Trash2, + RefreshCw, + Globe, + Eye, + Clock, + CheckCircle, + XCircle, + Loader2, + Check, + ChevronsUpDown, +} from "lucide-react"; +import { toast } from "sonner"; +import { apiClient } from "@/lib/api/client"; +import { tableTypeApi } from "@/lib/api/screen"; +import { useConfirmDialog } from "@/components/common/ConfirmDialog"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from "@/components/ui/command"; +import { cn } from "@/lib/utils"; + +interface CrawlConfig { + id: string; + company_code: string; + name: string; + url: string; + method: string; + headers: Record; + request_body?: string; + selector_type: string; + row_selector: string; + column_mappings: Array<{ + selector: string; + column: string; + type: string; + attribute?: string; + }>; + target_table: string; + upsert_key?: string; + cron_schedule?: string; + is_active: string; + last_executed_at?: string; + last_status?: string; + last_error?: string; +} + +interface CrawlLog { + id: string; + status: string; + rows_collected: number; + rows_saved: number; + error_message?: string; + started_at: string; + finished_at?: string; +} + +const EMPTY_CONFIG: Partial = { + name: "", + url: "", + method: "GET", + headers: {}, + selector_type: "css", + row_selector: "", + column_mappings: [], + target_table: "", + upsert_key: "", + cron_schedule: "", + is_active: "Y", +}; + +export default function CrawlingManagementPage() { + const [configs, setConfigs] = useState([]); + const [selectedId, setSelectedId] = useState(null); + const [searchText, setSearchText] = useState(""); + const [loading, setLoading] = useState(false); + + // 모달 + const [modalOpen, setModalOpen] = useState(false); + const [modalMode, setModalMode] = useState<"add" | "edit">("add"); + const [form, setForm] = useState>(EMPTY_CONFIG); + const [saving, setSaving] = useState(false); + + // 테이블/컬럼 목록 + const [allTables, setAllTables] = useState>([]); + const [targetColumns, setTargetColumns] = useState>([]); + const [tablePopoverOpen, setTablePopoverOpen] = useState(false); + + // URL 분석 + const [analyzing, setAnalyzing] = useState(false); + const [analyzedTables, setAnalyzedTables] = useState>([]); + const [selectedAnalyzedIdx, setSelectedAnalyzedIdx] = useState(null); + + // 컬럼 매핑 시각적 폼 + const [mappingRows, setMappingRows] = useState>([]); + + // 미리보기 + const [previewOpen, setPreviewOpen] = useState(false); + const [previewData, setPreviewData] = useState(null); + const [previewing, setPreviewing] = useState(false); + + // 실행 로그 + const [logs, setLogs] = useState([]); + const [logsLoading, setLogsLoading] = useState(false); + + // 실행 중 + const [executing, setExecuting] = useState(null); + + const { confirm, ConfirmDialogComponent } = useConfirmDialog(); + + // ─── 데이터 로드 ─── + + const loadConfigs = useCallback(async () => { + setLoading(true); + try { + const res = await apiClient.get("/crawl/configs"); + setConfigs(res.data.data || []); + } catch { + toast.error("크롤링 설정 로드 실패"); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { + loadConfigs(); + tableTypeApi.getTables().then((t) => setAllTables(t || [])).catch(() => {}); + }, [loadConfigs]); + + // target_table 변경 시 컬럼 목록 로드 + useEffect(() => { + if (!form.target_table) { setTargetColumns([]); return; } + tableTypeApi.getColumns(form.target_table).then((cols) => { + setTargetColumns(cols.map((c: any) => ({ columnName: c.columnName || c.column_name, columnLabel: c.displayName || c.columnLabel || c.column_label }))); + }).catch(() => setTargetColumns([])); + }, [form.target_table]); + + const loadLogs = useCallback(async (configId: string) => { + setLogsLoading(true); + try { + const res = await apiClient.get(`/crawl/configs/${configId}/logs?limit=20`); + setLogs(res.data.data || []); + } catch { + setLogs([]); + } finally { + setLogsLoading(false); + } + }, []); + + useEffect(() => { + if (selectedId) loadLogs(selectedId); + else setLogs([]); + }, [selectedId, loadLogs]); + + // ─── 필터링 ─── + + const filteredConfigs = configs.filter( + (c) => + !searchText || + c.name.toLowerCase().includes(searchText.toLowerCase()) || + c.url.toLowerCase().includes(searchText.toLowerCase()) + ); + + const selectedConfig = configs.find((c) => c.id === selectedId); + + // ─── CRUD ─── + + const openAddModal = () => { + setModalMode("add"); + setForm({ ...EMPTY_CONFIG }); + setMappingRows([]); + setAnalyzedTables([]); + setSelectedAnalyzedIdx(null); + setModalOpen(true); + }; + + const openEditModal = (config: CrawlConfig) => { + setModalMode("edit"); + setForm({ ...config }); + const mappings = typeof config.column_mappings === "string" + ? JSON.parse(config.column_mappings) : config.column_mappings || []; + setMappingRows(mappings.map((m: any) => ({ selector: m.selector || "", column: m.column || "", type: m.type || "text" }))); + setAnalyzedTables([]); + setSelectedAnalyzedIdx(null); + setModalOpen(true); + }; + + // URL 분석 + const handleAnalyze = async () => { + if (!form.url) { toast.error("URL을 입력하세요."); return; } + setAnalyzing(true); + try { + const res = await apiClient.post("/crawl/analyze", { url: form.url }); + const data = res.data.data; + setAnalyzedTables(data.tables || []); + if (data.tables?.length > 0) { + toast.success(`${data.tables.length}개 테이블 감지됨`); + } else { + toast.info("페이지에서 테이블을 찾지 못했습니다."); + } + } catch (err: any) { + toast.error(err.response?.data?.message || "URL 분석 실패"); + } finally { + setAnalyzing(false); + } + }; + + // 분석된 테이블 선택 시 자동 매핑 생성 + const handleSelectAnalyzedTable = (idx: number) => { + const table = analyzedTables[idx]; + if (!table) return; + setSelectedAnalyzedIdx(idx); + setForm((p) => ({ ...p, row_selector: `${table.selector} tbody tr` })); + // 헤더 기반으로 컬럼 매핑 자동 생성 + const newMappings = table.headers.map((h, i) => ({ + selector: `td:nth-child(${i + 1})`, + column: h.replace(/\s+/g, "_").replace(/[^a-zA-Z0-9_가-힣]/g, "").toLowerCase() || `col_${i + 1}`, + type: "text", + })); + setMappingRows(newMappings); + }; + + const handleSave = async () => { + if (!form.name || !form.url || !form.target_table) { + toast.error("이름, URL, 대상 테이블은 필수입니다."); + return; + } + + setSaving(true); + try { + const payload = { + ...form, + column_mappings: mappingRows.filter((m) => m.selector && m.column), + headers: form.headers || {}, + }; + + if (modalMode === "add") { + await apiClient.post("/crawl/configs", payload); + toast.success("크롤링 설정이 생성되었습니다."); + } else { + await apiClient.put(`/crawl/configs/${form.id}`, payload); + toast.success("크롤링 설정이 수정되었습니다."); + } + + setModalOpen(false); + loadConfigs(); + } catch (err: any) { + toast.error(err.response?.data?.message || "저장 실패"); + } finally { + setSaving(false); + } + }; + + const handleDelete = async (id: string) => { + const ok = await confirm("정말 삭제하시겠습니까?", { variant: "destructive", confirmText: "삭제" }); + if (!ok) return; + try { + await apiClient.delete(`/crawl/configs/${id}`); + toast.success("삭제되었습니다."); + if (selectedId === id) setSelectedId(null); + loadConfigs(); + } catch { + toast.error("삭제 실패"); + } + }; + + // ─── 실행 & 미리보기 ─── + + const handleExecute = async (id: string) => { + setExecuting(id); + try { + const res = await apiClient.post(`/crawl/execute/${id}`); + const data = res.data.data; + toast.success(`수집 ${data.collected}건, 저장 ${data.saved}건`); + loadConfigs(); + if (selectedId === id) loadLogs(id); + } catch (err: any) { + toast.error(err.response?.data?.message || "실행 실패"); + } finally { + setExecuting(null); + } + }; + + const handlePreview = async () => { + setPreviewing(true); + try { + const res = await apiClient.post("/crawl/preview", { + url: form.url, + row_selector: form.row_selector, + column_mappings: mappingRows.filter((m) => m.selector && m.column), + method: form.method, + headers: form.headers || {}, + request_body: form.request_body, + }); + setPreviewData(res.data.data); + setPreviewOpen(true); + } catch (err: any) { + toast.error(err.response?.data?.message || "미리보기 실패"); + } finally { + setPreviewing(false); + } + }; + + // ─── 렌더링 ─── + + return ( +
+ {/* 좌측: 설정 목록 */} +
+
+

크롤링 설정

+
+ + +
+
+ +
+
+ + setSearchText(e.target.value)} + placeholder="검색..." + className="h-8 pl-8 text-xs" + /> +
+
+ +
+ {filteredConfigs.length === 0 ? ( +
+ {loading ? "로딩 중..." : "설정이 없습니다."} +
+ ) : ( + filteredConfigs.map((config) => ( +
setSelectedId(config.id)} + > +
+
+ + {config.name} +
+ + {config.is_active === "Y" ? "활성" : "비활성"} + +
+
{config.url}
+
+ {config.cron_schedule && ( + + {config.cron_schedule} + + )} + {config.last_status && ( + + {config.last_status === "success" ? ( + + ) : ( + + )} + {config.last_status} + + )} +
+
+ )) + )} +
+
+ + {/* 우측: 상세 + 로그 */} +
+ {selectedConfig ? ( + <> + {/* 상세 정보 */} +
+
+

{selectedConfig.name}

+
+ + + +
+
+ +
+
+ URL +
{selectedConfig.url}
+
+
+ 대상 테이블 +
{selectedConfig.target_table}
+
+
+ 행 선택자 +
{selectedConfig.row_selector || "-"}
+
+
+ 스케줄 +
{selectedConfig.cron_schedule || "수동 실행"}
+
+
+ UPSERT 키 +
{selectedConfig.upsert_key || "-"}
+
+
+ 컬럼 매핑 +
{(selectedConfig.column_mappings || []).length}개
+
+
+ + {selectedConfig.last_error && ( +
+ {selectedConfig.last_error} +
+ )} +
+ + {/* 실행 로그 */} +
+
+

실행 로그

+ +
+
+ {logs.length === 0 ? ( +
실행 로그가 없습니다.
+ ) : ( +
+ + + + + + + + + + + {logs.map((log) => ( + + + + + + + + ))} + +
상태시작수집저장에러
+ + {log.status} + + {new Date(log.started_at).toLocaleString("ko-KR")}{log.rows_collected}{log.rows_saved} + {log.error_message || "-"} +
+ )} + + + + ) : ( +
+ 좌측에서 크롤링 설정을 선택하세요. +
+ )} + + + {/* 추가/수정 모달 */} + + + + {modalMode === "add" ? "크롤링 설정 추가" : "크롤링 설정 수정"} + +
+ {/* STEP 1: 기본 정보 */} +
+

1. 기본 정보

+
+
+ + setForm((p) => ({ ...p, name: e.target.value }))} placeholder="예: 철강 시세 수집" className="h-8 text-xs" /> +
+
+
+ + setForm((p) => ({ ...p, cron_schedule: e.target.value }))} placeholder="0 9 * * 1-5" className="h-8 text-xs font-mono" /> +
+
+ setForm((p) => ({ ...p, is_active: v ? "Y" : "N" }))} /> + +
+
+
+
+ + {/* STEP 2: URL 입력 + 분석 */} +
+

2. 수집할 웹페이지

+
+ setForm((p) => ({ ...p, url: e.target.value }))} placeholder="https://example.com/prices" className="h-8 flex-1 text-xs font-mono" /> + +
+ + {/* 분석 결과: 감지된 테이블 목록 */} + {analyzedTables.length > 0 && ( +
+ +
+ {analyzedTables.map((t, idx) => ( +
handleSelectAnalyzedTable(idx)} + > +
+ {t.caption || `테이블 ${idx + 1}`} + {t.rowCount}행 · {t.headers.length}열 +
+ {t.headers.length > 0 && ( +
+ 컬럼: {t.headers.join(", ")} +
+ )} + {t.sampleRows.length > 0 && ( +
+ 샘플: {t.sampleRows[0].join(" | ")} +
+ )} +
+ ))} +
+
+ )} +
+ + {/* STEP 3: 컬럼 매핑 (시각적 폼) */} +
+
+

3. 컬럼 매핑

+ +
+ {mappingRows.length === 0 ? ( +
+ 위에서 "페이지 분석"을 클릭하면 자동으로 매핑이 생성됩니다. +
+ ) : ( +
+
+ CSS 선택자 + 저장 컬럼명 + 타입 + +
+ {mappingRows.map((row, i) => ( +
+ { const n = [...mappingRows]; n[i] = { ...n[i], selector: e.target.value }; setMappingRows(n); }} placeholder="td:nth-child(1)" className="h-7 text-xs font-mono" /> + { const n = [...mappingRows]; n[i] = { ...n[i], column: e.target.value }; setMappingRows(n); }} placeholder="item_name" className="h-7 text-xs" /> + + +
+ ))} +
+ )} + {form.row_selector && ( +
+ 행 선택자: {form.row_selector} +
+ )} +
+ + {/* STEP 4: 저장 대상 */} +
+

4. 저장 설정

+
+
+ + + + + + + + + 테이블을 찾을 수 없습니다. + + {allTables.map((t) => ( + { setForm((p) => ({ ...p, target_table: t.tableName, upsert_key: "" })); setTablePopoverOpen(false); }}> + + {t.displayName || t.tableName} + {t.displayName && ({t.tableName})} + + ))} + + + + +
+
+ + +
+
+
+
+ + + + + +
+
+ + {/* 미리보기 모달 */} + + + + 크롤링 미리보기 + + {previewData && ( +
+
+ + 총 요소: {previewData.totalElements} + + + HTML 크기: {(previewData.htmlLength / 1024).toFixed(1)}KB + + + 미리보기 행: {previewData.previewRows?.length || 0} + +
+ {previewData.previewRows?.length > 0 ? ( +
+ + + + {Object.keys(previewData.previewRows[0]).map((key) => ( + + ))} + + + + {previewData.previewRows.map((row: any, i: number) => ( + + {Object.values(row).map((val: any, j: number) => ( + + ))} + + ))} + +
+ {key} +
+ {val != null ? String(val) : "-"} +
+
+ ) : ( +
+ 추출된 데이터가 없습니다. 선택자를 확인하세요. +
+ )} +
+ )} +
+
+ + {ConfirmDialogComponent} + + ); +} diff --git a/frontend/components/layout/AdminPageRenderer.tsx b/frontend/components/layout/AdminPageRenderer.tsx index 49a136c5..94813685 100644 --- a/frontend/components/layout/AdminPageRenderer.tsx +++ b/frontend/components/layout/AdminPageRenderer.tsx @@ -89,6 +89,7 @@ const ADMIN_PAGE_REGISTRY: Record> = { // 자동화 관리 "/admin/automaticMng/flowMgmtList": dynamic(() => import("@/app/(main)/admin/automaticMng/flowMgmtList/page"), { ssr: false, loading: LoadingFallback }), "/admin/automaticMng/batchmngList": dynamic(() => import("@/app/(main)/admin/automaticMng/batchmngList/page"), { ssr: false, loading: LoadingFallback }), + "/admin/automaticMng/crawlingList": dynamic(() => import("@/app/(main)/admin/automaticMng/crawlingList/page"), { ssr: false, loading: LoadingFallback }), // 설계 관리 (커스텀 페이지) "/design/task-management": dynamic(() => import("@/app/(main)/design/task-management/page"), { ssr: false, loading: LoadingFallback }),