feat: add web crawling management functionality

- Introduced a new crawling management feature allowing users to configure, execute, and log web crawls.
- Added CRUD operations for crawl configurations, including URL analysis and preview capabilities.
- Implemented a new service for handling crawling logic and scheduling tasks.
- Integrated cheerio for HTML parsing and axios for HTTP requests.
- Created a sample HTML page for testing crawling functionality.

This commit enhances the application's data collection capabilities from external websites.
This commit is contained in:
kjs 2026-03-26 16:30:53 +09:00
parent 07777e314b
commit 5da134f016
8 changed files with 1700 additions and 0 deletions

View File

@ -13,6 +13,7 @@
"axios": "^1.11.0",
"bcryptjs": "^2.4.3",
"bwip-js": "^4.8.0",
"cheerio": "^1.2.0",
"compression": "^1.7.4",
"cors": "^2.8.5",
"docx": "^9.5.1",
@ -36,6 +37,7 @@
"nodemailer": "^6.10.1",
"oracledb": "^6.9.0",
"pg": "^8.16.3",
"playwright": "^1.58.2",
"quill": "^2.0.3",
"react-quill": "^2.0.0",
"redis": "^4.6.10",
@ -4408,6 +4410,12 @@
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
"license": "MIT"
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
"license": "ISC"
},
"node_modules/bowser": {
"version": "2.12.1",
"resolved": "https://registry.npmjs.org/bowser/-/bowser-2.12.1.tgz",
@ -4704,6 +4712,79 @@
"node": ">=10"
}
},
"node_modules/cheerio": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz",
"integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==",
"license": "MIT",
"dependencies": {
"cheerio-select": "^2.1.0",
"dom-serializer": "^2.0.0",
"domhandler": "^5.0.3",
"domutils": "^3.2.2",
"encoding-sniffer": "^0.2.1",
"htmlparser2": "^10.1.0",
"parse5": "^7.3.0",
"parse5-htmlparser2-tree-adapter": "^7.1.0",
"parse5-parser-stream": "^7.1.2",
"undici": "^7.19.0",
"whatwg-mimetype": "^4.0.0"
},
"engines": {
"node": ">=20.18.1"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/cheerio-select": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-select": "^5.1.0",
"css-what": "^6.1.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/cheerio/node_modules/entities": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz",
"integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/cheerio/node_modules/htmlparser2": {
"version": "10.1.0",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz",
"integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"license": "MIT",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3",
"domutils": "^3.2.2",
"entities": "^7.0.1"
}
},
"node_modules/chokidar": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
@ -5091,6 +5172,34 @@
"node": ">= 8"
}
},
"node_modules/css-select": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz",
"integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz",
"integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/debug": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@ -5539,6 +5648,31 @@
"node": ">=8.10.0"
}
},
"node_modules/encoding-sniffer": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz",
"integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==",
"license": "MIT",
"dependencies": {
"iconv-lite": "^0.6.3",
"whatwg-encoding": "^3.1.1"
},
"funding": {
"url": "https://github.com/fb55/encoding-sniffer?sponsor=1"
}
},
"node_modules/encoding-sniffer/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/ent": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/ent/-/ent-2.2.2.tgz",
@ -9020,6 +9154,18 @@
"node": ">=8"
}
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"license": "BSD-2-Clause",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/object-assign": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@ -9254,6 +9400,55 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/parse5": {
"version": "7.3.0",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
"integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
"license": "MIT",
"dependencies": {
"entities": "^6.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-htmlparser2-tree-adapter": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz",
"integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==",
"license": "MIT",
"dependencies": {
"domhandler": "^5.0.3",
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5-parser-stream": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz",
"integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==",
"license": "MIT",
"dependencies": {
"parse5": "^7.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/parse5/node_modules/entities": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
"integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/parseley": {
"version": "0.12.1",
"resolved": "https://registry.npmjs.org/parseley/-/parseley-0.12.1.tgz",
@ -9525,6 +9720,50 @@
"node": ">=8"
}
},
"node_modules/playwright": {
"version": "1.58.2",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz",
"integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==",
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.58.2"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.58.2",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz",
"integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==",
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/playwright/node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/postgres-array": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz",
@ -11146,6 +11385,15 @@
"dev": true,
"license": "MIT"
},
"node_modules/undici": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.24.6.tgz",
"integrity": "sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==",
"license": "MIT",
"engines": {
"node": ">=20.18.1"
}
},
"node_modules/undici-types": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
@ -11310,6 +11558,40 @@
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-encoding": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
"integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
"deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation",
"license": "MIT",
"dependencies": {
"iconv-lite": "0.6.3"
},
"engines": {
"node": ">=18"
}
},
"node_modules/whatwg-encoding/node_modules/iconv-lite": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
"license": "MIT",
"dependencies": {
"safer-buffer": ">= 2.1.2 < 3.0.0"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/whatwg-mimetype": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
"integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==",
"license": "MIT",
"engines": {
"node": ">=18"
}
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",

View File

@ -27,6 +27,7 @@
"axios": "^1.11.0",
"bcryptjs": "^2.4.3",
"bwip-js": "^4.8.0",
"cheerio": "^1.2.0",
"compression": "^1.7.4",
"cors": "^2.8.5",
"docx": "^9.5.1",
@ -50,6 +51,7 @@
"nodemailer": "^6.10.1",
"oracledb": "^6.9.0",
"pg": "^8.16.3",
"playwright": "^1.58.2",
"quill": "^2.0.3",
"react-quill": "^2.0.0",
"redis": "^4.6.10",

View File

@ -115,6 +115,7 @@ import workHistoryRoutes from "./routes/workHistoryRoutes"; // 작업 이력 관
import tableHistoryRoutes from "./routes/tableHistoryRoutes"; // 테이블 변경 이력 조회
import bomRoutes from "./routes/bomRoutes"; // BOM 이력/버전 관리
import productionRoutes from "./routes/productionRoutes"; // 생산계획 관리
import crawlRoutes from "./routes/crawlRoutes"; // 웹 크롤링
import roleRoutes from "./routes/roleRoutes"; // 권한 그룹 관리
import departmentRoutes from "./routes/departmentRoutes"; // 부서 관리
import tableCategoryValueRoutes from "./routes/tableCategoryValueRoutes"; // 카테고리 값 관리
@ -325,6 +326,7 @@ app.use("/api/work-history", workHistoryRoutes); // 작업 이력 관리
app.use("/api/table-history", tableHistoryRoutes); // 테이블 변경 이력 조회
app.use("/api/bom", bomRoutes); // BOM 이력/버전 관리
app.use("/api/production", productionRoutes); // 생산계획 관리
app.use("/api/crawl", crawlRoutes); // 웹 크롤링
app.use("/api/material-status", materialStatusRoutes); // 자재현황
app.use("/api/process-info", processInfoRoutes); // 공정정보관리
app.use("/api/roles", roleRoutes); // 권한 그룹 관리
@ -415,6 +417,11 @@ async function initializeServices() {
try {
await BatchSchedulerService.initializeScheduler();
logger.info(`⏰ 배치 스케줄러가 시작되었습니다.`);
// 크롤링 스케줄러 초기화
const { CrawlService } = await import("./services/crawlService");
await CrawlService.initializeScheduler();
logger.info(`🕷️ 크롤링 스케줄러가 시작되었습니다.`);
} catch (error) {
logger.error(`❌ 배치 스케줄러 초기화 실패:`, error);
}

View File

@ -0,0 +1,124 @@
import { Request, Response } from "express";
import { CrawlService } from "../services/crawlService";
import { logger } from "../utils/logger";
interface AuthenticatedRequest extends Request {
user?: { companyCode: string; userId: string };
}
// 설정 목록 조회
export async function getCrawlConfigs(req: AuthenticatedRequest, res: Response) {
try {
const companyCode = req.user?.companyCode || "*";
const configs = await CrawlService.getConfigs(companyCode);
return res.json({ success: true, data: configs });
} catch (error: any) {
logger.error("크롤링 설정 조회 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 설정 상세 조회
export async function getCrawlConfig(req: AuthenticatedRequest, res: Response) {
try {
const config = await CrawlService.getConfigById(req.params.id);
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
return res.json({ success: true, data: config });
} catch (error: any) {
logger.error("크롤링 설정 상세 조회 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 설정 생성
export async function createCrawlConfig(req: AuthenticatedRequest, res: Response) {
try {
const data = {
...req.body,
company_code: req.user?.companyCode || req.body.company_code,
writer: req.user?.userId,
};
const config = await CrawlService.createConfig(data);
return res.json({ success: true, data: config });
} catch (error: any) {
logger.error("크롤링 설정 생성 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 설정 수정
export async function updateCrawlConfig(req: AuthenticatedRequest, res: Response) {
try {
const config = await CrawlService.updateConfig(req.params.id, req.body);
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
return res.json({ success: true, data: config });
} catch (error: any) {
logger.error("크롤링 설정 수정 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 설정 삭제
export async function deleteCrawlConfig(req: AuthenticatedRequest, res: Response) {
try {
await CrawlService.deleteConfig(req.params.id);
return res.json({ success: true });
} catch (error: any) {
logger.error("크롤링 설정 삭제 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 미리보기
export async function previewCrawl(req: AuthenticatedRequest, res: Response) {
try {
const { url, row_selector, column_mappings, method, headers, request_body } = req.body;
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
const result = await CrawlService.preview(url, row_selector, column_mappings || [], method, headers, request_body);
return res.json({ success: true, data: result });
} catch (error: any) {
logger.error("크롤링 미리보기 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// URL 자동 분석 — 페이지의 테이블/리스트 구조를 감지
export async function analyzeUrl(req: AuthenticatedRequest, res: Response) {
try {
const { url } = req.body;
if (!url) return res.status(400).json({ success: false, message: "URL은 필수입니다." });
const result = await CrawlService.analyzeUrl(url);
return res.json({ success: true, data: result });
} catch (error: any) {
logger.error("URL 분석 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 수동 실행
export async function executeCrawl(req: AuthenticatedRequest, res: Response) {
try {
const config = await CrawlService.getConfigById(req.params.id);
if (!config) return res.status(404).json({ success: false, message: "설정을 찾을 수 없습니다." });
const result = await CrawlService.executeCrawl(config);
return res.json({ success: true, data: result });
} catch (error: any) {
logger.error("크롤링 수동 실행 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}
// 실행 로그 조회
export async function getCrawlLogs(req: AuthenticatedRequest, res: Response) {
try {
const limit = parseInt(req.query.limit as string) || 20;
const logs = await CrawlService.getLogs(req.params.id, limit);
return res.json({ success: true, data: logs });
} catch (error: any) {
logger.error("크롤링 로그 조회 실패:", error);
return res.status(500).json({ success: false, message: error.message });
}
}

View File

@ -0,0 +1,32 @@
import { Router } from "express";
import { authenticateToken } from "../middleware/authMiddleware";
import {
getCrawlConfigs,
getCrawlConfig,
createCrawlConfig,
updateCrawlConfig,
deleteCrawlConfig,
previewCrawl,
analyzeUrl,
executeCrawl,
getCrawlLogs,
} from "../controllers/crawlController";
const router = Router();
// 설정 CRUD
router.get("/configs", authenticateToken, getCrawlConfigs);
router.get("/configs/:id", authenticateToken, getCrawlConfig);
router.post("/configs", authenticateToken, createCrawlConfig);
router.put("/configs/:id", authenticateToken, updateCrawlConfig);
router.delete("/configs/:id", authenticateToken, deleteCrawlConfig);
// 분석 & 미리보기 & 실행
router.post("/analyze", authenticateToken, analyzeUrl);
router.post("/preview", authenticateToken, previewCrawl);
router.post("/execute/:id", authenticateToken, executeCrawl);
// 실행 로그
router.get("/configs/:id/logs", authenticateToken, getCrawlLogs);
export default router;

View File

@ -0,0 +1,489 @@
import * as cheerio from "cheerio";
import axios from "axios";
import cron, { ScheduledTask } from "node-cron";
import { query } from "../database/db";
import { logger } from "../utils/logger";
export interface CrawlConfig {
id: string;
company_code: string;
name: string;
url: string;
method: string;
headers: Record<string, string>;
request_body?: string;
selector_type: string;
row_selector: string;
column_mappings: Array<{
selector: string;
column: string;
type: "text" | "number" | "date";
attribute?: string; // href, src 등 속성값 추출
}>;
target_table: string;
upsert_key?: string;
cron_schedule?: string;
is_active: string;
writer?: string;
}
export interface CrawlResult {
collected: number;
saved: number;
errors: string[];
}
const DEFAULT_HEADERS = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
};
export class CrawlService {
private static scheduledTasks: Map<string, ScheduledTask> = new Map();
// ─── 스케줄러 ───
static async initializeScheduler() {
try {
const configs = await query<CrawlConfig>(
`SELECT * FROM crawl_configs WHERE is_active = 'Y' AND cron_schedule IS NOT NULL AND cron_schedule != ''`
);
logger.info(`크롤링 스케줄러: ${configs.length}개 설정 등록`);
for (const config of configs) {
this.scheduleConfig(config);
}
} catch (error) {
logger.error("크롤링 스케줄러 초기화 실패:", error);
}
}
static scheduleConfig(config: CrawlConfig) {
if (!config.cron_schedule || !cron.validate(config.cron_schedule)) {
logger.warn(`크롤링 [${config.name}]: 유효하지 않은 cron 표현식 - ${config.cron_schedule}`);
return;
}
// 기존 스케줄 제거
if (this.scheduledTasks.has(config.id)) {
this.scheduledTasks.get(config.id)!.stop();
this.scheduledTasks.delete(config.id);
}
const task = cron.schedule(
config.cron_schedule,
async () => {
logger.info(`크롤링 [${config.name}] 스케줄 실행 시작`);
await this.executeCrawl(config);
},
{ timezone: "Asia/Seoul" }
);
this.scheduledTasks.set(config.id, task);
logger.info(`크롤링 [${config.name}] 스케줄 등록: ${config.cron_schedule}`);
}
static unscheduleConfig(configId: string) {
if (this.scheduledTasks.has(configId)) {
this.scheduledTasks.get(configId)!.stop();
this.scheduledTasks.delete(configId);
}
}
// ─── CRUD ───
static async getConfigs(companyCode: string) {
const condition = companyCode === "*" ? "" : "WHERE company_code = $1";
const params = companyCode === "*" ? [] : [companyCode];
return query<CrawlConfig>(`SELECT * FROM crawl_configs ${condition} ORDER BY created_date DESC`, params);
}
static async getConfigById(id: string) {
const rows = await query<CrawlConfig>(`SELECT * FROM crawl_configs WHERE id = $1`, [id]);
return rows[0] || null;
}
static async createConfig(data: Partial<CrawlConfig>) {
const result = await query<CrawlConfig>(
`INSERT INTO crawl_configs (company_code, name, url, method, headers, request_body, selector_type, row_selector, column_mappings, target_table, upsert_key, cron_schedule, is_active, writer)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14) RETURNING *`,
[
data.company_code,
data.name,
data.url,
data.method || "GET",
JSON.stringify(data.headers || {}),
data.request_body || null,
data.selector_type || "css",
data.row_selector || null,
JSON.stringify(data.column_mappings || []),
data.target_table,
data.upsert_key || null,
data.cron_schedule || null,
data.is_active || "Y",
data.writer || null,
]
);
const config = result[0];
if (config.is_active === "Y" && config.cron_schedule) {
this.scheduleConfig(config);
}
return config;
}
static async updateConfig(id: string, data: Partial<CrawlConfig>) {
const result = await query<CrawlConfig>(
`UPDATE crawl_configs SET
name = COALESCE($2, name),
url = COALESCE($3, url),
method = COALESCE($4, method),
headers = COALESCE($5, headers),
request_body = $6,
selector_type = COALESCE($7, selector_type),
row_selector = $8,
column_mappings = COALESCE($9, column_mappings),
target_table = COALESCE($10, target_table),
upsert_key = $11,
cron_schedule = $12,
is_active = COALESCE($13, is_active),
updated_date = now()
WHERE id = $1 RETURNING *`,
[
id,
data.name,
data.url,
data.method,
data.headers ? JSON.stringify(data.headers) : null,
data.request_body ?? null,
data.selector_type,
data.row_selector ?? null,
data.column_mappings ? JSON.stringify(data.column_mappings) : null,
data.target_table,
data.upsert_key ?? null,
data.cron_schedule ?? null,
data.is_active,
]
);
const config = result[0];
if (config) {
this.unscheduleConfig(id);
if (config.is_active === "Y" && config.cron_schedule) {
this.scheduleConfig(config);
}
}
return config;
}
static async deleteConfig(id: string) {
this.unscheduleConfig(id);
await query(`DELETE FROM crawl_configs WHERE id = $1`, [id]);
}
// ─── 크롤링 실행 ───
static async executeCrawl(config: CrawlConfig): Promise<CrawlResult> {
const logId = await this.createLog(config.id, config.company_code);
const errors: string[] = [];
let collected = 0;
let saved = 0;
try {
// 1. HTTP 요청
const headers = { ...DEFAULT_HEADERS, ...(typeof config.headers === "string" ? JSON.parse(config.headers) : config.headers || {}) };
const response = await axios({
method: (config.method || "GET") as any,
url: config.url,
headers,
data: config.request_body || undefined,
timeout: 30000,
responseType: "text",
});
const html = response.data;
const htmlPreview = typeof html === "string" ? html.substring(0, 2000) : "";
// 2. DOM 파싱
const $ = cheerio.load(html);
const mappings = typeof config.column_mappings === "string"
? JSON.parse(config.column_mappings)
: config.column_mappings || [];
// 3. 행 추출
const rows: Record<string, any>[] = [];
if (config.row_selector) {
$(config.row_selector).each((_, el) => {
const row: Record<string, any> = {};
for (const mapping of mappings) {
const $el = $(el).find(mapping.selector);
const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim();
row[mapping.column] = this.castValue(raw, mapping.type);
}
rows.push(row);
});
} else {
// row_selector 없으면 column_mappings의 selector로 직접 추출 (단일 행)
const row: Record<string, any> = {};
for (const mapping of mappings) {
const $el = $(mapping.selector);
const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim();
row[mapping.column] = this.castValue(raw, mapping.type);
}
rows.push(row);
}
collected = rows.length;
// 4. DB 저장
for (const row of rows) {
try {
row.company_code = config.company_code;
if (config.upsert_key) {
await this.upsertRow(config.target_table, row, config.upsert_key, config.company_code);
} else {
await this.insertRow(config.target_table, row);
}
saved++;
} catch (err: any) {
errors.push(`행 저장 실패: ${err.message}`);
}
}
// 5. 상태 업데이트
await this.updateLog(logId, "success", collected, saved, null, htmlPreview);
await query(
`UPDATE crawl_configs SET last_executed_at = now(), last_status = 'success', last_error = null WHERE id = $1`,
[config.id]
);
logger.info(`크롤링 [${config.name}] 완료: ${collected}건 수집, ${saved}건 저장`);
} catch (error: any) {
const errMsg = error.message || "Unknown error";
errors.push(errMsg);
await this.updateLog(logId, "fail", collected, saved, errMsg, null);
await query(
`UPDATE crawl_configs SET last_executed_at = now(), last_status = 'fail', last_error = $2 WHERE id = $1`,
[config.id, errMsg]
);
logger.error(`크롤링 [${config.name}] 실패:`, error);
}
return { collected, saved, errors };
}
// ─── URL 자동 분석 ───
static async analyzeUrl(url: string) {
const response = await axios({
method: "GET",
url,
headers: DEFAULT_HEADERS,
timeout: 15000,
responseType: "text",
});
const $ = cheerio.load(response.data);
const tables: Array<{
index: number;
selector: string;
caption: string;
headers: string[];
rowCount: number;
sampleRows: string[][];
}> = [];
// HTML <table> 자동 감지
$("table").each((i, tableEl) => {
const $table = $(tableEl);
// 헤더 추출
const headers: string[] = [];
$table.find("thead th, thead td, tr:first-child th").each((_, th) => {
headers.push($(th).text().trim());
});
// 헤더가 없으면 첫 행에서 추출 시도
if (headers.length === 0) {
$table.find("tr:first-child td").each((_, td) => {
headers.push($(td).text().trim());
});
}
// 데이터 행 수
const bodyRows = $table.find("tbody tr");
const allRows = bodyRows.length > 0 ? bodyRows : $table.find("tr").slice(headers.length > 0 ? 1 : 0);
const rowCount = allRows.length;
// 샘플 (최대 3행)
const sampleRows: string[][] = [];
allRows.slice(0, 3).each((_, tr) => {
const cells: string[] = [];
$(tr).find("td, th").each((_, td) => {
cells.push($(td).text().trim());
});
sampleRows.push(cells);
});
if (headers.length > 0 || rowCount > 0) {
// 선택자 생성
let selector = "table";
const id = $table.attr("id");
const cls = $table.attr("class");
if (id) selector = `table#${id}`;
else if (cls) selector = `table.${cls.split(/\s+/)[0]}`;
else if (i > 0) selector = `table:nth-of-type(${i + 1})`;
const caption = $table.find("caption").text().trim() || $table.attr("summary") || "";
tables.push({
index: i,
selector,
caption,
headers,
rowCount,
sampleRows,
});
}
});
return {
title: $("title").text().trim(),
tableCount: tables.length,
tables,
htmlLength: response.data.length,
};
}
// ─── 미리보기 ───
static async preview(
url: string,
rowSelector: string,
columnMappings: CrawlConfig["column_mappings"],
method = "GET",
headers: Record<string, string> = {},
requestBody?: string
) {
const mergedHeaders = { ...DEFAULT_HEADERS, ...headers };
const response = await axios({
method: method as any,
url,
headers: mergedHeaders,
data: requestBody || undefined,
timeout: 15000,
responseType: "text",
});
const $ = cheerio.load(response.data);
const rows: Record<string, any>[] = [];
if (rowSelector) {
$(rowSelector)
.slice(0, 10) // 미리보기는 10행까지
.each((_, el) => {
const row: Record<string, any> = {};
for (const mapping of columnMappings) {
const $el = $(el).find(mapping.selector);
const raw = mapping.attribute ? $el.attr(mapping.attribute) || "" : $el.text().trim();
row[mapping.column] = this.castValue(raw, mapping.type);
}
rows.push(row);
});
}
return {
totalElements: rowSelector ? $(rowSelector).length : 0,
previewRows: rows,
htmlLength: response.data.length,
};
}
// ─── 유틸 ───
private static castValue(raw: string, type: string): any {
if (!raw) return null;
switch (type) {
case "number": {
const cleaned = raw.replace(/[^0-9.\-]/g, "");
const num = parseFloat(cleaned);
return isNaN(num) ? null : num;
}
case "date":
return raw;
default:
return raw;
}
}
private static async insertRow(tableName: string, row: Record<string, any>) {
const cols = Object.keys(row);
const vals = Object.values(row);
const placeholders = cols.map((_, i) => `$${i + 1}`).join(", ");
const colNames = cols.map((c) => `"${c}"`).join(", ");
await query(`INSERT INTO "${tableName}" (${colNames}) VALUES (${placeholders})`, vals);
}
private static async upsertRow(tableName: string, row: Record<string, any>, upsertKey: string, companyCode: string) {
const existing = await query(
`SELECT 1 FROM "${tableName}" WHERE "${upsertKey}" = $1 AND company_code = $2 LIMIT 1`,
[row[upsertKey], companyCode]
);
if (existing.length > 0) {
const setClauses: string[] = [];
const vals: any[] = [];
let idx = 1;
for (const [k, v] of Object.entries(row)) {
if (k === upsertKey || k === "company_code") continue;
setClauses.push(`"${k}" = $${idx}`);
vals.push(v);
idx++;
}
if (setClauses.length > 0) {
vals.push(row[upsertKey], companyCode);
await query(
`UPDATE "${tableName}" SET ${setClauses.join(", ")}, updated_date = now() WHERE "${upsertKey}" = $${idx} AND company_code = $${idx + 1}`,
vals
);
}
} else {
await this.insertRow(tableName, row);
}
}
private static async createLog(configId: string, companyCode: string): Promise<string> {
const result = await query<any>(
`INSERT INTO crawl_execution_logs (config_id, company_code, status) VALUES ($1, $2, 'running') RETURNING id`,
[configId, companyCode]
);
return result[0].id;
}
private static async updateLog(
logId: string,
status: string,
collected: number,
saved: number,
errorMessage: string | null,
htmlPreview: string | null
) {
await query(
`UPDATE crawl_execution_logs SET status = $2, rows_collected = $3, rows_saved = $4, error_message = $5, response_html_preview = $6, finished_at = now() WHERE id = $1`,
[logId, status, collected, saved, errorMessage, htmlPreview]
);
}
// ─── 로그 조회 ───
static async getLogs(configId: string, limit = 20) {
return query(
`SELECT * FROM crawl_execution_logs WHERE config_id = $1 ORDER BY started_at DESC LIMIT $2`,
[configId, limit]
);
}
}

View File

@ -0,0 +1,763 @@
"use client";
/**
* //
*/
import React, { useState, useEffect, useCallback } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Switch } from "@/components/ui/switch";
import { Label } from "@/components/ui/label";
import { Textarea } from "@/components/ui/textarea";
import { Badge } from "@/components/ui/badge";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
DialogFooter,
} from "@/components/ui/dialog";
import {
Plus,
Search,
Play,
Pencil,
Trash2,
RefreshCw,
Globe,
Eye,
Clock,
CheckCircle,
XCircle,
Loader2,
Check,
ChevronsUpDown,
} from "lucide-react";
import { toast } from "sonner";
import { apiClient } from "@/lib/api/client";
import { tableTypeApi } from "@/lib/api/screen";
import { useConfirmDialog } from "@/components/common/ConfirmDialog";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import { Command, CommandEmpty, CommandGroup, CommandInput, CommandItem, CommandList } from "@/components/ui/command";
import { cn } from "@/lib/utils";
interface CrawlConfig {
id: string;
company_code: string;
name: string;
url: string;
method: string;
headers: Record<string, string>;
request_body?: string;
selector_type: string;
row_selector: string;
column_mappings: Array<{
selector: string;
column: string;
type: string;
attribute?: string;
}>;
target_table: string;
upsert_key?: string;
cron_schedule?: string;
is_active: string;
last_executed_at?: string;
last_status?: string;
last_error?: string;
}
interface CrawlLog {
id: string;
status: string;
rows_collected: number;
rows_saved: number;
error_message?: string;
started_at: string;
finished_at?: string;
}
const EMPTY_CONFIG: Partial<CrawlConfig> = {
name: "",
url: "",
method: "GET",
headers: {},
selector_type: "css",
row_selector: "",
column_mappings: [],
target_table: "",
upsert_key: "",
cron_schedule: "",
is_active: "Y",
};
export default function CrawlingManagementPage() {
const [configs, setConfigs] = useState<CrawlConfig[]>([]);
const [selectedId, setSelectedId] = useState<string | null>(null);
const [searchText, setSearchText] = useState("");
const [loading, setLoading] = useState(false);
// 모달
const [modalOpen, setModalOpen] = useState(false);
const [modalMode, setModalMode] = useState<"add" | "edit">("add");
const [form, setForm] = useState<Partial<CrawlConfig>>(EMPTY_CONFIG);
const [saving, setSaving] = useState(false);
// 테이블/컬럼 목록
const [allTables, setAllTables] = useState<Array<{ tableName: string; displayName: string }>>([]);
const [targetColumns, setTargetColumns] = useState<Array<{ columnName: string; columnLabel?: string }>>([]);
const [tablePopoverOpen, setTablePopoverOpen] = useState(false);
// URL 분석
const [analyzing, setAnalyzing] = useState(false);
const [analyzedTables, setAnalyzedTables] = useState<Array<{
index: number; selector: string; caption: string; headers: string[]; rowCount: number; sampleRows: string[][];
}>>([]);
const [selectedAnalyzedIdx, setSelectedAnalyzedIdx] = useState<number | null>(null);
// 컬럼 매핑 시각적 폼
const [mappingRows, setMappingRows] = useState<Array<{ selector: string; column: string; type: string }>>([]);
// 미리보기
const [previewOpen, setPreviewOpen] = useState(false);
const [previewData, setPreviewData] = useState<any>(null);
const [previewing, setPreviewing] = useState(false);
// 실행 로그
const [logs, setLogs] = useState<CrawlLog[]>([]);
const [logsLoading, setLogsLoading] = useState(false);
// 실행 중
const [executing, setExecuting] = useState<string | null>(null);
const { confirm, ConfirmDialogComponent } = useConfirmDialog();
// ─── 데이터 로드 ───
const loadConfigs = useCallback(async () => {
setLoading(true);
try {
const res = await apiClient.get("/crawl/configs");
setConfigs(res.data.data || []);
} catch {
toast.error("크롤링 설정 로드 실패");
} finally {
setLoading(false);
}
}, []);
useEffect(() => {
loadConfigs();
tableTypeApi.getTables().then((t) => setAllTables(t || [])).catch(() => {});
}, [loadConfigs]);
// target_table 변경 시 컬럼 목록 로드
useEffect(() => {
if (!form.target_table) { setTargetColumns([]); return; }
tableTypeApi.getColumns(form.target_table).then((cols) => {
setTargetColumns(cols.map((c: any) => ({ columnName: c.columnName || c.column_name, columnLabel: c.displayName || c.columnLabel || c.column_label })));
}).catch(() => setTargetColumns([]));
}, [form.target_table]);
const loadLogs = useCallback(async (configId: string) => {
setLogsLoading(true);
try {
const res = await apiClient.get(`/crawl/configs/${configId}/logs?limit=20`);
setLogs(res.data.data || []);
} catch {
setLogs([]);
} finally {
setLogsLoading(false);
}
}, []);
useEffect(() => {
if (selectedId) loadLogs(selectedId);
else setLogs([]);
}, [selectedId, loadLogs]);
// ─── 필터링 ───
const filteredConfigs = configs.filter(
(c) =>
!searchText ||
c.name.toLowerCase().includes(searchText.toLowerCase()) ||
c.url.toLowerCase().includes(searchText.toLowerCase())
);
const selectedConfig = configs.find((c) => c.id === selectedId);
// ─── CRUD ───
const openAddModal = () => {
setModalMode("add");
setForm({ ...EMPTY_CONFIG });
setMappingRows([]);
setAnalyzedTables([]);
setSelectedAnalyzedIdx(null);
setModalOpen(true);
};
const openEditModal = (config: CrawlConfig) => {
setModalMode("edit");
setForm({ ...config });
const mappings = typeof config.column_mappings === "string"
? JSON.parse(config.column_mappings) : config.column_mappings || [];
setMappingRows(mappings.map((m: any) => ({ selector: m.selector || "", column: m.column || "", type: m.type || "text" })));
setAnalyzedTables([]);
setSelectedAnalyzedIdx(null);
setModalOpen(true);
};
// URL 분석
const handleAnalyze = async () => {
if (!form.url) { toast.error("URL을 입력하세요."); return; }
setAnalyzing(true);
try {
const res = await apiClient.post("/crawl/analyze", { url: form.url });
const data = res.data.data;
setAnalyzedTables(data.tables || []);
if (data.tables?.length > 0) {
toast.success(`${data.tables.length}개 테이블 감지됨`);
} else {
toast.info("페이지에서 테이블을 찾지 못했습니다.");
}
} catch (err: any) {
toast.error(err.response?.data?.message || "URL 분석 실패");
} finally {
setAnalyzing(false);
}
};
// 분석된 테이블 선택 시 자동 매핑 생성
const handleSelectAnalyzedTable = (idx: number) => {
const table = analyzedTables[idx];
if (!table) return;
setSelectedAnalyzedIdx(idx);
setForm((p) => ({ ...p, row_selector: `${table.selector} tbody tr` }));
// 헤더 기반으로 컬럼 매핑 자동 생성
const newMappings = table.headers.map((h, i) => ({
selector: `td:nth-child(${i + 1})`,
column: h.replace(/\s+/g, "_").replace(/[^a-zA-Z0-9_가-힣]/g, "").toLowerCase() || `col_${i + 1}`,
type: "text",
}));
setMappingRows(newMappings);
};
const handleSave = async () => {
if (!form.name || !form.url || !form.target_table) {
toast.error("이름, URL, 대상 테이블은 필수입니다.");
return;
}
setSaving(true);
try {
const payload = {
...form,
column_mappings: mappingRows.filter((m) => m.selector && m.column),
headers: form.headers || {},
};
if (modalMode === "add") {
await apiClient.post("/crawl/configs", payload);
toast.success("크롤링 설정이 생성되었습니다.");
} else {
await apiClient.put(`/crawl/configs/${form.id}`, payload);
toast.success("크롤링 설정이 수정되었습니다.");
}
setModalOpen(false);
loadConfigs();
} catch (err: any) {
toast.error(err.response?.data?.message || "저장 실패");
} finally {
setSaving(false);
}
};
const handleDelete = async (id: string) => {
const ok = await confirm("정말 삭제하시겠습니까?", { variant: "destructive", confirmText: "삭제" });
if (!ok) return;
try {
await apiClient.delete(`/crawl/configs/${id}`);
toast.success("삭제되었습니다.");
if (selectedId === id) setSelectedId(null);
loadConfigs();
} catch {
toast.error("삭제 실패");
}
};
// ─── 실행 & 미리보기 ───
const handleExecute = async (id: string) => {
setExecuting(id);
try {
const res = await apiClient.post(`/crawl/execute/${id}`);
const data = res.data.data;
toast.success(`수집 ${data.collected}건, 저장 ${data.saved}`);
loadConfigs();
if (selectedId === id) loadLogs(id);
} catch (err: any) {
toast.error(err.response?.data?.message || "실행 실패");
} finally {
setExecuting(null);
}
};
const handlePreview = async () => {
setPreviewing(true);
try {
const res = await apiClient.post("/crawl/preview", {
url: form.url,
row_selector: form.row_selector,
column_mappings: mappingRows.filter((m) => m.selector && m.column),
method: form.method,
headers: form.headers || {},
request_body: form.request_body,
});
setPreviewData(res.data.data);
setPreviewOpen(true);
} catch (err: any) {
toast.error(err.response?.data?.message || "미리보기 실패");
} finally {
setPreviewing(false);
}
};
// ─── 렌더링 ───
return (
<div className="flex h-full gap-4 p-4">
{/* 좌측: 설정 목록 */}
<div className="flex w-[340px] shrink-0 flex-col rounded-lg border bg-card">
<div className="flex items-center justify-between border-b p-3">
<h2 className="text-sm font-semibold"> </h2>
<div className="flex gap-1">
<Button variant="outline" size="sm" onClick={loadConfigs} disabled={loading}>
<RefreshCw className={`h-3.5 w-3.5 ${loading ? "animate-spin" : ""}`} />
</Button>
<Button size="sm" onClick={openAddModal}>
<Plus className="mr-1 h-3.5 w-3.5" />
</Button>
</div>
</div>
<div className="border-b p-2">
<div className="relative">
<Search className="absolute left-2 top-1/2 h-3.5 w-3.5 -translate-y-1/2 text-muted-foreground" />
<Input
value={searchText}
onChange={(e) => setSearchText(e.target.value)}
placeholder="검색..."
className="h-8 pl-8 text-xs"
/>
</div>
</div>
<div className="flex-1 overflow-auto">
{filteredConfigs.length === 0 ? (
<div className="p-4 text-center text-xs text-muted-foreground">
{loading ? "로딩 중..." : "설정이 없습니다."}
</div>
) : (
filteredConfigs.map((config) => (
<div
key={config.id}
className={`cursor-pointer border-b p-3 transition-colors hover:bg-muted/50 ${
selectedId === config.id ? "bg-muted" : ""
}`}
onClick={() => setSelectedId(config.id)}
>
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Globe className="h-4 w-4 text-primary" />
<span className="text-sm font-medium">{config.name}</span>
</div>
<Badge variant={config.is_active === "Y" ? "default" : "secondary"} className="text-[10px]">
{config.is_active === "Y" ? "활성" : "비활성"}
</Badge>
</div>
<div className="mt-1 truncate text-[11px] text-muted-foreground">{config.url}</div>
<div className="mt-1 flex items-center gap-2 text-[10px] text-muted-foreground">
{config.cron_schedule && (
<span className="flex items-center gap-0.5">
<Clock className="h-3 w-3" /> {config.cron_schedule}
</span>
)}
{config.last_status && (
<span className="flex items-center gap-0.5">
{config.last_status === "success" ? (
<CheckCircle className="h-3 w-3 text-green-500" />
) : (
<XCircle className="h-3 w-3 text-red-500" />
)}
{config.last_status}
</span>
)}
</div>
</div>
))
)}
</div>
</div>
{/* 우측: 상세 + 로그 */}
<div className="flex flex-1 flex-col gap-4">
{selectedConfig ? (
<>
{/* 상세 정보 */}
<div className="rounded-lg border bg-card p-4">
<div className="mb-3 flex items-center justify-between">
<h3 className="text-sm font-semibold">{selectedConfig.name}</h3>
<div className="flex gap-1">
<Button
variant="outline"
size="sm"
onClick={() => handleExecute(selectedConfig.id)}
disabled={executing === selectedConfig.id}
>
{executing === selectedConfig.id ? (
<Loader2 className="mr-1 h-3.5 w-3.5 animate-spin" />
) : (
<Play className="mr-1 h-3.5 w-3.5" />
)}
</Button>
<Button variant="outline" size="sm" onClick={() => openEditModal(selectedConfig)}>
<Pencil className="mr-1 h-3.5 w-3.5" />
</Button>
<Button variant="ghost" size="sm" onClick={() => handleDelete(selectedConfig.id)}>
<Trash2 className="h-3.5 w-3.5 text-destructive" />
</Button>
</div>
</div>
<div className="grid grid-cols-2 gap-3 text-xs">
<div>
<span className="text-muted-foreground">URL</span>
<div className="mt-0.5 truncate font-mono">{selectedConfig.url}</div>
</div>
<div>
<span className="text-muted-foreground"> </span>
<div className="mt-0.5 font-mono">{selectedConfig.target_table}</div>
</div>
<div>
<span className="text-muted-foreground"> </span>
<div className="mt-0.5 font-mono">{selectedConfig.row_selector || "-"}</div>
</div>
<div>
<span className="text-muted-foreground"></span>
<div className="mt-0.5">{selectedConfig.cron_schedule || "수동 실행"}</div>
</div>
<div>
<span className="text-muted-foreground">UPSERT </span>
<div className="mt-0.5">{selectedConfig.upsert_key || "-"}</div>
</div>
<div>
<span className="text-muted-foreground"> </span>
<div className="mt-0.5">{(selectedConfig.column_mappings || []).length}</div>
</div>
</div>
{selectedConfig.last_error && (
<div className="mt-3 rounded bg-destructive/10 p-2 text-xs text-destructive">
{selectedConfig.last_error}
</div>
)}
</div>
{/* 실행 로그 */}
<div className="flex-1 rounded-lg border bg-card">
<div className="flex items-center justify-between border-b p-3">
<h3 className="text-sm font-semibold"> </h3>
<Button variant="ghost" size="sm" onClick={() => loadLogs(selectedConfig.id)} disabled={logsLoading}>
<RefreshCw className={`h-3.5 w-3.5 ${logsLoading ? "animate-spin" : ""}`} />
</Button>
</div>
<div className="max-h-[400px] overflow-auto">
{logs.length === 0 ? (
<div className="p-4 text-center text-xs text-muted-foreground"> .</div>
) : (
<table className="w-full text-xs">
<thead className="bg-muted/50">
<tr>
<th className="px-3 py-2 text-left"></th>
<th className="px-3 py-2 text-left"></th>
<th className="px-3 py-2 text-right"></th>
<th className="px-3 py-2 text-right"></th>
<th className="px-3 py-2 text-left"></th>
</tr>
</thead>
<tbody>
{logs.map((log) => (
<tr key={log.id} className="border-b hover:bg-muted/30">
<td className="px-3 py-2">
<Badge
variant={
log.status === "success" ? "default" : log.status === "running" ? "secondary" : "destructive"
}
className="text-[10px]"
>
{log.status}
</Badge>
</td>
<td className="px-3 py-2">{new Date(log.started_at).toLocaleString("ko-KR")}</td>
<td className="px-3 py-2 text-right">{log.rows_collected}</td>
<td className="px-3 py-2 text-right">{log.rows_saved}</td>
<td className="max-w-[200px] truncate px-3 py-2 text-destructive">
{log.error_message || "-"}
</td>
</tr>
))}
</tbody>
</table>
)}
</div>
</div>
</>
) : (
<div className="flex flex-1 items-center justify-center rounded-lg border bg-card text-sm text-muted-foreground">
.
</div>
)}
</div>
{/* 추가/수정 모달 */}
<Dialog open={modalOpen} onOpenChange={setModalOpen}>
<DialogContent className="max-h-[85vh] max-w-3xl overflow-auto">
<DialogHeader>
<DialogTitle>{modalMode === "add" ? "크롤링 설정 추가" : "크롤링 설정 수정"}</DialogTitle>
</DialogHeader>
<div className="space-y-4">
{/* STEP 1: 기본 정보 */}
<div className="rounded-lg border p-3 space-y-3">
<h4 className="text-xs font-semibold text-muted-foreground">1. </h4>
<div className="grid grid-cols-2 gap-3">
<div className="space-y-1">
<Label className="text-xs"> *</Label>
<Input value={form.name || ""} onChange={(e) => setForm((p) => ({ ...p, name: e.target.value }))} placeholder="예: 철강 시세 수집" className="h-8 text-xs" />
</div>
<div className="grid grid-cols-2 gap-2">
<div className="space-y-1">
<Label className="text-xs"> (cron)</Label>
<Input value={form.cron_schedule || ""} onChange={(e) => setForm((p) => ({ ...p, cron_schedule: e.target.value }))} placeholder="0 9 * * 1-5" className="h-8 text-xs font-mono" />
</div>
<div className="flex items-end gap-2 pb-0.5">
<Switch checked={form.is_active === "Y"} onCheckedChange={(v) => setForm((p) => ({ ...p, is_active: v ? "Y" : "N" }))} />
<Label className="text-xs"></Label>
</div>
</div>
</div>
</div>
{/* STEP 2: URL 입력 + 분석 */}
<div className="rounded-lg border p-3 space-y-3">
<h4 className="text-xs font-semibold text-muted-foreground">2. </h4>
<div className="flex gap-2">
<Input value={form.url || ""} onChange={(e) => setForm((p) => ({ ...p, url: e.target.value }))} placeholder="https://example.com/prices" className="h-8 flex-1 text-xs font-mono" />
<Button variant="outline" size="sm" onClick={handleAnalyze} disabled={analyzing || !form.url}>
{analyzing ? <Loader2 className="mr-1 h-3.5 w-3.5 animate-spin" /> : <Search className="mr-1 h-3.5 w-3.5" />}
</Button>
</div>
{/* 분석 결과: 감지된 테이블 목록 */}
{analyzedTables.length > 0 && (
<div className="space-y-2">
<Label className="text-xs text-muted-foreground">{analyzedTables.length} </Label>
<div className="space-y-2 max-h-[200px] overflow-auto">
{analyzedTables.map((t, idx) => (
<div
key={idx}
className={cn(
"cursor-pointer rounded border p-2 text-xs transition-colors hover:bg-muted/50",
selectedAnalyzedIdx === idx && "border-primary bg-primary/5"
)}
onClick={() => handleSelectAnalyzedTable(idx)}
>
<div className="flex items-center justify-between">
<span className="font-medium">{t.caption || `테이블 ${idx + 1}`}</span>
<Badge variant="secondary" className="text-[10px]">{t.rowCount} · {t.headers.length}</Badge>
</div>
{t.headers.length > 0 && (
<div className="mt-1 text-[10px] text-muted-foreground truncate">
: {t.headers.join(", ")}
</div>
)}
{t.sampleRows.length > 0 && (
<div className="mt-1 text-[10px] text-muted-foreground truncate">
: {t.sampleRows[0].join(" | ")}
</div>
)}
</div>
))}
</div>
</div>
)}
</div>
{/* STEP 3: 컬럼 매핑 (시각적 폼) */}
<div className="rounded-lg border p-3 space-y-3">
<div className="flex items-center justify-between">
<h4 className="text-xs font-semibold text-muted-foreground">3. </h4>
<Button variant="ghost" size="sm" className="h-6 text-[10px]" onClick={() => setMappingRows((p) => [...p, { selector: "", column: "", type: "text" }])}>
<Plus className="mr-0.5 h-3 w-3" />
</Button>
</div>
{mappingRows.length === 0 ? (
<div className="text-center text-xs text-muted-foreground py-3">
"페이지 분석" .
</div>
) : (
<div className="space-y-1">
<div className="grid grid-cols-[1fr_1fr_80px_32px] gap-1.5 text-[10px] text-muted-foreground px-1">
<span>CSS </span>
<span> </span>
<span></span>
<span></span>
</div>
{mappingRows.map((row, i) => (
<div key={i} className="grid grid-cols-[1fr_1fr_80px_32px] gap-1.5">
<Input value={row.selector} onChange={(e) => { const n = [...mappingRows]; n[i] = { ...n[i], selector: e.target.value }; setMappingRows(n); }} placeholder="td:nth-child(1)" className="h-7 text-xs font-mono" />
<Input value={row.column} onChange={(e) => { const n = [...mappingRows]; n[i] = { ...n[i], column: e.target.value }; setMappingRows(n); }} placeholder="item_name" className="h-7 text-xs" />
<Select value={row.type} onValueChange={(v) => { const n = [...mappingRows]; n[i] = { ...n[i], type: v }; setMappingRows(n); }}>
<SelectTrigger className="h-7 text-[10px]"><SelectValue /></SelectTrigger>
<SelectContent>
<SelectItem value="text"></SelectItem>
<SelectItem value="number"></SelectItem>
<SelectItem value="date"></SelectItem>
</SelectContent>
</Select>
<Button variant="ghost" size="sm" className="h-7 w-7 p-0" onClick={() => setMappingRows((p) => p.filter((_, j) => j !== i))}>
<Trash2 className="h-3 w-3 text-muted-foreground" />
</Button>
</div>
))}
</div>
)}
{form.row_selector && (
<div className="text-[10px] text-muted-foreground">
: <code className="bg-muted px-1 rounded">{form.row_selector}</code>
</div>
)}
</div>
{/* STEP 4: 저장 대상 */}
<div className="rounded-lg border p-3 space-y-3">
<h4 className="text-xs font-semibold text-muted-foreground">4. </h4>
<div className="grid grid-cols-2 gap-3">
<div className="space-y-1">
<Label className="text-xs"> *</Label>
<Popover open={tablePopoverOpen} onOpenChange={setTablePopoverOpen}>
<PopoverTrigger asChild>
<Button variant="outline" role="combobox" className="h-8 w-full justify-between text-xs font-normal">
{form.target_table ? allTables.find((t) => t.tableName === form.target_table)?.displayName || form.target_table : "테이블 선택"}
<ChevronsUpDown className="ml-1 h-3 w-3 shrink-0 opacity-50" />
</Button>
</PopoverTrigger>
<PopoverContent className="w-[280px] p-0">
<Command>
<CommandInput placeholder="테이블 검색..." className="text-xs" />
<CommandEmpty> .</CommandEmpty>
<CommandGroup className="max-h-[200px] overflow-auto">
{allTables.map((t) => (
<CommandItem key={t.tableName} value={`${t.displayName || ""} ${t.tableName}`} onSelect={() => { setForm((p) => ({ ...p, target_table: t.tableName, upsert_key: "" })); setTablePopoverOpen(false); }}>
<Check className={cn("mr-2 h-3 w-3", form.target_table === t.tableName ? "opacity-100" : "opacity-0")} />
<span className="text-xs">{t.displayName || t.tableName}</span>
{t.displayName && <span className="ml-1 text-[10px] text-muted-foreground">({t.tableName})</span>}
</CommandItem>
))}
</CommandGroup>
</Command>
</PopoverContent>
</Popover>
</div>
<div className="space-y-1">
<Label className="text-xs"> </Label>
<Select value={form.upsert_key || "__none__"} onValueChange={(v) => setForm((p) => ({ ...p, upsert_key: v === "__none__" ? "" : v }))}>
<SelectTrigger className="h-8 text-xs"><SelectValue placeholder="없음 (항상 추가)" /></SelectTrigger>
<SelectContent>
<SelectItem value="__none__"> ( )</SelectItem>
{targetColumns.map((col) => (
<SelectItem key={col.columnName} value={col.columnName}>{col.columnLabel || col.columnName}</SelectItem>
))}
</SelectContent>
</Select>
</div>
</div>
</div>
</div>
<DialogFooter className="gap-2">
<Button variant="outline" size="sm" onClick={handlePreview} disabled={previewing || !form.url || mappingRows.length === 0}>
{previewing ? <Loader2 className="mr-1 h-3.5 w-3.5 animate-spin" /> : <Eye className="mr-1 h-3.5 w-3.5" />}
</Button>
<Button size="sm" onClick={handleSave} disabled={saving}>
{saving ? <Loader2 className="mr-1 h-3.5 w-3.5 animate-spin" /> : null}
{modalMode === "add" ? "생성" : "저장"}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
{/* 미리보기 모달 */}
<Dialog open={previewOpen} onOpenChange={setPreviewOpen}>
<DialogContent className="max-h-[70vh] max-w-3xl overflow-auto">
<DialogHeader>
<DialogTitle> </DialogTitle>
</DialogHeader>
{previewData && (
<div className="space-y-3">
<div className="flex gap-4 text-xs">
<span>
: <strong>{previewData.totalElements}</strong>
</span>
<span>
HTML : <strong>{(previewData.htmlLength / 1024).toFixed(1)}KB</strong>
</span>
<span>
: <strong>{previewData.previewRows?.length || 0}</strong>
</span>
</div>
{previewData.previewRows?.length > 0 ? (
<div className="overflow-auto rounded border">
<table className="w-full text-xs">
<thead className="bg-muted/50">
<tr>
{Object.keys(previewData.previewRows[0]).map((key) => (
<th key={key} className="px-3 py-2 text-left font-medium">
{key}
</th>
))}
</tr>
</thead>
<tbody>
{previewData.previewRows.map((row: any, i: number) => (
<tr key={i} className="border-t">
{Object.values(row).map((val: any, j: number) => (
<td key={j} className="max-w-[200px] truncate px-3 py-1.5">
{val != null ? String(val) : "-"}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
) : (
<div className="p-4 text-center text-xs text-muted-foreground">
. .
</div>
)}
</div>
)}
</DialogContent>
</Dialog>
{ConfirmDialogComponent}
</div>
);
}

View File

@ -89,6 +89,7 @@ const ADMIN_PAGE_REGISTRY: Record<string, React.ComponentType<any>> = {
// 자동화 관리
"/admin/automaticMng/flowMgmtList": dynamic(() => import("@/app/(main)/admin/automaticMng/flowMgmtList/page"), { ssr: false, loading: LoadingFallback }),
"/admin/automaticMng/batchmngList": dynamic(() => import("@/app/(main)/admin/automaticMng/batchmngList/page"), { ssr: false, loading: LoadingFallback }),
"/admin/automaticMng/crawlingList": dynamic(() => import("@/app/(main)/admin/automaticMng/crawlingList/page"), { ssr: false, loading: LoadingFallback }),
// 설계 관리 (커스텀 페이지)
"/design/task-management": dynamic(() => import("@/app/(main)/design/task-management/page"), { ssr: false, loading: LoadingFallback }),