{"id":11547,"date":"2022-09-25T17:34:42","date_gmt":"2022-09-25T08:34:42","guid":{"rendered":"https:\/\/prodskill.com\/?p=11547"},"modified":"2022-10-10T20:55:55","modified_gmt":"2022-10-10T11:55:55","slug":"word-extractor-source-code-1","status":"publish","type":"post","link":"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/","title":{"rendered":"Wortextraktionstool(4): Beschreibung des Quellcodes des Wortextraktionstools(1)"},"content":{"rendered":"\n<p>Python\uc73c\ub85c \uad6c\ud604\ud55c \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc5d0 \ub300\ud574 \uc0b4\ud3b4\ubcf8\ub2e4.<\/p>\n\n\n\n<p>\uc774\uc804 \uae00\uc5d0\uc11c \uc774\uc5b4\uc9c0\ub294 \ub0b4\uc6a9\uc774\ub2e4.<\/p>\n\n\n\n<p><a href=\"https:\/\/prodskill.com\/word-extractor-run-and-check-result\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(3): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc2e4\ud589 \ubc29\ubc95\uacfc \uacb0\uacfc \ud655\uc778 \ubc29\ubc95<\/a><\/p>\n\n\n\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_82_2 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">&lt;&lt;\ubaa9\ucc28&gt;&gt;<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#4_%EB%8B%A8%EC%96%B4_%EC%B6%94%EC%B6%9C_%EB%8F%84%EA%B5%AC_%EC%86%8C%EC%8A%A4%EC%BD%94%EB%93%9C\" >4. \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#41_%EA%B0%9C%EC%9A%94\" >4.1. \uac1c\uc694<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#411_%EC%86%8C%EC%8A%A4%EC%BD%94%EB%93%9C_%EC%9D%BC%EB%9F%AC%EB%91%90%EA%B8%B0\" >4.1.1. \uc18c\uc2a4\ucf54\ub4dc \uc77c\ub7ec\ub450\uae30<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#412%EB%8B%A8%EC%96%B4_%EC%B6%94%EC%B6%9C_%EB%8F%84%EA%B5%AC_%ED%95%A8%EC%88%98_%ED%98%B8%EC%B6%9C_%EA%B4%80%EA%B3%84\" >4.1.2.\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \ud568\uc218 \ud638\ucd9c \uad00\uacc4<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#42_main_%ED%95%A8%EC%88%98\" >4.2. main \ud568\uc218<\/a><ul class='ez-toc-list-level-4' ><li class='ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#421_argument_parsing\" >4.2.1. argument parsing<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#422_%EC%B2%98%EB%A6%AC%ED%95%A0_%ED%8C%8C%EC%9D%BC_%EB%AA%A9%EB%A1%9D_%EC%B6%94%EC%B6%9C\" >4.2.2. \ucc98\ub9ac\ud560 \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#423_Multi_processing%EC%9C%BC%EB%A1%9C_get_file_text_%EC%8B%A4%ED%96%89\" >4.2.3. Multi processing\uc73c\ub85c get_file_text \uc2e4\ud589<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#424_Multi_processing%EC%9C%BC%EB%A1%9C_get_word_list_%EC%8B%A4%ED%96%89\" >4.2.4. Multi processing\uc73c\ub85c get_word_list \uc2e4\ud589<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#425_%EB%8B%A8%EC%96%B4_%EB%B9%88%EB%8F%84%EB%A5%BC_%EA%B5%AC%ED%95%98%EA%B3%A0_make_word_cloud_%EC%8B%A4%ED%96%89\" >4.2.5. \ub2e8\uc5b4 \ube48\ub3c4\ub97c \uad6c\ud558\uace0 make_word_cloud \uc2e4\ud589<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-4'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/#426_%EC%B6%94%EC%B6%9C%EB%90%9C_%EB%8B%A8%EC%96%B4_%EB%AA%A9%EB%A1%9D%EA%B3%BC_%EB%8B%A8%EC%96%B4_%EB%B9%88%EB%8F%84_%EC%97%91%EC%85%80_%ED%8C%8C%EC%9D%BC%EB%A1%9C_%EC%A0%80%EC%9E%A5%ED%95%98%EA%B3%A0_%EC%8B%A4%ED%96%89%EC%8B%9C%EA%B0%84_%EC%B6%9C%EB%A0%A5_%EC%A2%85%EB%A3%8C\" >4.2.6. \ucd94\ucd9c\ub41c \ub2e8\uc5b4 \ubaa9\ub85d\uacfc \ub2e8\uc5b4 \ube48\ub3c4 \uc5d1\uc140 \ud30c\uc77c\ub85c \uc800\uc7a5\ud558\uace0 \uc2e4\ud589\uc2dc\uac04 \ucd9c\ub825, \uc885\ub8cc<\/a><\/li><\/ul><\/li><\/ul><\/li><\/ul><\/nav><\/div>\n<h2 class=\"wp-block-heading\" id=\"4._\ub2e8\uc5b4_\ucd94\ucd9c_\ub3c4\uad6c_\uc18c\uc2a4\ucf54\ub4dc\"><span class=\"ez-toc-section\" id=\"4_%EB%8B%A8%EC%96%B4_%EC%B6%94%EC%B6%9C_%EB%8F%84%EA%B5%AC_%EC%86%8C%EC%8A%A4%EC%BD%94%EB%93%9C\"><\/span>4. \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"4.1._\uac1c\uc694\"><span class=\"ez-toc-section\" id=\"41_%EA%B0%9C%EC%9A%94\"><\/span>4.1. \uac1c\uc694<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.1.1._\uc18c\uc2a4\ucf54\ub4dc_\uc77c\ub7ec\ub450\uae30\"><span class=\"ez-toc-section\" id=\"411_%EC%86%8C%EC%8A%A4%EC%BD%94%EB%93%9C_%EC%9D%BC%EB%9F%AC%EB%91%90%EA%B8%B0\"><\/span>4.1.1. \uc18c\uc2a4\ucf54\ub4dc \uc77c\ub7ec\ub450\uae30<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<p>\uc774 \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c\uc758 \uc18c\uc2a4\ucf54\ub4dc\ub294 \ub0b4\uac00 Python\uc73c\ub85c \ub9cc\ub4e0 \uc4f8\ub9cc\ud55c \ub3c4\uad6c\uc911\uc5d0 \uac70\uc758 \uccab\ubc88\uc9f8\ub85c \uc791\uc131\ud55c \ucf54\ub4dc\uc774\ub2e4. \uc544\uc9c1 \uc190\uc5d0 \uc775\uc9c0 \uc54a\uc558\uc744 \ub54c \ud544\uc694\ud55c \uae30\ub2a5\uc744 \uad6c\ud604\ud558\ub294\ub370\uc5d0\ub9cc \uc911\uc810\uc744 \ub450\ub2e4 \ubcf4\ub2c8 Python\uc758 \uc7a5\uc810\uc778 \uac04\uacb0\ud568\uacfc\ub294 \uac70\ub9ac\uac00 \uba40\ub2e4. Python \uc2a4\ud0c0\uc77c\uc774\ub77c\uae30 \ubcf4\ub2e4\ub294 C \uc2a4\ud0c0\uc77c\uc5d0 \uac00\uae5d\ub2e4.<\/p>\n\n\n\n<p>\ud14d\uc2a4\ud2b8 \ucd94\ucd9c \uacb0\uacfc, \ub2e8\uc5b4 \ucd94\ucd9c \uacb0\uacfc\ub97c \ubcc4\ub3c4\uc758 class\ub85c \uc791\uc131\ud560\uae4c \ud558\ub2e4\uac00, \uc2dc\ud5d8\uc0bc\uc544 pandas\uc758 DataFrame\uc744 \uc0ac\uc6a9\ud574 \ubd24\ub294\ub370 \uc0dd\uac01\ubcf4\ub2e4 \uc798 \ub3d9\uc791\ud574\uc11c \uadf8\ub0e5 DataFrame\uc744 \uc0ac\uc6a9\ud588\ub2e4. \ub364\uc73c\ub85c DataFrame\uc5d0\uc11c \uc81c\uacf5\ud558\ub294 groupby, to_excel \ud568\uc218\ub97c \uc0ac\uc6a9\ud558\uc5ec \uad6c\ud604\ud558\ub294\ub370 \uc2dc\uac04\uc744 \ub9ce\uc774 \uc904\uc600\ub2e4.<\/p>\n\n\n\n<p>&#8220;<a href=\"https:\/\/prodskill.com\/ko\/word-extractor-config-runtime-environment\/#212_%ED%98%95%ED%83%9C%EC%86%8C_%EB%B6%84%EC%84%9D%EA%B8%B0_%EC%84%A0%ED%83%9D_Mecab\">2.1.2. \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 \uc120\ud0dd: Mecab<\/a>&#8220;\uc5d0\uc11c \uc5b8\uae09\ud588\ub4ef\uc774, \ub2e8\uc5b4 \ucd94\ucd9c\uc5d0 \uc790\uc5f0\uc5b4 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30 Mecab\uc744 \uc0ac\uc6a9\ud588\ub2e4. \ub2e4\ub978 \ud615\ud0dc\uc18c \ubd84\uc11d\uae30\ub97c \uc0ac\uc6a9\ud558\ub824\uba74 get_word_list \ud568\uc218\ub97c \uace0\uccd0 \uc0ac\uc6a9\ud558\uae30 \ubc14\ub780\ub2e4.<\/p>\n\n\n\n<p>\ubcf8\ubb38\uc5d0 \uc0bd\uc785\ud55c \ucf54\ub4dc\uc758 \ud589 \ubc88\ud638\ub294 github\uc5d0 \uc5c5\ub85c\ub4dc\ud55c \uc18c\uc2a4\ucf54\ub4dc\uc758 \ud589\ubc88\ud638\uc640 \uac19\uac8c \uc124\uc815\ud558\uc600\uace0, \uc8fc\uc11d\ub3c4 \uac00\uae09\uc801 \uc81c\uc678\ud558\uc9c0 \uc54a\uace0 \ubaa8\ub450 \ud3ec\ud568\uc2dc\ucf30\ub2e4.<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.1.2.\ub2e8\uc5b4_\ucd94\ucd9c_\ub3c4\uad6c_\ud568\uc218_\ud638\ucd9c_\uad00\uacc4\"><span class=\"ez-toc-section\" id=\"412%EB%8B%A8%EC%96%B4_%EC%B6%94%EC%B6%9C_%EB%8F%84%EA%B5%AC_%ED%95%A8%EC%88%98_%ED%98%B8%EC%B6%9C_%EA%B4%80%EA%B3%84\"><\/span>4.1.2.\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \ud568\uc218 \ud638\ucd9c \uad00\uacc4<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n<div class=\"wp-block-image\">\n<figure class=\"aligncenter size-full\"><a href=\"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png\"><img loading=\"lazy\" decoding=\"async\" width=\"386\" height=\"141\" src=\"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png\" alt=\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \ud568\uc218 \ud638\ucd9c \uad00\uacc4\" class=\"wp-image-11548\" srcset=\"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png 386w, https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150-300x110.png 300w, https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150-18x7.png 18w, https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150-24x9.png 24w, https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150-36x13.png 36w, https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150-48x18.png 48w\" sizes=\"auto, (max-width: 386px) 100vw, 386px\" \/><\/a><figcaption>\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \ud568\uc218 \ud638\ucd9c \uad00\uacc4<\/figcaption><\/figure>\n<\/div>\n\n\n<p>\ud568\uc218 \uc804\ubc18\uc801\uc778 \ud638\ucd9c \uad00\uacc4\ub294 \uc704 \ub3c4\uc2dd\uacfc \uc544\ub798 \ub0b4\uc6a9\uacfc \uac19\uc774 \uc694\uc57d\ud560 \uc218 \uc788\ub2e4.<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li>main \ud568\uc218\uc5d0\uc11c get_file_text \ud568\uc218\ub97c \ud638\ucd9c\ud574\uc11c \uac01 \ud30c\uc77c\ub85c\ubd80\ud130 \ud589\ub2e8\uc704, \ubb38\ub2e8(paragraph) \ub2e8\uc704\uc758 \ud14d\uc2a4\ud2b8\ub97c \ucd94\ucd9c\ud55c\ub2e4.<\/li><li>get_file_text \ud568\uc218 \ub0b4\uc5d0\uc11c \ud30c\uc77c \ud655\uc7a5\uc790\uc5d0 \ub530\ub77c get_doc_text, get_ppt_text, get_txt_text, get_db_comment_text \ud568\uc218\ub97c \ud638\ucd9c\ud55c\ub2e4.<\/li><li>get_hwp_text, get_pdf_text \ud568\uc218\ub294 \uc544\uc9c1 \uad6c\ud604\ud558\uc9c0 \uc54a\uc558\uace0 \ub098\uc911\uc5d0 \ud544\uc694\ud55c \uc2dc\uc810\uc5d0 \uad6c\ud604\ud560 \uc608\uc815\uc774\ub2e4. (\ud639\uc2dc \uad6c\ud604\ud55c \uacbd\ud5d8\uc774 \uc788\uac70\ub098 \uad6c\ud604\ud55c \ucf54\ub4dc\ub97c \uc54c\uace0 \uc788\ub2e4\uba74 \ub313\uae00\ub85c \ub0a8\uaca8\uc8fc\uae30 \ubc14\ub780\ub2e4.)<\/li><li>get_file_text \ud568\uc218 \uc2e4\ud589\uacb0\uacfc\ub97c get_word_list \ud568\uc218\uc5d0 \uc804\ub2ec\ud558\uc5ec \ub2e8\uc5b4 \ud6c4\ubcf4\uad70\uc744 \ucd94\ucd9c\ud55c\ub2e4.<\/li><li>get_file_text \ud568\uc218\uc640 get_word_list \ud568\uc218\ub294 multiprocessing\uc73c\ub85c \ucc98\ub9ac\ud55c\ub2e4.<\/li><li>make_word_cloud \ud568\uc218\ub97c \ud638\ucd9c\ud558\uc5ec word cloud \uc774\ubbf8\uc9c0\ub97c \uc0dd\uc131\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"4.2._main_\ud568\uc218\"><span class=\"ez-toc-section\" id=\"42_main_%ED%95%A8%EC%88%98\"><\/span>4.2. main \ud568\uc218<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.1._argument_parsing\"><span class=\"ez-toc-section\" id=\"421_argument_parsing\"><\/span>4.2.1. argument parsing<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"369\" data-enlighter-title=\"\" data-enlighter-group=\"\">def main():\n    \"\"\"\n    \uc9c0\uc815\ud55c \uacbd\ub85c \ud558\uc704 \ud3f4\ub354\uc758 File\ub4e4\uc5d0\uc11c Text\ub97c \ucd94\ucd9c\ud558\uace0 \uac01 Text\uc758 \uba85\uc0ac\ub97c \ucd94\ucd9c\ud558\uc5ec \uc5d1\uc140\ud30c\uc77c\ub85c \uc800\uc7a5\n    :return: \uc5c6\uc74c\n    \"\"\"\n\n    # region Args Parse &amp; Usage set-up -------------------------------------------------------------\n    # parser = argparse.ArgumentParser(usage='usage test', description='description test')\n    usage_description = \"\"\"--- Description ---\n  * db_comment_file\uacfc in_path\uc911 \ud558\ub098\ub294 \ud544\uc218\ub85c \uc785\ub825\n\n  * \uc2e4\ud589 \uc608\uc2dc\n    1. File\uc5d0\uc11c text, \ub2e8\uc5b4 \ucd94\ucd9c: in_path, out_path \uc9c0\uc815\n       python word_extractor.py --multi_process_count 4 --in_path .\\\\test_files --out_path .\\out\n\n    2. DB comment\uc5d0\uc11c text, \ub2e8\uc5b4 \ucd94\ucd9c: db_comment_file, out_path \uc9c0\uc815\n       python word_extractor.py --db_comment_file \"table,column comments.xlsx\" --out_path .\\out\n\n    3. File, DB comment \uc5d0\uc11c text, \ub2e8\uc5b4 \ucd94\ucd9c: db_comment_file, in_path, out_path \uc9c0\uc815\n       python word_extractor.py --db_comment_file \"table,column comments.xlsx\" --in_path .\\\\test_files --out_path .\\out\n\n  * DB Table, Column comment \ud30c\uc77c \ud615\uc2dd\n    - \uccab\ubc88\uc9f8 sheet(Table comment): DBName, SchemaName, Tablename, TableComment\n    - \ub450\ubc88\uc9f8 sheet(Column comment): DBName, SchemaName, Tablename, ColumnName, ColumnComment\"\"\"\n\n    # ToDo: \uc635\uc158\ucd94\uac00: \ubcf5\ud569\uc5b4 \ucd94\ucd9c\ud560\uc9c0 \uc5ec\ubd80, \uc601\ubb38\uc790 \ucd94\ucd9c\ud560\uc9c0 \uc5ec\ubd80, \uc601\ubb38\uc790 \uae38\uc774 1\uc790\ub9ac \uc81c\uc678\uc5ec\ubd80, ...\n    parser = argparse.ArgumentParser(description=usage_description, formatter_class=argparse.RawTextHelpFormatter)\n    # name argument \ucd94\uac00\n    parser.add_argument('--multi_process_count', required=False, type=int,\n                        help='text \ucd94\ucd9c, \ub2e8\uc5b4 \ucd94\ucd9c\uc744 \ub3d9\uc2dc\uc5d0 \uc2e4\ud589\ud560 multi process \uac1c\uc218(\uc9c0\uc815\ud558\uc9c0 \uc54a\uc73c\uba74 (logical)cpu \uac1c\uc218\ub85c \uc124\uc815\ub428)')\n    parser.add_argument('--db_comment_file', required=False,\n                        help='DB Table, Column comment \uc815\ubcf4 \ud30c\uc77c\uba85(\uc608: comment.xlsx)')\n    parser.add_argument('--in_path', required=False, help='\uc785\ub825\ud30c\uc77c(ppt, doc, txt) \uacbd\ub85c\uba85(\uc608: .\\in) ')\n    parser.add_argument('--out_path', required=True, help='\ucd9c\ub825\ud30c\uc77c(xlsx, png) \uacbd\ub85c\uba85(\uc608: .\\out)')\n\n    args = parser.parse_args()\n\n    if args.multi_process_count:\n        multi_process_count = int(args.multi_process_count)\n    else:\n        multi_process_count = multiprocessing.cpu_count()\n\n    db_comment_file = args.db_comment_file\n    if db_comment_file is not None and not os.path.isfile(db_comment_file):\n        print('db_comment_file not found: %s' % db_comment_file)\n        exit(-1)\n\n    in_path = args.in_path\n    out_path = args.out_path\n    print('------------------------------------------------------------')\n    print('Word Extractor v%s start --- %s' % (_version_, get_current_datetime()))\n    print('##### arguments #####')\n    print('multi_process_count: %d' % multi_process_count)\n    print('db_comment_file: %s' % db_comment_file)\n    print('in_path: %s' % in_path)\n    print('out_path: %s' % out_path)\n    print('------------------------------------------------------------')<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>395\ud589: argparse package\uc758 ArgumentParser \uac1d\uccb4\ub97c \uc0dd\uc131\ud55c\ub2e4.<\/li><li>397~404\ud589: \ud544\uc694\ud55c argument\ub97c \ucd94\uac00\ud558\uace0 \uc2e4\ud589\uc2dc \uc9c0\uc815\ud55c argument\ub97c parsing\ud55c\ub2e4.<\/li><li>406~425\ud589: argument\ub97c \ub0b4\ubd80 \ubcc0\uc218\ub85c \uc124\uc815\ud558\uace0, \uc124\uc815\ub41c \uac12\uc744 \ucd9c\ub825\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.2._\ucc98\ub9ac\ud560_\ud30c\uc77c_\ubaa9\ub85d_\ucd94\ucd9c\"><span class=\"ez-toc-section\" id=\"422_%EC%B2%98%EB%A6%AC%ED%95%A0_%ED%8C%8C%EC%9D%BC_%EB%AA%A9%EB%A1%9D_%EC%B6%94%EC%B6%9C\"><\/span>4.2.2. \ucc98\ub9ac\ud560 \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"436,437-444,451-452\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"432\" data-enlighter-title=\"\" data-enlighter-group=\"\">    file_list = []\n    if in_path is not None and in_path.strip() != '':\n        print('[%s] Start Get File List...' % get_current_datetime())\n        in_abspath = os.path.abspath(in_path)  # os.path.abspath('.') + '\\\\test_files'\n        file_types = ('.ppt', '.pptx', '.doc', '.docx', '.txt')\n        for root, dir, files in os.walk(in_abspath):\n            for file in sorted(files):\n                # \uc81c\uc678\ud560 \ud30c\uc77c\n                if file.startswith('~'):\n                    continue\n                # \ud3ec\ud568\ud560 \ud30c\uc77c\n                if file.endswith(file_types):\n                    file_list.append(root + '\\\\' + file)\n\n        print('[%s] Finish Get File List.' % get_current_datetime())\n        print('--- File List ---')\n        print('\\n'.join(file_list))\n\n\n    if db_comment_file is not None:\n        file_list.append(db_comment_file)<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>436\ud589: \ucc98\ub9ac \ub300\uc0c1 \ud30c\uc77c\uc5d0 \ud574\ub2f9\ud558\ub294 \ud30c\uc77c \ud655\uc7a5\uc790 \ubaa9\ub85d\uc744 \uc815\uc758\ud55c\ub2e4.<\/li><li>437~444\ud589: \uc2e4\ud589\uc2dc \uc9c0\uc815\ud55c argument\uc911 in_path \ud558\uc704\uc758 \ud3f4\ub354 \uc804\uccb4\ub97c \uc7ac\uadc0 \ud0d0\uc0c9\ud558\uba74\uc11c \uac01 \ud30c\uc77c\uc774 \ub300\uc0c1 \ud30c\uc77c\uc778\uc9c0 \ud310\ub2e8\ud558\uace0 \ub300\uc0c1 \ud30c\uc77c\uc774\uba74 file_list\uc5d0 \ucd94\uac00\ud55c\ub2e4.<\/li><li>451~452\ud589: \uc2e4\ud589\uc2dc \uc9c0\uc815\ud55c argument\uc911 db_comment_file\uc774 \uc788\uc73c\uba74 file_list\uc5d0 \ucd94\uac00\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.3._Multi_processing\uc73c\ub85c_get_file_text_\uc2e4\ud589\"><span class=\"ez-toc-section\" id=\"423_Multi_processing%EC%9C%BC%EB%A1%9C_get_file_text_%EC%8B%A4%ED%96%89\"><\/span>4.2.3. Multi processing\uc73c\ub85c get_file_text \uc2e4\ud589<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"455-456\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"454\" data-enlighter-title=\"\" data-enlighter-group=\"\">    print('[%s] Start Get File Text...' % get_current_datetime())\n    with multiprocessing.Pool(processes=multi_process_count) as pool:\n        mp_text_result = pool.map(get_file_text, file_list)\n    df_text = pd.concat(mp_text_result, ignore_index=True)\n    print('[%s] Finish Get File Text.' % get_current_datetime())\n    # \uc5ec\uae30\uae4c\uc9c0 text \ucd94\ucd9c\uc644\ub8cc. \uc544\ub798\uc5d0 \ub2e8\uc5b4 \ucd94\ucd9c \uc2dc\uc791<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>455~456\ud589: \uc2e4\ud589\uc2dc \uc9c0\uc815\ud55c multi_process_count \ub9cc\ud07c process\ub97c \uc2e4\ud589\ud558\uc5ec \uac01 process\uc5d0\uc11c file_lsit\ub97c \uc785\ub825\uc73c\ub85c get_file_text \ud568\uc218\ub97c \uc2e4\ud589\ud558\uace0 \uadf8 \uacb0\uacfc\ub97c mp_text_result\uc5d0 \ub2f4\ub294\ub2e4.<\/li><li>457\ud589: DataFrame\uc758 list \ud615\ud0dc\uc778 mp_text_result\uc758 \uac01 list item\uc744 \ud569\uccd0\uc11c(concat) \ud558\ub098\uc758 DataFrame\uc778 df_text\ub85c \ub9cc\ub4e0\ub2e4.<\/li><\/ul>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.4._Multi_processing\uc73c\ub85c_get_word_list_\uc2e4\ud589\"><span class=\"ez-toc-section\" id=\"424_Multi_processing%EC%9C%BC%EB%A1%9C_get_word_list_%EC%8B%A4%ED%96%89\"><\/span>4.2.4. Multi processing\uc73c\ub85c get_word_list \uc2e4\ud589<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"465-466\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"461\" data-enlighter-title=\"\" data-enlighter-group=\"\">    # ---------- \ubcd1\ub82c \uc2e4\ud589 ----------\n    print('[%s] Start Get Word from File Text...' % get_current_datetime())\n    df_text_split = np.array_split(df_text, multi_process_count)\n    # mp_result = []\n    with multiprocessing.Pool(processes=multi_process_count) as pool:\n        mp_result = pool.map(get_word_list, df_text_split)\n\n    df_result = pd.concat(mp_result, ignore_index=True)\n    if 'DB' not in df_result.columns:\n        df_result['DB'] = ''\n        df_result['Schema'] = ''\n        df_result['Table'] = ''\n        df_result['Column'] = ''\n\n    print('[%s] Finish Get Word from File Text.' % get_current_datetime())\n    # ------------------------------<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>463\ud589: df_text\uc758 \ud589\uc744 multi_process_count\ub85c \ubd84\ud560\ud558\uc5ec \uac01 \ubd84\ud560\ub41c DataFrame\uc744 df_text_split(list type)\uc5d0 \ub2f4\ub294\ub2e4.<ul><li>\uc608\ub97c \ub4e4\uc5b4, df_text\uc5d0 1000\uac1c\uc758 \ud589\uc774 \uc788\uace0 multi_process_count\uac00 4\uc778 \uacbd\uc6b0\ub77c\uba74, \uac01\uac01 250\uac1c \ud589\uc744 \uac00\uc9c4 4\uac1c\uc758 DataFrame\uc774 \ub9cc\ub4e4\uc5b4\uc9c0\uace0 \uc774 4\uac1c\uc758 DataFrame\uc744 item\uc73c\ub85c \uac00\uc9c0\ub294 df_text_split \ubcc0\uc218\uac00 \ub9cc\ub4e4\uc5b4\uc9c4\ub2e4.<\/li><\/ul><\/li><li>465~466\ud589: \uc2e4\ud589\uc2dc \uc9c0\uc815\ud55c multi_process_count \ub9cc\ud07c process\ub97c \uc2e4\ud589\ud558\uc5ec \uac01 process\uc5d0\uc11c df_text_split\uc744 \uc785\ub825\uc73c\ub85c get_word_list \ud568\uc218\ub97c \uc2e4\ud589\ud558\uace0 \uadf8 \uacb0\uacfc\ub97c mp_result\uc5d0 \ub2f4\ub294\ub2e4.<\/li><li>468\ud589: DataFrame\uc758 list \ud615\ud0dc\uc778 mp_result\uc758 \uac01 list item\uc744 \ud569\uccd0\uc11c(concat) \ud558\ub098\uc758 DataFrame\uc778 df_result\ub85c \ub9cc\ub4e0\ub2e4.<\/li><li>469~473\ud589: df_result.columns\uc5d0 &#8216;DB&#8217;\uac00 \uc5c6\ub294 \uacbd\uc6b0, \ub2e4\uc2dc \ub9d0\ud558\uc5ec db_comment_file \uc774 \uc9c0\uc815\ub418\uc9c0 \uc54a\uc740 \uacbd\uc6b0 \ud6c4\uc18d \ucc98\ub9ac \ub85c\uc9c1\uc744 \ub2e8\uc21c\ud654\ud558\uace0 \uc624\ub958\ub97c \ubc29\uc9c0\ud558\uae30 \uc704\ud558\uc5ec &#8216;DB&#8217;, &#8216;Schema&#8217;, &#8216;Table&#8217;, &#8216;Column&#8217;\uc758 \uc774\ub984\uc744 \uac00\uc9c4 \uc5f4(column)\uc744 \ube48 \uac12\uc73c\ub85c \ucd94\uac00\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.5._\ub2e8\uc5b4_\ube48\ub3c4\ub97c_\uad6c\ud558\uace0_make_word_cloud_\uc2e4\ud589\"><span class=\"ez-toc-section\" id=\"425_%EB%8B%A8%EC%96%B4_%EB%B9%88%EB%8F%84%EB%A5%BC_%EA%B5%AC%ED%95%98%EA%B3%A0_make_word_cloud_%EC%8B%A4%ED%96%89\"><\/span>4.2.5. \ub2e8\uc5b4 \ube48\ub3c4\ub97c \uad6c\ud558\uace0 make_word_cloud \uc2e4\ud589<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\" data-enlighter-theme=\"\" data-enlighter-highlight=\"482\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"478\" data-enlighter-title=\"\" data-enlighter-group=\"\">    print('[%s] Start Get Word Frequency...' % get_current_datetime())\n    # df_group = pd.DataFrame(df_result.groupby(by='Word').size().sort_values(ascending=False))\n    df_result_subset = df_result[['Word', 'Source']]  # \ube48\ub3c4\uc218\ub97c \uad6c\ud558\uae30 \uc704\ud574 \ud544\uc694\ud55c column\ub9cc \ucd94\ucd9c\n    # df_group = df_result_subset.groupby(by='Word').agg(['count', lambda x: list(x)])\n    df_group = df_result_subset.groupby(by='Word').agg(['count', lambda x: '\\n'.join(list(x)[:10])])\n    df_group.index.name = 'Word'  # index\uba85 \uc7ac\uc9c0\uc815\n    df_group.columns = ['Freq', 'Source']  # column\uba85 \uc7ac\uc9c0\uc815\n    df_group = df_group.sort_values(by='Freq', ascending=False)\n    print('[%s] Finish Get Word Frequency.' % get_current_datetime())\n    # df_group['Len'] = df_group['Word'].str.len()\n    # df_group['Len'] = df_group['Word'].apply(lambda x: len(x))\n    print('[%s] Start Make Word Cloud...' % get_current_datetime())\n    now_dt = datetime.datetime.now().strftime(\"%Y%m%d%H%M%S\")\n    make_word_cloud(df_group, now_dt, out_path)\n    print('[%s] Finish Make Word Cloud.' % get_current_datetime())<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>480\ud589: df_result\uc5d0\uc11c &#8216;Word&#8217;, &#8216;Source&#8217; \uceec\ub7fc\ub9cc \uace8\ub77c df_result_subset DataFrame\uc744 \ub9cc\ub4e0\ub2e4.<\/li><li>482\ud589: df_result_subset\uc5d0 &#8216;Word&#8217; \uceec\ub7fc\uc73c\ub85c grouping\ud558\uc5ec count\ub97c \uad6c\ud558\uace0, &#8216;Source&#8217;\uc911 \ucc98\uc74c 10\uac1c\uc758 \uac12\uc744 \ucd94\ucd9c\ud558\uc5ec \ud589\ubd84\ub9ac \uae30\ud638\ub85c \uc5f0\uacb0\ud558\uc5ec df_group DataFrame\uc744 \ub9cc\ub4e0\ub2e4.<\/li><li>483~484\ud589: df_group DataFrame\uc758 index\uba85\uc744 &#8216;Word&#8217;\ub85c, column\uba85\uc744 \uac01\uac01 &#8216;Freq&#8217;, &#8216;Source&#8217;\ub85c \uc9c0\uc815\ud55c\ub2e4.<\/li><li>485\ud589: df_group\uc744 &#8216;Freq'(\ub2e8\uc5b4 \ube48\ub3c4)\ub85c \uc5ed\uc21c\uc815\ub82c\ud55c\ub2e4.<\/li><li>491\ud589: df_group\uc744 make_word_cloud \ud568\uc218\uc5d0 \uc804\ub2ec\ud558\uc5ec word cloud \uc774\ubbf8\uc9c0\ub97c \uc0dd\uc131\ud558\uace0 \uc800\uc7a5\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"4.2.6._\ucd94\ucd9c\ub41c_\ub2e8\uc5b4_\ubaa9\ub85d\uacfc_\ub2e8\uc5b4_\ube48\ub3c4_\uc5d1\uc140_\ud30c\uc77c\ub85c_\uc800\uc7a5\ud558\uace0_\uc2e4\ud589\uc2dc\uac04_\ucd9c\ub825,_\uc885\ub8cc\"><span class=\"ez-toc-section\" id=\"426_%EC%B6%94%EC%B6%9C%EB%90%9C_%EB%8B%A8%EC%96%B4_%EB%AA%A9%EB%A1%9D%EA%B3%BC_%EB%8B%A8%EC%96%B4_%EB%B9%88%EB%8F%84_%EC%97%91%EC%85%80_%ED%8C%8C%EC%9D%BC%EB%A1%9C_%EC%A0%80%EC%9E%A5%ED%95%98%EA%B3%A0_%EC%8B%A4%ED%96%89%EC%8B%9C%EA%B0%84_%EC%B6%9C%EB%A0%A5_%EC%A2%85%EB%A3%8C\"><\/span>4.2.6. \ucd94\ucd9c\ub41c \ub2e8\uc5b4 \ubaa9\ub85d\uacfc \ub2e8\uc5b4 \ube48\ub3c4 \uc5d1\uc140 \ud30c\uc77c\ub85c \uc800\uc7a5\ud558\uace0 \uc2e4\ud589\uc2dc\uac04 \ucd9c\ub825, \uc885\ub8cc<span class=\"ez-toc-section-end\"><\/span><\/h4>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"504-517\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"494\" data-enlighter-title=\"\" data-enlighter-group=\"\">    print('[%s] Start Save the Extract result to Excel File...' % get_current_datetime())\n    df_result.index += 1\n    excel_style = {\n        'font-size': '10pt'\n    }\n    df_result = df_result.style.set_properties(**excel_style)\n    df_group = df_group.style.set_properties(**excel_style)\n    out_file_name = '%s\\\\extract_result_%s.xlsx' % (out_path, now_dt)  # 'out\\\\extract_result_%s.xlsx' % now_dt\n\n    print('start writing excel file...')\n    with pd.ExcelWriter(path=out_file_name, engine='xlsxwriter') as writer:\n        df_result.to_excel(writer,\n                           header=True,\n                           sheet_name='\ub2e8\uc5b4\ucd94\ucd9c\uacb0\uacfc',\n                           index=True,\n                           index_label='No',\n                           freeze_panes=(1, 0),\n                           columns=['Word', 'FileName', 'FileType', 'Page', 'Text', 'DB', 'Schema', 'Table', 'Column'])\n        df_group.to_excel(writer,\n                          header=True,\n                          sheet_name='\ub2e8\uc5b4\ube48\ub3c4',\n                          index=True,\n                          index_label='\ub2e8\uc5b4',\n                          freeze_panes=(1, 0))\n        workbook = writer.book\n        worksheet = writer.sheets['\ub2e8\uc5b4\ube48\ub3c4']\n        wrap_format = workbook.add_format({'text_wrap': True})\n        worksheet.set_column(\"C:C\", None, wrap_format)\n\n    # print('finished writing excel file')\n    print('[%s] Finish Save the Extract result to Excel File...' % get_current_datetime())\n\n    end_time = time.time()\n    # elapsed_time = end_time - start_time\n    elapsed_time = str(datetime.timedelta(seconds=end_time - start_time))\n    print('------------------------------------------------------------')\n    print('[%s] Finished.' % get_current_datetime())\n    print('overall elapsed time: %s' % elapsed_time)\n    print('------------------------------------------------------------')\n<\/pre>\n\n\n\n<ul class=\"wp-block-list\"><li>495~501\ud589: \uc5d1\uc140 \uae00\uaf34 \ud06c\uae30\ub97c 10 point\ub85c \uc9c0\uc815\ud558\uace0, \uc800\uc7a5\ud560 \uc5d1\uc140 \ud30c\uc77c\uc758 \uacbd\ub85c\uc640 \ud30c\uc77c\uba85\uc744 \uc124\uc815\ud55c\ub2e4.<\/li><li>504~521\ud589: pandas ExcelWriter\ub97c \uc774\uc6a9\ud558\uc5ec df_result, df_group DataFrame\uc744 \uc5d1\uc140\ud30c\uc77c\ub85c \uc800\uc7a5\ud55c\ub2e4.<\/li><li>526~532\ud589: \uc2e4\ud589\uc5d0 \uac78\ub9b0 \uc2dc\uac04\uc744 \uacc4\uc0b0\ud558\uc5ec \ucd9c\ub825\ud558\uace0 \uc885\ub8cc\ud55c\ub2e4.<\/li><\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity is-style-dots\"\/>\n\n\n\n<p>\ub0b4\uc6a9\uc774 \uae38\uc5b4\uc838\uc11c \uae00\uc744 \ub450 \uac1c\ub85c \ub098\ub204\uc5b4 \uc62c\ub9b0\ub2e4.\u00a0\ub2e4\uc74c \uae00\uc5d0 \uacc4\uc18d\ub41c\ub2e4.<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>&lt;&lt;&nbsp;<strong>\uad00\ub828 \uae00 \ubaa9\ub85d<\/strong>&nbsp;&gt;&gt;<\/p>\n\n\n\n<ul class=\"wp-block-list\"><li><a href=\"https:\/\/prodskill.com\/word-extractor-overview\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(1): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uac1c\uc694<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-config-runtime-environment\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(2): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc2e4\ud589\ud658\uacbd \uad6c\uc131<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-run-and-check-result\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(3): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc2e4\ud589 \ubc29\ubc95\uacfc \uacb0\uacfc \ud655\uc778 \ubc29\ubc95<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-source-code-1\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1)<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-source-code-2\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(5): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(2)<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-additional-information\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(6): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \ubd80\uac00 \uc124\uba85<\/a><\/li><li><a href=\"https:\/\/prodskill.com\/word-extractor-toc\/\">\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc124\uba85\uae00 \uc804\uccb4 \ubaa9\ucc28 , \ub2e4\uc6b4\ub85c\ub4dc<\/a><\/li><\/ul>\n","protected":false},"excerpt":{"rendered":"<p>Untersuchen Sie den Quellcode eines in Python implementierten Wortextraktionstools. Dies ist eine Fortsetzung des vorherigen Artikels. Word Extraction Tool (3): So f\u00fchren Sie das Word Extraction Tool aus und \u00fcberpr\u00fcfen die Ergebnisse 4. Quellcode des Word Extraction Tool 4.1. \u00dcberblick 4.1.1. Beachten Sie den Quellcode Der Quellcode dieses Wortextraktionstools ist ...<\/p>","protected":false},"author":1,"featured_media":11548,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[21],"tags":[15,83,84,85,86,87],"class_list":["post-11547","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-word-extractor","tag-python","tag-83","tag-word-extractor","tag-nlp","tag-86","tag-87"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.3 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1) - \uc0dd\uc0b0\uc131 Skill<\/title>\n<meta name=\"description\" content=\"Python\uc73c\ub85c \uad6c\ud604\ud55c \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc5d0 \ub300\ud574 \uc0b4\ud3b4\ubcf8\ub2e4. \ud568\uc218\uac04 \ud638\ucd9c \uad00\uacc4, main \ud568\uc218\uc758 \uc8fc\uc694 \uae30\ub2a5\uc778 argument parsing, \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c, \ud14d\uc2a4\ud2b8 \ucd94\ucd9c, \ub2e8\uc5b4 \ucd94\ucd9c \uc18c\uc2a4\ucf54\ub4dc\uc5d0 \ub300\ud574 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/\" \/>\n<meta property=\"og:locale\" content=\"de_DE\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1) - \uc0dd\uc0b0\uc131 Skill\" \/>\n<meta property=\"og:description\" content=\"Python\uc73c\ub85c \uad6c\ud604\ud55c \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc5d0 \ub300\ud574 \uc0b4\ud3b4\ubcf8\ub2e4. \ud568\uc218\uac04 \ud638\ucd9c \uad00\uacc4, main \ud568\uc218\uc758 \uc8fc\uc694 \uae30\ub2a5\uc778 argument parsing, \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c, \ud14d\uc2a4\ud2b8 \ucd94\ucd9c, \ub2e8\uc5b4 \ucd94\ucd9c \uc18c\uc2a4\ucf54\ub4dc\uc5d0 \ub300\ud574 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/\" \/>\n<meta property=\"og:site_name\" content=\"\uc0dd\uc0b0\uc131 Skill\" \/>\n<meta property=\"article:published_time\" content=\"2022-09-25T08:34:42+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2022-10-10T11:55:55+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png\" \/>\n\t<meta property=\"og:image:width\" content=\"386\" \/>\n\t<meta property=\"og:image:height\" content=\"141\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"Zerom\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Verfasst von\" \/>\n\t<meta name=\"twitter:data1\" content=\"Zerom\" \/>\n\t<meta name=\"twitter:label2\" content=\"Gesch\u00e4tzte Lesezeit\" \/>\n\t<meta name=\"twitter:data2\" content=\"7\u00a0Minuten\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#article\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/\"},\"author\":{\"name\":\"Zerom\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#\\\/schema\\\/person\\\/bbad0870c78008c82edbe0960fe768bd\"},\"headline\":\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1)\",\"datePublished\":\"2022-09-25T08:34:42+00:00\",\"dateModified\":\"2022-10-10T11:55:55+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/\"},\"wordCount\":229,\"commentCount\":4,\"publisher\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#\\\/schema\\\/person\\\/bbad0870c78008c82edbe0960fe768bd\"},\"image\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/image-150.png\",\"keywords\":[\"python\",\"\ub2e8\uc5b4 \ucd94\ucd9c\",\"word-extractor\",\"nlp\",\"\ud615\ud0dc\uc18c \ubd84\uc11d\uae30\",\"\uc790\uc5f0\uc5b4 \ucc98\ub9ac\"],\"articleSection\":[\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c\"],\"inLanguage\":\"de\",\"potentialAction\":[{\"@type\":\"CommentAction\",\"name\":\"Comment\",\"target\":[\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#respond\"]}]},{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/\",\"url\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/\",\"name\":\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1) - \uc0dd\uc0b0\uc131 Skill\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/image-150.png\",\"datePublished\":\"2022-09-25T08:34:42+00:00\",\"dateModified\":\"2022-10-10T11:55:55+00:00\",\"description\":\"Python\uc73c\ub85c \uad6c\ud604\ud55c \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc5d0 \ub300\ud574 \uc0b4\ud3b4\ubcf8\ub2e4. \ud568\uc218\uac04 \ud638\ucd9c \uad00\uacc4, main \ud568\uc218\uc758 \uc8fc\uc694 \uae30\ub2a5\uc778 argument parsing, \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c, \ud14d\uc2a4\ud2b8 \ucd94\ucd9c, \ub2e8\uc5b4 \ucd94\ucd9c \uc18c\uc2a4\ucf54\ub4dc\uc5d0 \ub300\ud574 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#breadcrumb\"},\"inLanguage\":\"de\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"de\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#primaryimage\",\"url\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/image-150.png\",\"contentUrl\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/image-150.png\",\"width\":386,\"height\":141},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/word-extractor-source-code-1\\\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1)\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#website\",\"url\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/\",\"name\":\"\uc0dd\uc0b0\uc131 Skill\",\"description\":\"Meta Thinking, Meta Working\",\"publisher\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#\\\/schema\\\/person\\\/bbad0870c78008c82edbe0960fe768bd\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"de\"},{\"@type\":[\"Person\",\"Organization\"],\"@id\":\"https:\\\/\\\/prodskill.com\\\/ko\\\/#\\\/schema\\\/person\\\/bbad0870c78008c82edbe0960fe768bd\",\"name\":\"Zerom\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"de\",\"@id\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/productivity_clockgear.png\",\"url\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/productivity_clockgear.png\",\"contentUrl\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/productivity_clockgear.png\",\"width\":512,\"height\":512,\"caption\":\"Zerom\"},\"logo\":{\"@id\":\"https:\\\/\\\/prodskill.com\\\/wp-content\\\/uploads\\\/2022\\\/09\\\/productivity_clockgear.png\"},\"url\":\"https:\\\/\\\/prodskill.com\\\/de\\\/author\\\/proda\\\/\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Wortextraktionstool (4): Beschreibung des Quellcodes des Wortextraktionstools (1) - Produktivit\u00e4tskompetenz","description":"Werfen wir einen Blick auf den Quellcode eines in Python implementierten Tools zur Wortextraktion. Sie erfahren mehr \u00fcber die Aufrufbeziehungen zwischen Funktionen, die Hauptfunktion der Hauptfunktion (Argumentanalyse), die Dateilistenextraktion, die Textextraktion und den Quellcode zur Wortextraktion.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/","og_locale":"de_DE","og_type":"article","og_title":"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1) - \uc0dd\uc0b0\uc131 Skill","og_description":"Python\uc73c\ub85c \uad6c\ud604\ud55c \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc5d0 \ub300\ud574 \uc0b4\ud3b4\ubcf8\ub2e4. \ud568\uc218\uac04 \ud638\ucd9c \uad00\uacc4, main \ud568\uc218\uc758 \uc8fc\uc694 \uae30\ub2a5\uc778 argument parsing, \ud30c\uc77c \ubaa9\ub85d \ucd94\ucd9c, \ud14d\uc2a4\ud2b8 \ucd94\ucd9c, \ub2e8\uc5b4 \ucd94\ucd9c \uc18c\uc2a4\ucf54\ub4dc\uc5d0 \ub300\ud574 \ud655\uc778\ud560 \uc218 \uc788\ub2e4.","og_url":"https:\/\/prodskill.com\/de\/word-extractor-source-code-1\/","og_site_name":"\uc0dd\uc0b0\uc131 Skill","article_published_time":"2022-09-25T08:34:42+00:00","article_modified_time":"2022-10-10T11:55:55+00:00","og_image":[{"width":386,"height":141,"url":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png","type":"image\/png"}],"author":"Zerom","twitter_card":"summary_large_image","twitter_misc":{"Verfasst von":"Zerom","Gesch\u00e4tzte Lesezeit":"7\u00a0Minuten"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#article","isPartOf":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/"},"author":{"name":"Zerom","@id":"https:\/\/prodskill.com\/ko\/#\/schema\/person\/bbad0870c78008c82edbe0960fe768bd"},"headline":"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1)","datePublished":"2022-09-25T08:34:42+00:00","dateModified":"2022-10-10T11:55:55+00:00","mainEntityOfPage":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/"},"wordCount":229,"commentCount":4,"publisher":{"@id":"https:\/\/prodskill.com\/ko\/#\/schema\/person\/bbad0870c78008c82edbe0960fe768bd"},"image":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#primaryimage"},"thumbnailUrl":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png","keywords":["python","\ub2e8\uc5b4 \ucd94\ucd9c","word-extractor","nlp","\ud615\ud0dc\uc18c \ubd84\uc11d\uae30","\uc790\uc5f0\uc5b4 \ucc98\ub9ac"],"articleSection":["\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c"],"inLanguage":"de","potentialAction":[{"@type":"CommentAction","name":"Comment","target":["https:\/\/prodskill.com\/word-extractor-source-code-1\/#respond"]}]},{"@type":"WebPage","@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/","url":"https:\/\/prodskill.com\/word-extractor-source-code-1\/","name":"Wortextraktionstool (4): Beschreibung des Quellcodes des Wortextraktionstools (1) - Produktivit\u00e4tskompetenz","isPartOf":{"@id":"https:\/\/prodskill.com\/ko\/#website"},"primaryImageOfPage":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#primaryimage"},"image":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#primaryimage"},"thumbnailUrl":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png","datePublished":"2022-09-25T08:34:42+00:00","dateModified":"2022-10-10T11:55:55+00:00","description":"Werfen wir einen Blick auf den Quellcode eines in Python implementierten Tools zur Wortextraktion. Sie erfahren mehr \u00fcber die Aufrufbeziehungen zwischen Funktionen, die Hauptfunktion der Hauptfunktion (Argumentanalyse), die Dateilistenextraktion, die Textextraktion und den Quellcode zur Wortextraktion.","breadcrumb":{"@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#breadcrumb"},"inLanguage":"de","potentialAction":[{"@type":"ReadAction","target":["https:\/\/prodskill.com\/word-extractor-source-code-1\/"]}]},{"@type":"ImageObject","inLanguage":"de","@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#primaryimage","url":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png","contentUrl":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/image-150.png","width":386,"height":141},{"@type":"BreadcrumbList","@id":"https:\/\/prodskill.com\/word-extractor-source-code-1\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/prodskill.com\/ko\/"},{"@type":"ListItem","position":2,"name":"\ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c(4): \ub2e8\uc5b4 \ucd94\ucd9c \ub3c4\uad6c \uc18c\uc2a4\ucf54\ub4dc \uc124\uba85(1)"}]},{"@type":"WebSite","@id":"https:\/\/prodskill.com\/ko\/#website","url":"https:\/\/prodskill.com\/ko\/","name":"Produktivit\u00e4tsf\u00e4higkeiten","description":"Meta-Denken, Meta-Arbeiten","publisher":{"@id":"https:\/\/prodskill.com\/ko\/#\/schema\/person\/bbad0870c78008c82edbe0960fe768bd"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/prodskill.com\/ko\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"de"},{"@type":["Person","Organization"],"@id":"https:\/\/prodskill.com\/ko\/#\/schema\/person\/bbad0870c78008c82edbe0960fe768bd","name":"Nullm","image":{"@type":"ImageObject","inLanguage":"de","@id":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/productivity_clockgear.png","url":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/productivity_clockgear.png","contentUrl":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/productivity_clockgear.png","width":512,"height":512,"caption":"Zerom"},"logo":{"@id":"https:\/\/prodskill.com\/wp-content\/uploads\/2022\/09\/productivity_clockgear.png"},"url":"https:\/\/prodskill.com\/de\/author\/proda\/"}]}},"_links":{"self":[{"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/posts\/11547","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/comments?post=11547"}],"version-history":[{"count":0,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/posts\/11547\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/media\/11548"}],"wp:attachment":[{"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/media?parent=11547"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/categories?post=11547"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/prodskill.com\/de\/wp-json\/wp\/v2\/tags?post=11547"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}