{"id":368,"date":"2024-10-03T15:20:54","date_gmt":"2024-10-03T07:20:54","guid":{"rendered":"https:\/\/www.xuzhe.tj.cn\/?p=368"},"modified":"2025-05-03T00:48:04","modified_gmt":"2025-05-02T16:48:04","slug":"scikit-learn","status":"publish","type":"post","link":"https:\/\/www.xuzhe.tj.cn\/index.php\/2024\/10\/03\/scikit-learn\/","title":{"rendered":"scikit-learn\u4e2d\u7684\u6811\u7b97\u6cd5 \u4e0d\u80fd\u76f4\u63a5\u4f7f\u7528\u5206\u7c7b\u53d8\u91cf"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">scikit-learn\u4e2d\u7684\u7b97\u6cd5\u90fd\u4e0d\u76f4\u63a5\u652f\u6301\u5206\u7c7b\u53d8\u91cf<\/h2>\n\n\n<p>\u4e0d\u53ea\u662fscikit-learn\u4e2d\u7684\u6811\u7b97\u6cd5\u4e0d\u80fd\u76f4\u63a5\u4f7f\u7528\u5206\u7c7b\u53d8\u91cf\uff0cscikit-learn\u4e2d\u7684\u7b97\u6cd5\u90fd\u4e0d\u76f4\u63a5\u652f\u6301\u5206\u7c7b\u53d8\u91cf\u3002\u5728\u4f7f\u7528\u8fd9\u4e9b\u7b97\u6cd5\u4e4b\u524d\uff0c\u9700\u8981\u5c06\u5206\u7c7b\u53d8\u91cf\u8f6c\u6362\u4e3a\u72ec\u70ed\u7f16\u7801\uff08one-hot\uff09\u6216\u6574\u6570\u7c7b\u578b\u3002<\/p>\n\n<!--more-->\n\n<p>\u4e0b\u9762\u8fd9\u5f20\u622a\u56fe\u6765\u81ea scikit-learn \u7684\u793e\u533a\uff0c\u89e3\u91ca\u4e86\u4e3a\u4ec0\u4e48 scikit-learn \u4e0d\u80fd\u76f4\u63a5\u4f7f\u7528\u5206\u7c7b\u53d8\u91cf\uff0c\u800c\u9700\u8981\u5728\u5efa\u6a21\u524d\u5c06\u5176\u9884\u5904\u7406\u4e3a\u6570\u503c\u578b\u7279\u5f81\u3002\u622a\u56fe\u6307\u51fa\uff0c\u8fd9\u662f\u56e0\u4e3a scikit-learn \u4f7f\u7528 NumPy \u6570\u7ec4\u6216 SciPy \u7684\u7a00\u758f\u77e9\u9635\u6765\u8868\u793a\u6570\u636e\u96c6\uff0c\u8fd9\u4e24\u79cd\u6570\u636e\u7ed3\u6784\u90fd\u65e0\u6cd5\u76f4\u63a5\u8868\u793a\u7c7b\u522b\u53d8\u91cf\u3002<\/p>\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img loading=\"lazy\" decoding=\"async\" width=\"1270\" height=\"404\" src=\"https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage.png\" class=\"wp-image-367\" srcset=\"https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage.png 1270w, https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage-300x95.png 300w, https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage-1024x326.png 1024w, https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage-768x244.png 768w, https:\/\/www.xuzhe.tj.cn\/wp-content\/uploads\/2024\/10\/DraggedImage-624x199.png 624w\" sizes=\"auto, (max-width: 1270px) 100vw, 1270px\" \/><\/figure><\/div>\n\n\n<p><a href=\"https:\/\/scikit-learn.org.cn\/lists\/94.html?glarity%5C_translate=1#%E4%B8%8E%E5%85%B6%E4%BB%96%E5%B7%A5%E5%85%B7%E7%9B%B8%E6%AF%94%EF%BC%8C%E4%B8%BA%E4%BB%80%E4%B9%88%E5%88%86%E7%B1%BB%E5%8F%98%E9%87%8F%E9%9C%80%E8%A6%81%E5%9C%A8scikit-learn%E4%B8%AD%E8%BF%9B%E8%A1%8C%E9%A2%84%E5%A4%84%E7%90%86%EF%BC%9F\" title=\"\u56fe\u7247\u6765\u6e90\"> \u56fe\u7247\u6765\u6e90 <\/a><\/p>\n\n\n<p>\u4e3e\u4f8b\u6765\u8bf4\uff0c\u5728\u5929\u6c14\u8fd9\u4e2a\u7c7b\u522b\u53d8\u91cf\u7279\u5f81\u4e2d\uff0c\u662f\u4e0d\u80fd\u7528&#8221;Rain&#8221;\u3001&#8221;Sunny&#8221;\u3001&#8221;Cloudy&#8221;\u8fd9\u6837\u7684\u5b57\u7b26\u4e32\u5f62\u5f0f\u7684\uff0c\u800c\u662f\u5e94\u8be5\u4f7f\u7528[1, 0, 0]\u3001[0, 1, 0]\u3001[0, 1, 0] \u8fd9\u6837\u7684\u72ec\u70ed\u7f16\u7801\u6765\u5206\u522b\u8868\u793a &#8220;Rain&#8221;\u3001 &#8220;Sunny&#8221;\u3001 &#8220;Cloudy&#8221;\u3002 <\/p>\n\n\n<p>\u9664one-hot\u7f16\u7801\u4e4b\u5916\uff0c\u4e5f\u53ef\u4ee5\u8f6c\u6210\u6574\u6570\u7684\u7f16\u7801\uff0c\u5982\u4f7f\u75280\u30011\u30012\u8fd9\u6837\u7684\u6574\u6570\u8868\u793a&#8221;Rain&#8221;\u3001&#8221;Sunny&#8221;\u3001&#8221;Cloudy&#8221;\u3002\u4f46\u8fd9\u79cd\u8868\u793a\u65b9\u5f0f\u4f1a\u9519\u8bef\u5730\u8d4b\u4e88\u5206\u7c7b\u503c\u5927\u5c0f\u5173\u7cfb\uff0c\u800c\u8fd9\u5e76\u975e\u5206\u7c7b\u53d8\u91cf\u672c\u8eab\u6240\u5177\u6709\u7684\u7279\u6027\uff0c\u6240\u4ee5<strong>\u5e94\u907f\u514d\u5c06\u7c7b\u522b\u53d8\u91cf\u8f6c\u6362\u6210\u6574\u6570\u5f62\u5f0f<\/strong>\uff0c\u800c\u5e94\u4f7f\u7528\u72ec\u70ed\u7f16\u7801\u5f62\u5f0f\u3002<\/p>\n\n\n<p>\u53ef\u4ee5\u770b\u51fa\uff0c<strong>scikit-learn \u53ea\u80fd\u5904\u7406\u6570\u503c\u578b\u7279\u5f81\uff0c\u5982\u6574\u6570\u6216\u6d6e\u70b9\u6570<\/strong>\u3002\u5b83\u65e0\u6cd5\u76f4\u63a5\u5904\u7406\u6587\u672c\u5f62\u5f0f\u7684\u5206\u7c7b\u53d8\u91cf\u3002\u56e0\u6b64\uff0c\u9700\u8981\u5c06\u5206\u7c7b\u53d8\u91cf\u8f6c\u6362\u4e3a\u72ec\u70ed\u7f16\u7801\u5f62\u5f0f\uff0c\u5373\u5305\u542b {0, 1} \u4e24\u4e2a\u6570\u503c\u3002\u8fd9\u91cc\u7684 0 \u548c 1 \u867d\u7136\u662f\u6570\u503c\u578b\u53d8\u91cf\uff0c\u4f46\u5b83\u4eec\u5b9e\u9645\u4e0a\u8868\u793a\u67d0\u4e2a\u7279\u5f81\u7684\u5b58\u5728\u4e0e\u5426\u3002\u672c\u8d28\u4e0a\uff0c\u8fd9\u662f\u4f7f\u7528\u6574\u6570 0 \u548c 1 \u6765\u8868\u793a\u903b\u8f91\u5047\u548c\u903b\u8f91\u771f\u3002<\/p>\n\n\n<h2 class=\"wp-block-heading\">\u5206\u7c7b\u53d8\u91cf\u7684\u8f6c\u6362\u4f8b\u7a0b<\/h2>\n\n\n<p>\u4e0b\u9762\u770b\u4e00\u4e2a\u4f8b\u5b50\uff0c\u8fd9\u662f\u4e00\u4e2a\u4f7f\u7528OneHotEncoder\u5c06\u5206\u7c7b\u53d8\u91cf\u8f6c\u6362\u4e3a\u72ec\u70ed\u7f16\u7801\u5f62\u5f0f\u7684\u793a\u4f8b\u4ee3\u7801\uff1a<\/p>\n\n\n<pre class=\"wp-block-code\"><code>import pandas as pd\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, classification_report\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\n# \u521b\u5efa\u4e00\u4e2a\u5305\u542b\u79bb\u6563\u7279\u5f81\u7684\u793a\u4f8b\u6570\u636e\u96c6\ndata = {\n    '\u989c\u8272': ['\u7ea2', '\u84dd', '\u7eff', '\u7ea2', '\u84dd', '\u7eff', '\u7ea2', '\u84dd'],\n    '\u5927\u5c0f': ['\u5927', '\u4e2d', '\u5c0f', '\u5927', '\u4e2d', '\u5c0f', '\u4e2d', '\u5c0f'],\n    '\u5f62\u72b6': ['\u5706', '\u65b9', '\u4e09\u89d2', '\u5706', '\u65b9', '\u4e09\u89d2', '\u65b9', '\u5706'],\n    '\u7c7b\u522b': ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'B']\n}\n\n# \u521b\u5efaDataFrame\ndf = pd.DataFrame(data)\n\n# \u5206\u79bb\u7279\u5f81\u548c\u76ee\u6807\u53d8\u91cf\nX = df.drop('\u7c7b\u522b', axis=1)\ny = df['\u7c7b\u522b']\n\n# \u521b\u5efaColumnTransformer\u5bf9\u8c61\u6765\u5e94\u7528One-Hot\u7f16\u7801\nct = ColumnTransformer([('encoder', OneHotEncoder(sparse=False), ['\u989c\u8272', '\u5927\u5c0f', '\u5f62\u72b6'])], remainder='passthrough')\n\n# \u5bf9\u7279\u5f81\u8fdb\u884cOne-Hot\u7f16\u7801\nX_encoded = ct.fit_transform(X)\n\n# \u83b7\u53d6\u7f16\u7801\u540e\u7684\u7279\u5f81\u540d\u79f0\nfeature_names = ct.named_transformers_['encoder'].get_feature_names_out(['\u989c\u8272', '\u5927\u5c0f', '\u5f62\u72b6'])\n\n# \u521b\u5efa\u5305\u542b\u7f16\u7801\u540e\u7279\u5f81\u7684DataFrame\nX_encoded_df = pd.DataFrame(X_encoded, columns=feature_names)\n\n# \u6253\u5370 X_encoded_df \u7684\u5185\u5bb9\nprint(\"X_encoded_df \u7684\u5185\u5bb9\uff1a\")\nprint(X_encoded_df)\n\n# \u53ef\u4ee5\u6dfb\u52a0\u4ee5\u4e0b\u884c\u6765\u9650\u5236\u8f93\u51fa\u7684\u884c\u6570\uff0c\u5982\u679c\u6570\u636e\u96c6\u5f88\u5927\u7684\u8bdd\n# print(X_encoded_df.head(10))  # \u53ea\u6253\u5370\u524d10\u884c\n\n# \u6253\u5370 X_encoded_df \u7684\u57fa\u672c\u4fe1\u606f\nprint(\"\\nX_encoded_df \u7684\u57fa\u672c\u4fe1\u606f\uff1a\")\nprint(X_encoded_df.info())\n\n# \u6253\u5370 X_encoded_df \u7684\u7edf\u8ba1\u6458\u8981\nprint(\"\\nX_encoded_df \u7684\u7edf\u8ba1\u6458\u8981\uff1a\")\nprint(X_encoded_df.describe())\n\n# \u5206\u5272\u6570\u636e\u96c6\u4e3a\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\nX_train, X_test, y_train, y_test = train_test_split(X_encoded_df, y, test_size=0.2, random_state=42)\n\n# \u521b\u5efa\u5e76\u8bad\u7ec3CART\u51b3\u7b56\u6811\ncart = DecisionTreeClassifier(random_state=42)\ncart.fit(X_train, y_train)\n\n# \u5728\u6d4b\u8bd5\u96c6\u4e0a\u8fdb\u884c\u9884\u6d4b\ny_pred = cart.predict(X_test)\n\n# \u8ba1\u7b97\u51c6\u786e\u7387\naccuracy = accuracy_score(y_test, y_pred)\nprint(f\"\u6a21\u578b\u51c6\u786e\u7387: {accuracy:.2f}\")\n\n# \u6253\u5370\u5206\u7c7b\u62a5\u544a\nprint(\"\\n\u5206\u7c7b\u62a5\u544a:\")\nprint(classification_report(y_test, y_pred))\n\n# \u6253\u5370\u7279\u5f81\u91cd\u8981\u6027\nprint(\"\\n\u7279\u5f81\u91cd\u8981\u6027:\")\nfor feature, importance in zip(X_encoded_df.columns, cart.feature_importances_):\n    print(f\"{feature}: {importance:.4f}\")\n\n# \u53ef\u89c6\u5316\u51b3\u7b56\u6811\nfrom sklearn.tree import plot_tree\nimport matplotlib.pyplot as plt\n\nplt.figure(figsize=(20,10))\nplot_tree(cart, \n          feature_names=list(X_encoded_df.columns),\n          class_names=list(cart.classes_),\n          filled=True, \n          rounded=True)\nplt.show()\n<\/code><\/pre>\n\n\n<p>\u8be5\u4ee3\u7801\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528\u51b3\u7b56\u6811\u6a21\u578b\uff08CART\uff09\u5bf9\u79bb\u6563\u7c7b\u522b\u53d8\u91cf\u8fdb\u884c\u5904\u7406\uff0c\u5e76\u4e14\u901a\u8fc7 <code>One-Hot Encoding<\/code> \u7684\u65b9\u5f0f\u5c06\u7c7b\u522b\u53d8\u91cf\u8f6c\u6362\u4e3a\u6570\u503c\u578b\u7279\u5f81\u3002\u4e0b\u9762\u6211\u5c06\u9010\u6b65\u89e3\u91ca\u4ee3\u7801\u7684\u5404\u4e2a\u90e8\u5206\uff0c\u5e76\u8be6\u7ec6\u8bf4\u660e\u5982\u4f55\u5c06\u7c7b\u522b\u53d8\u91cf\u8f6c\u53d8\u6210 <code>One-Hot<\/code> \u53d8\u91cf\u3002<\/p>\n\n\n<h3 class=\"wp-block-heading\">1. \u521b\u5efa\u793a\u4f8b\u6570\u636e\u96c6<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>data = {\n    '\u989c\u8272': ['\u7ea2', '\u84dd', '\u7eff', '\u7ea2', '\u84dd', '\u7eff', '\u7ea2', '\u84dd'],\n    '\u5927\u5c0f': ['\u5927', '\u4e2d', '\u5c0f', '\u5927', '\u4e2d', '\u5c0f', '\u4e2d', '\u5c0f'],\n    '\u5f62\u72b6': ['\u5706', '\u65b9', '\u4e09\u89d2', '\u5706', '\u65b9', '\u4e09\u89d2', '\u65b9', '\u5706'],\n    '\u7c7b\u522b': ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'B']\n}\ndf = pd.DataFrame(data)\n\n<\/code><\/pre>\n\n\n<p>\u8fd9\u6bb5\u4ee3\u7801\u521b\u5efa\u4e86\u4e00\u4e2a\u5305\u542b 8 \u6761\u8bb0\u5f55\u7684\u5c0f\u578b\u6570\u636e\u96c6\u3002\u6570\u636e\u96c6\u4e2d\u7684\u7279\u5f81\u53d8\u91cf\u662f <strong>\u989c\u8272<\/strong>\u3001<strong>\u5927\u5c0f<\/strong> \u548c <strong>\u5f62\u72b6<\/strong>\uff0c\u76ee\u6807\u53d8\u91cf\u662f <strong>\u7c7b\u522b<\/strong>\u3002\u6bcf\u4e2a\u7279\u5f81\u53d8\u91cf\u90fd\u662f\u79bb\u6563\u7c7b\u522b\u53d8\u91cf\uff0c\u5e76\u4e14\u76ee\u6807\u53d8\u91cf\u201c\u7c7b\u522b\u201d\u5305\u542b <code>A<\/code> \u548c <code>B<\/code> \u4e24\u4e2a\u7c7b\u522b\u3002<\/p>\n\n\n<h3 class=\"wp-block-heading\">2. \u7279\u5f81\u4e0e\u76ee\u6807\u53d8\u91cf\u5206\u79bb<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>X = df.drop('\u7c7b\u522b', axis=1)  # \u7279\u5f81\ny = df['\u7c7b\u522b']               # \u76ee\u6807\u53d8\u91cf\n<\/code><\/pre>\n\n\n<p>\u5c06\u7279\u5f81\u548c\u76ee\u6807\u53d8\u91cf\u5206\u79bb\uff0c<code>X<\/code> \u4e2d\u5305\u542b\u4e86\u6240\u6709\u7279\u5f81\uff08\u989c\u8272\u3001\u5927\u5c0f\u3001\u5f62\u72b6\uff09\uff0c\u800c <code>y<\/code> \u5305\u542b\u4e86\u76ee\u6807\u53d8\u91cf\u201c\u7c7b\u522b\u201d\u3002<\/p>\n\n\n<h3 class=\"wp-block-heading\">3. \u521b\u5efa <code>ColumnTransformer<\/code> \u5bf9\u8c61\u5e76\u8fdb\u884c <code>One-Hot Encoding<\/code><\/h3>\n\n\n<pre class=\"wp-block-code\"><code>ct = ColumnTransformer([('encoder', OneHotEncoder(sparse=False), ['\u989c\u8272', '\u5927\u5c0f', '\u5f62\u72b6'])], remainder='passthrough')\n<\/code><\/pre>\n\n\n<p>\u8fd9\u6bb5\u4ee3\u7801\u521b\u5efa\u4e86\u4e00\u4e2a <code>ColumnTransformer<\/code> \u5bf9\u8c61\uff0c\u8be5\u5bf9\u8c61\u7528\u4e8e\u5c06 <code>One-Hot Encoding<\/code> \u5e94\u7528\u4e8e\u6307\u5b9a\u7684\u5217\uff08<code>\u989c\u8272<\/code>\u3001<code>\u5927\u5c0f<\/code>\u3001<code>\u5f62\u72b6<\/code>\uff09\u3002<code>ColumnTransformer<\/code> \u662f <code>sklearn<\/code> \u4e2d\u7528\u4e8e\u5c06\u4e0d\u540c\u7684\u9884\u5904\u7406\u64cd\u4f5c\u5e94\u7528\u4e8e\u4e0d\u540c\u5217\u7684\u5de5\u5177\u3002\u5b83\u7684\u4e3b\u8981\u53c2\u6570\u5982\u4e0b\uff1a<\/p>\n\n\n<ul class=\"wp-block-list\">\n    <li><strong>[(&#8216;encoder&#8217;, OneHotEncoder(sparse=False), [&#8216;\u989c\u8272&#8217;, &#8216;\u5927\u5c0f&#8217;, &#8216;\u5f62\u72b6&#8217;])]<\/strong>\uff1a<br><ul>\n            <li><code>encoder<\/code> \u662f\u4e00\u4e2a\u522b\u540d\uff0c\u7528\u4e8e\u6807\u8bc6\u8be5 <code>ColumnTransformer<\/code> \u4e2d\u7684 <code>OneHotEncoder<\/code> \u7f16\u7801\u5668\u3002<\/li>\n            <li><code>OneHotEncoder(sparse=False)<\/code> \u6307\u5b9a\u4f7f\u7528 <code>One-Hot<\/code> \u7f16\u7801\u5668\u6765\u5c06\u79bb\u6563\u7c7b\u522b\u7279\u5f81\u8f6c\u6362\u4e3a <code>One-Hot<\/code> \u683c\u5f0f\uff0c\u5e76\u4e14 <code>sparse=False<\/code> \u8868\u793a\u8fd4\u56de\u7684\u7f16\u7801\u7ed3\u679c\u662f\u4e00\u4e2a\u5bc6\u96c6\u77e9\u9635\uff08<code>Dense Matrix<\/code>\uff09\uff0c\u800c\u4e0d\u662f\u7a00\u758f\u77e9\u9635\uff08<code>Sparse Matrix<\/code>\uff09\u3002<\/li>\n            <li><code>['\u989c\u8272', '\u5927\u5c0f', '\u5f62\u72b6']<\/code> \u6307\u5b9a\u9700\u8981\u8fdb\u884c <code>One-Hot Encoding<\/code> \u7684\u5217\u3002<\/li>\n        <\/ul><\/li>\n    <li><strong><code>remainder='passthrough'<\/code><\/strong>\uff1a<br><ul>\n            <li>\u8868\u793a\u5176\u4ed6\u672a\u88ab\u5217\u51fa\u7528\u4e8e <code>One-Hot Encoding<\/code> \u7684\u5217\uff08\u5728\u672c\u4f8b\u4e2d\u4e0d\u5b58\u5728\uff09\u5c06\u4e0d\u4f5c\u4efb\u4f55\u5904\u7406\uff0c\u76f4\u63a5\u4fdd\u7559\u5728\u8f93\u51fa\u4e2d\u3002<br><\/li>\n        <\/ul><\/li>\n<\/ul>\n\n\n<h3 class=\"wp-block-heading\">4. \u5c06\u7279\u5f81\u8fdb\u884c <code>One-Hot Encoding<\/code><\/h3>\n\n\n<pre class=\"wp-block-code\"><code>X_encoded = ct.fit_transform(X)\n<\/code><\/pre>\n\n\n<p>\u8fd9\u6bb5\u4ee3\u7801\u5c06 <code>One-Hot Encoding<\/code> \u5e94\u7528\u4e8e\u7279\u5f81 <code>X<\/code>\u3002<code>fit_transform<\/code> \u65b9\u6cd5\u4f1a\uff1a<\/p>\n\n\n<ol class=\"wp-block-list\">\n    <li>\u8ba1\u7b97\u6bcf\u4e2a\u7279\u5f81\u4e2d\u6240\u6709\u7c7b\u522b\u503c\u7684 <code>One-Hot<\/code> \u7f16\u7801\u6620\u5c04\u5173\u7cfb\u3002<\/li>\n    <li>\u5c06\u6240\u6709\u7c7b\u522b\u53d8\u91cf\u8f6c\u6362\u4e3a <code>One-Hot<\/code> \u5f62\u5f0f\u7684\u6570\u503c\u77e9\u9635\u3002<br><\/li>\n<\/ol>\n\n\n<h3 class=\"wp-block-heading\">5. \u83b7\u53d6 <code>One-Hot Encoding<\/code> \u540e\u7684\u7279\u5f81\u540d\u79f0<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>feature_names = ct.named_transformers_['encoder'].get_feature_names_out(['\u989c\u8272', '\u5927\u5c0f', '\u5f62\u72b6'])\n\n<\/code><\/pre>\n\n\n<p>\u4f7f\u7528 <code>ct.named_transformers_['encoder'].get_feature_names_out()<\/code> \u6765\u83b7\u53d6\u7f16\u7801\u540e\u7684\u7279\u5f81\u540d\u79f0\u3002\u6bcf\u4e2a\u7279\u5f81\u4f1a\u6839\u636e\u5176\u539f\u59cb\u7279\u5f81\u540d\u79f0\u548c\u7c7b\u522b\u503c\u751f\u6210\u76f8\u5e94\u7684 <code>One-Hot<\/code> \u7279\u5f81\u540d\u3002\u4f8b\u5982\uff0c\u989c\u8272\u4e2d\u7684\u7ea2\u3001\u84dd\u3001\u7eff\u4f1a\u751f\u6210\u7279\u5f81\u540d\uff1a<code>\u989c\u8272_\u7ea2<\/code>, <code>\u989c\u8272_\u84dd<\/code>, <code>\u989c\u8272_\u7eff<\/code>\u3002<\/p>\n\n\n<h3 class=\"wp-block-heading\">6. \u521b\u5efa\u5305\u542b <code>One-Hot<\/code> \u7f16\u7801\u540e\u7684 DataFrame<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>X_encoded_df = pd.DataFrame(X_encoded, columns=feature_names)\n\n<\/code><\/pre>\n\n\n<p>\u5c06 <code>One-Hot<\/code> \u7f16\u7801\u540e\u7684\u77e9\u9635\u8f6c\u6362\u4e3a <code>DataFrame<\/code> \u683c\u5f0f\uff0c\u5e76\u8d4b\u4e88\u5bf9\u5e94\u7684\u5217\u540d\u3002\u6253\u5370\u8f93\u51fa\u65f6\u53ef\u4ee5\u770b\u5230\u5982\u4e0b\u5f62\u5f0f\u7684\u8868\u683c\uff1a<\/p>\n\n\n<figure class=\"wp-block-table\">\n<table>\n    <thead>\n        <tr>\n            <th>\n                \u989c\u8272_\u7ea2\n            <\/th>\n            <th>\n                \u989c\u8272_\u84dd\n            <\/th>\n            <th>\n                \u989c\u8272_\u7eff\n            <\/th>\n            <th>\n                \u5927\u5c0f_\u5927\n            <\/th>\n            <th>\n                \u5927\u5c0f_\u4e2d\n            <\/th>\n            <th>\n                \u5927\u5c0f_\u5c0f\n            <\/th>\n            <th>\n                \u5f62\u72b6_\u5706\n            <\/th>\n            <th>\n                \u5f62\u72b6_\u65b9\n            <\/th>\n            <th>\n                \u5f62\u72b6_\u4e09\u89d2\n            <\/th>\n        <\/tr>\n    <\/thead>\n    <tbody>\n        <tr>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n        <\/tr>\n        <tr>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n        <\/tr>\n        <tr>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                0.0\n            <\/td>\n            <td>\n                1.0\n            <\/td>\n        <\/tr>\n        <tr>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n            <td>\n                &#8230;\n            <\/td>\n        <\/tr>\n    <\/tbody>\n<\/table>\n<\/figure>\n\n\n<h3 class=\"wp-block-heading\">7. \u6570\u636e\u96c6\u5212\u5206\u3001\u6a21\u578b\u8bad\u7ec3\u4e0e\u6d4b\u8bd5<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>X_train, X_test, y_train, y_test = train_test_split(X_encoded_df, y, test_size=0.2, random_state=42)\ncart = DecisionTreeClassifier(random_state=42)\ncart.fit(X_train, y_train)\n\n<\/code><\/pre>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u4f7f\u7528 <code>train_test_split<\/code> \u5c06\u6570\u636e\u96c6\u5212\u5206\u4e3a\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\uff0c\u6bd4\u4f8b\u4e3a 80% \u8bad\u7ec3\u96c6\u548c 20% \u6d4b\u8bd5\u96c6\u3002<\/li>\n    <li>\u521b\u5efa\u4e00\u4e2a <code>DecisionTreeClassifier<\/code> \u5b9e\u4f8b\uff0c\u5e76\u4f7f\u7528\u8bad\u7ec3\u96c6 <code>X_train<\/code> \u548c <code>y_train<\/code> \u8fdb\u884c\u6a21\u578b\u8bad\u7ec3\u3002<br><\/li>\n<\/ul>\n\n\n<h3 class=\"wp-block-heading\">8. \u9884\u6d4b\u4e0e\u8bc4\u4f30<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>y_pred = cart.predict(X_test)\naccuracy = accuracy_score(y_test, y_pred)\n\n<\/code><\/pre>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u4f7f\u7528\u8bad\u7ec3\u597d\u7684\u6a21\u578b\u5728\u6d4b\u8bd5\u96c6\u4e0a\u8fdb\u884c\u9884\u6d4b\uff0c\u5e76\u8ba1\u7b97\u6a21\u578b\u7684\u51c6\u786e\u7387\uff08Accuracy\uff09\u3002<br><\/li>\n<\/ul>\n\n\n<h3 class=\"wp-block-heading\">9. \u5206\u7c7b\u62a5\u544a\u548c\u7279\u5f81\u91cd\u8981\u6027<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>print(classification_report(y_test, y_pred))\n\n<\/code><\/pre>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u6253\u5370\u5206\u7c7b\u62a5\u544a\uff0c\u663e\u793a\u6a21\u578b\u5728\u4e0d\u540c\u7c7b\u522b\u4e0a\u7684\u7cbe\u786e\u5ea6\uff08Precision\uff09\u3001\u53ec\u56de\u7387\uff08Recall\uff09\u548c F1 \u503c\u3002<br><\/li>\n<\/ul>\n\n\n<pre class=\"wp-block-code\"><code>for feature, importance in zip(X_encoded_df.columns, cart.feature_importances_):\n    print(f\"{feature}: {importance:.4f}\")\n\n<\/code><\/pre>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u6253\u5370\u6bcf\u4e2a <code>One-Hot<\/code> \u7f16\u7801\u540e\u7684\u7279\u5f81\u5728\u51b3\u7b56\u6811\u6a21\u578b\u4e2d\u7684\u91cd\u8981\u6027\uff08Feature Importance\uff09\u3002<br><\/li>\n<\/ul>\n\n\n<h3 class=\"wp-block-heading\">10. \u53ef\u89c6\u5316\u51b3\u7b56\u6811<\/h3>\n\n\n<pre class=\"wp-block-code\"><code>plot_tree(cart, feature_names=list(X_encoded_df.columns), class_names=list(cart.classes_), filled=True, rounded=True)\n\n<\/code><\/pre>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u4f7f\u7528 <code>plot_tree<\/code> \u51fd\u6570\u53ef\u89c6\u5316\u51b3\u7b56\u6811\u3002<code>feature_names<\/code> \u53c2\u6570\u7528\u4e8e\u6307\u5b9a\u7279\u5f81\u540d\u79f0\uff0c<code>class_names<\/code> \u7528\u4e8e\u6307\u5b9a\u7c7b\u522b\u540d\u79f0\uff0c<code>filled=True<\/code> \u4f7f\u8282\u70b9\u6839\u636e\u7c7b\u522b\u586b\u5145\u989c\u8272\uff0c<code>rounded=True<\/code> \u4f7f\u8282\u70b9\u8fb9\u6846\u53d8\u4e3a\u5706\u89d2\u3002<br><\/li>\n<\/ul>\n\n\n<h3 class=\"wp-block-heading\">\u603b\u7ed3<\/h3>\n\n\n<ul class=\"wp-block-list\">\n    <li>\u8be5\u4ee3\u7801\u5c55\u793a\u4e86\u5982\u4f55\u4f7f\u7528 <code>One-Hot Encoding<\/code> \u5c06\u79bb\u6563\u7c7b\u522b\u7279\u5f81\u8f6c\u6362\u4e3a\u6570\u503c\u578b\u7279\u5f81\uff0c\u5e76\u4f7f\u7528 <code>DecisionTreeClassifier<\/code> \u8fdb\u884c\u5efa\u6a21\u3002<\/li>\n    <li><code>One-Hot Encoding<\/code> \u662f\u5c06\u6bcf\u4e2a\u7c7b\u522b\u7279\u5f81\u8f6c\u5316\u4e3a\u591a\u4e2a\u4e8c\u8fdb\u5236\u7279\u5f81\uff080 \u6216 1\uff09\uff0c\u4ece\u800c\u4f7f\u5f97\u6a21\u578b\u80fd\u591f\u5904\u7406\u8fd9\u4e9b\u7c7b\u522b\u578b\u53d8\u91cf\u3002<\/li>\n    <li>\u901a\u8fc7 <code>ColumnTransformer<\/code> \u7ed3\u5408 <code>OneHotEncoder<\/code> \u7684\u65b9\u5f0f\uff0c\u53ef\u4ee5\u5728\u4e00\u4e2a\u6b65\u9aa4\u4e2d\u5b8c\u6210\u591a\u4e2a\u5217\u7684 <code>One-Hot<\/code> \u7f16\u7801\uff0c\u5e76\u6700\u7ec8\u5c06\u7f16\u7801\u7ed3\u679c\u7528\u4e8e\u6a21\u578b\u8bad\u7ec3\u548c\u8bc4\u4f30\u3002<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>scikit-learn\u4e2d\u7684\u7b97\u6cd5\u90fd\u4e0d\u76f4\u63a5\u652f\u6301\u5206\u7c7b\u53d8\u91cf \u4e0d\u53ea\u662f&#8230;<\/p>\n","protected":false},"author":1,"featured_media":495,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[15],"tags":[29,31,23],"class_list":["post-368","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-machine-learning","tag-decision-tree","tag-feature","tag-machine-learning"],"_links":{"self":[{"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/posts\/368","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/comments?post=368"}],"version-history":[{"count":3,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/posts\/368\/revisions"}],"predecessor-version":[{"id":388,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/posts\/368\/revisions\/388"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/media\/495"}],"wp:attachment":[{"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/media?parent=368"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/categories?post=368"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.xuzhe.tj.cn\/index.php\/wp-json\/wp\/v2\/tags?post=368"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}