diff --git a/convert_voc_to_txt.ipynb b/convert_voc_to_txt.ipynb new file mode 100644 index 00000000..9a9a10ac --- /dev/null +++ b/convert_voc_to_txt.ipynb @@ -0,0 +1,146 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + }, + "colab": { + "name": "convert_voc_to_txt.ipynb", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tviGOxXhz6nP" + }, + "source": [ + "# generate txt from xml" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kEEDpFfK_HId", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3befd898-e40b-4faf-f5d1-da64cd1a71d1" + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nUf9w9CZ_a0D" + }, + "source": [ + "FOLDER_PATH = '/content/drive/MyDrive/class/緯育醫學影像/Day5/keras-yolo3'" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "VeJo9ZEwz6nU" + }, + "source": [ + "import xml.etree.ElementTree as ET\n", + "import os\n", + "from os import getcwd\n", + "from glob import glob\n", + "\n", + "classes = ['WBC', 'Platelets', 'RBC']\n", + "\n", + "list_file = open(os.path.join('drive/My Drive', 'anno.txt'), 'w') # target .txt file\n", + "for path in sorted(glob(os.path.join(FOLDER_PATH, 'dataset/train_anno/*.xml'))):\n", + " file_id = ''.join(path.split('/')[-1].split('.')[:-1])\n", + " print(file_id)\n", + " in_file = open(path)\n", + " tree=ET.parse(in_file)\n", + " root = tree.getroot()\n", + " \n", + " list_file.write(f'dataset/train_img/{file_id}.jpg')\n", + " for obj in root.iter('object'):\n", + " cls = obj.find('name').text\n", + " cls_id = classes.index(cls)\n", + " xmlbox = obj.find('bndbox')\n", + " b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))\n", + " list_file.write(\" \" + \",\".join([str(a) for a in b]) + ',' + str(cls_id))\n", + " list_file.write('\\n')\n", + "list_file.close()\n", + " \n", + " " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "xYCAzMwTz6nX" + }, + "source": [ + "# .xml file count\n", + "len(glob(os.path.join(FOLDER_PATH, 'dataset/train_anno/*.xml')))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "0mfPpZ6Bz6na" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file