郵便番号のCSVデータを nodejsを使って適当にpgsqlに登録してみる

ありがちなんだけど 郵便番号のCSVデータを nodejsを使って登録してみる(手抜きですが)

データのダウンロードはこちら 郵便番号検索

コマンドは以下を参照

ロール(ユーザー)の作成

まずはDBを作成する

createdb --locale=C --encoding=UTF-8 --template=template0 -U postgres demo
create user demo with login password 'password'

とりあえず試してみるだけなので、テーブルも手抜き(笑

create table postal(
    postal   varchar(8)  null,
    pref     varchar(20) null,
    city     varchar(40) null,
    adrline1 varchar(40) null,
    adrline2 varchar(40) null,
    adrline4 varchar(40) null
);

環境設定

PS > npm init
This utility will walk you through creating a package.json file.
It only covers the most common items, and tries to guess sensible defaults.

See `npm help json` for definitive documentation on these fields
and exactly what they do.

Use `npm install <pkg>` afterwards to install a package and
save it as a dependency in the package.json file.

Press ^C at any time to quit.
package name: (nodedb)
version: (1.0.0)
description:
entry point: (nodepgpostal.js)
test command:
git repository:
keywords:
author:
license: (ISC)
About to write to \package.json:

{
  "name": "nodedb",
  "version": "1.0.0",
  "description": "",
  "main": "nodepgpostal.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC"
}


Is this ok? (yes)
PS > npm install async
npm notice created a lockfile as package-lock.json. You should commit this file.
npm WARN nodedb@1.0.0 No description
npm WARN nodedb@1.0.0 No repository field.

\+ async@2.5.0
added 2 packages in 12.392s
PS > npm install csv
npm WARN nodedb@1.0.0 No description
npm WARN nodedb@1.0.0 No repository field.

\+ csv@1.1.1
added 6 packages in 4.61s
PS > npm install pg
npm WARN nodedb@1.0.0 No description
npm WARN nodedb@1.0.0 No repository field.

\+ pg@7.1.2
added 17 packages in 5.001s
PS > npm install fs
npm WARN nodedb@1.0.0 No description
npm WARN nodedb@1.0.0 No repository field.

\+ fs@0.0.1-security
added 1 package in 2.406s
PS > npm install iconv-lite
npm WARN nodedb@1.0.0 No description
npm WARN nodedb@1.0.0 No repository field.

\+ iconv-lite@0.4.18
added 1 package in 2.298s
PS >

地元県だと名前感あるので 17ISHIKA.CSV を使う

var async = require('async');
var csv   = require('csv');
var fs    = require('fs');
var iconv = require('iconv-lite');
var pg    = require('pg');

var config = {
  user    : 'postgres',     //env var: PGUSER
  database: 'demo',         //env var: PGDATABASE
  password: 'password',     //env var: PGPASSWORD
  host    : 'localhost',    // Server hosting the postgres database
  port    : 5432,           //env var: PGPORT
  max     : 10,             // max number of clients in the pool
  idleTimeoutMillis: 30000, // how long a client is allowed to remain idle before being closed
};

var pool = new pg.Pool(config);
iconv.skipDecodeWarning = true;

async.forEachSeries( fs.readFileSync('17ISHIKA.CSV', 'binary').toString().split('\n'), function (line, cb) {
    line = iconv.decode(line, 'Shift_JIS');   // Shift_JIS -> utf8
    if (line.length > 0) {
        csv.parse(line, function(err, line) {
            csv.transform(line, function(line){
                //console.log(line);
                //console.log(line[2] + ':' + line[7] + ':' + line[8]);
                return line;
            }, function(err, line) {
                //console.log(line);
                //console.log(line[0][2] + ':' + line[0][7] + ':' + line[0][8]);
                if (err) {
                    console.error('error connecting: ' + err.stack);
                    return;
                }
                pool.connect(function(err, con, done) {
                    var sql = "insert into postal (postal, city, adrline1 ) values ('"+line[0][2]+"','"+line[0][7]+"','"+line[0][8]+"');"
                   // console.log( "insert:" +  sql);
                    con.query(sql, function (err, result) {
                        if (err) {
                            console.log("err:" +  sql);
                        }
                        done();
                        cb();
                    });
                });
            });
        });
    } else {
        console.log('END: ');
        pool.end();
        process.exit(0);
    }
}, function() { // ここには止まらない
    pool.end();
    console.log('end');
    process.exit(0);
});

select count(*) from postal -> 2552

KEN_ALL.CSV を9回分コピーして 120万行までは動くのを確認(遅いけど)

データを洗い出すロジックを作成して、ローダーに喰わせるのが正解だろうなぁ・・・・