aboutsummaryrefslogtreecommitdiff
path: root/sources/languages
diff options
context:
space:
mode:
authorJochen Topf <jochen@topf.org>2013-01-10 11:00:09 +0100
committerJochen Topf <jochen@topf.org>2013-01-10 11:00:13 +0100
commit04f03cb543e0af844bc115b517a355a2de6abe47 (patch)
tree31c4247257bb2eafd2f2949a7943cf1948a1f044 /sources/languages
parent2d44735e2e2ae4514c7e2b70f516c8f1bbc0df0e (diff)
downloadtaginfo-04f03cb543e0af844bc115b517a355a2de6abe47.tar
taginfo-04f03cb543e0af844bc115b517a355a2de6abe47.tar.gz
Add new source: Languages
Get IANA language subtag registry. Not yet used.
Diffstat (limited to 'sources/languages')
-rwxr-xr-xsources/languages/import_subtag_registry.rb131
-rw-r--r--sources/languages/post.sql14
-rw-r--r--sources/languages/pre.sql22
-rwxr-xr-xsources/languages/update.sh43
4 files changed, 210 insertions, 0 deletions
diff --git a/sources/languages/import_subtag_registry.rb b/sources/languages/import_subtag_registry.rb
new file mode 100755
index 0000000..636181e
--- /dev/null
+++ b/sources/languages/import_subtag_registry.rb
@@ -0,0 +1,131 @@
+#!/usr/bin/ruby
+#------------------------------------------------------------------------------
+#
+# Taginfo source: Languages
+#
+# import_subtag_registry.rb
+#
+#------------------------------------------------------------------------------
+#
+# Copyright (C) 2013 Jochen Topf <jochen@remote.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#------------------------------------------------------------------------------
+
+require 'rubygems'
+
+require 'sqlite3'
+
+class Subtag
+
+ @@entries = []
+
+ attr_accessor :type, :subtag, :added, :suppress_script, :scope
+
+ def self.entries
+ @@entries
+ end
+
+ def initialize
+ @@entries.push(self)
+ @descriptions = []
+ @prefixes = []
+ end
+
+ def description=(value)
+ @descriptions.push(value)
+ end
+
+ def description
+ @descriptions.join('. ')
+ end
+
+ def prefix=(value)
+ @prefixes.push(value)
+ end
+
+ def prefix
+ @prefixes.join(',')
+ end
+
+end
+
+dir = ARGV[0] || '.'
+
+db = SQLite3::Database.new(dir + '/taginfo-languages.db')
+
+registry_file = "#{dir}/language-subtag-registry"
+
+file_date = nil
+
+begin
+ entry = nil
+ last_key = nil
+ open(registry_file) do |file|
+ file.each do |line|
+ line.chomp!
+ if line == '%%'
+ entry = Subtag.new
+ elsif entry.nil? && line =~ /^File-Date: ([0-9]{4}-[0-9]{2}-[0-9]{2})$/
+ file_date = $1
+ elsif line =~ /^\s+(.*)/
+ if entry.respond_to?(last_key)
+ entry.send(last_key, $1)
+ end
+ else
+ (key, value) = line.split(/: /)
+ key.downcase!
+ key.gsub!(/[^a-z]/, '_')
+ s = (key + '=').to_sym
+ last_key = s
+ if entry.respond_to?(s)
+ entry.send(s, value)
+ end
+ end
+ end
+ end
+end
+
+SUBTAG_TYPES = %w( language script region variant )
+
+db.execute('BEGIN TRANSACTION');
+
+if file_date
+ db.execute("UPDATE source SET data_until=?", "#{file_date} 00:00:00")
+end
+
+Subtag.entries.each do |entry|
+ if SUBTAG_TYPES.include?(entry.type) &&
+ entry.description != 'Private use' &&
+ (entry.type != 'language' || (entry.scope != 'special' && entry.scope != 'collection')) &&
+ (entry.type != 'script' || !entry.subtag.match(%r{^Z}) ) &&
+ (entry.type != 'region' || entry.subtag.match(%r{^[A-Z]{2}$}) )
+ db.execute("INSERT INTO subtags (stype, subtag, added, suppress_script, scope, description, prefix) VALUES (?, ?, ?, ?, ?, ?, ?)",
+ entry.type,
+ entry.subtag,
+ entry.added,
+ entry.suppress_script,
+ entry.scope,
+ entry.description,
+ entry.prefix
+ )
+ end
+end
+
+db.execute('COMMIT');
+
+
+#-- THE END -------------------------------------------------------------------
diff --git a/sources/languages/post.sql b/sources/languages/post.sql
new file mode 100644
index 0000000..909bb9c
--- /dev/null
+++ b/sources/languages/post.sql
@@ -0,0 +1,14 @@
+--
+-- Taginfo source: Languages
+--
+-- post.sql
+--
+
+.bail ON
+
+
+
+ANALYZE;
+
+UPDATE source SET update_end=datetime('now');
+
diff --git a/sources/languages/pre.sql b/sources/languages/pre.sql
new file mode 100644
index 0000000..c1a505c
--- /dev/null
+++ b/sources/languages/pre.sql
@@ -0,0 +1,22 @@
+--
+-- Taginfo source: Languages
+--
+-- pre.sql
+--
+
+.bail ON
+
+INSERT INTO source (id, name, update_start) SELECT 'languages', 'Languages', datetime('now');
+
+DROP TABLE IF EXISTS subtags;
+
+CREATE TABLE subtags (
+ stype TEXT,
+ subtag TEXT,
+ added TEXT,
+ suppress_script TEXT,
+ scope TEXT,
+ description TEXT,
+ prefix TEXT
+);
+
diff --git a/sources/languages/update.sh b/sources/languages/update.sh
new file mode 100755
index 0000000..26a374a
--- /dev/null
+++ b/sources/languages/update.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+#
+# Taginfo source: Languages
+#
+# update.sh DIR
+#
+
+set -e
+
+DIR=$1
+REGISTRY_URL="http://www.iana.org/assignments/language-subtag-registry"
+REGISTRY_FILE="$DIR/language-subtag-registry"
+
+DATECMD='date +%Y-%m-%dT%H:%M:%S'
+
+if [ "x" = "x$DIR" ]; then
+ echo "Usage: update.sh DIR"
+ exit 1
+fi
+
+echo "`$DATECMD` Start languages..."
+
+DATABASE=$DIR/taginfo-languages.db
+
+rm -f $DATABASE
+
+echo "`$DATECMD` Running init.sql..."
+sqlite3 $DATABASE <../init.sql
+
+echo "`$DATECMD` Running pre.sql..."
+sqlite3 $DATABASE <pre.sql
+
+echo "`$DATECMD` Getting subtag registry..."
+curl --silent --time-cond $REGISTRY_FILE --output $REGISTRY_FILE $REGISTRY_URL
+
+echo "`$DATECMD` Running import..."
+./import_subtag_registry.rb $DIR
+
+echo "`$DATECMD` Running post.sql..."
+sqlite3 $DATABASE <post.sql
+
+echo "`$DATECMD` Done languages."
+