#!/usr/bin/env python
# SOSID - Switch On String Identifier for C/C++
#
# Copyright (C) 2007 Tim Janik <timj@gtk.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# If you have not received a copy of the GNU General Public License
# along with this program, see: http://www.gnu.org/licenses/

import sys, re, itertools

# Usage: sosid.py [output-filename] < c_or_cpp_source
VERSION="SOSID-0.4"

# create output file
if len (sys.argv) >= 2:
    fout_name = sys.argv[1]
    fout = open (fout_name, 'w')
else:
    fout = sys.stdout

# mark territory ;)
print >>fout, "/* Switch On String code generated by", VERSION, "*/"

# collect all string identifiers from input stream
string_list = re.findall (r'\bSOSID\s*\(\s*([a-zA-Z0-9_]+)\s*\)', sys.stdin.read())

# sort the keys so that a binary search can use 2-stage comparisons where the
# first order cmp is case insensitive, and the second order cmp can be case
# sensitive if needed. this allows us to use a single structure (SOSID_STRINGS)
# for case sensitive and case insensitive matches.
def case2cmp (a, b):    # make case a second order key
    r = cmp (a.lower(), b.lower())
    if r == 0:
        r = cmp (a, b)
    return r
string_list.sort (cmp = case2cmp)

# remove duplicate symbols that CPP would barf on
string_list = [k for k,g in itertools.groupby (string_list)]

# SOSID_STRINGS
print >>fout, r'#define SOSID_N_STRINGS                 (%u)' % len (string_list)
print >>fout, r'#define SOSID_STRINGS                   ("\000" ',
for s in string_list:
    print >>fout, '"' + s + r'\000"',
print >>fout, ")"

# SOSID_OFFSETS
ofs = 1
print >>fout, r'#define SOSID_OFFSETS                   { %u,' % ofs,
for s in string_list:
    ofs += len (s) + 1
    print >>fout, '%d,' % ofs,
print >>fout, '}'

# SOSID__*
ofs = 1
for s in string_list:
    print >>fout, '#define SOSID__%s' % s, ' ' * max (0, 23 - len (s)), '(%u)' % ofs
    ofs += len (s) + 1

# boilerplate
print >>fout, r"""
#define SOSID(identifier/**/)           SOSID__ ## identifier
#define SOSID_LOOKUP(STRING)            SOSID_LOOKUP__INTERNAL (STRING, 0)
#define SOSID_LOOKUP_ICASE(STRING)      SOSID_LOOKUP__INTERNAL (STRING, 1)
#define SOSID_LOOKUP__INTERNAL(STRING,ICASE)  ({                                \
  const char *sosid_haystack = SOSID_STRINGS, *sosid__needle = STRING;          \
  const int sosid_offsets[] = SOSID_OFFSETS;                                    \
  int sosid_n_elements = SOSID_N_STRINGS, sosid_offset = 0, sosid_result = 0;   \
  while (sosid_offset < sosid_n_elements)                                       \
    {                                                                           \
      int sosid_i = (sosid_offset + sosid_n_elements) >> 1;                     \
      const char *sosid_current = sosid_haystack + sosid_offsets[sosid_i];      \
      int sosid_cmp = strcasecmp (sosid__needle, sosid_current);                \
      if (!ICASE && sosid_cmp == 0)                                             \
        sosid_cmp = strcmp (sosid__needle, sosid_current);                      \
      if (sosid_cmp == 0)                                                       \
        { sosid_result = sosid_offsets[sosid_i]; break; }                       \
      else if (sosid_cmp < 0)                                                   \
        sosid_n_elements = sosid_i;                                             \
      else /* (sosid_cmp > 0) */                                                \
        sosid_offset = sosid_i + 1;                                             \
    }                                                                           \
  sosid_result;                                                                 \
})
"""

# clean up (and announce output file)
if fout != sys.stdout:
    fout.close()
    print fout_name
