1 files changed, 248 insertions, 140 deletions
diff --git a/tagging/utils.py b/tagging/utils.py
index af50bac..9bfd07f 100644
--- a/tagging/utils.py
+++ b/tagging/utils.py
@@ -1,140 +1,248 @@
-import math
-import re
-import types
-
-from django.db.models.query import QuerySet
-from django.utils.encoding import force_unicode, smart_unicode
-
-# Python 2.3 compatibility
-if not hasattr(__builtins__, 'set'):
-    from sets import Set as set
-
-find_tag_re = re.compile(r'[-\w]+', re.U)
-
-def get_tag_name_list(tag_names):
-    """
-    Finds tag names in the given string and return them as a list.
-    """
-    if tag_names is not None:
-        tag_names = force_unicode(tag_names)
-    results = find_tag_re.findall(tag_names or '')
-    return results
-
-def get_tag_list(tags):
-    """
-    Utility function for accepting tag input in a flexible manner.
-
-    If a ``Tag`` object is given, it will be returned in a list as
-    its single occupant.
-
-    If given, the tag names in the following will be used to create a
-    ``Tag`` ``QuerySet``:
-
-        * A string, which may contain multiple tag names.
-        * A list or tuple of strings corresponding to tag names.
-        * A list or tuple of integers corresponding to tag ids.
-
-    If given, the following will be returned as-is:
-
-        * A list or tuple of ``Tag`` objects.
-        * A ``Tag`` ``QuerySet``.
-    """
-    from tagging.models import Tag
-    if isinstance(tags, Tag):
-        return [tags]
-    elif isinstance(tags, QuerySet) and tags.model is Tag:
-        return tags
-    elif isinstance(tags, types.StringTypes):
-        return Tag.objects.filter(name__in=get_tag_name_list(tags))
-    elif isinstance(tags, (types.ListType, types.TupleType)):
-        if len(tags) == 0:
-            return tags
-        contents = set()
-        for item in tags:
-            if isinstance(item, types.StringTypes):
-                contents.add('string')
-            elif isinstance(item, Tag):
-                contents.add('tag')
-            elif isinstance(item, (types.IntType, types.LongType)):
-                contents.add('int')
-        if len(contents) == 1:
-            if 'string' in contents:
-                return Tag.objects.filter(name__in=[smart_unicode(tag) \
-                                                    for tag in tags])
-            elif 'tag' in contents:
-                return tags
-            elif 'int' in contents:
-                return Tag.objects.filter(id__in=tags)
-        else:
-            raise ValueError(u'If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.')
-    else:
-        raise ValueError(u'The tag input given was invalid.')
-
-def get_tag(tag):
-    """
-    Utility function for accepting single tag input in a flexible
-    manner.
-
-    If a ``Tag`` object is given it will be returned as-is; if a
-    string or integer are given, they will be used to lookup the
-    appropriate ``Tag``.
-
-    If no matching tag can be found, ``None`` will be returned.
-    """
-    from tagging.models import Tag
-    if isinstance(tag, Tag):
-        return tag
-
-    try:
-        if isinstance(tag, types.StringTypes):
-            return Tag.objects.get(name=tag)
-        elif isinstance(tag, (types.IntType, types.LongType)):
-            return Tag.objects.get(id=tag)
-    except Tag.DoesNotExist:
-        pass
-
-    return None
-
-# Font size distribution algorithms
-LOGARITHMIC, LINEAR = 1, 2
-
-def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
-    """
-    Add a ``font_size`` attribute to each tag according to the
-    frequency of its use, as indicated by its ``count``
-    attribute.
-
-    ``steps`` defines the range of font sizes - ``font_size`` will
-    be an integer between 1 and ``steps`` (inclusive).
-
-    ``distribution`` defines the type of font size distribution
-    algorithm which will be used - logarithmic or linear. It must be
-    either ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
-
-    The algorithm to scale the tags logarithmically is from a
-    blog post by Anders Pearson, 'Scaling tag clouds':
-    http://thraxil.com/users/anders/posts/2005/12/13/scaling-tag-clouds/
-    """
-    if len(tags) > 0:
-        thresholds = []
-        counts = [tag.count for tag in tags]
-        max_weight = float(max(counts))
-        min_weight = float(min(counts))
-
-        # Set up the appropriate thresholds
-        if distribution == LOGARITHMIC:
-            thresholds = [math.pow(max_weight - min_weight + 1, float(i) / float(steps)) \
-                          for i in range(1, steps + 1)]
-        elif distribution == LINEAR:
-            delta = (max_weight - min_weight) / float(steps)
-            thresholds = [min_weight + i * delta for i in range(1, steps + 1)]
-        else:
-            raise ValueError(u'Invalid font size distribution algorithm specified: %s.' % distribution)
-
-        for tag in tags:
-            font_set = False
-            for i in range(steps):
-                if not font_set and tag.count <= thresholds[i]:
-                    tag.font_size = i + 1
-                    font_set = True
-    return tags
+"""
+Tagging utilities - from user tag input parsing to tag cloud
+calculation.
+"""
+import math
+import types
+
+from django.db.models.query import QuerySet
+from django.utils.encoding import force_unicode
+from django.utils.translation import ugettext as _
+
+# Python 2.3 compatibility
+if not hasattr(__builtins__, 'set'):
+    from sets import Set as set
+
+def parse_tag_input(input):
+    """
+    Parses tag input, with multiple word input being activated and
+    delineated by commas and double quotes. Quotes take precedence, so
+    they may contain commas.
+
+    Returns a sorted list of unique tag names.
+    """
+    if not input:
+        return []
+
+    input = force_unicode(input)
+
+    # Special case - if there are no commas or double quotes in the
+    # input, we don't *do* a recall... I mean, we know we only need to
+    # split on spaces.
+    if u',' not in input and u'"' not in input:
+        words = list(set(split_strip(input, u' ')))
+        words.sort()
+        return words
+
+    words = []
+    buffer = []
+    # Defer splitting of non-quoted sections until we know if there are
+    # any unquoted commas.
+    to_be_split = []
+    saw_loose_comma = False
+    open_quote = False
+    i = iter(input)
+    try:
+        while 1:
+            c = i.next()
+            if c == u'"':
+                if buffer:
+                    to_be_split.append(u''.join(buffer))
+                    buffer = []
+                # Find the matching quote
+                open_quote = True
+                c = i.next()
+                while c != u'"':
+                    buffer.append(c)
+                    c = i.next()
+                if buffer:
+                    word = u''.join(buffer).strip()
+                    if word:
+                        words.append(word)
+                    buffer = []
+                open_quote = False
+            else:
+                if not saw_loose_comma and c == u',':
+                    saw_loose_comma = True
+                buffer.append(c)
+    except StopIteration:
+        # If we were parsing an open quote which was never closed treat
+        # the buffer as unquoted.
+        if buffer:
+            if open_quote and u',' in buffer:
+                saw_loose_comma = True
+            to_be_split.append(u''.join(buffer))
+    if to_be_split:
+        if saw_loose_comma:
+            delimiter = u','
+        else:
+            delimiter = u' '
+        for chunk in to_be_split:
+            words.extend(split_strip(chunk, delimiter))
+    words = list(set(words))
+    words.sort()
+    return words
+
+def split_strip(input, delimiter=u','):
+    """
+    Splits ``input`` on ``delimiter``, stripping each resulting string
+    and returning a list of non-empty strings.
+    """
+    if not input:
+        return []
+
+    words = [w.strip() for w in input.split(delimiter)]
+    return [w for w in words if w]
+
+def edit_string_for_tags(tags):
+    """
+    Given list of ``Tag`` instances, creates a string representation of
+    the list suitable for editing by the user, such that submitting the
+    given string representation back without changing it will give the
+    same list of tags.
+
+    Tag names which contain commas will be double quoted.
+
+    If any tag name which isn't being quoted contains whitespace, the
+    resulting string of tag names will be comma-delimited, otherwise
+    it will be space-delimited.
+    """
+    names = []
+    use_commas = False
+    for tag in tags:
+        name = tag.name
+        if u',' in name:
+            names.append('"%s"' % name)
+            continue
+        elif u' ' in name:
+            if not use_commas:
+                use_commas = True
+        names.append(name)
+    if use_commas:
+        glue = u', '
+    else:
+        glue = u' '
+    return glue.join(names)
+
+def get_tag_list(tags):
+    """
+    Utility function for accepting tag input in a flexible manner.
+
+    If a ``Tag`` object is given, it will be returned in a list as
+    its single occupant.
+
+    If given, the tag names in the following will be used to create a
+    ``Tag`` ``QuerySet``:
+
+       * A string, which may contain multiple tag names.
+       * A list or tuple of strings corresponding to tag names.
+       * A list or tuple of integers corresponding to tag ids.
+
+    If given, the following will be returned as-is:
+
+       * A list or tuple of ``Tag`` objects.
+       * A ``Tag`` ``QuerySet``.
+
+    """
+    from tagging.models import Tag
+    if isinstance(tags, Tag):
+        return [tags]
+    elif isinstance(tags, QuerySet) and tags.model is Tag:
+        return tags
+    elif isinstance(tags, types.StringTypes):
+        return Tag.objects.filter(name__in=parse_tag_input(tags))
+    elif isinstance(tags, (types.ListType, types.TupleType)):
+        if len(tags) == 0:
+            return tags
+        contents = set()
+        for item in tags:
+            if isinstance(item, types.StringTypes):
+                contents.add('string')
+            elif isinstance(item, Tag):
+                contents.add('tag')
+            elif isinstance(item, (types.IntType, types.LongType)):
+                contents.add('int')
+        if len(contents) == 1:
+            if 'string' in contents:
+                return Tag.objects.filter(name__in=[force_unicode(tag) \
+                                                    for tag in tags])
+            elif 'tag' in contents:
+                return tags
+            elif 'int' in contents:
+                return Tag.objects.filter(id__in=tags)
+        else:
+            raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.'))
+    else:
+        raise ValueError(_('The tag input given was invalid.'))
+
+def get_tag(tag):
+    """
+    Utility function for accepting single tag input in a flexible
+    manner.
+
+    If a ``Tag`` object is given it will be returned as-is; if a
+    string or integer are given, they will be used to lookup the
+    appropriate ``Tag``.
+
+    If no matching tag can be found, ``None`` will be returned.
+    """
+    from tagging.models import Tag
+    if isinstance(tag, Tag):
+        return tag
+
+    try:
+        if isinstance(tag, types.StringTypes):
+            return Tag.objects.get(name=tag)
+        elif isinstance(tag, (types.IntType, types.LongType)):
+            return Tag.objects.get(id=tag)
+    except Tag.DoesNotExist:
+        pass
+
+    return None
+
+# Font size distribution algorithms
+LOGARITHMIC, LINEAR = 1, 2
+
+def _calculate_thresholds(min_weight, max_weight, steps):
+    delta = (max_weight - min_weight) / float(steps)
+    return [min_weight + i * delta for i in range(1, steps + 1)]
+
+def _calculate_tag_weight(weight, max_weight, distribution):
+    """
+    Logarithmic tag weight calculation is based on code from the
+    `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs.
+
+    .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud
+    """
+    if distribution == LINEAR or max_weight == 1:
+        return weight
+    elif distribution == LOGARITHMIC:
+        return math.log(weight) * max_weight / math.log(max_weight)
+    raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution)
+
+def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
+    """
+    Add a ``font_size`` attribute to each tag according to the
+    frequency of its use, as indicated by its ``count``
+    attribute.
+
+    ``steps`` defines the range of font sizes - ``font_size`` will
+    be an integer between 1 and ``steps`` (inclusive).
+
+    ``distribution`` defines the type of font size distribution
+    algorithm which will be used - logarithmic or linear. It must be
+    one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
+    """
+    if len(tags) > 0:
+        counts = [tag.count for tag in tags]
+        min_weight = float(min(counts))
+        max_weight = float(max(counts))
+        thresholds = _calculate_thresholds(min_weight, max_weight, steps)
+        for tag in tags:
+            font_set = False
+            tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution)
+            for i in range(steps):
+                if not font_set and tag_weight <= thresholds[i]:
+                    tag.font_size = i + 1
+                    font_set = True
+    return tags