aboutsummaryrefslogtreecommitdiff
path: root/tagging/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'tagging/utils.py')
-rw-r--r--tagging/utils.py511
1 files changed, 263 insertions, 248 deletions
diff --git a/tagging/utils.py b/tagging/utils.py
index 9bfd07f..e89bab0 100644
--- a/tagging/utils.py
+++ b/tagging/utils.py
@@ -1,248 +1,263 @@
-"""
-Tagging utilities - from user tag input parsing to tag cloud
-calculation.
-"""
-import math
-import types
-
-from django.db.models.query import QuerySet
-from django.utils.encoding import force_unicode
-from django.utils.translation import ugettext as _
-
-# Python 2.3 compatibility
-if not hasattr(__builtins__, 'set'):
- from sets import Set as set
-
-def parse_tag_input(input):
- """
- Parses tag input, with multiple word input being activated and
- delineated by commas and double quotes. Quotes take precedence, so
- they may contain commas.
-
- Returns a sorted list of unique tag names.
- """
- if not input:
- return []
-
- input = force_unicode(input)
-
- # Special case - if there are no commas or double quotes in the
- # input, we don't *do* a recall... I mean, we know we only need to
- # split on spaces.
- if u',' not in input and u'"' not in input:
- words = list(set(split_strip(input, u' ')))
- words.sort()
- return words
-
- words = []
- buffer = []
- # Defer splitting of non-quoted sections until we know if there are
- # any unquoted commas.
- to_be_split = []
- saw_loose_comma = False
- open_quote = False
- i = iter(input)
- try:
- while 1:
- c = i.next()
- if c == u'"':
- if buffer:
- to_be_split.append(u''.join(buffer))
- buffer = []
- # Find the matching quote
- open_quote = True
- c = i.next()
- while c != u'"':
- buffer.append(c)
- c = i.next()
- if buffer:
- word = u''.join(buffer).strip()
- if word:
- words.append(word)
- buffer = []
- open_quote = False
- else:
- if not saw_loose_comma and c == u',':
- saw_loose_comma = True
- buffer.append(c)
- except StopIteration:
- # If we were parsing an open quote which was never closed treat
- # the buffer as unquoted.
- if buffer:
- if open_quote and u',' in buffer:
- saw_loose_comma = True
- to_be_split.append(u''.join(buffer))
- if to_be_split:
- if saw_loose_comma:
- delimiter = u','
- else:
- delimiter = u' '
- for chunk in to_be_split:
- words.extend(split_strip(chunk, delimiter))
- words = list(set(words))
- words.sort()
- return words
-
-def split_strip(input, delimiter=u','):
- """
- Splits ``input`` on ``delimiter``, stripping each resulting string
- and returning a list of non-empty strings.
- """
- if not input:
- return []
-
- words = [w.strip() for w in input.split(delimiter)]
- return [w for w in words if w]
-
-def edit_string_for_tags(tags):
- """
- Given list of ``Tag`` instances, creates a string representation of
- the list suitable for editing by the user, such that submitting the
- given string representation back without changing it will give the
- same list of tags.
-
- Tag names which contain commas will be double quoted.
-
- If any tag name which isn't being quoted contains whitespace, the
- resulting string of tag names will be comma-delimited, otherwise
- it will be space-delimited.
- """
- names = []
- use_commas = False
- for tag in tags:
- name = tag.name
- if u',' in name:
- names.append('"%s"' % name)
- continue
- elif u' ' in name:
- if not use_commas:
- use_commas = True
- names.append(name)
- if use_commas:
- glue = u', '
- else:
- glue = u' '
- return glue.join(names)
-
-def get_tag_list(tags):
- """
- Utility function for accepting tag input in a flexible manner.
-
- If a ``Tag`` object is given, it will be returned in a list as
- its single occupant.
-
- If given, the tag names in the following will be used to create a
- ``Tag`` ``QuerySet``:
-
- * A string, which may contain multiple tag names.
- * A list or tuple of strings corresponding to tag names.
- * A list or tuple of integers corresponding to tag ids.
-
- If given, the following will be returned as-is:
-
- * A list or tuple of ``Tag`` objects.
- * A ``Tag`` ``QuerySet``.
-
- """
- from tagging.models import Tag
- if isinstance(tags, Tag):
- return [tags]
- elif isinstance(tags, QuerySet) and tags.model is Tag:
- return tags
- elif isinstance(tags, types.StringTypes):
- return Tag.objects.filter(name__in=parse_tag_input(tags))
- elif isinstance(tags, (types.ListType, types.TupleType)):
- if len(tags) == 0:
- return tags
- contents = set()
- for item in tags:
- if isinstance(item, types.StringTypes):
- contents.add('string')
- elif isinstance(item, Tag):
- contents.add('tag')
- elif isinstance(item, (types.IntType, types.LongType)):
- contents.add('int')
- if len(contents) == 1:
- if 'string' in contents:
- return Tag.objects.filter(name__in=[force_unicode(tag) \
- for tag in tags])
- elif 'tag' in contents:
- return tags
- elif 'int' in contents:
- return Tag.objects.filter(id__in=tags)
- else:
- raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.'))
- else:
- raise ValueError(_('The tag input given was invalid.'))
-
-def get_tag(tag):
- """
- Utility function for accepting single tag input in a flexible
- manner.
-
- If a ``Tag`` object is given it will be returned as-is; if a
- string or integer are given, they will be used to lookup the
- appropriate ``Tag``.
-
- If no matching tag can be found, ``None`` will be returned.
- """
- from tagging.models import Tag
- if isinstance(tag, Tag):
- return tag
-
- try:
- if isinstance(tag, types.StringTypes):
- return Tag.objects.get(name=tag)
- elif isinstance(tag, (types.IntType, types.LongType)):
- return Tag.objects.get(id=tag)
- except Tag.DoesNotExist:
- pass
-
- return None
-
-# Font size distribution algorithms
-LOGARITHMIC, LINEAR = 1, 2
-
-def _calculate_thresholds(min_weight, max_weight, steps):
- delta = (max_weight - min_weight) / float(steps)
- return [min_weight + i * delta for i in range(1, steps + 1)]
-
-def _calculate_tag_weight(weight, max_weight, distribution):
- """
- Logarithmic tag weight calculation is based on code from the
- `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs.
-
- .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud
- """
- if distribution == LINEAR or max_weight == 1:
- return weight
- elif distribution == LOGARITHMIC:
- return math.log(weight) * max_weight / math.log(max_weight)
- raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution)
-
-def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
- """
- Add a ``font_size`` attribute to each tag according to the
- frequency of its use, as indicated by its ``count``
- attribute.
-
- ``steps`` defines the range of font sizes - ``font_size`` will
- be an integer between 1 and ``steps`` (inclusive).
-
- ``distribution`` defines the type of font size distribution
- algorithm which will be used - logarithmic or linear. It must be
- one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
- """
- if len(tags) > 0:
- counts = [tag.count for tag in tags]
- min_weight = float(min(counts))
- max_weight = float(max(counts))
- thresholds = _calculate_thresholds(min_weight, max_weight, steps)
- for tag in tags:
- font_set = False
- tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution)
- for i in range(steps):
- if not font_set and tag_weight <= thresholds[i]:
- tag.font_size = i + 1
- font_set = True
- return tags
+"""
+Tagging utilities - from user tag input parsing to tag cloud
+calculation.
+"""
+import math
+import types
+
+from django.db.models.query import QuerySet
+from django.utils.encoding import force_unicode
+from django.utils.translation import ugettext as _
+
+# Python 2.3 compatibility
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+def parse_tag_input(input):
+ """
+ Parses tag input, with multiple word input being activated and
+ delineated by commas and double quotes. Quotes take precedence, so
+ they may contain commas.
+
+ Returns a sorted list of unique tag names.
+ """
+ if not input:
+ return []
+
+ input = force_unicode(input)
+
+ # Special case - if there are no commas or double quotes in the
+ # input, we don't *do* a recall... I mean, we know we only need to
+ # split on spaces.
+ if u',' not in input and u'"' not in input:
+ words = list(set(split_strip(input, u' ')))
+ words.sort()
+ return words
+
+ words = []
+ buffer = []
+ # Defer splitting of non-quoted sections until we know if there are
+ # any unquoted commas.
+ to_be_split = []
+ saw_loose_comma = False
+ open_quote = False
+ i = iter(input)
+ try:
+ while 1:
+ c = i.next()
+ if c == u'"':
+ if buffer:
+ to_be_split.append(u''.join(buffer))
+ buffer = []
+ # Find the matching quote
+ open_quote = True
+ c = i.next()
+ while c != u'"':
+ buffer.append(c)
+ c = i.next()
+ if buffer:
+ word = u''.join(buffer).strip()
+ if word:
+ words.append(word)
+ buffer = []
+ open_quote = False
+ else:
+ if not saw_loose_comma and c == u',':
+ saw_loose_comma = True
+ buffer.append(c)
+ except StopIteration:
+ # If we were parsing an open quote which was never closed treat
+ # the buffer as unquoted.
+ if buffer:
+ if open_quote and u',' in buffer:
+ saw_loose_comma = True
+ to_be_split.append(u''.join(buffer))
+ if to_be_split:
+ if saw_loose_comma:
+ delimiter = u','
+ else:
+ delimiter = u' '
+ for chunk in to_be_split:
+ words.extend(split_strip(chunk, delimiter))
+ words = list(set(words))
+ words.sort()
+ return words
+
+def split_strip(input, delimiter=u','):
+ """
+ Splits ``input`` on ``delimiter``, stripping each resulting string
+ and returning a list of non-empty strings.
+ """
+ if not input:
+ return []
+
+ words = [w.strip() for w in input.split(delimiter)]
+ return [w for w in words if w]
+
+def edit_string_for_tags(tags):
+ """
+ Given list of ``Tag`` instances, creates a string representation of
+ the list suitable for editing by the user, such that submitting the
+ given string representation back without changing it will give the
+ same list of tags.
+
+ Tag names which contain commas will be double quoted.
+
+ If any tag name which isn't being quoted contains whitespace, the
+ resulting string of tag names will be comma-delimited, otherwise
+ it will be space-delimited.
+ """
+ names = []
+ use_commas = False
+ for tag in tags:
+ name = tag.name
+ if u',' in name:
+ names.append('"%s"' % name)
+ continue
+ elif u' ' in name:
+ if not use_commas:
+ use_commas = True
+ names.append(name)
+ if use_commas:
+ glue = u', '
+ else:
+ glue = u' '
+ return glue.join(names)
+
+def get_queryset_and_model(queryset_or_model):
+ """
+ Given a ``QuerySet`` or a ``Model``, returns a two-tuple of
+ (queryset, model).
+
+ If a ``Model`` is given, the ``QuerySet`` returned will be created
+ using its default manager.
+ """
+ try:
+ return queryset_or_model, queryset_or_model.model
+ except AttributeError:
+ return queryset_or_model._default_manager.all(), queryset_or_model
+
+def get_tag_list(tags):
+ """
+ Utility function for accepting tag input in a flexible manner.
+
+ If a ``Tag`` object is given, it will be returned in a list as
+ its single occupant.
+
+ If given, the tag names in the following will be used to create a
+ ``Tag`` ``QuerySet``:
+
+ * A string, which may contain multiple tag names.
+ * A list or tuple of strings corresponding to tag names.
+ * A list or tuple of integers corresponding to tag ids.
+
+ If given, the following will be returned as-is:
+
+ * A list or tuple of ``Tag`` objects.
+ * A ``Tag`` ``QuerySet``.
+
+ """
+ from tagging.models import Tag
+ if isinstance(tags, Tag):
+ return [tags]
+ elif isinstance(tags, QuerySet) and tags.model is Tag:
+ return tags
+ elif isinstance(tags, types.StringTypes):
+ return Tag.objects.filter(name__in=parse_tag_input(tags))
+ elif isinstance(tags, (types.ListType, types.TupleType)):
+ if len(tags) == 0:
+ return tags
+ contents = set()
+ for item in tags:
+ if isinstance(item, types.StringTypes):
+ contents.add('string')
+ elif isinstance(item, Tag):
+ contents.add('tag')
+ elif isinstance(item, (types.IntType, types.LongType)):
+ contents.add('int')
+ if len(contents) == 1:
+ if 'string' in contents:
+ return Tag.objects.filter(name__in=[force_unicode(tag) \
+ for tag in tags])
+ elif 'tag' in contents:
+ return tags
+ elif 'int' in contents:
+ return Tag.objects.filter(id__in=tags)
+ else:
+ raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.'))
+ else:
+ raise ValueError(_('The tag input given was invalid.'))
+
+def get_tag(tag):
+ """
+ Utility function for accepting single tag input in a flexible
+ manner.
+
+ If a ``Tag`` object is given it will be returned as-is; if a
+ string or integer are given, they will be used to lookup the
+ appropriate ``Tag``.
+
+ If no matching tag can be found, ``None`` will be returned.
+ """
+ from tagging.models import Tag
+ if isinstance(tag, Tag):
+ return tag
+
+ try:
+ if isinstance(tag, types.StringTypes):
+ return Tag.objects.get(name=tag)
+ elif isinstance(tag, (types.IntType, types.LongType)):
+ return Tag.objects.get(id=tag)
+ except Tag.DoesNotExist:
+ pass
+
+ return None
+
+# Font size distribution algorithms
+LOGARITHMIC, LINEAR = 1, 2
+
+def _calculate_thresholds(min_weight, max_weight, steps):
+ delta = (max_weight - min_weight) / float(steps)
+ return [min_weight + i * delta for i in range(1, steps + 1)]
+
+def _calculate_tag_weight(weight, max_weight, distribution):
+ """
+ Logarithmic tag weight calculation is based on code from the
+ `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs.
+
+ .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud
+ """
+ if distribution == LINEAR or max_weight == 1:
+ return weight
+ elif distribution == LOGARITHMIC:
+ return math.log(weight) * max_weight / math.log(max_weight)
+ raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution)
+
+def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
+ """
+ Add a ``font_size`` attribute to each tag according to the
+ frequency of its use, as indicated by its ``count``
+ attribute.
+
+ ``steps`` defines the range of font sizes - ``font_size`` will
+ be an integer between 1 and ``steps`` (inclusive).
+
+ ``distribution`` defines the type of font size distribution
+ algorithm which will be used - logarithmic or linear. It must be
+ one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
+ """
+ if len(tags) > 0:
+ counts = [tag.count for tag in tags]
+ min_weight = float(min(counts))
+ max_weight = float(max(counts))
+ thresholds = _calculate_thresholds(min_weight, max_weight, steps)
+ for tag in tags:
+ font_set = False
+ tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution)
+ for i in range(steps):
+ if not font_set and tag_weight <= thresholds[i]:
+ tag.font_size = i + 1
+ font_set = True
+ return tags