diff --git a/taggit/contrib/suggest/README.txt b/taggit/contrib/suggest/README.txt index 5eab28e..5bdc3ce 100644 --- a/taggit/contrib/suggest/README.txt +++ b/taggit/contrib/suggest/README.txt @@ -4,17 +4,28 @@ taggit.contrib.suggest This add on module allows you to easily associate keywords and regular expressions with a Tag object. This is useful to help keep your database -getting filled up with several similar tags that really represent the same thing. +getting filled up with several similar tags that really represent the same +thing. For example, if your site is a humor site you might want to collapse all of #fun, #funny, #funnies, #hilarious, #rofl, and #lol into one tag #funny. The suggest_tags() function in taggit.contrib.suggest.utils will give you a list of tags that seem appropriate for the text content given to it. +Usage +===== + +Put 'taggit.contrib.suggest' into INSTALLED_APPS and run a syncdb to create +the necessary models. This will add Keywords and Regular Expression inlines +to the default django-taggit admin. Once you've populated those based on your +site you can do a simple: + +from taggit.contrib.suggest.utils import suggest_tags + +tags = suggest_tags(content='Some textual content...') + TODO ==== -* Basic stemming of the keywords for you! Which will require the Python NLTK. - * In a later version I hope to a simple way to help determine keywords for you automatically, by learning from your past tags and content. diff --git a/taggit/contrib/suggest/models.py b/taggit/contrib/suggest/models.py index da3c261..09f50cb 100644 --- a/taggit/contrib/suggest/models.py +++ b/taggit/contrib/suggest/models.py @@ -4,15 +4,29 @@ from django.db import models from django.core.exceptions import ValidationError from taggit.models import Tag +HAS_PYSTEMMER = True +try: + import Stemmer +except ImportError: + HAS_PYSTEMMER = False class TagKeyword(models.Model): """ Model to associate simple keywords to a Tag """ tag = models.ForeignKey(Tag, related_name='keywords') keyword = models.CharField(max_length=30) + stem = models.CharField(max_length=30) def __unicode__(self): return "Keyword '%s' for Tag '%s'" % (self.keyword, self.tag.name) + def save(self, *args, **kwargs): + """ Stem the keyword on save if they have PyStemmer """ + language = kwargs.pop('stemmer-language', 'english') + if not self.id and not self.stem and HAS_PYSTEMMER: + stemmer = Stemmer.Stemmer(language) + self.stem = stemmer.stemWord(self.keyword) + super(TagKeyword,self).save(*args,**kwargs) + def validate_regexp(value): """ Make sure we have a valid regular expression """ try: diff --git a/taggit/contrib/suggest/utils.py b/taggit/contrib/suggest/utils.py index 1bf05c6..a9b4210 100644 --- a/taggit/contrib/suggest/utils.py +++ b/taggit/contrib/suggest/utils.py @@ -4,12 +4,6 @@ from taggit.contrib.suggest.models import TagKeyword, TagRegExp from django.conf import settings -HAS_NLTK = True -try: - from nltk.stemmer.porter import PorterStemmer -except ImportError: - HAS_NLTK = False - def _suggest_keywords(content=None): """ Suggest by keywords """ suggested_keywords = set()