| 123456789101112131415161718192021222324252627 |
- import re
- import unicodedata
- from counter.models import Keyword
- # Analyse the reason of the reset and retrieve all the keyword objects that it
- # contains, creating the objects in the DB on-the-fly if necessary
- def parseSeumReason(reason):
- hashtags = re.findall(r'(?<=#)[^#\s]+', reason, re.I)
- hashtags = [removeAccentsToLowercase(u) for u in hashtags]
- keywords = []
- for hashtag in hashtags:
- try:
- keyword = Keyword.objects.get(text=hashtag)
- keywords.append(keyword)
- except Keyword.DoesNotExist:
- keyword = Keyword(text=hashtag)
- keyword.save()
- keywords.append(keyword)
- return keywords
- def removeAccentsToLowercase(input_str):
- nfkd_form = unicodedata.normalize('NFKD', input_str)
- return u"".join([c.lower() for c in nfkd_form
- if not unicodedata.combining(c)])
|