Keine Beschreibung

utils.py 885B

123456789101112131415161718192021222324252627
  1. import re
  2. import unicodedata
  3. from counter.models import Keyword
  4. # Analyse the reason of the reset and retrieve all the keyword objects that it
  5. # contains, creating the objects in the DB on-the-fly if necessary
  6. def parseSeumReason(reason):
  7. hashtags = re.findall(r'(?<=#)[^#\s]+', reason, re.I)
  8. hashtags = [removeAccentsToLowercase(u) for u in hashtags]
  9. keywords = []
  10. for hashtag in hashtags:
  11. try:
  12. keyword = Keyword.objects.get(text=hashtag)
  13. keywords.append(keyword)
  14. except Keyword.DoesNotExist:
  15. keyword = Keyword(text=hashtag)
  16. keyword.save()
  17. keywords.append(keyword)
  18. return keywords
  19. def removeAccentsToLowercase(input_str):
  20. nfkd_form = unicodedata.normalize('NFKD', input_str)
  21. return u"".join([c.lower() for c in nfkd_form
  22. if not unicodedata.combining(c)])