import re
import unicodedata
from counter.models import Keyword


# Analyse the reason of the reset and retrieve all the keyword objects that it
# contains, creating the objects in the DB on-the-fly if necessary
def parseSeumReason(reason):
    hashtags = re.findall(r'(?<=#)[^#\s]+', reason, re.I)
    hashtags = [removeAccentsToLowercase(u) for u in hashtags]
    keywords = []
    for hashtag in hashtags:
        try:
            keyword = Keyword.objects.get(text=hashtag)
            keywords.append(keyword)
        except Keyword.DoesNotExist:
            keyword = Keyword(text=hashtag)
            keyword.save()
            keywords.append(keyword)
    return keywords


def removeAccentsToLowercase(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c.lower() for c in nfkd_form
                     if not unicodedata.combining(c)])