~brenthuisman/lastfmtagupdater

b4d9ba86e992dad59068e341e767a582c3f5d969 — Brent Huisman 2 years ago a561362 master
v3.0.1. Updates to tag handling, more robust capitalization and duplicate checking. New configuration option: skipTags. Helps unclutter tags retrieved from LastFM.
M README.md => README.md +1 -0
@@ 27,6 27,7 @@ The default configuration file is [lastfm_tagger.conf]. Assuming that the config

### Changelog

 * 2018-07-20: v3.0.1. Updates to tag handling, more robust capitalization and duplicate checking. New configuration option: skipTags. Helps unclutter tags retrieved from LastFM.
 * 2016-03-21: Renamed to lastfmtagupdater, bumped version number to v3. Finish port to Python 3, added setup.py. Cleaned up code, removed Gui.
 * 2016-01-28: Update writing mp4.
 * 2014-11-30: Fix: correctly ignore writeUntaggedTag. Change: write ID3v2.3 tags instead of ID3v2.4.

M lastfm_tagger.conf => lastfm_tagger.conf +1 -1
@@ 6,7 6,7 @@ lastFMAPI_key =
lastFMAPI_secret = 

verbose = true
mediaDir = d:/media/music
;mediaDir = d:/media/music
logFile = lastfmtagupdater.log
cacheFile = librarycache.xml                        ; The name of the XML cache file
    

M lastfmtagupdater/__init__.py => lastfmtagupdater/__init__.py +12 -12
@@ 5,22 5,22 @@ from .outputwrapper import OutputWrapper

def main(argv=None):
    config = LastFM_Config(argv)
    

    if (config.getboolean('delcache') and os.path.exists(config.get('cachefile'))):
        if (config.getboolean('verbose')):
            print('Removing existing cachefile')
        os.remove(config.get('cacheFile'))
    
    print(('Launching [' + os.path.basename(sys.argv[0]) + ']'))    
                      

    print(('Launching [' + os.path.basename(sys.argv[0]) + ']'))

    outputWrapper = OutputWrapper(config)

    try:                
    try:
        library = MediaLibrary(config, outputWrapper)
        if (not config.getboolean('skipscan')):        
        if (not config.getboolean('skipscan')):
            library.readMedia()
            library.writeCache()
    

        if (not config.getboolean('skipfetch')):
            try:
                library.fetchTags()


@@ 28,14 28,14 @@ def main(argv=None):
                library.writeCache()
                raise
            library.writeCache()
            

        if (not config.getboolean('skipupdate')):
            library.updateTags()
            library.writeCache()
        

        outputWrapper.logNormal('DONE')
        

    except:
        pass 
    finally:   
        pass
    finally:
        outputWrapper.close()
\ No newline at end of file

M lastfmtagupdater/common.py => lastfmtagupdater/common.py +7 -7
@@ 2,7 2,7 @@ def isempty(strng):
    ''' How is this not a part of the standard library? '''
    return strng is None or len(strng.strip()) == 0

 

def distinctTagSeq(tags):
    ''' This eliminates duplicate tags from the sequence (the highest weight version is kept). The returned list
    will be in sorted order, by weight. '''


@@ 25,7 25,7 @@ def distinctSeq(seq):
        return seq or []
    seen = set()
    result = []
    for val in seq:        
    for val in seq:
        if (isinstance(val, str) or isinstance(val, str)):
            key = val.lower()
        else:


@@ 36,14 36,14 @@ def distinctSeq(seq):
    return result

def sortWeightedTagTuples(tagtuples):
    ''' 
    '''
    This method performs an inplace multi-field sort on the provided list of tag tuples, using the integer weights
    stored in each tuple at position [1:]. It is assumed that all tuples have the same length. Thus, if provided a list of
    3 item tuples, the result will be sorted first by the values in position 1 and then, where position 1 values are the same, 
    by the values in position 2. Sort order is descending and this sort should be stable. Although the sort is inplace, we 
    3 item tuples, the result will be sorted first by the values in position 1 and then, where position 1 values are the same,
    by the values in position 2. Sort order is descending and this sort should be stable. Although the sort is inplace, we
    return a reference to the original (now sorted) list, as a convenience.
    '''        
    if (tagtuples is None or len(tagtuples) == 0) : 
    '''
    if (tagtuples is None or len(tagtuples) == 0) :
        return None
    tagtuples.sort(key=lambda tag: [-sortval for sortval in tag[1:]])
    return tagtuples

M lastfmtagupdater/config.py => lastfmtagupdater/config.py +41 -40
@@ 3,11 3,11 @@ from . import common

class LastFM_Config:
    config_section = 'lastfm_tagger'
    
    usage_message = '''    
This utility updates media files with tags retrieved from LastFM. Please see the configuration file and 

    usage_message = '''
This utility updates media files with tags retrieved from LastFM. Please see the configuration file and
documentation for an explanation of the various operating parameters. You very likely do not want to use
the defaults. 
the defaults.

Usage:
    --cfg=file   the configuration file to use (default: lastfm_tagger.conf)


@@ 17,7 17,7 @@ Usage:
    --skipupdate do not update the media files (useful if you want to audit the cache file first)
'''

    

    defaults = dict(
        lastFMAPI_key='',
        lastFMAPI_secret='',


@@ 44,6 44,7 @@ Usage:
        artistSkipListFile='',
        capTagWords='true',
        skipExtensions='jpg,ini,m3u,db,nfo,par2',
        skipTags='http,www.,https,Arranged,Produced,single',
        writeUntaggedTag='no',
        artistField='both',
        artistTagFields='comment',


@@ 63,19 64,19 @@ Usage:
        id3v2DupeHeaderFix='false',
        tagStartDelim='',
        tagEndDelim='',
        tagSep=';')       
    
        tagSep=';')

    allowedMediaWriteFields = set(['genre', 'grouping', 'comment', ''])
    allowedSortOptions = set(['record', 'popularity', 'library'])
    

    def __init__(self, argv=None):
        self.config = self.parseargs(argv)

        

    def parseargs(self, argv=None):
        if argv is None:
            argv = sys.argv
        try:    
        try:
            # command line processing
            opts, args = getopt.getopt(argv[1:], 'h',
                ['help',


@@ 88,11 89,11 @@ Usage:
            for option, value in opts:
                if (option in ('-h', '--help')):
                    raise Exception()
                

                if (option in ('--cfg')):
                    self.defaults['cfg'] = value
                    print('Using config file [' + value + ']')                        
                                    
                    print('Using config file [' + value + ']')

                if (option == '--delcache'):
                    self.defaults['delcache'] = 'true'



@@ 101,18 102,18 @@ Usage:

                if (option == '--skipfetch'):
                    self.defaults['skipfetch'] = 'true'
                                     

                if (option == '--skipupdate'):
                    self.defaults['skipupdate'] = 'true'
                
                    


            # end command line parsing

                        

            # Validate the cfg file value and load it
            configfile = self.defaults['cfg']
            if (os.path.exists(configfile) and not os.path.isfile(configfile)):
                raise IOError('Config file already exists as a directory or other non-file type: ' + configfile)                
                raise IOError('Config file already exists as a directory or other non-file type: ' + configfile)
            elif (not os.access(configfile, os.R_OK)):
                raise IOError('Could not open config file for reading: ' + os.path.abspath(configfile))
            config = configparser.SafeConfigParser(self.defaults)


@@ 122,7 123,7 @@ Usage:
            config.set(self.config_section, 'tagSep', self.decode_string(config.get(self.config_section, 'tagSep')))
            config.set(self.config_section, 'tagStartDelim', self.decode_string(config.get(self.config_section, 'tagStartDelim')))
            config.set(self.config_section, 'tagEndDelim', self.decode_string(config.get(self.config_section, 'tagEndDelim')))
            

            # Sanity check various settings
            mediadir = config.get(self.config_section, 'mediaDir')
            if (not os.path.exists(mediadir) or not os.path.isdir(mediadir) or not os.access(mediadir, os.R_OK)):


@@ 130,52 131,52 @@ Usage:

            cachefile = config.get(self.config_section, 'cacheFile')
            if (os.path.exists(cachefile) and not os.path.isfile(cachefile)):
                raise IOError('Cache file already exists as a directory or other non-file type: ' + cachefile)                
                raise IOError('Cache file already exists as a directory or other non-file type: ' + cachefile)
            elif (not os.access(os.path.dirname(os.path.abspath(cachefile)), os.W_OK)):
                raise IOError('Could not open cachefile directory for writing: ' + cachefile)
            

            logFile = config.get(self.config_section, 'logFile')
            if (os.path.exists(logFile) and not os.path.isfile(logFile)):
                raise IOError('Log file already exists as a directory or other non-file type: ' + logFile)                
                raise IOError('Log file already exists as a directory or other non-file type: ' + logFile)
            elif (not os.access(os.path.dirname(os.path.abspath(logFile)), os.W_OK)):
                raise IOError('Could not open log file directory for writing: ' + logFile)

            skipscan = config.getboolean(self.config_section, 'skipscan')
            if (skipscan and not os.path.exists(cachefile)):
                raise Exception('NOOP: Cannot bypass media file scanning if the cachefile is empty')                        
            
                raise Exception('NOOP: Cannot bypass media file scanning if the cachefile is empty')

            for option in ['artistTagFields', 'trackTagFields', 'overwriteFields', 'forceOverwriteFields']:
                fields = set(map(str.strip, config.get(self.config_section, option).lower().split(',')))
                self.validFieldSet(option, fields, self.allowedMediaWriteFields)
            

            for option in ['genreSort', 'groupingSort', 'commentSort']:
                sorts = set(map(str.strip, config.get(self.config_section, option).lower().split(',')))
                self.validFieldSet(option, sorts, self.allowedSortOptions)
                         
            if (config.get(self.config_section, 'artistField').lower() not in ['artist', 'albumartist', 'both']):                                     

            if (config.get(self.config_section, 'artistField').lower() not in ['artist', 'albumartist', 'both']):
                raise Exception('An invalid artistField value was specified: ' + config.get(self.config_section, 'artistField'))

            if (config.get(self.config_section, 'writeUntaggedTag').lower() not in ['artist', 'track', 'both', 'no']):                                     
            if (config.get(self.config_section, 'writeUntaggedTag').lower() not in ['artist', 'track', 'both', 'no']):
                raise Exception('An invalid writeUntaggedTag value was specified: ' + config.get(self.config_section, 'writeUntaggedTag'))
            

            return config
            

        except Exception as err:
            sys.stderr.write(os.path.basename(sys.argv[0]) + ': ' + str(err))
            sys.stderr.write(self.usage_message)
            sys.exit(-1)

            

    def validFieldSet(self, option, configSet, validSet):
        if (configSet is None or validSet is None or len(configSet) == 0):
            return
        if (len(configSet.difference(validSet)) > 0):
            raise Exception('One or more invalid fields were specified for option [' + option + ']: ' + str(configSet.difference(validSet)))                
    
            raise Exception('One or more invalid fields were specified for option [' + option + ']: ' + str(configSet.difference(validSet)))

    def decode_string(self, str):
        if (common.isempty(str)):
            return ''
        

        if (str.lower() == 'space'):
            return ' '
        elif (str.lower() == 'semi'):


@@ 186,17 187,17 @@ Usage:
            return '%'
        else:
            return str
        

    def get(self, option):
        return self.config.get(self.config_section, option)
    

    def getint(self, option):
        return self.config.getint(self.config_section, option)
    

    def getfloat(self, option):
        return self.config.getfloat(self.config_section, option)        
    
        return self.config.getfloat(self.config_section, option)

    def getboolean(self, option):
        return self.config.getboolean(self.config_section, option)

 


M lastfmtagupdater/lastfmwrapper.py => lastfmtagupdater/lastfmwrapper.py +20 -20
@@ 6,7 6,7 @@ class LastFM_Wrapper:
    # to have conditional logic, based on that. Weakly typed languages FTMFL.
    useNamedTuple = False;
    outputWrapper = None
    

    def __init__(self, config, outputWrapper):
        self.config = config
        self.outputWrapper = outputWrapper


@@ 16,16 16,16 @@ class LastFM_Wrapper:
        if pyver[1] >= 6 and pyver[0] < 3:
            self.useNamedTuple = True

                

    def fetchArtistTags(self, artist, maxTagsToFetch, minWeight, retries=2):
        ''' 
        '''
        Retrieve artist tags from LastFM, filtering out those tags that appear bunky (below the specified
        weight, longer than the maximum allowable distance, self-referential, etc.
        '''
        try:
            lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret)
            tags = self.processSeenTags(lastfm.get_artist(artist).get_top_tags(limit=maxTagsToFetch), minWeight)
            return [pair for pair in tags if pair[0].lower().replace('the', '').strip() != artist.lower().replace('the', '').strip()]        
            return [pair for pair in tags if pair[0].lower().replace('the', '').strip() != artist.lower().replace('the', '').strip()]
        except Exception as err:
            if ('not found' in str(err).lower() or 'not be found' in str(err).lower()): return []
            if (retries > 0):


@@ 36,9 36,9 @@ class LastFM_Wrapper:
                self.outputWrapper.logError('Problem retrieving artist tag information for [' + str(artist) + '], skipping: ' + str(err))
        return None

        

    def fetchTrackTags(self, artist, track, maxTagsToFetch, minWeight, retries=2):
        ''' 
        '''
        Retrieve track tags from LastFM, filtering out those tags that appear bunky (below the specified
        weight, longer than the maximum allowable distance, self-referential, etc.
        '''


@@ 58,13 58,13 @@ class LastFM_Wrapper:


    def processSeenTags(self, tags, minWeight):
        ''' 
        '''
        This method converts a lastFM tag stream into our internal format (a list of (tag, weight) pairs), while
        also filtering based on minimum weight. We convert the tag to a unicode type.
        '''
        if (tags is None or len(tags) == 0): 
        if (tags is None or len(tags) == 0):
            return []
        

        newtags = []
        for rawtag in tags:
            if (self.useNamedTuple):


@@ 76,18 76,18 @@ class LastFM_Wrapper:
            else:
                tag = str(rawtag['item'].name)
                weight = int(rawtag['weight'])
            # results are pre-sorted, so we can abort on this condition            
            # results are pre-sorted, so we can abort on this condition
            if (weight < minWeight): break
            if (len(tag) > self.config.getint('maxTagLength')): continue
            newtags.append((tag, weight))            
            newtags.append((tag, weight))
        return newtags
    
    


    def fetchTopTagStats(self, retries=2):
        '''
        LastFM provides a unified list tags/counts, for the top tags. By fetching these in one call, we can
        typically avoid a ton of unnecessary network calls for individual tags.
        ''' 
        '''
        tags = {}
        try:
            lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret)


@@ 98,7 98,7 @@ class LastFM_Wrapper:
                    count = int(lastTopTag.weight)
                elif type(lastTopTag) == pylast.TopItem:
                    key = str(lastTopTag.item.get_name()).lower()
                    count = int(lastTopTag.weight)                   
                    count = int(lastTopTag.weight)
                else:
                    key = str(lastTopTag['item'].name).lower()
                    count = int(lastTopTag['weight'])


@@ 119,12 119,12 @@ class LastFM_Wrapper:


    def fetchTagCount(self, tag, retries=2):
        ''' Return a count/weight, for the specified tag. The API no longer seems to return counts, though, so this is currently deprecated. ''' 
        ''' Return a count/weight, for the specified tag. The API no longer seems to return counts, though, so this is currently deprecated. '''
        try:
            lastfm = pylast.get_lastfm_network(api_key=self.api_key, api_secret=self.api_secret)
            results = lastfm.search_for_tag(tag)
            if (results.get_total_result_count() > 0):
                topmatch = results.get_next_page()[0]                
                topmatch = results.get_next_page()[0]
                if (topmatch is not None):
                    if (self.useNamedTuple):
                        return int(topmatch.weight)


@@ 135,7 135,7 @@ class LastFM_Wrapper:
                time.sleep(5)
                return self.fetchTopTagStats(retries - 1)
            else:
                self.outputWrapper.logError('Problem retrieving tag information, ' + str(retries) + ' skipping: ' + str(err))            
                self.outputWrapper.logError('Problem retrieving tag information, ' + str(retries) + ' skipping: ' + str(err))
        return None
    
    



M lastfmtagupdater/mediahelper.py => lastfmtagupdater/mediahelper.py +56 -44
@@ 17,14 17,15 @@ class MediaHelper:
    useBothArtistFields = False
    artistFieldPref = []
    outputWrapper = None
    

    formatFieldMap = dict(
          id3=dict(genre='TCON', grouping='TIT1', comment="COMM::'eng'", artist='TPE1', albumartist='TPE2', album='TALB', track='TIT2'),
          # id3=dict(genre='TCON', grouping='TIT1', comment="COMM::'eng'", artist='TPE1', albumartist='TPE2', album='TALB', track='TIT2'),
          id3=dict(genre='TCON', grouping='TIT1', comment="COMM::eng", artist='TPE1', albumartist='TPE2', album='TALB', track='TIT2'),
          mp4=dict(genre='\xa9gen', grouping='\xa9grp', comment='\xa9cmt', artist='\xa9ART', albumartist='aART', album='\xa9alb', track='\xa9nam'),
          oggvorbis=dict(genre='genre', grouping='grouping', comment='comment', artist='artist', albumartist='album artist', album='album', track='title'),
          flac=dict(genre='genre', grouping='grouping', comment='comment', artist='artist', albumartist='album artist', album='album', track='title')
    )
    

    id3FuncMap = dict(
        genre=lambda val: TCON(encoding=3, text=val),
        grouping=lambda val: TIT1(encoding=3, text=val),


@@ 34,33 35,33 @@ class MediaHelper:
        album=lambda val: TALB(encoding=3, text=val),
        track=lambda val: TIT2(encoding=3, text=val)
    )
    

    meaninglessArtists = frozenset(['various artists', 'soundtrack', 'soundtracks', 'original soundtrack', 'ost', 'compilation'])


    def __init__(self, config, outputWrapper):
        self.config = config
        self.outputWrapper = outputWrapper        
        self.outputWrapper = outputWrapper
        self.tagSep = self.config.get('tagSep')
        if (len(self.tagSep.strip()) > 0):
            self.tagSep += ' '
        # if (len(self.tagSep.strip()) > 0):
        #     self.tagSep += ' '
        self.maxTags = dict(genre=self.config.getint('genreMaxTags'),
                            grouping=self.config.getint('groupingMaxTags'),
                            comment=self.config.getint('commentMaxTags'))
        self.tagSkipCounts = dict(genre=self.config.getint('genreTagSkipCount'),
                                  grouping=self.config.getint('groupingTagSkipCount'),
                                  comment=self.config.getint('commentTagSkipCount'))        
                                  comment=self.config.getint('commentTagSkipCount'))
        self.overwriteFields = set(map(str.strip, self.config.get('overwriteFields').lower().split(',')))
        self.forceOverwriteFields = set(map(str.strip, self.config.get('forceOverwriteFields').lower().split(',')))        
        self.forceOverwriteFields = set(map(str.strip, self.config.get('forceOverwriteFields').lower().split(',')))
        self.id3v1Handling = self.config.getint('id3v1Handling')
        self.ignoreCase = self.config.getboolean('ignoreCase')
        self.id3v2DupeHeaderFix = self.config.getboolean('id3v2DupeHeaderFix')
        

        self.artistFieldPref = ['albumartist', 'artist']
        if (self.config.get('artistField').lower() == 'both'):      self.useBothArtistFields = True
        elif (self.config.get('artistField').lower() == 'artist'):  self.artistFieldPref.reverse()
        
        


    def getMediawrapper(self, filename):
        root, ext = os.path.splitext(filename.lower())
        if (ext == '.mp3'):     mediawrapper = ID3(filename)


@@ 69,8 70,8 @@ class MediaHelper:
        elif (ext == '.flac'):  mediawrapper = FLAC(filename)
        else:                   mediawrapper = mutagen.File(filename)
        return mediawrapper
        
    


    def extractMetadata(self, filename):
        try:
            mediawrapper = self.getMediawrapper(filename)


@@ 80,12 81,12 @@ class MediaHelper:
            elif (isinstance(mediawrapper, OggVorbis)): return self.extractMetadataHelper(mediawrapper, self.formatFieldMap['oggvorbis'], filename)
            elif (isinstance(mediawrapper, FLAC)):      return self.extractMetadataHelper(mediawrapper, self.formatFieldMap['flac'], filename)
            else:
                if (self.config.getboolean('verbose')):                                       
                if (self.config.getboolean('verbose')):
                    self.outputWrapper.logNormal('\tSkipping unknown/incompatible media file type [' + filename + ']')
        except Exception as err:
            self.outputWrapper.logError('Error seen during media reading: ' + str(err))
        return None
            


    def extractMetadataHelper(self, mediawrapper, fieldMap, filename):
        ''' Retrieves artist, album, and track data, forcing it to unicode '''


@@ 101,23 102,23 @@ class MediaHelper:
                    continue
                break
        artists = set(artists).difference(self.meaninglessArtists)
        if (len(artists) == 0): 
        if (len(artists) == 0):
            self.outputWrapper.logError('No artist info found for [' + filename + ']')
            return None
        
        # album     

        # album
        album = '-unknown-'
        if (fieldMap['album'] in mediawrapper):
            tmpalbum = mediawrapper[fieldMap['album']][0]  
            tmpalbum = mediawrapper[fieldMap['album']][0]
            if (not common.isempty(tmpalbum)):
                if (self.ignoreCase):
                    tmpalbum = tmpalbum.lower()
                album = str(tmpalbum)
    

        # track
        track = None
        if (fieldMap['track'] in mediawrapper):
            tmptrack = mediawrapper[fieldMap['track']][0] 
            tmptrack = mediawrapper[fieldMap['track']][0]
            if (not common.isempty(tmptrack)):
                if (self.ignoreCase):
                    tmptrack = tmptrack.lower()


@@ 126,31 127,31 @@ class MediaHelper:
            self.outputWrapper.logError('No track title found for [' + filename + ']')
            return None
        return {'artists':artists, 'album':album, 'track':track}
            
    


    def updateTags(self, filename, tagPayload):
        try:
            mediawrapper = self.getMediawrapper(filename)
            

            for bucket in tagPayload:
                tagPayload[bucket] = self.tagSep.join(tagPayload[bucket][self.tagSkipCounts[bucket]:self.tagSkipCounts[bucket] + self.maxTags[bucket]])
            
            if (isinstance(mediawrapper, ID3)):         return self.updateTagsHelperID3(mediawrapper, tagPayload, self.formatFieldMap['id3'])           

            if (isinstance(mediawrapper, ID3)):         return self.updateTagsHelperID3(mediawrapper, tagPayload, self.formatFieldMap['id3'])
            elif (isinstance(mediawrapper, MP4)):       return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['mp4'])
            elif (isinstance(mediawrapper, OggVorbis)): return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['oggvorbis'])
            elif (isinstance(mediawrapper, FLAC)):      return self.updateTagsHelper(mediawrapper, tagPayload, self.formatFieldMap['flac'])
            else:                                       self.outputWrapper.logNormal('Skipping unknown/incompatible media file type [' + filename + ']')
        except Exception as err:
            self.outputWrapper.logError('Error seen during update processing: ' + str(err))                        
            self.outputWrapper.logError('Error seen during update processing: ' + str(err))
        return False

    

    def updateTagsHelper(self, mediawrapper, tagPayload, fieldMap):
        ''' This version saves the tag data in Unicode encoding '''
        retVal = False
        for bucket in tagPayload:
            if (bucket not in fieldMap): raise Exception('Unknown field type requested [' + bucket + ']')
            curField = fieldMap[bucket]            
            curField = fieldMap[bucket]
            # If we're not required to overwrite, check if we actually need to and should
            if (bucket not in self.forceOverwriteFields):
                # Is the payload empty? Don't update.


@@ 170,26 171,37 @@ class MediaHelper:
                        mediawrapper.update_to_v23()
                        mediawrapper.save(v2_version=3)
                    else:
                        mediawrapper.save()                        
                    break                    
                        mediawrapper.save()
                    break
                except Exception as err:
                    retryCount += 1
                    if (retryCount > 2):
                        raise
                    self.outputWrapper.logError('Problem updating media file - retrying (' + str(err) + ')')
                    time.sleep(1)                   
                    time.sleep(1)
        return retVal

        

    def updateTagsHelperID3(self, mediawrapper, tagPayload, fieldMap):
        ''' 
        ID3 requires uniquely encoded values, so this custom method is necessary to properly save the updated tags. 
        If the comments field is used, values will be saved with an empty description and lang=eng.  
        '''
        ID3 requires uniquely encoded values, so this custom method is necessary to properly save the updated tags.
        If the comments field is used, values will be saved with an empty description and lang=eng.
        '''
        retVal = False
        for bucket in tagPayload:
            if (bucket not in fieldMap): raise Exception('Unknown field type requested [' + bucket + ']')
            curField = fieldMap[bucket]            
            curField = fieldMap[bucket]

            # debug: multiple values in one tag is not a solved problem in ID3...
            # print(curField)
            # for key, value in mediawrapper.items():
            #     print(key,value)
            # try:
            #     print(mediawrapper[curField])
            # except Exception as e:
            #     print("Error gevangen:",e)
            # input("verder?")

            # self.outputWrapper.logNormal(u'\t\t'+unicode(curField)+'\t'+unicode(tagPayload[bucket]))
            # If we're not required to overwrite, check if we actually need to and should
            if (bucket not in self.forceOverwriteFields):


@@ 205,8 217,8 @@ class MediaHelper:
        if (retVal == True):
            # There's an odd bug somewhere in the interaction between some set of Mutagen, iTunes, and/or WMP that causes
            # duplicate ID3v2 headers. This tends to break playback at least in iTunes. The following pre-save block makes a
            # copy of whatever the 'current' header is, deletes 'all' v2 headers, and then re-adds the current header frames. 
            # We seem to end up with some unnecessary blank padding between the frames and content, though. 
            # copy of whatever the 'current' header is, deletes 'all' v2 headers, and then re-adds the current header frames.
            # We seem to end up with some unnecessary blank padding between the frames and content, though.
            if (self.id3v2DupeHeaderFix):
                curFrames = {}
                for key in list(mediawrapper.keys()):


@@ 215,13 227,13 @@ class MediaHelper:
                while True:
                    try:
                        mediawrapper.delete(delete_v2=True)
                        break                    
                        break
                    except Exception as err:
                        retryCount += 1
                        if (retryCount > 2):
                            raise
                        self.outputWrapper.logError('Problem updating media file - retrying (' + str(err) + ')')
                        time.sleep(1)                   
                        time.sleep(1)
                for key in curFrames:
                    mediawrapper[key] = curFrames[key]
            retryCount = 0


@@ 229,11 241,11 @@ class MediaHelper:
                try:
                    mediawrapper.update_to_v23()
                    mediawrapper.save(v2_version=3,v1=self.id3v1Handling)
                    break                    
                    break
                except Exception as err:
                    retryCount += 1
                    if (retryCount > 2):
                        raise
                    self.outputWrapper.logError('Problem updating media file - retrying (' + str(err) + ')')
                    time.sleep(1)                   
                    time.sleep(1)
        return retVal

M lastfmtagupdater/medialibrary.py => lastfmtagupdater/medialibrary.py +132 -126
@@ 1,4 1,4 @@
import fileinput,os,string,time,sys
import fileinput,os,time,sys
from xml.etree.ElementTree import Element, SubElement, ElementTree
from . import common
from .lastfmwrapper import LastFM_Wrapper


@@ 20,10 20,10 @@ class MediaLibrary:
    # even when they may be different (e.g. by track number), as lastFM itself only uses simple artist/album/track filenames as keys.
    # In otherwords, the library is not intended to be a representation of file system objects so much as distinct lastFM entities. As
    # such, we don't persist other fields, like comments, genres, etc., which may differ from file to file (even with the same key metadata).
    # This probably isn't an important distinction for the vast majority of files.    
    
    # This probably isn't an important distinction for the vast majority of files.

    mediaLibrary = {}
    

    # LastFM Tag Library object:
    # dict(string (lowercase lastfm tag) -> int (number of hits/count reported by lastFM)
    lastTagLibrary = {}


@@ 43,53 43,53 @@ class MediaLibrary:
    cacheBackedUp = False
    artistSkipList = None

        

    def __init__(self, config, outputWrapper):
        self.config = config
        self.outputWrapper = outputWrapper
        self.mediaHelper = MediaHelper(config, outputWrapper)
        self.readCache()
       

        self.artistSkipList = self.loadSkipList(self.config.get('artistSkipListFile'))

       
             


    def readCache(self):
        cachefile = os.path.normpath(str(self.config.get('cacheFile')))
        if (not os.path.exists(cachefile)):
            return
        self.fromXml(ElementTree().parse(cachefile))
    
    


    def writeCache(self):
        self.outputWrapper.logNormal('Saving cache')
        cachefile = os.path.normpath(str(self.config.get('cacheFile')))
        ElementTree(self.toXml()).write(cachefile, 'UTF-8')
    
    


    def readMedia(self):
        mediadir = os.path.normpath(str(self.config.get('mediaDir')))
        verbose = self.config.getboolean('verbose')
        skipExtensions = ['.' + x.lower().strip() for x in self.config.get('skipExtensions').split(',')]
        

        self.outputWrapper.logNormal('Reading existing metadata from [' + mediadir + ']')
        numfiles = 0
        for root, dirs, files in os.walk(mediadir):
            for filename in files:
                fname, ext = os.path.splitext(filename)
                if (ext is not None and ext.lower() in skipExtensions):
                    continue                
                    continue
                metadata = self.mediaHelper.extractMetadata(os.path.join(root, filename))
                if (metadata is None or len(metadata['artists']) == 0 or metadata['album'] is None or metadata['track'] is None):
                    continue
                for artist in metadata['artists']:                        
                for artist in metadata['artists']:
                    self.addToMediaLibrary(artist, metadata['album'], metadata['track'], isInScanset=True)
                numfiles += 1
                if (verbose):
                    self.outputWrapper.logNormal('\tProcessed: ' + os.path.join(root, filename))
        self.outputWrapper.logNormal('Read [' + str(numfiles) + '] media files')            
    
         
        self.outputWrapper.logNormal('Read [' + str(numfiles) + '] media files')


    def addToMediaLibrary(self, artist, album, track, artistTags=None, trackTags=None, isInScanset=False):
        if (common.isempty(artist)):
            raise Exception('No artist info provided')


@@ 97,64 97,64 @@ class MediaLibrary:
            raise Exception('No track title provided')
        elif (common.isempty(album)):
            raise Exception('No album title provided')
                

        if (artist not in self.mediaLibrary):
            self.mediaLibrary[artist] = { 'albums':{}, 'tags':artistTags }           
            
            self.mediaLibrary[artist] = { 'albums':{}, 'tags':artistTags }

        if (album not in self.mediaLibrary[artist]['albums']):
            self.mediaLibrary[artist]['albums'][album] = { 'tracks':{} }
            

        if (track not in self.mediaLibrary[artist]['albums'][album]['tracks']):
            self.mediaLibrary[artist]['albums'][album]['tracks'][track] = { 'tags':trackTags }
        

        if (isInScanset):
            self.mediaLibrary[artist]['scanned'] = True
            self.mediaLibrary[artist]['albums'][album]['scanned'] = True
            self.mediaLibrary[artist]['albums'][album]['tracks'][track]['scanned'] = True
   
              


    def printLibrary(self):
        for artist in self.mediaLibrary:
            self.outputWrapper.logNormal(artist + ' (' + ', '.join([pair[0] for pair in self.mediaLibrary[artist]['tags'] or []]) + ')')
            for album in self.mediaLibrary[artist]['albums']:
                self.outputWrapper.logNormal('\t' + album)
                for track in self.mediaLibrary[artist]['albums'][album]['tracks']:
                    self.outputWrapper.logNormal('\t\t' + track + ' (' + ', '.join([pair[0] for pair in self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] or []]) + ')')                        
                
    
                    self.outputWrapper.logNormal('\t\t' + track + ' (' + ', '.join([pair[0] for pair in self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] or []]) + ')')


    def toXml(self):
        numartists = 0
        numalbums = 0
        numtracks = 0

        try:        
            libraryElement = Element('library')            
        try:
            libraryElement = Element('library')
            artistsElement = SubElement(libraryElement, 'artists')
            for artist in sorted(self.mediaLibrary):
                artistDict = self.mediaLibrary[artist]
                artistElement = SubElement(artistsElement, 'artist')
                

                SubElement(artistElement, 'name').text = artist
                      

                if (artistDict['tags'] is not None):
                    if (len(artistDict['tags']) == 0):
                        SubElement(artistElement, 'notags')
                    else:
                        for tagpair in sorted(artistDict['tags']):
                            SubElement(artistElement, 'tag', weight=str(tagpair[1])).text = tagpair[0]
                

                for album in sorted(artistDict['albums']):
                    albumDict = artistDict['albums'][album]            
                    albumDict = artistDict['albums'][album]
                    albumElement = SubElement(artistElement, 'album')
                    

                    SubElement(albumElement, 'name').text = album
                    

                    for track in sorted(albumDict['tracks']):
                        trackDict = albumDict['tracks'][track]
                        trackElement = SubElement(albumElement, 'track')
                        

                        SubElement(trackElement, 'name').text = track
    

                        if (trackDict['tags'] is not None):
                            if (len(trackDict['tags']) == 0):
                                SubElement(trackElement, 'notags')


@@ 166,7 166,7 @@ class MediaLibrary:
                    numalbums += 1
                numartists += 1
            self.outputWrapper.logNormal('Serialized [' + str(numartists) + '] artists, [' + str(numalbums) + '] albums, and [' + str(numtracks) + '] tracks to XML')
            

            localTagsElement = SubElement(libraryElement, 'localTags')
            for tagpair in sorted(list(self.localTagLibrary.items()), key=lambda x: x [1]['localhits'], reverse=True):
                tag = tagpair[0]


@@ 183,27 183,27 @@ class MediaLibrary:
                numtags += 1

            self.outputWrapper.logNormal('Serialized [' + str(numtags) + '] lastFM tags to XML')
    

            return libraryElement
        except Exception as err:
            raise Exception('Could not serialize the XML cache data: ' + str(err)).with_traceback(sys.exc_info()[2])
    
    


    def fromXml(self, rootElement):
        numartists = 0
        numalbums = 0
        numtracks = 0
        ignoreCase = self.config.getboolean('ignoreCase')
        

        try:
            artistsElement = rootElement.find('artists')
            for artistElement in artistsElement.findall('artist'):
                nameElement = artistElement.find('name')
                if (nameElement is None):
                    self.outputWrapper.logError('Missing name element on [' + artistElement.tag + ']')
                    continue                
                    continue
                artist = str(nameElement.text.lower() if ignoreCase else nameElement.text)
                

                # tags = None means there is no tag info, tags = [] means we know it's an empty list
                artistTags = None
                artistTagElements = artistElement.findall('tag')


@@ 213,21 213,21 @@ class MediaLibrary:
                        artistTags.append((str(artistTagElement.text), int(artistTagElement.get('weight'))))
                elif (artistElement.find('notags') is not None):
                    artistTags = []
        

                for albumElement in artistElement.findall('album'):
                    nameElement = albumElement.find('name')
                    if (nameElement is None):
                        self.outputWrapper.logError('Missing name element on [' + albumElement.tag + ']')
                        continue
                    album = str(nameElement.text.lower() if ignoreCase else nameElement.text)
                    

                    for trackElement in albumElement.findall('track'):
                        nameElement = trackElement.find('name')
                        if (nameElement is None):
                            self.outputWrapper.logError('Missing name element on [' + trackElement.tag + ']')
                            continue
                        track = str(nameElement.text.lower() if ignoreCase else nameElement.text)                        
                            
                        track = str(nameElement.text.lower() if ignoreCase else nameElement.text)

                        # tags = None means there is no tag info, tags = [] means we know it's an empty list
                        trackTags = None
                        trackTagElements = trackElement.findall('tag')


@@ 239,12 239,12 @@ class MediaLibrary:
                            trackTags = []

                        self.addToMediaLibrary(artist, album, track, artistTags, trackTags)
                        

                        numtracks += 1
                    numalbums += 1
                numartists += 1
            self.outputWrapper.logNormal('Loaded [' + str(numartists) + '] artists, [' + str(numalbums) + '] albums, and [' + str(numtracks) + '] cached tracks')
            

            lastTagsElement = rootElement.find('lastFmTags')
            for lastTagElement in lastTagsElement.findall('tag'):
                self.addToLastFMTagLibrary(str(lastTagElement.text), int(lastTagElement.get('hits')))


@@ 253,14 253,14 @@ class MediaLibrary:


    def fetchTags(self):
        lastfm = LastFM_Wrapper(self.config, self.outputWrapper)                        
        lastfm = LastFM_Wrapper(self.config, self.outputWrapper)
        self.fetchArtistTags(lastfm)
        self.fetchTrackTags(lastfm)
        self.fetchTagStats(lastfm)        
        self.fetchTagStats(lastfm)
        #if (self.config.getboolean('verbose')):
        #    self.printDistinctLastTags()

        

    def fetchArtistTags(self, lastfm):
        verbose = self.config.getboolean('verbose')
        refetch = self.config.getboolean('refetchCachedTags')


@@ 269,7 269,7 @@ class MediaLibrary:
        maxTagsToSave = self.config.getint('getArtistTags')
        if (maxTagsToSave <= 0):
            return
        

        self.outputWrapper.logNormal('Fetching artist tags from LastFM')
        for artist in sorted(self.mediaLibrary):
            if (artist in self.artistSkipList):


@@ 285,8 285,8 @@ class MediaLibrary:
            if (verbose):
                self.outputWrapper.logNormal('\tFetched [' + artist + '] (' + (', '.join([pair[0] for pair in tagpairs]) if tagpairs is not None else '') + ')')
            time.sleep(niceness)
                    
                    


    def fetchTrackTags(self, lastfm):
        verbose = self.config.getboolean('verbose')
        refetch = self.config.getboolean('refetchCachedTags')


@@ 295,7 295,7 @@ class MediaLibrary:
        maxTagsToSave = self.config.getint('getTrackTags')
        if (maxTagsToSave <= 0):
            return
        

        self.outputWrapper.logNormal('Fetching track tags from LastFM')
        for artist in sorted(self.mediaLibrary):
            if (artist in self.artistSkipList):


@@ 305,7 305,7 @@ class MediaLibrary:
                    if ('scanned' not in self.mediaLibrary[artist]['albums'][album]['tracks'][track]):
                        continue
                    tagpairs = self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags']
                    if (tagpairs is not None and refetch is False):                    
                    if (tagpairs is not None and refetch is False):
                        continue
                    self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] = tagpairs = lastfm.fetchTrackTags(artist, track, maxTagsToSave, minWeight)
                    if (tagpairs is not None):


@@ 314,7 314,7 @@ class MediaLibrary:
                        self.outputWrapper.logNormal('\tFetched [' + artist + ':' + track + '] (' + (', '.join([pair[0] for pair in tagpairs]) if tagpairs is not None else '') + ')')
                    time.sleep(niceness)

    

    def fetchTagStats(self, lastfm):
        ''' Fetch overall/LastFM-wide tag counts. Currently only works for LastFM's 'top tracks' (they don't syndicate counts for arbitrary tags) '''
        toptags = lastfm.fetchTopTagStats()


@@ 326,16 326,16 @@ class MediaLibrary:
        for lasttag in self.lastTagLibrary:
            if (lasttag in toptags):
                self.lastTagLibrary[lasttag] = toptags[lasttag]
                        


    def addToLastFMTagLibrary(self, lasttag, hits=0):
        ''' 
        This method ensures that the fetched tags are in the lastFM tag library. We use 
        '''
        This method ensures that the fetched tags are in the lastFM tag library. We use
        this later to handle stats. If the tag is already in the library, this does nothing.
        '''
        key = lasttag.lower() 
        if (key not in self.lastTagLibrary): 
            self.lastTagLibrary[key] = hits 
        key = lasttag.lower()
        if (key not in self.lastTagLibrary):
            self.lastTagLibrary[key] = hits


    def updateTags(self):


@@ 351,14 351,14 @@ class MediaLibrary:
        skipExtensions = ['.' + x.lower().strip() for x in self.config.get('skipExtensions').split(',')]
        writeUntaggedArtist = (self.config.get('writeUntaggedTag').lower() == 'artist' or self.config.get('writeUntaggedTag').lower() == 'both')
        writeUntaggedTrack = (self.config.get('writeUntaggedTag').lower() == 'track' or self.config.get('writeUntaggedTag').lower() == 'both')
        

        if (touchedFields is None or len(touchedFields) == 0):
            self.outputWrapper.logError('Perhaps you should configure a destination field...')
            return
        

        self.loadSynonyms()
        self.generateLocalTags()
        

        self.outputWrapper.logNormal('Updating tags in [' + mediadir + ']')
        numfiles = 0
        for root, dirs, files in os.walk(mediadir):


@@ 367,17 367,17 @@ class MediaLibrary:
                    fname, ext = os.path.splitext(filename)
                    if (ext is not None and ext.lower() in skipExtensions):
                        continue
    

                    metadata = self.mediaHelper.extractMetadata(os.path.join(root, filename))
                    if (metadata is None or len(metadata['artists']) == 0 or metadata['album'] is None or metadata['track'] is None):
                        continue
                    album = metadata['album'].lower() if ignoreCase else metadata['album']
                    track = metadata['track'].lower() if ignoreCase else metadata['track']                
                    
                    track = metadata['track'].lower() if ignoreCase else metadata['track']

                    artistTags = []
                    trackTags = []
                    for artist in list(map(str.lower, metadata['artists'])) if ignoreCase else metadata['artists']:                                        
                        if (artist in self.artistSkipList): 
                    for artist in list(map(str.lower, metadata['artists'])) if ignoreCase else metadata['artists']:
                        if (artist in self.artistSkipList):
                            continue
                        if (artist not in self.mediaLibrary or
                            album not in self.mediaLibrary[artist]['albums'] or


@@ 386,14 386,14 @@ class MediaLibrary:
                            continue
                        artistTags.extend(self.mediaLibrary[artist]['tags'] or [])
                        trackTags.extend(self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'] or [])
                                     

                    localArtistTags = self.lastTagsToLocalTags(artistTags)
                    localTrackTags = self.lastTagsToLocalTags(trackTags)
                    

                    # Use untagged tags, if requested and appropriate
                    if (len(localArtistTags) == 0 and writeUntaggedArtist): localArtistTags = [('untagged artist', 0)]
                    if (len(localTrackTags) == 0 and writeUntaggedTrack):   localTrackTags = [('untagged track', 0)]
                    

                    tagPayload = {}
                    for touchedField in touchedFields:
                        if (touchedField in artistTagFields and touchedField in trackTagFields):


@@ 402,32 402,32 @@ class MediaLibrary:
                            fieldTags = localArtistTags
                        else:
                            fieldTags = localTrackTags
    
                        if (fieldTags is None or len(fieldTags) == 0) : 

                        if (fieldTags is None or len(fieldTags) == 0) :
                            continue
    

                        # The following section is mostly to deal with multi-column sorting
                        

                        # Store the record weights somewhere we can look them up (the list should already be distinct)
                        recordWeights = {}
                        for tagpair in fieldTags:
                            recordWeights[tagpair[0].lower()] = tagpair[1]
    
                        # Pull out just the tag names as singleton tuples, we'll tack on sort weights next                                        

                        # Pull out just the tag names as singleton tuples, we'll tack on sort weights next
                        tagWeightsList = [(tuple[0],) for tuple in fieldTags]
                                            

                        # Pull out the list of sort rules (e.g. record, library) and append each appropriate weight to the tuple list, in succession
                        sortRules = list(map(str.strip, self.config.get(touchedField + 'Sort').lower().split(',')))                    
                        sortRules = list(map(str.strip, self.config.get(touchedField + 'Sort').lower().split(',')))
                        for sortRule in sortRules:
                            if (sortRule == 'record'):      tagWeightsList = [tagtuple + (recordWeights[tagtuple[0].lower()],) for tagtuple in tagWeightsList]
                            elif (sortRule == 'library'):   tagWeightsList = [tagtuple + (self.getLibraryWeight(tagtuple[0].lower()),) for tagtuple in tagWeightsList]
                            elif (sortRule == 'popularity'):tagWeightsList = [tagtuple + (self.getPopularityWeight(tagtuple[0].lower()),) for tagtuple in tagWeightsList]
                        

                        common.sortWeightedTagTuples(tagWeightsList)
                        

                        tagPayload[touchedField] = self.formattedTagList(tagWeightsList, startDelim, endDelim)
                            
                    if (self.mediaHelper.updateTags(os.path.join(root, filename), tagPayload)):                                                                      

                    if (self.mediaHelper.updateTags(os.path.join(root, filename), tagPayload)):
                        numfiles += 1
                        if (verbose):
                            self.outputWrapper.logNormal('\tUpdated: ' + os.path.join(root, filename))


@@ 438,7 438,7 @@ class MediaLibrary:
                    pass
        self.outputWrapper.logNormal('Updated [' + str(numfiles) + '] media files')
        #if (verbose):
        #    self.printDistinctLocalTags()            
        #    self.printDistinctLocalTags()


    def loadSynonyms(self):


@@ 447,10 447,10 @@ class MediaLibrary:
            return
        if (not os.path.exists(synfile) or not os.access(synfile, os.R_OK)):
            self.outputWrapper.logError('Synonyms file either does not exist or cannot be accessed [' + synfile + ']')
        

        # Read the synonmyms file. The expected format is:
        # original token(tab)replacement token[,replacement token]...
        # e.g. 
        # e.g.
        # rnb    rhythm and blues, r&b
        # This would replace any instance of 'rnb' seen in the LastFM tag set with both 'rhythm and blues' and 'r&b'
        # We preserve order, for the replacement values (so you can order them as you would like them to be replaced)


@@ 461,7 461,7 @@ class MediaLibrary:
            line = line.strip()
            if (common.isempty(line)):
                continue
            if (isinstance(line, str)):                
            if (isinstance(line, str)):
                pass#line = str(line, 'latin1')
            synline = line.split('\t')
            if (len(synline) < 2):


@@ 478,8 478,8 @@ class MediaLibrary:
        #for syn in sorted(self.synonyms):
        #    self.outputWrapper.logNormal(u'Synonyms: '+ syn + ' :: '+ ', '.join(sorted(self.synonyms[syn])))
        if (self.config.getboolean('verbose')):
            self.outputWrapper.logNormal('Loaded [' + str(len(list(self.synonyms.keys()))) + '] tag synonyms')           
        
            self.outputWrapper.logNormal('Loaded [' + str(len(list(self.synonyms.keys()))) + '] tag synonyms')


    def loadSkipList(self, strInFile):
        if (common.isempty(strInFile)):


@@ 488,7 488,7 @@ class MediaLibrary:
            self.outputWrapper.logError('SkipList file either does not exist or cannot be accessed [' + strInFile + ']')

        ignoreCase = self.config.getboolean('ignoreCase')
        tmpSet = set()        
        tmpSet = set()
        for line in fileinput.input(strInFile):
            # Allow inline comments
            if ('#' in line):


@@ 496,33 496,33 @@ class MediaLibrary:
            line = line.strip()
            if (common.isempty(line)):
                continue
            if (isinstance(line, str)):                
            if (isinstance(line, str)):
                pass#line = str(line, 'latin1')
            if (ignoreCase): 
            if (ignoreCase):
                line = line.lower()
            tmpSet.add(line)
        if (self.config.getboolean('verbose')):
            self.outputWrapper.logNormal('Loaded [' + str(len(tmpSet)) + '] skip list entries from [' + strInFile + ']')
        return tmpSet           
        return tmpSet


    
    def generateLocalTags(self):
        '''
        This method goes through the media library and pulls out each distinct token, storing it in
        the localTagLibrary object. At the end of processing, this object will contain counters for the
        number of times each tag is referenced in the local library and a canonical (display) form of the tag
        '''
        

        # These are dummy tags which may optionally be used to indicate an absence of tags
        self.localTagLibrary['untagged artist'] = dict(disp='Untagged Artist', lastfmkeys=[], localhits=0, lasthits=0)
        self.localTagLibrary['untagged track'] = dict(disp='Untagged Track', lastfmkeys=[], localhits=0, lasthits=0)
        

        for artist in self.mediaLibrary:
            self.generateLocalTagsHelper(self.mediaLibrary[artist]['tags'], 'untagged artist')
            for album in self.mediaLibrary[artist]['albums']:
                for track in self.mediaLibrary[artist]['albums'][album]['tracks']:
                    self.generateLocalTagsHelper(self.mediaLibrary[artist]['albums'][album]['tracks'][track]['tags'], 'untagged track')
                    

        # Move or merge the lastFM tag counts to the local tag object
        for localtag in self.localTagLibrary:
            lastcount = 0


@@ 532,31 532,31 @@ class MediaLibrary:


    def generateLocalTagsHelper(self, tagpairs, emptyTagKey):
        ''' 
        This method operates on each individual record (either a track or an artist), performing synonym 
        '''
        This method operates on each individual record (either a track or an artist), performing synonym
        expansion/contraction and finally incrementing tag counters for the distinct tags left after processing
        '''
        if (tagpairs is None or len(tagpairs) == 0):
            self.localTagLibrary[emptyTagKey]['localhits'] += 1 
            self.localTagLibrary[emptyTagKey]['localhits'] += 1
            return
        newtags = []
        for tagpair in tagpairs:
            synlist = self.lookupSynonyms(tagpair[0])
            if (synlist is not None):   tmplist = synlist       # an empty set is valid (means delete the tag)
            else:                       tmplist = [tagpair[0]]            
            else:                       tmplist = [tagpair[0]]
            for tmptag in tmplist:
                self.addToLocalTagLibrary(tmptag, tagpair[0])
                newtags.append((tmptag.lower(), tagpair[1]))
        newtags = common.distinctTagSeq(newtags)
        if (len(newtags) > 0):
            # Keep track of distinct library hits for the local tags 
            # Keep track of distinct library hits for the local tags
            for newtag in newtags:
                self.localTagLibrary[newtag[0]]['localhits'] += 1
        else:
            self.localTagLibrary[emptyTagKey]['localhits'] += 1
                     

    def addToLocalTagLibrary(self, localtag, lasttag):
        ''' 
        '''
        Ensures that the specified tag is in the local tag library, with a back reference to the original
        lastFM tag. We also seed the 'disp' value with a canonical tag representation. In general, this is the
        first case-form of the tag seen (so you don't end up with genres 'indie' and 'Indie'), but may optionally


@@ 565,44 565,50 @@ class MediaLibrary:
        localkey = localtag.lower()
        lastkey = lasttag.lower()
        if (localkey not in self.localTagLibrary):
            if (self.config.getboolean('capTagWords')): disptag = string.capwords(localtag)
            if (self.config.getboolean('capTagWords')): disptag = localtag.title()
            else:                                       disptag = localtag
            self.localTagLibrary[localkey] = dict(disp=disptag, lastfmkeys=set([lastkey]), localhits=0)
        elif (lastkey not in self.localTagLibrary[localkey]['lastfmkeys']):
            self.localTagLibrary[localkey]['lastfmkeys'].add(lastkey)
        


    def lastTagsToLocalTags(self, tagpairs):
        ''' 
        '''
        This method performs synonym expansion/contraction and duplicate removal, returning a 'local tag'
        version of the lastFM tag stream. It also optionally filters out low-count tags.
        '''
        if (tagpairs is None or len(tagpairs) == 0) : 
        if (tagpairs is None or len(tagpairs) == 0) :
            return []
        

        newtags = []
        for tagpair in tagpairs:
            synlist = self.lookupSynonyms(tagpair[0])
            if (synlist is not None):   tmplist = synlist       # an empty list is valid (means delete the tag)
            else:                       tmplist = [tagpair[0]]            
            else:                       tmplist = [tagpair[0]]
            for tmptag in tmplist:
                key = tmptag.lower()
                if (self.localTagLibrary[key]['localhits'] < self.config.getint('minLibraryCount')): continue
                if (self.localTagLibrary[key]['lasthits'] < self.config.getint('minLastFMCount')): continue
                newtags.append((tmptag.lower(), tagpair[1]))
        return common.distinctTagSeq(newtags)
           


    def lookupSynonyms(self, tag):
        ''' Returns a set of synonyms for the given tag, or None if none exist '''
        if (common.isempty(tag)):
            return None
        key = tag.lower()
        try:
            if any(key in x for x in [x.lower().strip() for x in self.config.get('skipTags').split(',')]):
                return None
        except:
            # zero or one skiptag defined.
            pass
        if (key in self.synonyms):
            return self.synonyms[key]
        return None

        

    def printDistinctLocalTags(self):
        if (len(self.localTagLibrary) == 0):
            return


@@ 622,28 628,28 @@ class MediaLibrary:


    def formattedTagList(self, tagpairs, startDelim, endDelim):
        ''' 
        This method breaks apart the tag pairs, returning just a list of the canonical-form 
        tags (optionally formatted with starting/ending delimiters) 
        '''
        return [startDelim + self.localTagLibrary[pair[0]]['disp'] + endDelim for pair in tagpairs]
        This method breaks apart the tag pairs, returning just a list of the canonical-form
        tags (optionally formatted with starting/ending delimiters)
        '''
        return common.distinctSeq([startDelim + self.localTagLibrary[pair[0]]['disp'] + endDelim for pair in tagpairs])


    
    def getLibraryWeight(self, tag):
        ''' Returns the library weight for the given tag, or 0 if the tag is empty or not present '''
        if (common.isempty(tag)):
            return 0
        key = tag.lower()
        if (self.localTagLibrary[key] is not None): 
        if (self.localTagLibrary[key] is not None):
            return self.localTagLibrary[key]['localhits']
        return 0

        

    def getPopularityWeight(self, tag):
        ''' Returns the popularity weight for the given tag, or 0 if the tag is empty or not present '''
        if (common.isempty(tag)):
            return 0
        key = tag.lower()
        if (self.localTagLibrary[key] is not None): 
        if (self.localTagLibrary[key] is not None):
            return self.localTagLibrary[key]['lasthits']
        return 0

M lastfmtagupdater/outputwrapper.py => lastfmtagupdater/outputwrapper.py +7 -7
@@ 6,10 6,10 @@ This helper class facilitates output to either the console or a GUI. If GUI mode
as a background thread.
'''
class OutputWrapper():
    

    def __init__(self, config):
        self.config = config
            

        logFile = self.config.get('logFile')
        if (logFile is not None):
            self.logFileHandle = codecs.open(logFile, 'w+', 'utf_8_sig')


@@ 18,13 18,13 @@ class OutputWrapper():
    def logNormal(self, msg):
        self._logHelper(msg, sys.stdout)

        

    def logError(self, msg):
        self._logHelper(msg, sys.stderr)


    def _logHelper(self, msg, consoleStream):    
        
    def _logHelper(self, msg, consoleStream):

        if (self.logFileHandle is not None):
            self.logFileHandle.write(msg + '\n')
            self.logFileHandle.flush()


@@ 38,7 38,7 @@ class OutputWrapper():
            pass
        consoleStream.flush()

        

    def close(self):
        if (self.logFileHandle is not None):
            self.logFileHandle.close()                        
            self.logFileHandle.close()

M setup.py => setup.py +2 -2
@@ 1,7 1,7 @@
#! /usr/bin/env python
from setuptools import setup

VERSION = '3.0'
VERSION = '3.0.1'

with open("README.md", "rb") as f:
    long_descr = f.read()


@@ 27,7 27,7 @@ def main():
          license='GPL',
          include_package_data=True,
          zip_safe=False,
          install_requires=['pylast','mutagen'],
          install_requires=['pylast<2.0.0','mutagen'],
          packages=['lastfmtagupdater'],
          entry_points={
              "console_scripts": ['lastfmtagupdater = lastfmtagupdater:main'],