# match_counter.py # Implements class MatchCounter. # # Copyright (C) 2012-2016 Red Hat, Inc. # # This copyrighted material is made available to anyone wishing to use, # modify, copy, or redistribute it subject to the terms and conditions of # the GNU General Public License v.2, or (at your option) any later version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY expressed or implied, including the implied warranties of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. You should have received a copy of the # GNU General Public License along with this program; if not, write to the # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. Any Red Hat trademarks that are incorporated in the # source code or documentation are not subject to the GNU General Public # License and may only be used or replicated with the express permission of # Red Hat, Inc. # from __future__ import absolute_import from __future__ import print_function from __future__ import unicode_literals from functools import reduce WEIGHTS = { 'name' : 7, 'summary' : 4, 'description' : 2, 'url' : 1, } def _canonize_string_set(sset, length): """ Ordered sset with empty strings prepended. """ current = len(sset) l = [''] * (length - current) + sorted(sset) return l class MatchCounter(dict): """Map packages to which of their attributes matched in a search against what values. The mapping is: ``package -> [(key, needle), ... ]``. """ @staticmethod def _eval_weights(pkg, matches): # how much is each match worth and return their sum: def weight(match): key = match[0] needle = match[1] haystack = getattr(pkg, key) if key == "name" and haystack == needle: # if package matches exactly by name, increase weight return 2 * WEIGHTS[key] return WEIGHTS[key] return sum(map(weight, matches)) def _key_func(self): """Get the key function used for sorting matches. It is not enough to only look at the matches and order them by the sum of their weighted hits. In case this number is the same we have to ensure that the same matched needles are next to each other in the result. Returned function is: pkg -> (weights_sum, canonized_needles_set, -distance) """ def get_key(pkg): return ( # use negative value to make sure packages with the highest weight come first - self._eval_weights(pkg, self[pkg]), # then order packages alphabetically pkg.name, ) return get_key def _max_needles(self): """Return the max count of needles of all packages.""" if self: return max(len(self.matched_needles(pkg)) for pkg in self) return 0 def add(self, pkg, key, needle): self.setdefault(pkg, []).append((key, needle)) def dump(self): for pkg in self: print('%s\t%s' % (pkg, self[pkg])) def matched_haystacks(self, pkg): return set(getattr(pkg, m[0]) for m in self[pkg]) def matched_keys(self, pkg): # return keys in the same order they appear in the list result = [] for i in self[pkg]: if i[0] in result: continue result.append(i[0]) return result def matched_needles(self, pkg): return set(m[1] for m in self[pkg]) def sorted(self, reverse=False, limit_to=None): keys = limit_to if limit_to else self.keys() return sorted(keys, key=self._key_func()) def total(self): return reduce(lambda total, pkg: total + len(self[pkg]), self, 0)