From 92406a8cc7d3513e907e12ac7bcf1eed32eb78a9 Mon Sep 17 00:00:00 2001 From: Adam Sampson Date: Wed, 28 Jul 2004 12:11:45 +0000 Subject: [PATCH] Add upgrade tool. --- NEWS | 13 +++++ rawdoglib/rawdog.py | 11 +++- rawdoglib/upgrade_1_2.py | 115 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 rawdoglib/upgrade_1_2.py diff --git a/NEWS b/NEWS index d56de05..58dc8ba 100644 --- a/NEWS +++ b/NEWS @@ -5,11 +5,24 @@ internals; state files from old versions will no longer work with rawdog 2.0 (and external programs that manipulate rawdog state files will also be broken). +rawdog now has a --upgrade option to allow it to import state from +rawdog 1.x state files into rawdog 2.x; to upgrade from 1.x to 2.x, +you'll need to perform the following steps: +- cp -R ~/.rawdog ~/.rawdog-old +- rm ~/.rawdog/state +- rawdog -u (with the new rawdog, to add and fetch all the feeds) +- rawdog --upgrade ~/.rawdog-old ~/.rawdog (to copy the state) +- rawdog -w +- rm -r ~/.rawdog-old (once you're happy with the new version) + rawdog now keeps track of a version number in the state file, and will complain if you use a state file from an incompatible version. The old option syntax ("rawdog update write") is no longer supported. +The state file is now saved in the binary pickle format, and cPickle is +used instead of pickle so it can be read and written more rapidly. + - rawdog 1.13 Handle OverflowError with parsed dates (patch from Matthew Scott). diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py index a54d178..6c8cca0 100644 --- a/rawdoglib/rawdog.py +++ b/rawdoglib/rawdog.py @@ -851,18 +851,27 @@ Actions (performed in order given): -t, --show-template Print the template currently in use -T, --show-itemtemplate Print the item template currently in use +Special actions (all other options are ignored if one of these is specified): +--upgrade OLDDIR NEWDIR Import feed state from rawdog 1.x directory + OLDDIR into rawdog 2.x directory NEWDIR + Report bugs to .""" def main(argv): """The command-line interface to the aggregator.""" try: - (optlist, args) = getopt.getopt(argv, "ulwf:c:tTd:v", ["update", "list", "write", "update-feed=", "help", "config=", "show-template", "dir=", "show-itemtemplate", "verbose"]) + (optlist, args) = getopt.getopt(argv, "ulwf:c:tTd:v", ["update", "list", "write", "update-feed=", "help", "config=", "show-template", "dir=", "show-itemtemplate", "verbose", "upgrade"]) except getopt.GetoptError, s: print s usage() return 1 + for o, a in optlist: + if o == "--upgrade" and len(args) == 2: + import upgrade_1_2 + return upgrade_1_2.upgrade(args[0], args[1]) + if len(args) != 0: usage() return 1 diff --git a/rawdoglib/upgrade_1_2.py b/rawdoglib/upgrade_1_2.py new file mode 100644 index 0000000..a35cab8 --- /dev/null +++ b/rawdoglib/upgrade_1_2.py @@ -0,0 +1,115 @@ +# upgrade_1_2: import state from rawdog 1.x state files to rawdog 2.x +# Copyright 2003 Adam Sampson +# +# rawdog is free software; you can redistribute and/or modify it +# under the terms of that license as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# rawdog is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with rawdog; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA, or see http://www.gnu.org/. + +import sys, os, time, difflib +import cPickle as pickle +import feedparser +from rawdog import Rawdog, Article +from persister import Persister + +def format_time(secs): + """Turn a Unix time into a human-readable string.""" + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(secs)) + +def approximately_equal(a, b): + """Return whether two strings are approximately equal.""" + if a == b: + return True + return difflib.get_close_matches(a, [b], 1, 0.6) != [] + +def upgrade(olddir, newdir): + """Given a rawdog 1.x state directory and a rawdog 2.x state directory, + copy the ordering information from the old one into the new one. Since + rawdog 2.0 mangles articles in a slightly different way, this needs to + do approximate matching to find corresponding articles.""" + print "Importing state from " + olddir + " into " + newdir + + print "Loading old state" + f = open(olddir + "/state") + oldrawdog = pickle.load(f) + + print "Loading new state" + os.chdir(newdir) + persister = Persister("state", Rawdog) + newrawdog = persister.load() + + print "Copying feed state" + oldfeeds = {} + newfeeds = {} + for url, oldfeed in oldrawdog.feeds.items(): + if newrawdog.feeds.has_key(url): + last_update = oldfeed.last_update + print "Setting feed", url, "last update time to", format_time(last_update) + newrawdog.feeds[url].last_update = last_update + oldfeeds[url] = {} + newfeeds[url] = {} + else: + print "Old feed", url, "not in new state" + + print "Copying article state" + + # Seperate out the articles by feed. + for oldhash, oldarticle in oldrawdog.articles.items(): + if oldfeeds.has_key(oldarticle.feed): + oldfeeds[oldarticle.feed][oldhash] = oldarticle + for newhash, newarticle in newrawdog.articles.items(): + if newfeeds.has_key(newarticle.feed): + newfeeds[newarticle.feed][newhash] = newarticle + + # Now fuzzily match articles. + for url, oldarticles in oldfeeds.items(): + for newhash, newarticle in newfeeds[url].items(): + matches = [] + for oldhash, oldarticle in oldarticles.items(): + score = 0 + + olink = oldarticle.link + nlink = newarticle.entry_info.get("link") + if olink is not None and nlink is not None and olink == nlink: + score += 1 + + otitle = oldarticle.title + ntitle = newarticle.entry_info.get("title") + if otitle is not None and ntitle is not None and approximately_equal(otitle, ntitle): + score += 1 + + odesc = oldarticle.description + ndesc = newarticle.entry_info.get("description") + if odesc is not None and ndesc is not None and approximately_equal(odesc, ndesc): + score += 1 + + matches.append((score, oldhash)) + + matches.sort() + if matches != [] and matches[-1][0] > 1: + oldhash = matches[-1][1] + oldarticle = oldarticles[oldhash] + newarticle.sequence = oldarticle.sequence + newarticle.last_seen = oldarticle.last_seen + newarticle.added = oldarticle.added + print "Matched new", newhash, "to old", oldhash, "in", url + else: + print "No match for", newhash, "in", url + + + print "Saving new state" + newrawdog.modified() + persister.save() + + print "Done" + -- 2.35.1