From 92406a8cc7d3513e907e12ac7bcf1eed32eb78a9 Mon Sep 17 00:00:00 2001
From: Adam Sampson <ats@offog.org>
Date: Wed, 28 Jul 2004 12:11:45 +0000
Subject: [PATCH] Add upgrade tool.

---
 NEWS                     |  13 +++++
 rawdoglib/rawdog.py      |  11 +++-
 rawdoglib/upgrade_1_2.py | 115 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 rawdoglib/upgrade_1_2.py

diff --git a/NEWS b/NEWS
index d56de05..58dc8ba 100644
--- a/NEWS
+++ b/NEWS
@@ -5,11 +5,24 @@ internals; state files from old versions will no longer work with rawdog
 2.0 (and external programs that manipulate rawdog state files will also
 be broken).
 
+rawdog now has a --upgrade option to allow it to import state from
+rawdog 1.x state files into rawdog 2.x; to upgrade from 1.x to 2.x,
+you'll need to perform the following steps:
+- cp -R ~/.rawdog ~/.rawdog-old
+- rm ~/.rawdog/state
+- rawdog -u (with the new rawdog, to add and fetch all the feeds)
+- rawdog --upgrade ~/.rawdog-old ~/.rawdog (to copy the state)
+- rawdog -w
+- rm -r ~/.rawdog-old (once you're happy with the new version)
+
 rawdog now keeps track of a version number in the state file, and will
 complain if you use a state file from an incompatible version.
 
 The old option syntax ("rawdog update write") is no longer supported.
 
+The state file is now saved in the binary pickle format, and cPickle is
+used instead of pickle so it can be read and written more rapidly.
+
 - rawdog 1.13
 
 Handle OverflowError with parsed dates (patch from Matthew Scott).
diff --git a/rawdoglib/rawdog.py b/rawdoglib/rawdog.py
index a54d178..6c8cca0 100644
--- a/rawdoglib/rawdog.py
+++ b/rawdoglib/rawdog.py
@@ -851,18 +851,27 @@ Actions (performed in order given):
 -t, --show-template          Print the template currently in use
 -T, --show-itemtemplate      Print the item template currently in use
 
+Special actions (all other options are ignored if one of these is specified):
+--upgrade OLDDIR NEWDIR      Import feed state from rawdog 1.x directory
+                             OLDDIR into rawdog 2.x directory NEWDIR
+
 Report bugs to <azz@us-lot.org>."""
 
 def main(argv):
 	"""The command-line interface to the aggregator."""
 
 	try:
-		(optlist, args) = getopt.getopt(argv, "ulwf:c:tTd:v", ["update", "list", "write", "update-feed=", "help", "config=", "show-template", "dir=", "show-itemtemplate", "verbose"])
+		(optlist, args) = getopt.getopt(argv, "ulwf:c:tTd:v", ["update", "list", "write", "update-feed=", "help", "config=", "show-template", "dir=", "show-itemtemplate", "verbose", "upgrade"])
 	except getopt.GetoptError, s:
 		print s
 		usage()
 		return 1
 
+	for o, a in optlist:
+		if o == "--upgrade" and len(args) == 2:
+			import upgrade_1_2
+			return upgrade_1_2.upgrade(args[0], args[1])
+
 	if len(args) != 0:
 		usage()
 		return 1
diff --git a/rawdoglib/upgrade_1_2.py b/rawdoglib/upgrade_1_2.py
new file mode 100644
index 0000000..a35cab8
--- /dev/null
+++ b/rawdoglib/upgrade_1_2.py
@@ -0,0 +1,115 @@
+# upgrade_1_2: import state from rawdog 1.x state files to rawdog 2.x
+# Copyright 2003 Adam Sampson <azz@us-lot.org>
+#
+# rawdog is free software; you can redistribute and/or modify it
+# under the terms of that license as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# rawdog is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with rawdog; see the file COPYING. If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA, or see http://www.gnu.org/.
+
+import sys, os, time, difflib
+import cPickle as pickle
+import feedparser
+from rawdog import Rawdog, Article
+from persister import Persister
+
+def format_time(secs):
+	"""Turn a Unix time into a human-readable string."""
+	return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(secs))
+
+def approximately_equal(a, b):
+	"""Return whether two strings are approximately equal."""
+	if a == b:
+		return True
+	return difflib.get_close_matches(a, [b], 1, 0.6) != []
+
+def upgrade(olddir, newdir):
+	"""Given a rawdog 1.x state directory and a rawdog 2.x state directory,
+	copy the ordering information from the old one into the new one. Since
+	rawdog 2.0 mangles articles in a slightly different way, this needs to
+	do approximate matching to find corresponding articles."""
+	print "Importing state from " + olddir + " into " + newdir
+
+	print "Loading old state"
+	f = open(olddir + "/state")
+	oldrawdog = pickle.load(f)
+
+	print "Loading new state"
+	os.chdir(newdir)
+	persister = Persister("state", Rawdog)
+	newrawdog = persister.load()
+
+	print "Copying feed state"
+	oldfeeds = {}
+	newfeeds = {}
+	for url, oldfeed in oldrawdog.feeds.items():
+		if newrawdog.feeds.has_key(url):
+			last_update = oldfeed.last_update
+			print "Setting feed", url, "last update time to", format_time(last_update)
+			newrawdog.feeds[url].last_update = last_update
+			oldfeeds[url] = {}
+			newfeeds[url] = {}
+		else:
+			print "Old feed", url, "not in new state"
+
+	print "Copying article state"
+
+	# Seperate out the articles by feed.
+	for oldhash, oldarticle in oldrawdog.articles.items():
+		if oldfeeds.has_key(oldarticle.feed):
+			oldfeeds[oldarticle.feed][oldhash] = oldarticle
+	for newhash, newarticle in newrawdog.articles.items():
+		if newfeeds.has_key(newarticle.feed):
+			newfeeds[newarticle.feed][newhash] = newarticle
+
+	# Now fuzzily match articles.
+	for url, oldarticles in oldfeeds.items():
+		for newhash, newarticle in newfeeds[url].items():
+			matches = []
+			for oldhash, oldarticle in oldarticles.items():
+				score = 0
+
+				olink = oldarticle.link
+				nlink = newarticle.entry_info.get("link")
+				if olink is not None and nlink is not None and olink == nlink:
+					score += 1
+
+				otitle = oldarticle.title
+				ntitle = newarticle.entry_info.get("title")
+				if otitle is not None and ntitle is not None and approximately_equal(otitle, ntitle):
+					score += 1
+
+				odesc = oldarticle.description
+				ndesc = newarticle.entry_info.get("description")
+				if odesc is not None and ndesc is not None and approximately_equal(odesc, ndesc):
+					score += 1
+
+				matches.append((score, oldhash))
+
+			matches.sort()
+			if matches != [] and matches[-1][0] > 1:
+				oldhash = matches[-1][1]
+				oldarticle = oldarticles[oldhash]
+				newarticle.sequence = oldarticle.sequence
+				newarticle.last_seen = oldarticle.last_seen
+				newarticle.added = oldarticle.added
+				print "Matched new", newhash, "to old", oldhash, "in", url
+			else:
+				print "No match for", newhash, "in", url
+
+
+	print "Saving new state"
+	newrawdog.modified()
+	persister.save()
+
+	print "Done"
+
-- 
2.35.1