#!/usr/bin/env python
# http://www.python.org/doc/current/lib/dom-node-objects.html

def bookAugment(doc,outfd):
    import amazon, time

    def getText(node):
        node.normalize()
        try:
            rc = node.firstChild.data
            return _normalizeWhitespace(rc)
        except AttributeError:
            return None

    def replaceText(feature,value):
        for node in feature.childNodes:
            if node.nodeType == node.TEXT_NODE:
                feature.removeChild(node)
        feature.appendChild(doc.createTextNode(value))

    def buildQuery(query, property, value):
        if value not in [None,""]:
            if query != "": query += " and "
            query += "%s: %s" %(property, value)
        return query

    def _get_feature(property,book):
        """_get_feature returns a book childNode corresponding
        to the property. This is extremely clumsy iterating
        over the books children for every property"""
        for feature in _childrenElements(book):
            if feature.localName == property:
                return feature

    def _splitTitle(title):
        try:
            title,subtitle = getText(ProductName).split(": ",1)
        except ValueError:
            title = getText(ProductName)
            subtitle = ""
        return title, subtitle

    def _replaceFeature(replacement,feature):
        if feature:
            book.replaceChild(replacement,feature)
        else:
            book.appendChild(replacement)

    _childrenElements = lambda node: [n for n in node.childNodes
               if n.nodeType == n.ELEMENT_NODE] # is node element
    _normalizeWhitespace = lambda text: ' '.join(text.split())
    _normalizeIsbn = lambda chars: chars.replace('-','')


    bookcase = doc.getElementsByTagName("bookcase").pop()
    collection = doc.getElementsByTagName("collection").pop()
    for book in _childrenElements(collection):
        query = ""          # the query to pass to Amazon searchByPower
        comments = []

        # Build the query from existing title, isbn, and author
        for feature in _childrenElements(book):
            if feature.localName == "title":
                title = getText(feature)
                query = buildQuery(query, "title", title)
            if feature.localName == "authors":
                for author in _childrenElements(feature):
                    author = getText(author)
                    query = buildQuery(query, "author", author)
            if feature.localName == "isbn":
                isbn = _normalizeIsbn(getText(feature))
                query = buildQuery(query, "isbn", isbn)

        # Perform the query
        comments.append("query = %s" %query.encode('utf-8'))
        try:
            #results,url = amazon.searchByPower('author:Stephenson and title:Snow Crash')
            results,url = amazon.searchByPower(query,return_xml=1)
            comments.append("query='%s'" %url)
        except amazon.AmazonError, e:
            comments.append("ERROR %s" %e)
            for comment in comments:
                book.appendChild(doc.createComment(comment))
            continue
        time.sleep(.7)      # Amazon only permits one query per second

        # Augment Book with results of query by iterating over
        # the bookcase DTD and replacing/inserting elements
        # dbg: I know this algorithm sucks.
        Details = results.getElementsByTagName("Details")
        if len(Details) != 1:   # not a unique entry
            comments.append("There is not a single return")
            for comment in comments:
                book.appendChild(doc.createComment(comment))
        else:
            Detail = Details[0]
            book_ns = "http://periapsis.org/bookcase/"
            book_dtd = ("title", "subtitle", "authors", "isbn")
#             book_dtd = ("title", "subtitle", "authors", "binding", "pur_date",
#                 "pur_price", "publisher", "edition", "cr_years", "pub_year",
#                 "isbn", "lccn", "pages", "languages", "genres", "keywords",
#                 "series", "series_num", "condition", "signed", "read", "gift",
#                 "loaned", "rating", "comments")
            ProductName = results.getElementsByTagName("ProductName")[0]

            title, subtitle = _splitTitle(getText(ProductName).split(": ",1))
            for property in book_dtd:   # step through the DTD
                feature = _get_feature(property,book)
                if property == "title":
                    r_title = doc.createElementNS(book_ns,"title")
                    r_title.appendChild(doc.createTextNode(title))
                    _replaceFeature(r_title,feature)

                elif property == "subtitle" and subtitle != "":
                    r_subtitle = doc.createElementNS(book_ns,"subtitle")
                    r_subtitle.appendChild(doc.createTextNode(subtitle))
                    _replaceFeature(r_subtitle,feature)

                elif property == "authors":
                    # remove my children and add new ones from Amazon result
                    # dbg: could use dom.importNode(node,True) from cDomlette
                    for author in _childrenElements(feature):
                        feature.removeChild(author)
                    for r_author in Detail.getElementsByTagName("Author"):
                        r_author.tagName = "author"
                        feature.appendChild(r_author)

                elif property == "isbn":
                    try:    # in rare cases there is no ISBN
                        r_isbn = Detail.getElementsByTagName("Isbn")[0]
                        r_isbn.tagName = "isbn"
                        _replaceFeature(r_isbn,feature)
                    except IndexError:
                        continue

    PrettyPrint(bookcase,outfd)


def print_usage():
    print "pybookcase infile.xml outfile.xml"
    print "pybookcase will augment a bookcase XML file with other information from "
    print "the python interface"

if __name__ == "__main__":

    import getopt, sys
    mode = 'xml'

    try:
        (options,files) = getopt.getopt (sys.argv[1:],"h")
    except getopt.error:
        print_usage()
    for (option,value) in options:
        pass
        if option == '-h':
            print_usage()
    try:
        infd = open(files[0])
    except IndexError:
        infd = sys.stdin
    try:
        outfd = open(files[1], 'w')
    except IndexError:
        outfd = sys.stdout

    from xml.dom import minidom
    from xml.dom.ext import PrettyPrint

    doc = minidom.parse(infd)
    bookAugment(doc,outfd)

    infd.close()
    outfd.close()

