#!/usr/bin/env bash
# Cync -- Cyworld Synchronizer
# Author: Jaeho Shin <netj@ropas.snu.ac.kr>
# Created: 2004-12-02

Level=5

prep_cookies() {
    if ! [ -f "$tmp/cookies.txt" ]; then
        cat >"$tmp/cookies.txt" <<EOF
.nate.com	TRUE	/	FALSE	2000000000	pcid	cync_`hostname`
EOF
    fi
}

clean_page() {
    iconv -f cp949 -t utf-8 \
    |sed -e 's#
$##g' \
    |sed -e 's#&#\&amp;#g' \
    |sed -e 's#&amp;nbsp;\?# #g' \
    |sed -e 's#ks_c_5601-1987#utf-8#g' \
    |sed -e 's# <br> #<br>#g' \
    |sed -e 's#  *</td>#</td>#g' \
    |perl -e '
    $a=join "", <>;
    $a =~ s:<script[^>]*>.*?</script>::gs;
    $a =~ s:<!--.*?-->::gs;
    print $a' \
    |xmllint --recover --html --xmlout --encode utf-8 - 2>/dev/null
}

convert_page() {
    xsltproc --novalid "$BaseDir/diary-page2items.xsl" -
}

merge_items() {
    xsltproc --novalid \
        --param owner "'$1'" \
        --param url "'$2'" \
        "$BaseDir/diary-items2rss.xsl" -
}

format_xml() {
    xmllint --format --encode utf-8 -
}


get() {
    (
    prep_cookies
    wget --user-agent="Cync/1.0" \
        --cookies=on --load-cookies="$tmp/cookies.txt" \
        --quiet "$@"
    )
}


BaseDir=$(cd "`dirname "$0"`"; pwd)

set -e
tmp=`mktemp -d /tmp/cync.XXXXXX`
trap "rm -rf $tmp" EXIT SIGINT SIGHUP SIGTERM

case "$1" in
    get)
    "$@"
    ;;

    diary.rss) shift
    id=$1
    tid=`get -O- "http://cyworld.nate.com/$id" | grep '?tid=' | \
        sed -e 's/.*\?tid=\([0-9]\+\).*/\1/'`

    cd "$tmp"
    get --recursive --level $Level --span-hosts -np -nH --cut-dirs=2 \
        --quiet -I x/common/ -I /pims/diary/ \
        "http://minihp.cyworld.nate.com/pims/diary/diary_list_na.asp?tid=$tid"

    {
        echo "<items-set>"
        for i in `find "$tmp" -name 'diary_list_na.asp*' ! -name '*.cync'`; do
            clean_page <"$i" | tee "$i.cync" | convert_page
        done
        echo "</items-set>"
    } \
    |merge_items "$id" \
        "http://minihp.cyworld.nate.com/pims/main/pims_main.asp?tid=$tid" \
    |format_xml
    cd -
    ;;

    *)
    cat <<USAGE
Cync -- Cyworld Synchronizer 1.0
: cync diary.rss <Ȩ ּ ̸>
        cync get [wget option] <URL> ...

,
    cync diary.rss netj
    cync get http://img.cyworld.nate.com/common/file_down.asp?redirect=%2Fc7601%2F2004%2F10%2F2%2F98%2Fcy%2EJPG

USAGE
    exit 2
    ;;
esac
