makaleler / Python Programlama / Zargan İngilizce-Türkçe Sözlük.

Zargan İngilizce-Türkçe Sözlük.

25.11.2011 09:54:45

Zargan, Türkçe İngilizce Konsol dan çeviri.


#!/usr/bin/python
# -*- coding:utf-8 -*-

"""
    file    : zargan.py
    author      : emrah
    usage       : zargan.py word

    This script is a console based Zargan interface. Zargan is a
    Turkish-English dictionary web site: www.zargan.com
"""

import re
import sys
import httplib
import urllib
import socket
import HTMLParser

# Debug mode. 0 for production, 1 for testing.
DEBUG = 0
# Color mode for print out. 0 for no color, 1 for colored print out.
COLORED = 1
# Server address and web page.
SERVER = 'www.zargan.com'
PAGE = '/mobil/sozluk.asp'
# Regex pattern to parse word list block.
P_WORD_BLOCK = re.compile('<div id="listResults">(.*?)</div>', re.DOTALL)
# Regex pattern to parse (word, meaning) pair.
P_WORD_PAIR  = '.*?<span class="listString">(.*?)</span>'
P_WORD_PAIR += '.*?<a href=".*?">(.*?)</a>(.*)'
P_WORD_PAIR  = re.compile(P_WORD_PAIR, re.DOTALL)

# -----------------------------------------------------------------------------
def get_response_from_zargan(word):
    """
    Get response from Zargan web site for the given word(s). This is
    an unparsed HTML text.
    """
    try:
        header = {'Content-type'   : 'application/x-www-form-urlencoded',
                  'Accept'         : 'text:plain' }
        param = urllib.urlencode({'sozcuk': word})
        cnn = httplib.HTTPConnection(SERVER)
        socket.setdefaulttimeout(5)
        cnn.request('GET', PAGE, param, header)
        res = cnn.getresponse().read().decode('utf-8')
        cnn.close()

        result = HTMLParser.HTMLParser().unescape(res)
    except Exception, err:
        if DEBUG:
            raise
        else:
            sys.stderr.write('%s\n' % err)

        result = None

    return result

# -----------------------------------------------------------------------------
def get_dict_pairs(html):
    """
    Parse the unparsed HTML text to get (word, meaning) pairs.
    """
    try:
        result = []

        # Parse word list block.
        g = P_WORD_BLOCK.search(html)
        if not g:
            raise NameError('Word list block not found')
        html = g.group(1)

        # Parse (word, meaning) pairs
        while True:
            g = P_WORD_PAIR.search(html)
            if not g:
                break
            result.append((g.group(1).strip(), g.group(2).strip()))
            html = g.group(3)
    except Exception, err:
        if DEBUG:
            raise
        else:
            sys.stderr.write('%s\n' % err)

    return result

# -----------------------------------------------------------------------------
def print_dict_pairs(pairs):
    """
    Print out the (word, meaning) pairs.
    """
    try:
        # If no result found, print out :(
        if not pairs:
            sys.stderr.write(':(\n')

        for key, value in pairs:
            if COLORED:
                out =  "\033[35m%s\033[0m : " % (key.strip().encode('utf-8'))
                out += "%s\n" % (value.strip().encode('utf-8'))
            else:
                out =  "%s : " % (key.strip().encode('utf-8'))
                out += "%s\n"    % (value.strip().encode('utf-8'))

            sys.stdout.write(out)

        result = True
    except Exception, err:
        if DEBUG:
            raise
        else:
            sys.stderr.write('%s\n' % err)

        result = False

    return result

# -----------------------------------------------------------------------------
if __name__ == '__main__':
    try:
        # Get words as argument.
        if len(sys.argv) < 2:
            sys.stderr.write('Usage: %s word [word...]\n' % sys.argv[0])
            sys.exit(1)
        word = ' '.join(sys.argv[1:])

        # Get unparsed html response from zargan.com
        html = get_response_from_zargan(word)
        # Parse and get (word, meaning) pairs from html output.
        pairs = get_dict_pairs(html)
        # Print (word, meaning) pairs.
        print_dict_pairs(pairs)

        sys.exit(0)
    except Exception, err:
        if DEBUG:
            raise
        else:
            sys.stderr.write('%s\n' % err)

        sys.exit(1)
yazar husonet

Yorumlar

Bu içerik için sizde yorum yapabilirsiniz!
anasayfa | makaleler | haberler | dosyalar | linkler | hakkımızda