# -*- coding: utf-8 -*-
"""
Script to resolve double redirects, and to delete broken redirects.
Requires access to MediaWiki's maintenance pages or to a SQL dump file. Delete function requires
adminship.

Syntax:

    python redirect.py action [argument]

where action can be one of these:

* double - fix redirects which point to other redirects
* broken - delete redirects where targets don\'t exist. Requires adminship.

and argument can be:

* sql - retrieve information from a local dump (http://download.wikimedia.org).

if this argument isn't given, info will be loaded from the maintenance page of
the live wiki.
argument can also be given as "-sql:filename.sql".

NOTE: For resolving redirects, please use solve_disambiguation.py -redir.
"""
#
# (C) Daniel Herding, 2004
#
# Distributed under the terms of the PSF license.
#
__version__='$Id: redirect.py,v 1.12 2004/10/11 12:28:15 jeedo Exp $'
#
from __future__ import generators
import wikipedia, config
import re, sys

# Summary message for fixing double redirects
msg_double={
    'en':u'Robot: Fixing double redirect',
    'de':u'Bot: Korrigiere doppelten Redirect',
    'is':u'Vélmenni: Lagfæri tvöfalda tilvísun',
    'ar':u'روبوت: اصلاح ازدوادية في اعادة التحويل',
    }

# Reason for deleting broken redirects
reason_broken={
    'en':u'Robot: Redirect target doesn\'t exist',
    'de':u'Bot: Weiterleitungsziel existiert nicht',
    }

    
def get_hamza_normalized_titles(sqlfilename):
    '''
    Loads a local sql dump file, looks at all pages which have hamza in its title
    and creates a new name for it.
    Returns a dictionary where the normalized name is thr keys and the redirect
    targets are the values.
    NOTE: if the redirect isn't in the main namespace, the returned key will be
    prefixed by the default namespace identifiers. See full_title() in dump.py.
    '''
    dict = {}
    # open sql dump and read page titles out of it
    dump = sqldump.SQLdump(sqlfilename, wikipedia.myencoding())
    newArticleName=u""
    for entry in dump.entries():
        fullArticleName = entry.full_title()
	newArticleName = fullArticleName.replace(u'أ',u'ا')
	newArticleName = newArticleName.replace(u'إ',u'ا')
	newArticleName = newArticleName.replace(u'آ',u'ا')
        if newArticleName != fullArticleName:
	 dict[newArticleName] = fullArticleName
           
    return dict

def get_harakat_normalized_titles(sqlfilename):
    '''
    Loads a local sql dump file, looks at all pages which have hamza in its title
    and creates a new name for it.
    Returns a dictionary where the normalized name is thr keys and the redirect
    targets are the values.
    NOTE: if the redirect isn't in the main namespace, the returned key will be
    prefixed by the default namespace identifiers. See full_title() in dump.py.
    '''
    dict = {}
    # open sql dump and read page titles out of it
    dump = sqldump.SQLdump(sqlfilename, wikipedia.myencoding())
    newArticleName=u""
    for entry in dump.entries():
        fullArticleName = entry.full_title()
	newArticleName = re.sub(u'[ًَُّ~ِْ]','',fullArticleName )
	#newArticleName = re.sub(u'(َ)','',fullArticleName )
	#newArticleName = fullArticleName.replace(u'َ',u'')
	#newArticleName = newArticleName.replace(u'ً',u'')
	#newArticleName = newArticleName.replace(u'ّ',u'')
	#newArticleName = newArticleName.replace(u'ُ',u'')
	#newArticleName = newArticleName.replace(u'ٌ',u'')
	#newArticleName = newArticleName.replace(u'ْ'u'')
	#newArticleName = newArticleName.replace(u'ِ',u'')
	#newArticleName = newArticleName.replace(u'ٍ',u'')
        if newArticleName != fullArticleName:
	 print newArticleName +' '+fullArticleName
	 dict[newArticleName] = fullArticleName
           
    return dict
                
def create_normalized_hamza_redirects(source):
    for article_title , articlefullTitle in get_hamza_normalized_titles(source).iteritems():
        target = wikipedia.PageLink('ar', article_title)
	if not target.exists():
		pl = wikipedia.PageLink('ar',article_title)
		if article_title.replace(u':',u'X') == article_title:
			pl.put('#تحويل [['+articlefullTitle+']]','#تحويل [['+articlefullTitle+']]')
			print article_title +'--> ' + articlefullTitle
			wikipedia.put_throttle()
    #print get_hamza_normalized_titles(source)

def create_normalized_harakat_redirects(source):
    for article_title , articlefullTitle in get_harakat_normalized_titles(source).iteritems():
        pl = wikipedia.PageLink('ar',article_title)
	#target = wikipedia.PageLink(wikipedia.mylang, article_title)
	if article_title.replace(':','X') == article_title:
	 if not pl.exists():
		pl.put('#تحويل [['+articlefullTitle+']]','#تحويل [['+articlefullTitle+']]')
		print article_title +' --> ' + articlefullTitle
		wikipedia.put_throttle() 
	 else:
		print article_title + 'already exists'
		if pl.isRedirectPage():
		 if unicode(pl.getRedirectTo(),'utf-8') != articlefullTitle:
		 	print 'hmm, youve got trouble in article ' + article_title

def fix_double_redirects(source):
    for redir_name in retrieve_double_redirects(source):
        print ''
        redir = wikipedia.PageLink(wikipedia.mylang, redir_name)
        try:
            target = redir.getRedirectTo()
        except wikipedia.IsNotRedirectPage:
            wikipedia.output(u'%s is not a redirect.' % redir.linkname())
        except wikipedia.NoPage:
            wikipedia.output(u'%s doesn\'t exist.' % redir.linkname())
        except wikipedia.LockedPage:
            wikipedia.output(u'%s is locked, skipping.' % redir.linkname())
        else:
            try:
                second_redir = wikipedia.PageLink(wikipedia.mylang, target)
                second_target = second_redir.getRedirectTo(read_only = True)
            except wikipedia.IsNotRedirectPage:
                wikipedia.output(u'%s is not a redirect.' % second_redir.linkname())
            except wikipedia.NoPage:
                wikipedia.output(u'%s doesn\'t exist.' % second_redir.linkname())
            else:
                txt = "#تحويل [[%s]]" % second_target
                redir.put(txt)

# read command line parameters
# what the bot should do (either resolve double redirs, or delete broken redirs)
action = None
# where the bot should get his infos from (either None to load the maintenance
# special page from the live wiki, the filename of a local sql dump file)
source = None
for arg in sys.argv[1:]:
    arg = wikipedia.argHandler(arg)
    if arg:
        if arg == 'normalize':
            action = 'normalize'
        elif arg == 'normalize_hamza':
            action = 'normalize_hamza'
	elif arg == 'normalize_harakat':
            action = 'normalize_harakat'
        elif arg.startswith('-sql'):
            if len(arg) == 4:
                sqlfilename = wikipedia.input(u'Please enter the SQL dump\'s filename: ')
            else:
                sqlfilename = arg[5:]
            import sqldump
            source = sqlfilename
        else:
            print 'Unknown argument: %s' % arg

if action == 'normalize_hamza':
    # get summary text
    wikipedia.setAction(wikipedia.translate('ar', msg_double))
    create_normalized_hamza_redirects(source)
elif action == 'normalize_harakat':
    create_normalized_harakat_redirects(source)
else:
    wikipedia.output(__doc__, 'utf-8')