PHP Classes

File: profiles/basic.php

Recommend this page to a friend!
  Classes of Jill Lingoff  >  Sweeper  >  profiles/basic.php  >  Download  
File: profiles/basic.php
Role: Auxiliary script
Content type: text/plain
Description: Auxiliary script
Class: Sweeper
Clean HTML to remove unwanted tags and attributes
Author: By
Last change:
Date: 1 month ago
Size: 3,033 bytes


Class file image Download

include('mappings' . DIRECTORY_SEPARATOR . 'CLF2.php');
'basic' . DIRECTORY_SEPARATOR . 'typical-mod.php');
'basic' . DIRECTORY_SEPARATOR . 'typical-rxp-mod.php');
'basic' . DIRECTORY_SEPARATOR . 'wingding.php');
'basic' . DIRECTORY_SEPARATOR . 'language.php');

return array(

// execution macro: this defines the methods you want to call, each and every one of these "cleans" something
    // call the methods how many times you want, change the order as needed
'macro' => array(
// 'undoublyencodeentities', // possibly unnecessary now (2012-01-16) but kept here for insurance (which is low risk since we have never done a document whose content was about HTML character entities)
'post_dom', //'post_dom_stripme',
//'mark_TOC', // needs to be rethought
'combine_inline', // was disabled since in using brute force to ensure proper nesting this could take an extremely long time to run
    //'unmark_TOC', // needs to be rethought
    //'extra_space', // this is currently (2009-07-10) too aggressive for vanilla sweeper
    // do clean inline instead of some of these?
    //'fix_inline', // risky although I have not seen it be destructive; also its orange message could scare somebody.

'non_breaking_type' => 'nbsp', // nbsp, noWrap
'use_local_DTD' => true,
'local_DTD' => 'DTD' . DIRECTORY_SEPARATOR . 'xhtml1-strict.dtd',
'basictypical' => $basicTypicalArray,
'basictypicalrxp' => $basicTypicalArrayRxp,
'basicwingding' => $basicwingding,
'basicEnglishArray' => $basicEnglishArray,
'basicFrenchArray' => $basicFrenchArray,
'basicFrenchRxpArray' => $basicFrenchRxpArray,

'CLF2_replace' => $CLF2Array,
'CLF2_regex' => $CLF2RxpArray,
'french_footnote_reference_anchor_text' => 'Lien la note ',
'french_footnote_anchor_text' => 'Lien &agrave; la r&eacute;f&eacute;rence de la note ',
'english_footnote_reference_anchor_text' => 'Link to note ',
'english_footnote_anchor_text' => 'Link to note reference ',
'footnote_anchor_name' => 'note',
'footnote_reference_anchor_name' => 'noteref',
'french_endnote_reference_anchor_text' => 'Lien la note de bas ',
'french_endnote_anchor_text' => 'Lien &agrave; la r&eacute;f&eacute;rence de la note de bas ',
'english_endnote_reference_anchor_text' => 'Link to endnote ',
'english_endnote_anchor_text' => 'Link to endnote reference ',
'endnote_anchor_name' => 'nnote',
'endnote_reference_anchor_name' => 'nnoteref',

'character_entity_encoding_type' => 'hexadecimal', // we might like to use 'named' but DOM which is currently (2015-06-09) being used doesn't like named entities