Pig-Latin-ifier
PHP Code Examples
[Download]
<?php

include '../util.inc';

// Pig Latin page redisplay.

// Convert the words in $text to pig-latin.
function spit_out($text) {
	// Get rid of leading spaces, and simply echo blank items
	if(!preg_match('/^(\s*)(\S+)((.|\n)*)$/', $text, $parts)) {
		echo $text;
		return;
	}

	// Output leading space and leave it out of text.
	echo $parts[1];
	$text = $parts[2] . $parts[3];

	// Pull out the next word.
	while(preg_match('/^([^A-Za-z]*)([A-Za-z]+)([^A-Za-z\s]*\s*)((.|\n)*)$/', 
			 $text, $parts)) {
		// Get the parts
		$pre = $parts[1];
		$word = $parts[2];
		$sp = $parts[3];
		$text = $parts[4];

		// If no letters, go 'round.
		if(!preg_match('/^[a-z]+$/i', $word)) {
			echo $word, $sp;
			continue;
		}

		// If this is an HTML entity, just echo it.
		if($pre == '&' && $sp[0] == ';') {
			echo $pre, $word, $sp;
			continue;
		}

		// Get the parts.  If it won't break, just echo it.
		if(!preg_match('/([bcdfghjklmnpqrstvwxz]*)([aeiouy][a-z]*)/i',
					$word, $parts)) {
			echo $pre, $word, $sp;
			continue;
		}

		// Fix up caps.
		if(preg_match('/^[A-Z][a-z]/', $word)) {
			$parts[1] = strtolower($parts[1]);
			$parts[2] = ucfirst($parts[2]);
		}

		// Finally.
		echo $pre, $parts[2], $parts[1], "ay", $sp;
	}
	echo $text;
}

// Vet the query string.
$target = $_SERVER["QUERY_STRING"];
if(!$target) whap('No URL', "Please specify a URL to look at.</body></html>");

if(!preg_match('|^[a-z]+\://|', $target)) 
	whap('Bad URL', "Specify an absolute URL.</body></html>");

// Okay, try to open the beast.
$in = @fopen($target, 'r');
if(!$in) whap('Inaccessible URL', "Cannot read $target.</body></html>");

// Need to set this.  It's not really correct HTML if it comes before the
// HTML tag from the original page.  Oh well.
//echo "<base href=\"$target\">\n";

// Find first non-blank
$stuff = "";		// Accumulates blocks of code to translate.
$tag = "";		// Accumulates contents of HTML tag.
$depth = 0;		// Nesting depth of < > characters.
$inq = FALSE;		// We're inside quotes inside a tag.
$rebased = FALSE;	// We have generated a <base > tag.
$protect = FALSE;	// We're in a <style > or <script > -- don't change.
while(1) {
	// Next char.
	if(feof($in)) exit;
	$inch = fgetc($in);

	// Quote mode.  Quotes inside < >.
	if($inq) {
		if($inch == '"')
			$inq = FALSE;
		echo $inch;
		continue;
	}

	// Possibly enter quote mode.
	if($in == '"' && $depth) {
		$inq = TRUE;
		echo $inch;
		continue;
	}

	// Check for opening <.
	if($inch == '<') {
		if($depth == 0) {
			// First <.  Spit.
			if($protect) echo $stuff;
			else spit_out($stuff);
			$stuff = '';
		}
		++$depth;
	} else if($inch == '>' && $depth) {
		--$depth;
		$tag .= '>';
		if($depth == 0) {
			// This means we've reached the end of tag (or
			// close).  Usually, we just echo it, but there are
			// some things to check for.
			if(!$rebased && 
			   preg_match('/^\<\s*(body|a|img)[\s\>]/i', $tag)) {
				// We need a base and there's no head.
				echo "<base href=\"$target\">\n";
				$rebased = TRUE;   
			}
			echo "$tag";
			if(!$rebased && 
			   preg_match('/^\<\s*head[\s\>]/i', $tag)) {
				// Add a <base> at start of head.
				echo "<base href=\"$target\">\n";
				$rebased = TRUE;   
			}
			if(preg_match('/^\<\s*(style|script)[\s\>]/i', $tag))
				$protect = TRUE;
			if(preg_match('#^\<\s*/(style|script)[\s\>]#i', $tag))
				$protect = FALSE;
			$tag = '';
		}
		continue;
	}
	if(!$depth) $stuff .= $inch;
	else $tag .= $inch;
}
?>