diff options
author | Angie Byron <webchick@24967.no-reply.drupal.org> | 2009-11-07 14:10:33 +0000 |
---|---|---|
committer | Angie Byron <webchick@24967.no-reply.drupal.org> | 2009-11-07 14:10:33 +0000 |
commit | 35ff7005f4bb751e19509a3d9887d2fc455fada9 (patch) | |
tree | dd6e0cc1f7309883be0a1c0686fbb0075f3e5a48 /includes | |
parent | 74722b8a1467740d0fafaf431b6a326cfbe0d9eb (diff) | |
download | brdo-35ff7005f4bb751e19509a3d9887d2fc455fada9.tar.gz brdo-35ff7005f4bb751e19509a3d9887d2fc455fada9.tar.bz2 |
#600032 by c960657: Removed browser.inc. Please see http://drupal.org/project/browser if you'd like to help with a more fleshed-out abstract browser for possible inclusion in D8.
Diffstat (limited to 'includes')
-rw-r--r-- | includes/browser.inc | 1145 |
1 files changed, 0 insertions, 1145 deletions
diff --git a/includes/browser.inc b/includes/browser.inc deleted file mode 100644 index 9b1a22922..000000000 --- a/includes/browser.inc +++ /dev/null @@ -1,1145 +0,0 @@ -<?php -// $Id$ - -/** - * @file - * Browser API class. - */ - -/** - * @defgroup browser Browser - * @{ - * Provides a powerful text based browser through a class based API. - * The browser supports two HTTP backends natively: 1) PHP streams, and - * 2) curl. The browser also supports arbitrary HTTP request types in addtion - * to GET and POST, given that the backend supports them. - * - * The browser can be used to make a simple GET request to example.com as - * shown below. - * @code - * $browser = new Browser(); - * $browser->get('http://example.com'); - * @endcode - * The result of the GET request can be accessed in two ways: 1) the get() - * method returns an array defining the result of the request, or 2) the - * individual properties can be accessed from the browser instance via their - * respective access methods. The following demonstrates the properties that - * are avaialable and how to access them. - * @code - * $browser->getUrl(); - * $browser->getResponseHeaders(); - * $browser->getContent(); - * @endcode - * - * When performing a POST request the following format is used. - * @code - * $browser = new Browser(); - * $post = array( - * 'field_name1' => 'foo', - * 'checkbox1' => TRUE, - * 'multipleselect1[]' => array( - * 'value1', - * 'value2', - * ), - * ); - * $browser->post('http://example.com/form', $post, 'Submit button text'); - * @endcode - * To submit a multi-step form or to post to the current page the URL passed to - * post() may be set to NULL. If there were two steps on the form shown in the - * example above with the mutliple select field on the second page and a submit - * button with the title "Next" on the first page the code be as follows. - * @code - * $browser = new Browser(); - * $post = array( - * 'field_name1' => 'foo', - * 'checkbox1' => TRUE, - * ); - * $browser->post('http://example.com/form', $post, 'Next'); - * - * $post = array( - * 'multipleselect1[]' => array( - * 'value1', - * 'value2', - * ), - * ); - * $browser->post(NULL, $post, 'Final'); - * @endcode - */ - -/** - * Browser API class. - * - * All browser functionality is provided by this main class which manages the - * various aspects of the browser. - */ -class Browser { - - /** - * Flag indicating if curl is available. - * - * @var boolean - */ - protected $curl; - - /** - * The handle of the current curl connection. - * - * @var resource - */ - protected $handle; - - /** - * The current cookie file used by curl. - * - * Cookies are not reused so they can be stored in memory instead of a file. - * - * @var mixed - */ - protected $cookieFile = NULL; - - /** - * The request headers. - * - * @var array - */ - protected $requestHeaders = array(); - - /** - * The URL of the current page. - * - * @var string - */ - protected $url; - - /** - * The response headers of the current page. - * - * @var Array - */ - protected $headers = array(); - - /** - * The raw content of the current page. - * - * @var string - */ - protected $content; - - /** - * The BrowserPage class representing to the current page. - * - * @var BrowserPage - */ - protected $page; - - /** - * Initialize the browser. - * - * @param $force_stream - * Force the use of the PHP stream wrappers insead of CURL. This is used - * during testing to force the use of the stream wrapper so it can be - * tested. - */ - public function __construct($force_stream = FALSE) { - $this->curl = $force_stream ? FALSE : function_exists('curl_init'); - $this->setUserAgent('Drupal (+http://drupal.org/)'); - - if ($this->curl) { - $this->handle = curl_init(); - curl_setopt_array($this->handle, $this->curlOptions()); - } - else { - $this->handle = stream_context_create(); - } - } - - /** - * Check the the method is supported by the backend. - * - * @param $method - * The method string identifier. - */ - public function isMethodSupported($method) { - return $method == 'GET' || $method == 'POST'; - } - - /** - * Get the request headers. - * - * The request headers are sent in every request made by the browser with a - * few changes made the the individual request methods. - * - * @return - * Associative array of request headers. - */ - public function getRequestHeaders() { - return $this->requestHeaders; - } - - /** - * Set the request headers. - * - * @param $headers - * Associative array of request headers. - */ - public function setRequestHeaders(array $headers) { - $this->requestHeaders = $headers; - } - - /** - * Get the user-agent that the browser is identifying itself as. - * - * @return - * Browser user-agent. - */ - public function getUserAgent() { - return $this->requestHeaders['User-Agent']; - } - - /** - * Set the user-agent that the browser will identify itself as. - * - * @param $agent - * User-agent to to identify as. - */ - public function setUserAgent($agent) { - $this->requestHeaders['User-Agent'] = $agent; - } - - /** - * Get HTTP authentication information. - * - * @return - * Authentication information in the format, username:password. - */ - public function getHttpAuthentication() { - if (isset($this->requestHeaders['Authorization'])) { - return base64_decode($this->requestHeaders['Authorization']); - } - return NULL; - } - - /** - * Set HTTP authentication information. - * - * @param $username - * HTTP authentication username, which cannot contain a ":". - * @param $password - * HTTP authentication password. - */ - public function setHttpAuthentication($username, $password) { - $this->requestHeaders['Authorization'] = base64_encode("$username:$password"); - } - - /** - * Get the URL of the current page. - * - * @return - * The URL of the current page. - */ - public function getUrl() { - return $this->url; - } - - /** - * Get the response headers of the current page. - * - * @return - * The response headers of the current page. - */ - public function getResponseHeaders() { - return $this->headers; - } - - /** - * Get the raw content of the current page. - * - * @return - * The raw content for the current page. - */ - public function getContent() { - return $this->content; - } - - /** - * Get the BrowserPage instance for the current page. - * - * If the raw content is new and the page has not yet been parsed then parse - * the content and ensure that it is valid. - * - * @return - * BrowserPage instance for the current page. - */ - public function getPage() { - if (!isset($this->page)) { - $this->page = new BrowserPage($this->url, $this->headers, $this->content); - } - return $this->page; - } - - /** - * Get the current state of the browser. - * - * @return - * An associative array containing state information, including: 1) url, 2) - * headers, 3) content. - * @see getUrl() - * @see getResponseHeaders() - * @see getContent() - */ - public function getState() { - return array( - 'url' => $this->url, - 'headers' => $this->headers, - 'content' => $this->content, - ); - } - - /** - * Set the state of the browser. - * - * @param $url - * The URL of the current page. - * @param $headers - * The response headers of the current page. - * @param $content - * The raw content of the current page. - */ - public function setState($url, $headers, $content) { - $this->url = $url; - $this->headers = $headers; - $this->content = $content; - - // Clear the page variable since the content has change. - unset($this->page); - - $this->refreshCheck(); - } - - /** - * Perform a GET request. - * - * @param $url - * Absolute URL to request. - * @return - * Associative array of state information, as returned by getState(). - * @see getState(). - */ - public function get($url) { - if ($this->curl) { - $this->curlExecute(array( - CURLOPT_HTTPGET => TRUE, - CURLOPT_URL => $url, - CURLOPT_NOBODY => FALSE, - )); - } - else { - $this->streamExecute($url, array( - 'method' => 'GET', - 'header' => array( - 'Content-Type' => 'application/x-www-form-urlencoded', - ), - )); - } - - $this->refreshCheck(); - - return $this->getState(); - } - - /** - * Perform a POST request. - * - * @param $url - * Absolute URL to request, or NULL to submit the current page. - * @param $fields - * Associative array of fields to submit as POST variables. - * @param $submit - * Text contained in 'value' properly of submit button of which to press. - * @return - * Associative array of state information, as returned by - * browser_state_get(). - * @see browser_state_get() - */ - public function post($url, array $fields, $submit) { - // If URL is set then request the page, otherwise use the current page. - if ($url) { - $this->get($url); - } - else { - $url = $this->url; - } - - if (($page = $this->getPage()) === FALSE) { - return FALSE; - } - - if (($form = $this->findForm($fields, $submit)) === FALSE) { - return FALSE; - } - - // If form specified action then use that for the post url. - if ($form['action']) { - $url = $page->getAbsoluteUrl($form['action']); - } - - if ($this->curl) { - $this->curlExecute(array( - CURLOPT_POST => TRUE, - CURLOPT_URL => $url, - CURLOPT_POSTFIELDS => http_build_query($form['post'], NULL, '&'), - )); - } - else { - $this->streamExecute($url, array( - 'method' => 'POST', - 'header' => array( - 'Content-Type' => 'application/x-www-form-urlencoded', - ), - 'content' => http_build_query($form['post'], NULL, '&'), - )); - } - - $this->refreshCheck(); - - return $this->getState(); - } - - /** - * Find the the form that patches the conditions. - * - * @param $fields - * Associative array of fields to submit as POST variables. - * @param $submit - * Text contained in 'value' properly of submit button of which to press. - * @return - * Form action and the complete post array containing default values if not - * overridden, or FALSE if no form matching the conditions was found. - */ - protected function findForm(array $fields, $submit) { - $page = $this->getPage(); - - $forms = $page->getForms(); - foreach ($forms as $form) { - if (($post = $this->processForm($form, $fields, $submit)) !== FALSE) { - $action = (isset($form['action']) ? (string) $form['action'] : FALSE); - return array( - 'action' => $action, - 'post' => $post, - ); - } - } - return FALSE; - } - - /** - * Check the conditions against the specified form and process values. - * - * @param $form - * Form SimpleXMLElement object. - * @param $fields - * Associative array of fields to submit as POST variables. - * @param $submit - * Text contained in 'value' properly of submit button of which to press. - * @return - * The complete post array containing default values if not overridden, or - * FALSE if no form matching the conditions was found. - */ - protected function processForm($form, $fields, $submit) { - $page = $this->getPage(); - - $post = array(); - $submit_found = FALSE; - $inputs = $page->getInputs($form); - foreach ($inputs as $input) { - $name = (string) $input['name']; - $html_value = isset($input['value']) ? (string) $input['value'] : ''; - - // Get type from input vs textarea and select. - $type = isset($input['type']) ? (string) $input['type'] : $input->getName(); - - if (isset($fields[$name])) { - if ($type == 'file') { - // Make sure the file path is the absolute path. - $file = realpath($fields[$name]); - if ($file && is_file($file)) { - // Signify that the post field is a file in case backend needs to - // perform additional processing. - $post[$name] = '@' . $file; - } - // Known type, field processed. - unset($fields[$name]); - } - elseif (($processed_value = $this->processField($input, $type, $fields[$name], $html_value)) !== NULL) { - // Value may be ommitted (checkbox). - if ($processed_value !== FALSE) { - if (is_array($processed_value)) { - $post += $processed_value; - } - else { - $post[$name] = $processed_value; - } - } - // Known type, field processed. - unset($fields[$name]); - } - } - - // No post value for the field means that: no post field value specified, - // the value does not match the field (checkbox, radio, select), or the - // field is of an unknown type. - if (!isset($post[$name])) { - // No value specified so use default value (value in HTML). - if (($default_value = $this->getDefaultFieldValue($input, $type, $html_value)) !== NULL) { - $post[$name] = $default_value; - unset($fields[$name]); - } - } - - // Check if the - if (($type == 'submit' || $type == 'image') && $submit == $html_value) { - $post[$name] = $html_value; - $submit_found = TRUE; - } - } - - if ($submit_found) { - return $post; - } - return FALSE; - } - - /** - * Get the value to be sent for the specified field. - * - * @param $input - * Input SimpleXMLElement object. - * @param $type - * Input type: text, textarea, password, radio, checkbox, or select. - * @param $new_value - * The new value to be assigned to the input. - * @param $html_value - * The cleaned default value for the input from the HTML value. - */ - protected function processField($input, $type, $new_value, $html_value) { - switch ($type) { - case 'text': - case 'textarea': - case 'password': - return $new_value; - case 'radio': - if ($new_value == $html_value) { - return $new_value; - } - return NULL; - case 'checkbox': - // If $new_value is set to FALSE then ommit checkbox value, otherwise - // pass original value. - if ($new_value === FALSE) { - return FALSE; - } - return $html_value; - case 'select': - // Remove the ending [] from multi-select element name. - $key = preg_replace('/\[\]$/', '', (string) $input['name']); - - $options = $page->getSelectOptions($input); - $index = 0; - $out = array(); - foreach ($options as $value => $text) { - if (is_array($value)) { - if (in_array($value, $new_value)) { - $out[$key . '[' . $index++ . ']'] = $value; - } - } - elseif ($new_value == $value) { - return $new_value; - } - } - return ($out ? $out : NULL); - default: - return NULL; - } - } - - /** - * Get the cleaned default value for the input from the HTML value. - * - * @param $input - * Input SimpleXMLElement object. - * @param $type - * Input type: text, textarea, password, radio, checkbox, or select. - * @param $html_value - * The default value for the input, as specified in the HTML. - */ - protected function getDefaultFieldValue($input, $type, $html_value) { - switch ($type) { - case 'textarea': - return (string) $input; - case 'select': - // Remove the ending [] from multi-select element name. - $key = preg_replace('/\[\]$/', '', (string) $input['name']); - $single = empty($input['multiple']); - - $options = $page->getSelectOptionElements($input); - $first = TRUE; - $index = 0; - $out = array(); - foreach ($options as $option) { - // For single select, we load the first option, if there is a - // selected option that will overwrite it later. - if ($option['selected'] || ($first && $single)) { - $first = FALSE; - if ($single) { - $out[$key] = (string) $option['value']; - } - else { - $out[$key . '[' . $index++ . ']'] = (string) $option['value']; - } - } - return ($single ? $out[$key] : $out); - } - break; - case 'file': - return NULL; - case 'radio': - case 'checkbox': - if (!isset($input['checked'])) { - return NULL; - } - // Deliberately no break. - default: - return $html_value; - } - } - - /** - * Perform a request of arbitrary type. - * - * Please use get() and post() for GET and POST requests respectively. - * - * @param $method - * The method string identifier. - * @param $url - * Absolute URL to request. - * @param $additional - * Additional parameters related to the particular request method. - * @return - * Associative array of state information, as returned by getState(). - * @see getState(). - */ - public function request($method, $url, array $additional) { - if (!$this->isMethodSupported($method)) { - return FALSE; - } - - // TODO - } - - /** - * Perform the request using the PHP stream wrapper. - * - * @param $url - * The url to request. - * @param $options - * The HTTP stream context options to be passed to - * stream_context_set_params(). - */ - protected function streamExecute($url, array $options) { - // Global variable provided by PHP stream wapper. - global $http_response_header; - - if (!isset($options['header'])) { - $options['header'] = array(); - } - - // Merge default request headers with the passed headers and generate - // header string to be sent in http request. - $headers = $this->requestHeaders + $options['header']; - $options['header'] = $this->headerString($headers); - - // Update the handler options. - stream_context_set_params($this->handle, array( - 'options' => array( - 'http' => $options, - ) - )); - - // Make the request. - $this->content = file_get_contents($url, FALSE, $this->handle); - $this->url = $url; - $this->headers = $this->headerParseAll($http_response_header); - unset($this->page); - } - - - /** - * Perform curl_exec() with the specified option changes. - * - * @param $options - * Curl options to set, any options not set will maintain their previous - * value. - */ - function curlExecute(array $options) { - // Headers need to be reset since callback appends. - $this->headers = array(); - - // Ensure that request headers are up to date. - if ($this->getHttpAuthentication()) { - curl_setopt($this->handle, CURLOPT_USERPWD, $this->getHttpAuthentication()); - } - curl_setopt($this->handle, CURLOPT_USERAGENT, $this->requestHeaders['User-Agent']); - curl_setopt($this->handle, CURLOPT_HTTPHEADER, $this->requestHeaders); - - curl_setopt_array($this->handle, $options); - $this->content = curl_exec($this->handle); - $this->url = curl_getinfo($this->handle, CURLINFO_EFFECTIVE_URL); - - // $this->headers should be filled by $this->curlHeaderCallback(). - unset($this->page); - } - - /** - * Get the default curl options to be used with each request. - * - * @return - * Default curl options. - */ - protected function curlOptions() { - return array( - CURLOPT_COOKIEJAR => $this->cookieFile, - CURLOPT_FOLLOWLOCATION => TRUE, - CURLOPT_HEADERFUNCTION => array($this, 'curlHeaderCallback'), - CURLOPT_HTTPHEADER => $this->requestHeaders, - CURLOPT_RETURNTRANSFER => TRUE, - CURLOPT_SSL_VERIFYPEER => FALSE, - CURLOPT_SSL_VERIFYHOST => FALSE, - CURLOPT_URL => '/', - CURLOPT_USERAGENT => $this->requestHeaders['User-Agent'], - ); - } - - /** - * Reads reponse headers and stores in $headers array. - * - * @param $curlHandler - * The curl handler. - * @param $header - * An header. - * @return - * The string length of the header. (required by curl) - */ - protected function curlHeaderCallback($handler, $header) { - // Ignore blank header lines. - $clean_header = trim($header); - if ($clean_header) { - $this->headers += $this->headerParse($clean_header); - } - - // Curl requires strlen() to be returned. - return strlen($header); - } - - /** - * Generate a header string given he associative array of headers. - * - * @param $headers - * Associative array of headers. - * @return - * Header string to be used with stream. - */ - protected function headerString(array $headers) { - $string = ''; - foreach ($headers as $key => $header) { - $string .= "$key: $header\r\n"; - } - return $string; - } - - /** - * Parse the response header array to create an associative array. - * - * @param $headers - * Array of headers. - * @return - * An associative array of headers. - */ - protected function headerParseAll(array $headers) { - $out = array(); - foreach ($headers as $header) { - $out += $this->headerParse($header); - } - return $out; - } - - /** - * Parse an individual header into name and value. - * - * @param $header - * A string header string. - * @return - * Parsed header as array($name => $value), or array() if parse failed. - */ - protected function headerParse($header) { - $parts = explode(':', $header, 2); - - // Ensure header line is valid. - if (count($parts) == 2) { - $name = $this->headerName(trim($parts[0])); - return array($name => trim($parts[1])); - } - return array(); - } - - /** - * Ensure that header name is formatted with all lowercase letters. - * - * @param $name - * Header name to format. - * @return - * Formatted header name. - */ - protected function headerName($name) { - return strtolower($name); - } - - /** - * Check for a refresh signifier. - * - * A refresh signifier can either be the 'Location' HTTP header or the meta - * tag 'http-equiv="Refresh"'. - */ - protected function refreshCheck() { - // If not handled by backend wrapper then go ahead and handle. - if (isset($this->headers['Location'])) { - // Expect absolute URL. - $this->get($this->headers['Location']); - } - - if (($page = $this->getPage()) !== FALSE && ($tag = $page->getMetaTag('Refresh', 'http-equiv'))) { - // Parse the content attribute of the meta tag for the format: - // "[delay]: URL=[path_to_redirect_to]". - if (preg_match('/\d+;\s*URL=(?P<url>.*)/i', $tag['content'], $match)) { - $this->get($page->getAbsoluteUrl(decode_entities($match['url']))); - } - } - } - - /** - * Close the wrapper connection. - */ - function __destruct() { - if (isset($this->handle)) { - if ($this->curl) { - curl_close($this->handle); - } - unset($this->handle); - } - } -} - - -/** - * Represents a page of content that has been fetched by the Browser. The class - * provides a number of convenience methods that relate to page content. - */ -class BrowserPage { - - /** - * The URL of the page. - * - * @var string - */ - protected $url; - - /** - * The response headers of the page. - * - * @var Array - */ - protected $headers; - - /** - * The root element of the page. - * - * @var SimpleXMLElement - */ - protected $root; - - /** - * Initialize the BrowserPage with the page state information. - * - * @param $url - * The URL of the page. - * @param $headers - * The response headers of the page. - * @param $content - * The raw content of the page. - */ - public function BrowserPage($url, $headers, $content) { - $this->url = $url; - $this->headers = $headers; - $this->root = $this->load($content); - } - - /** - * Attempt to parse the raw content using DOM and import it into SimpleXML. - * - * @param $content - * The raw content of the page. - * @return - * The root element of the page, or FALSE. - */ - protected function load($content) { - // Use DOM to load HTML soup, and hide warnings. - $document = @DOMDocument::loadHTML($content); - if ($document) { - return simplexml_import_dom($document); - } - return FALSE; - } - - /** - * Check if the raw content is valid and could be parse. - * - * @return - * TRUE if content is valid, otherwise FALSE. - */ - public function isValid() { - return ($this->root !== FALSE); - } - - /** - * Perform an xpath search on the contents of the page. - * - * The search is relative to the root element, usually the HTML tag, of the - * page. To perform a search using a different root element follow the - * example below. - * @code - * $parent = $page->xpath('.//parent'); - * $parent[0]->xpath('//children'); - * @endcode - * - * @param $xpath - * The xpath string. - * @return - * An array of SimpleXMLElement objects or FALSE in case of an error. - * @link http://us.php.net/manual/function.simplexml-element-xpath.php - */ - public function xpath($xpath) { - if ($this->isValid()) { - return $this->root->xpath($xpath); - } - return FALSE; - } - - /** - * Get all the meta tags. - * - * @return - * An array of SimpleXMLElement objects representing meta tags. - */ - public function getMetaTags() { - return $this->xpath('//meta'); - } - - /** - * Get a specific meta tag. - * - * @param $key - * The meta tag key. - * @param $type - * The type of meta tag, either: 'name' or 'http-equiv'. - * @return - * A SimpleXMLElement object representing the meta tag, or FALSE if not - * found. - */ - public function getMetaTag($key, $type = 'name') { - if ($tags = $this->getMetaTags()) { - foreach ($tags as $tag) { - if ($tag[$type] == $key) { - return $tag; - } - } - } - return FALSE; - } - - /** - * Get all the form elements. - * - * @return - * An array of SimpleXMLElement objects representing form elements. - */ - public function getForms() { - return $this->xpath('//form'); - } - - /** - * Get all the input elements, or only those nested within a parent element. - * - * @param $parent - * SimpleXMLElement representing the parent to search within. - * @return - * An array of SimpleXMLElement objects representing form elements. - */ - public function getInputs($parent = NULL) { - if ($parent) { - return $parent->xpath('.//input|.//textarea|.//select'); - } - return $this->xpath('.//input|.//textarea|.//select'); - } - - /** - * Get all the options contained by a select, including nested options. - * - * @param $select - * SimpleXMLElement representing the select to extract option from. - * @return - * Associative array where the keys represent each option value and the - * value is the text contained within the option tag. For example: - * @code - * array( - * 'option1' => 'Option 1', - * 'option2' => 'Option 2', - * ) - * @endcode - */ - public function getSelectOptions(SimpleXMLElement $select) { - $elements = $this->getSelectOptionElements($select); - - $options = array(); - foreach ($elements as $element) { - $options[(string) $element['value']] = $this->asText($element); - } - return $options; - } - - /** - * Get all selected options contained by a select, including nested options. - * - * @param $select - * SimpleXMLElement representing the select to extract option from. - * @return - * Associative array of selected items in the format described by - * BrowserPage->getSelectOptions(). - * @see BrowserPage->getSelectOptions() - */ - public function getSelectedOptions(SimpleXMLElement $select) { - $elements = getSelectOptionElements($select); - - $options = array(); - foreach ($elements as $element) { - if (isset($elements['selected'])) { - $options[(string) $element['value']] = asText($element); - } - } - return $options; - } - - /** - * Get all the options contained by a select, including nested options. - * - * @param $element - * SimpleXMLElement representing the select to extract option from. - * @return - * An array of SimpleXMLElement objects representing option elements. - */ - public function getSelectOptionElements(SimpleXMLElement $element) { - $options = array(); - - // Add all options items. - foreach ($element->option as $option) { - $options[] = $option; - } - - // Search option group children. - if (isset($element->optgroup)) { - foreach ($element->optgroup as $group) { - $options = array_merge($options, $this->getSelectOptionElements($group)); - } - } - return $options; - } - - /** - * Get the absolute URL for a given path, relative to the page. - * - * @param - * A path relative to the page or absolute. - * @return - * An absolute path. - */ - public function getAbsoluteUrl($path) { - $parts = @parse_url($path); - if (isset($parts['scheme'])) { - return $path; - } - - $base = $this->getBaseUrl(); - if ($path[0] == '/') { - // Lead / then use host as base. - $parts = parse_url($base); - $base = $parts['scheme'] . '://' . $parts['host']; - } - return $base . $path; - } - - /** - * Get the base URL of the page. - * - * If a 'base' HTML element is defined then the URL it defines is used as the - * base URL for the page, otherwise the page URL is used to determine the - * base URL. - * - * @return - * The base URL of the page. - */ - public function getBaseUrl() { - // Check for base element. - $elements = $this->xpath('.//base'); - if ($elements) { - // More than one may be specified. - foreach ($elements as $element) { - if (isset($element['href'])) { - $base = (string) $element['href']; - break; - } - } - } - else { - $base = $this->url; - if ($pos = strpos($base, '?')) { - // Remove query string. - $base = substr($base, 0, $pos); - } - - // Ignore everything after the last forward slash. - $base = substr($base, 0, strrpos($base, '/')); - } - - // Ensure that the last character is a forward slash. - if ($base[strlen($base) - 1] != '/') { - $base .= '/'; - } - return $base; - } - - /** - * Extract the text contained by the element. - * - * Strips all XML/HTML tags, decodes HTML entities, and trims the result. - * - * @param $element - * SimpleXMLElement to extract text from. - * @return - * Extracted text. - */ - public function asText(SimpleXMLElement $element) { - return trim(html_entity_decode(strip_tags($element->asXML()))); - } -} - -/** - * @} End of "defgroup browser". - */ |