• User

    Estrarre urls da una pagina

    Ho trovato questo script slla rete, che promette di estarre tutti gli url da un indirizzo internet, ma non funziona :arrabbiato:
    Chi di voi è in grado di correggerlo?

    <?php

    if (getenv('REQUEST_METHOD') == 'POST') {
    $url = $_POST;
    } else {
    $url = $_GET;
    }
    ?>

    <form action="<?= $PHP_SELF ?>" method="POST">
    URL:<input type="text" name="url" value="<?= $url ?>"/>

    <input type="submit">
    </form>

    <?php
    if ($url) {
    $remote = fopen($url, 'r');
    $html = fread($remote, 1048576);
    fclose($remote);

    $urls = '(http|file|ftp)';
    $ltrs = 'w';
    $gunk = '/#~:.?+=&%@!-';
    $punc = '.:?-';
    $any = "$ltrs$gunk$punc";
    preg_match_all("{
                      b
                      $urls   :
                      [$any] +?
    
    
                      (?=
                        [$punc] *
                        [^$any]
                      |
                        $
                      )
                  }x", $html, $matches);
    printf("Output of URLs %d URLs
    

    n", sizeof($matches[0]));
    foreach ($matches[0] as $u) {
    $link = $PHP_SELF . '?url=' . urlencode($u);
    echo "[url='$link']$u
    n";
    }
    }
    ?>

    grazie 🙂


  • Bannato User

    prova questo costruttore :

    
    class LinkExtractor &#123;
    	/* private Array variable: $linkReg &#91; contains pregs to parse links &#93;*/
    	var $linkReg = Array&#40;
    	"/&#40;?i&#41;<a&#40;&#91;^\a&#93;+?&#41;href='&#40;&#91;^\a&#93;+?&#41;'/i",
    	"/&#40;?i&#41;<a&#40;&#91;^\a&#93;+?&#41;href=\"&#40;&#91;^\a&#93;+?&#41;\"/i",
    	"/&#40;?i&#41;<a&#40;&#91;^\a&#93;+?&#41;href=&#40;&#91;^\a&#93;+?&#41;&#91; |>&#93;/i"
    	&#41;;
    
    	/**
    	* Public constructor.
    	* Create a global Array with no value, used for parsing
    	* and an internal array with valid pregs for links parsing.
    	*/
    	function LinkExtractor&#40;&#41; &#123;
    		global $__linkExtractor_linkRecipient;
    		$__linkExtractor_linkRecipient = Array&#40;&#41;;
    	&#125;
    
    	/**
    	* Private method, popolate internal Array with preg matches
    	* .
    	* @Param	String		String to push into internal array
    	* @Return	nothing
    	*/
            function __manageLinkRecipient&#40; $replacement &#41; &#123;
    		global $__linkExtractor_linkRecipient;
    		array_push&#40; $__linkExtractor_linkRecipient, htmlspecialchars&#40; $replacement&#91;2&#93; &#41; &#41;;
    	&#125;
    
    	/**
    	* Private method, call preg_replace_callback function with string.
    	* .
    	* @Param	String		String to parse
    	* @Return	nothing
    	*/
    	function __callBackCaller&#40; $st &#41; &#123;
    		preg_replace_callback&#40; $this->linkReg, Array&#40; &$this, '__manageLinkRecipient' &#41;, $st &#41;;
    	&#125;
    
    	/**
    	* Public method, read remote page or file and parse them
    	* .
    	* @Param	String		valid url address to parse
    	* @Return	Boolean		true if readed , false in other cases
    	*/
    	function parseUrl&#40; $url &#41; &#123;
    		if&#40; @$fp = fopen&#40; $url, "r" &#41; &#41; &#123;
    			$st = '';
    			while&#40; $text = fread&#40; $fp, 8192 &#41; &#41; &#123;
    				$st .= $text;
    			&#125;
    			fclose&#40; $fp &#41;;
    			$this->__callBackCaller&#40; $st &#41;;
    			return true;
    		&#125;
    		return false;
    	&#125;
    	
    	/**
    	* Public method, parse links in a file
    	* .
    	* @Param	String		string to parse
    	* @Return	nothing
    	*/
    	function parseFile&#40; $st &#41; &#123;
    		return $this->parseUrl&#40; $st &#41;;
    	&#125;
    
    	/**
    	* Public method, parse links in a string
    	* .
    	* @Param	String		string to parse
    	* @Return	nothing
    	*/
    	function parseString&#40; $st &#41; &#123;
    		$this->__callBackCaller&#40; $st &#41;;
    	&#125;
    	
    	/**
    	* Public method, return an array with all found links
    	* .
    	* @Param	no	no params need
    	* @Return	Array	Array with all links &#40; if there're &#41;
    	*/
    	function getLinks&#40;&#41; &#123;
    		global $__linkExtractor_linkRecipient;
    		return $__linkExtractor_linkRecipient;
    	&#125;
    &#125;
    function linkExtractor&#40; $what, $url = false &#41; &#123;
    	
    	
    	$myLinks = &new LinkExtractor&#40;&#41;;
    	if&#40; $url == false &#41; &#123;
    		$myLinks->parseString&#40; $what &#41;;
    	&#125;
    	elseif&#40; $url == true &#41; &#123;
    	        if&#40; $myLinks->parseUrl&#40; $what &#41; == false &#41; &#123;
    	        	return false;
    		&#125;
    	&#125;
    	return $myLinks->getLinks&#40;&#41;;
    &#125;
    
    // EXAMPLE
    $url = "http&#58;//www.php.net/"; // site to parse
    
    $arrayLinks = &linkExtractor&#40; $url, true &#41;;
    
    if&#40; $arrayLinks != false &#41; &#123;
    	for&#40; $a = 0, $b = count&#40; $arrayLinks &#41;; $a < $b; $a++ &#41; &#123;
    		echo $arrayLinks&#91;$a&#93;."
    ";
    	&#125;
    &#125;
    
    ```[/img]