Default.php 5.38 KB
<?php
/*
 * This file is part of PHP Copy/Paste Detector (PHPCPD).
 *
 * (c) Sebastian Bergmann <sebastian@phpunit.de>
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace SebastianBergmann\PHPCPD\Detector\Strategy;

use SebastianBergmann\PHPCPD\CodeClone;
use SebastianBergmann\PHPCPD\CodeCloneFile;
use SebastianBergmann\PHPCPD\CodeCloneMap;

class DefaultStrategy extends AbstractStrategy
{
    /**
     * Copy & Paste Detection (CPD).
     *
     * @param string       $file
     * @param int          $minLines
     * @param int          $minTokens
     * @param CodeCloneMap $result
     * @param bool         $fuzzy
     */
    public function processFile($file, $minLines, $minTokens, CodeCloneMap $result, $fuzzy = false)
    {
        $buffer                    = \file_get_contents($file);
        $currentTokenPositions     = [];
        $currentTokenRealPositions = [];
        $currentSignature          = '';
        $tokens                    = \token_get_all($buffer);
        $tokenNr                   = 0;
        $lastTokenLine             = 0;

        $result->setNumLines(
            $result->getNumLines() + \substr_count($buffer, "\n")
        );

        unset($buffer);

        foreach (\array_keys($tokens) as $key) {
            $token = $tokens[$key];

            if (\is_array($token)) {
                if (!isset($this->tokensIgnoreList[$token[0]])) {
                    if ($tokenNr == 0) {
                        $currentTokenPositions[$tokenNr] = $token[2] - $lastTokenLine;
                    } else {
                        $currentTokenPositions[$tokenNr] = $currentTokenPositions[$tokenNr - 1] +
                                                           $token[2] - $lastTokenLine;
                    }

                    $currentTokenRealPositions[$tokenNr++] = $token[2];

                    if ($fuzzy && $token[0] == T_VARIABLE) {
                        $token[1] = 'variable';
                    }

                    $currentSignature .= \chr($token[0] & 255) .
                                         \pack('N*', \crc32($token[1]));
                }

                $lastTokenLine = $token[2];
            }
        }

        $count         = \count($currentTokenPositions);
        $firstLine     = 0;
        $firstRealLine = 0;
        $found         = false;
        $tokenNr       = 0;

        while ($tokenNr <= $count - $minTokens) {
            $line     = $currentTokenPositions[$tokenNr];
            $realLine = $currentTokenRealPositions[$tokenNr];

            $hash = \substr(
                \md5(
                    \substr(
                        $currentSignature,
                        $tokenNr * 5,
                        $minTokens * 5
                    ),
                    true
                ),
                0,
                8
            );

            if (isset($this->hashes[$hash])) {
                $found = true;

                if ($firstLine === 0) {
                    $firstLine     = $line;
                    $firstRealLine = $realLine;
                    $firstHash     = $hash;
                    $firstToken    = $tokenNr;
                }
            } else {
                if ($found) {
                    $fileA        = $this->hashes[$firstHash][0];
                    $firstLineA   = $this->hashes[$firstHash][1];
                    $lastToken    = ($tokenNr - 1) + $minTokens - 1;
                    $lastLine     = $currentTokenPositions[$lastToken];
                    $lastRealLine = $currentTokenRealPositions[$lastToken];
                    $numLines     = $lastLine + 1 - $firstLine;
                    $realNumLines = $lastRealLine + 1 - $firstRealLine;

                    if ($numLines >= $minLines &&
                        ($fileA != $file ||
                         $firstLineA != $firstRealLine)) {
                        $result->addClone(
                            new CodeClone(
                                new CodeCloneFile($fileA, $firstLineA),
                                new CodeCloneFile($file, $firstRealLine),
                                $realNumLines,
                                $lastToken + 1 - $firstToken
                            )
                        );
                    }

                    $found     = false;
                    $firstLine = 0;
                }

                $this->hashes[$hash] = [$file, $realLine];
            }

            $tokenNr++;
        }

        if ($found) {
            $fileA        = $this->hashes[$firstHash][0];
            $firstLineA   = $this->hashes[$firstHash][1];
            $lastToken    = ($tokenNr - 1) + $minTokens - 1;
            $lastLine     = $currentTokenPositions[$lastToken];
            $lastRealLine = $currentTokenRealPositions[$lastToken];
            $numLines     = $lastLine + 1 - $firstLine;
            $realNumLines = $lastRealLine + 1 - $firstRealLine;

            if ($numLines >= $minLines &&
                ($fileA != $file || $firstLineA != $firstRealLine)) {
                $result->addClone(
                    new CodeClone(
                        new CodeCloneFile($fileA, $firstLineA),
                        new CodeCloneFile($file, $firstRealLine),
                        $realNumLines,
                        $lastToken + 1 - $firstToken
                    )
                );
            }
        }
    }
}