Commit 766b23e0 by Bostjan Skufca

Initial code commit

parent 48785996
composer.phar
vendor/
### Composer-specific paths
#
/composer.phar
/vendor/
# Commit your application's lock file http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file
# You may choose to ignore a library lock file http://getcomposer.org/doc/02-libraries.md#lock-file
# composer.lock
### Sitemap-specific paths
#
/sitemap.yaml
# php-brute-force-sitemap-generator
Generate sitemaps by crawling your website for static pages and using hooks for dynamic content
# PHP Brute Force Sitemap Generator
Generate sitemaps by crawling your website for static pages and using hooks for
dynamic content. Intermediate sitemap URI list is stored in relative format and
served dynamically by generating final documents where relative URIs are
prefixed with configured base URI, resulting in final document that contains
full URIs.
Features:
* crawl your website for static content and generate list of URIs;
* seed crawler with existing URI list, or add URIs manually and recrawl
to avoid missed content in the future;
* store URIs in relative format;
* when serving sitemaps, convert relative URIs to absolute form with
configured prefix URI.
## Target users
Sitemaps are generally best served as accurate as possible, and this means that
your application needs to have infrastructure prepared for enumerating all
content it serves. Many applications do not support this, or support it only
partially.
Users that are stuck using such applications and who have to provide sitemaps
are usually left with the option of pre-generating sitemaps using public web
crawlers. This results in inaccurate and stale sitemaps.
This is where Brute Force Sitemap Generator (BFSG) steps in.
## Modes of operation
Definition of terms:
* **base URI**: URI under which sitemap will reside, i.e. https://example.com/ (without trailing "sitemap.*)
* **transData**: It stands for "transitional data" and represents sitemap data that do not
contain absolute URIs. Absotule URIs are generated at the very last stage,
where HTTP request for sitemap triggers generation of final sitemap by prefixing
all relative URIs with base URI prefix which is obtained dynamically.
BFSG implements the following operations:
* create transData by crawling existing website
** crawling may be seeded by base URI only
** may be seeded by existing transData (list of URIs that were previously encountered)
* augment transData generated by crawler by using callback (for dynamically generated pages)
* using transData cache to generate and output final sitemap.(xml|txt)(.gz)?
BFSG can be glued to your application in the following ways:
1. add BFSG to your project as git submodule:
** you need to create sitemap-glue.php file that returns needed configuration details from your project
** sitemap-glue.php must reside on the same path level as main BFSG directory (just outside of BFSG source tree)
** (reasoning for this is that you will want to commit your glue code to your project repository instead to BFSG's git repo)
1. install BFSG with composer - TODO
1. Symfony: add BFSG as bundle - TODO
## License
BFSG is released under MIT license. See LICENSE file at the root of repository for
additional info.
## Credits
Brute Force Sitemap Generator was created and is maintained by Bostjan Skufca & Teon d.o.o company.
<?php
/**
* Import classes
*/
use Teon\Sitemap\Generator\BruteForce\Config;
/**
* Engage composer
*/
require __DIR__.'/../vendor/autoload.php';
/**
* Try to load configuration from integration file
*/
$integrationFilePath = __DIR__ .'/../../sitemap-glue.php';
if (file_exists($integrationFilePath)) {
$configData = require $integrationFilePath;
Config::setConfig($configData);
}
#!/usr/bin/env php
<?php
/**
* Import classes
*/
use Symfony\Component\Console\Application;
use Teon\Sitemap\Generator\BruteForce\Command\CrawlCommand;
use Teon\Sitemap\Generator\BruteForce\Command\GenerateTxtCommand;
use Teon\Sitemap\Generator\BruteForce\Command\GenerateXmlCommand;
/**
* Bootstrap first
*/
require __DIR__.'/../app/bootstrap.php';
/**
* Start the application
*/
$application = new Application();
$application->add(new CrawlCommand());
$application->add(new GenerateTxtCommand());
$application->add(new GenerateXmlCommand());
$application->run();
#!/bin/bash
### Init shell
#
cd `dirname $0`
### Check if command exist
#
CMD="$1"
if [ "$CMD" != "" ]; then
if ./console list sitemap | grep '^ sitemap:' | sed -e 's/^ sitemap://' | awk '{print $1}' | grep -c "$CMD" > /dev/null; then
./console sitemap:$@
else
./console list sitemap
echo
echo "ERROR: Unknown sitemap:... command, see help above or use ./console instead"
echo
fi
else
./console list sitemap
fi
{
"name": "teon/brute-force-sitemap-generator",
"description": "Generate sitemaps by crawling your website for static pages and using hooks for dynamic content",
"type": "library",
"license": "MIT",
"authors": [
{
"name" : "Teon d.o.o",
"email" : "opensource@teon.si",
"homepage" : "http://teon.si"
}
],
"autoload": {
"psr-4": {
"Teon\\Sitemap\\Generator\\BruteForce\\": "src/"
}
},
"require": {
"ext-simplexml": "*",
"ext-dom": "*",
"symfony/console": "^2.7",
"symfony/http-foundation": "^2.7",
"symfony/yaml": "^2.7",
"zendframework/zend-http": "^2.5"
}
}
{
"_readme": [
"This file locks the dependencies of your project to a known state",
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"hash": "5103764d07bf372c111d0abf7603de89",
"content-hash": "63c7c409fec18b44a9e24aa5fed640b9",
"packages": [
{
"name": "symfony/console",
"version": "v2.7.6",
"source": {
"type": "git",
"url": "https://github.com/symfony/console.git",
"reference": "5efd632294c8320ea52492db22292ff853a43766"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/console/zipball/5efd632294c8320ea52492db22292ff853a43766",
"reference": "5efd632294c8320ea52492db22292ff853a43766",
"shasum": ""
},
"require": {
"php": ">=5.3.9"
},
"require-dev": {
"psr/log": "~1.0",
"symfony/event-dispatcher": "~2.1",
"symfony/process": "~2.1"
},
"suggest": {
"psr/log": "For using the console logger",
"symfony/event-dispatcher": "",
"symfony/process": ""
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\Console\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony Console Component",
"homepage": "https://symfony.com",
"time": "2015-10-20 14:38:46"
},
{
"name": "symfony/http-foundation",
"version": "v2.7.6",
"source": {
"type": "git",
"url": "https://github.com/symfony/http-foundation.git",
"reference": "7598eea151ae3d4134df1f9957364b17809eea75"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/http-foundation/zipball/7598eea151ae3d4134df1f9957364b17809eea75",
"reference": "7598eea151ae3d4134df1f9957364b17809eea75",
"shasum": ""
},
"require": {
"php": ">=5.3.9"
},
"require-dev": {
"symfony/expression-language": "~2.4"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\HttpFoundation\\": ""
},
"classmap": [
"Resources/stubs"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony HttpFoundation Component",
"homepage": "https://symfony.com",
"time": "2015-10-23 14:47:27"
},
{
"name": "symfony/yaml",
"version": "v2.7.6",
"source": {
"type": "git",
"url": "https://github.com/symfony/yaml.git",
"reference": "eca9019c88fbe250164affd107bc8057771f3f4d"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/yaml/zipball/eca9019c88fbe250164affd107bc8057771f3f4d",
"reference": "eca9019c88fbe250164affd107bc8057771f3f4d",
"shasum": ""
},
"require": {
"php": ">=5.3.9"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.7-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Component\\Yaml\\": ""
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Fabien Potencier",
"email": "fabien@symfony.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony Yaml Component",
"homepage": "https://symfony.com",
"time": "2015-10-11 09:39:48"
},
{
"name": "zendframework/zend-escaper",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-escaper.git",
"reference": "a4b227d8a477f4e7e9073f8e0a7ae7dbd3104a73"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-escaper/zipball/a4b227d8a477f4e7e9073f8e0a7ae7dbd3104a73",
"reference": "a4b227d8a477f4e7e9073f8e0a7ae7dbd3104a73",
"shasum": ""
},
"require": {
"php": ">=5.3.23"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Escaper\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"homepage": "https://github.com/zendframework/zend-escaper",
"keywords": [
"escaper",
"zf2"
],
"time": "2015-06-03 14:05:37"
},
{
"name": "zendframework/zend-http",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-http.git",
"reference": "6cc6dee9a27fc07e0167d8779ab2258747108ed5"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-http/zipball/6cc6dee9a27fc07e0167d8779ab2258747108ed5",
"reference": "6cc6dee9a27fc07e0167d8779ab2258747108ed5",
"shasum": ""
},
"require": {
"php": ">=5.3.23",
"zendframework/zend-loader": "~2.5",
"zendframework/zend-stdlib": "~2.5",
"zendframework/zend-uri": "~2.5",
"zendframework/zend-validator": "~2.5"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0",
"zendframework/zend-config": "~2.5"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Http\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"description": "provides an easy interface for performing Hyper-Text Transfer Protocol (HTTP) requests",
"homepage": "https://github.com/zendframework/zend-http",
"keywords": [
"http",
"zf2"
],
"time": "2015-06-03 15:32:01"
},
{
"name": "zendframework/zend-loader",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-loader.git",
"reference": "c5fd2f071bde071f4363def7dea8dec7393e135c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-loader/zipball/c5fd2f071bde071f4363def7dea8dec7393e135c",
"reference": "c5fd2f071bde071f4363def7dea8dec7393e135c",
"shasum": ""
},
"require": {
"php": ">=5.3.23"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Loader\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"homepage": "https://github.com/zendframework/zend-loader",
"keywords": [
"loader",
"zf2"
],
"time": "2015-06-03 14:05:47"
},
{
"name": "zendframework/zend-stdlib",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-stdlib.git",
"reference": "cc8e90a60dd5d44b9730b77d07b97550091da1ae"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-stdlib/zipball/cc8e90a60dd5d44b9730b77d07b97550091da1ae",
"reference": "cc8e90a60dd5d44b9730b77d07b97550091da1ae",
"shasum": ""
},
"require": {
"php": ">=5.3.23"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0",
"zendframework/zend-config": "~2.5",
"zendframework/zend-eventmanager": "~2.5",
"zendframework/zend-filter": "~2.5",
"zendframework/zend-inputfilter": "~2.5",
"zendframework/zend-serializer": "~2.5",
"zendframework/zend-servicemanager": "~2.5"
},
"suggest": {
"zendframework/zend-eventmanager": "To support aggregate hydrator usage",
"zendframework/zend-filter": "To support naming strategy hydrator usage",
"zendframework/zend-serializer": "Zend\\Serializer component",
"zendframework/zend-servicemanager": "To support hydrator plugin manager usage"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Stdlib\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"homepage": "https://github.com/zendframework/zend-stdlib",
"keywords": [
"stdlib",
"zf2"
],
"time": "2015-06-03 15:32:03"
},
{
"name": "zendframework/zend-uri",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-uri.git",
"reference": "fe6c7f4c8d9037fe551898a538a2b6d39483f572"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-uri/zipball/fe6c7f4c8d9037fe551898a538a2b6d39483f572",
"reference": "fe6c7f4c8d9037fe551898a538a2b6d39483f572",
"shasum": ""
},
"require": {
"php": ">=5.3.23",
"zendframework/zend-escaper": "~2.5",
"zendframework/zend-validator": "~2.5"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Uri\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"description": "a component that aids in manipulating and validating » Uniform Resource Identifiers (URIs)",
"homepage": "https://github.com/zendframework/zend-uri",
"keywords": [
"uri",
"zf2"
],
"time": "2015-06-03 15:32:03"
},
{
"name": "zendframework/zend-validator",
"version": "2.5.1",
"source": {
"type": "git",
"url": "https://github.com/zendframework/zend-validator.git",
"reference": "f2c1a1fc786ff4533003cb7fac477495dc007120"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/zendframework/zend-validator/zipball/f2c1a1fc786ff4533003cb7fac477495dc007120",
"reference": "f2c1a1fc786ff4533003cb7fac477495dc007120",
"shasum": ""
},
"require": {
"php": ">=5.3.23",
"zendframework/zend-stdlib": "~2.5"
},
"require-dev": {
"fabpot/php-cs-fixer": "1.7.*",
"phpunit/phpunit": "~4.0",
"zendframework/zend-cache": "~2.5",
"zendframework/zend-config": "~2.5",
"zendframework/zend-db": "~2.5",
"zendframework/zend-filter": "~2.5",
"zendframework/zend-http": "~2.5",
"zendframework/zend-i18n": "~2.5",
"zendframework/zend-math": "~2.5",
"zendframework/zend-servicemanager": "~2.5",
"zendframework/zend-session": "~2.5",
"zendframework/zend-uri": "~2.5"
},
"suggest": {
"zendframework/zend-db": "Zend\\Db component",
"zendframework/zend-filter": "Zend\\Filter component, required by the Digits validator",
"zendframework/zend-i18n": "Zend\\I18n component to allow translation of validation error messages as well as to use the various Date validators",
"zendframework/zend-math": "Zend\\Math component",
"zendframework/zend-resources": "Translations of validator messages",
"zendframework/zend-servicemanager": "Zend\\ServiceManager component to allow using the ValidatorPluginManager and validator chains",
"zendframework/zend-session": "Zend\\Session component",
"zendframework/zend-uri": "Zend\\Uri component, required by the Uri and Sitemap\\Loc validators"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.5-dev",
"dev-develop": "2.6-dev"
}
},
"autoload": {
"psr-4": {
"Zend\\Validator\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"description": "provides a set of commonly needed validators",
"homepage": "https://github.com/zendframework/zend-validator",
"keywords": [
"validator",
"zf2"
],
"time": "2015-06-03 15:32:03"
}
],
"packages-dev": [],
"aliases": [],
"minimum-stability": "stable",
"stability-flags": [],
"prefer-stable": false,
"prefer-lowest": false,
"platform": {
"ext-simplexml": "*",
"ext-dom": "*"
},
"platform-dev": []
}
###
### Brute Force Sitemap Generator
###
#
# This is a sample .htaccess file that redirects requests for sitemap files
# to sitemap.php provided by Brute Force Sitemap Generator.
#
# By default, it ignores existing sitemap files.
#
RewriteEngine on
RewriteRule ^/?sitemap\.(txt|xml)(\.gz)?$ /sitemap.php
<?php
/**
* Import classes
*/
//use Symfony\Component\Console\Application;
use Teon\Sitemap\Generator\BruteForce\Generator;
/**
* Bootstrap first
*/
require __DIR__.'/../app/bootstrap.php';
/**
* Decide which format to throw out
*/
$requestUri = $_SERVER['REQUEST_URI'];
// Detect format
if (!preg_match('#/sitemap\.([^.]+)(\.gz)?$#', $requestUri, $m)) {
throw new Exception("Unsupported sitemap format: $requestUri");
}
$format = $m[1];
// Detect compression
$compressed = false;
if (preg_match('#\.gz$#', $requestUri)) {
$compressed = true;
}
/*
* Generate in appropriate format
*/
$Generator = new Generator();
switch ($format) {
case "txt":
$contentType = "application/xml";
$sitemapData = $Generator->generateTxt();
break;
case "xml":
$contentType = "text/plain";
$sitemapData = $Generator->generateXml();
break;
default:
throw new Exception("Unsupported sitemap format: $format");
}
/**
* Manage output
*/
if ($compressed) {
header('Content-Type: application/gzip');
$sitemapDataCompressed = gzencode($sitemapData);
echo $sitemapDataCompressed;
} else {
header("Content-Type: $contentType");
echo $sitemapData;
}
<?php
// Init your application
// ...
// Get current baseUri for your application instance