Configure Feeds
Feeds settings
Configuration settings related to Feeds need to be specified within the
[feeds]
section of the configuration file. The following settings are
supported.
useragent
The Useragent used for crawling.
[feeds]
useragent = feeds (+https://github.com/pyfeeds/pyfeeds)
spiders
Each spider listed in the spiders
setting will be crawled with each run.
List one spider per line.
[feeds]
spiders =
tvthek.orf.at
oe1.orf.at
Use feeds list
to get a list of all available spiders.
output_path
This is the path where the generated Atom feeds will be saved. You may serve this directory with any webserver.
[feeds]
output_path = output
output_url
The URL of the target directory from which the feeds can be accessed. This is
an optional setting and it is used to generate atom:link
element with
rel="self"
attribute. See also:
https://validator.w3.org/feed/docs/warning/MissingSelf.html
[feeds]
output_url = https://example.com/feeds
truncate_words
Truncate content to 10 words instead of including the full text. This can be useful if generated feeds should be made publicly available.
[feeds]
truncate_words = 10
remove_images
Remove images from output. This can be useful if generated feeds should be made publicly available.
[feeds]
remove_images = 1
cache_enabled
Feeds can be configured to use a cache for HTTP responses which is highly
recommended to save bandwidth. The cache_enabled
setting controls whether
caching is used.
[feeds]
cache_enabled = 1
cache_dir
The path where cache data is stored.
[feeds]
cache_dir = ~/.cache/feeds
cache_expires
Expire (remove) entries from cache after 90 days.
[feeds]
cache_expires = 90
Spider specific settings
Some spiders support additional settings. Head over to the Supported Websites section for more information on spider specific settings.
Example configuration
Have a look at Feeds example configuration when configuring Feeds to suit your needs.
# Feeds configuration.
[feeds]
# Useragent to use for crawling.
useragent = feeds (+https://github.com/pyfeeds/pyfeeds)
## List of spiders to run by default, one per line.
# spiders =
# tvthek.orf.at
# oe1.orf.at
## Target directory where the feeds will be saved.
# output_path = output
## URL of target directory from which the feeds can be accessed.
## Optional; used to generate atom:link element with rel="self" attribute.
## See also: https://validator.w3.org/feed/docs/warning/MissingSelf.html
# output_url = https://example.com/feeds
## Truncate content to 10 words instead of including the full text.
## This can be useful if generated feeds should be made publicly available.
# truncate_words = 10
## Remove images from output.
# remove_images = 1
## Enable caching of responses
# cache_enabled = 1
## Path to the cache.
# cache_dir = ~/.cache/feeds
## Expire (remove) entries from cache after 90 days
# cache_expires = 90
#[generic]
## A list of URLs to RSS/Atom feeds.
# urls =
## A list of URLs to RSS/Atom feeds that provide the full content in the "encoded" or
## "content" tag.
# fulltext_urls =
#[falter.at]
## falter.at has a paywall for certain articles.
## If you want to crawl paid articles, please provide abonr (subscription
## number) and password.
# abonr =
# password =
# blogs =
# lingens
# thinktank
#[konsument.at]
## KONSUMENT.AT has a paywall for certain articles.
## If you want to crawl paid articles, please provide username and password.
# username =
# password =
#[biblioweb.at]
## Location of your library that uses biblioweb.at.
# location =
#[lwn.net]
## LWN.net has paywalled articles.
## If you want to crawl them, please provide username and password.
# username =
# password =
#[vice.com]
#locales =
# de_at
# de
#[nachrichten.at]
## Nachrichten.at has paywalled articles.
## If you want to crawl them, please provide username and password.
#username =
#password =
#ressorts =
# wels
# linz
# nachrichten
#[uebermedien.de]
## uebermedien.de has a paywall for certain articles.
## If you want to crawl paid articles, please provide your Steady username
## and password.
# username =
# password =
#[orf.at]
#channels =
# news
# fm4
# science
# help
# sport
# oe3
# oesterreich
# burgenland
# wien
# noe
# ooe
# salzburg
# steiermark
# kaernten
# vorarlberg
# tirol
# religion
#authors =
# Erich Moechel
#[derstandard.at]
#ressorts =
# diskurs/kolumnen/rauscher
# inland/serienundblogs/standardabweichung
# etat
# immobilien
#users =
# 571924
#[arstechnica.com]
#channels =
# index
# features
# technology-lab
# gadgets
# business
# security
# tech-policy
# apple
# gaming
# science
# multiverse
# cars
# staff-blogs
# cardboard
# open-source
# microsoft
# software
# telecom
# web
#[momoxfashion.com]
#links =
# /de/herren?sortiertnach=neueste
#[kurier.at]
#channels =
# /chronik/wien
#articles =
# /meinung/pammesberger-2018-die-karikatur-zum-tag/309.629.015/slideshow
#authors =
# niki.glattauer
# guido.tartarotti
# florian.holzer
# barbara.kaufmann
#[spotify.com]
#market = AT
#shows =
# 6u7pI0o0CUBQq0T1fwPgbj
#[wienerzeitung.at]
#ressorts =
# nachrichten/politik/wien
# nachrichten/politik
# nachrichten/wirtschaft
# meinung
#[ft.com]
#ressorts =
# homepage
# the-big-read
#[economist.com]
#ressorts =
# finance-and-economics
# special-report
# leaders
#[gem2go]
#urls =
# http://yourlocalcommunity.tld/News
# https://mytown.tld/BUeRGERSERVICE/Neuigkeiten