#!/bin/csh -f # Script to filter stdin to stdout, stripping out lines that look like # log httpd server log lines from bots. # Without the --line-buffered option, grep buffers its output. # So when used in a pipe, it may never actually emit anything out of # the end of the pipe until the data stream ends. That's fine for # batch processes, and it's more efficient. But, it's a problem # when feeding in live data like tail -F and expecting to see output # immediately. Especially when used with other pipe elements that # buffer their output (grep, sed, others?). Until all of the buffers # of the entire pipe fill up, you get no output. And if you kill the # process with Ctrl-C (since tail -F never ends), any buffered output # is lost. alias grepv grep --line-buffered -v # Note: Can't do this. For some reason, on Linux but not macOS, this # reports an "Alias loop." error. Works fine in .cshrc for # interactive use, but not here in a script. Why? # Instead would have to a csh variable, not an alias, as: # set grep = "grep --line-buffered" # $grep -v Googlebot \ #alias grep grep --line-buffered #alias grep "grep --line-buffered" #alias grep 'grep --line-buffered' grepv Googlebot \ | grepv bingbot \ | grepv MJ12bot \ | grepv openai.com \ | grepv gptbot \ | grepv ClaudeBot \ | grepv amazonbot \ | grepv GoogleOther \ | grepv SemrushBot \ | grepv TikTokSpider \ | grepv DotBot \ | grepv DomainStatsBot \ | grepv AhrefsBot \ | grepv YandexBot \ | grepv Bytespider \ | grepv AspiegelBot \ | grepv SeznamBot \ | grepv AlphaBot \ | grepv PetalBot \ | grepv MegaIndex.ru \ | grepv Applebot \ | grepv MauiBot \ | grepv LivelapBot \ | grepv PaperLiBot \ | grepv Adsbot \ | grepv Cocolyzebot \ | grepv BLEXBot \ | grepv "The Knowledge AI" \ | grepv ZoominfoBot \ | grepv megagrabber.ru \ | grepv SeekportBot \ | grepv crawler_eb_germany \ | grepv AwarioBot \ | grepv dataforseo-bot \ | grepv Centurybot \ | grepv Dataprovider.com \ | grepv serpstatbot \ #| grepv xxx \ #| grepv xxx \ #| grepv xxx \ #| grepv xxx \ #| grepv xxx \ #| grepv xxx \