• alecsargent@lemmy.zip
    link
    fedilink
    arrow-up
    0
    ·
    12 days ago

    If you are using Hugo use this robots.txt template that automatically updates every build:

    {{- $url := "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt" -}}
    {{- $resource := resources.GetRemote $url -}}
    {{- with try $resource -}}
      {{ with .Err }}
        {{ errorf "%s" . }}
      {{ else with .Value }}
    	{{- .Content -}}
      {{ else }}
        {{ errorf "Unable to get remote resource %q" $url }}
      {{ end }}
    {{ end -}}
    
    Sitemap: {{ "sitemap.xml" | absURL }}
    

    Optionally if lead rouge bots to poisoned pages:

    {{- $url := "https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/refs/heads/main/robots.txt" -}}
    {{- $resource := resources.GetRemote $url -}}
    {{- with try $resource -}}
      {{ with .Err }}
        {{ errorf "%s" . }}
      {{ else with .Value }}
        {{- printf "%s\n%s\n\n" "User-Agent: *" "Disallow: /train-me" }}
        {{- .Content -}}
      {{ else }}
        {{ errorf "Unable to get remote resource %q" $url }}
      {{ end }}
    {{ end -}}
    
    Sitemap: {{ "sitemap.xml" | absURL }}
    

    Check out how to poison your pages for rouge bots in this article

    Repo was deleted and the internet archive was excluded.

    I use Quixotic and a Python script to poison the pages and I included those in my site update script.

    Its all cobbled together in amateur fashion from the deleted article but its honest work.