~jheckt/GoofyStuff

8c35edc38e471ce35646ac812fddef2919b8ec74 — turnipsoup 1 year, 3 months ago 29719e6
Unique endpoints only, returns sitemaps to user as well
1 files changed, 9 insertions(+), 2 deletions(-)

M sleuthing/check_robots.py
M sleuthing/check_robots.py => sleuthing/check_robots.py +9 -2
@@ 24,7 24,8 @@ def check_endpoint_list(endpoint_list):
    Checks all endpoints in the passed list
    """
    for endpoint in endpoint_list:
        check_endpoint(endpoint)
        if endpoint != "":
            check_endpoint(endpoint)


def clean_list(endpoint_list):


@@ 53,7 54,7 @@ if __name__ == "__main__":
    robots_txt = r.content.decode()

    # Sep endpoints
    endpoints = [x.strip() for x in robots_txt.split("\n")]
    endpoints = {x.strip() for x in robots_txt.split("\n")}

    # Get sitemaps
    sitemaps = [x for x in endpoints if "sitemap" in x.lower()]


@@ 76,6 77,12 @@ if __name__ == "__main__":
    print("--------------------")

    print()
    print("Sitemaps")
    print("-----")
    for cs in clean_sitemaps:
        print(cs)

    print()
    print("Explicitly Allowed Somewhere")
    print("-----")
    check_endpoint_list(clean_allows)