From 8c35edc38e471ce35646ac812fddef2919b8ec74 Mon Sep 17 00:00:00 2001 From: turnipsoup Date: Sat, 4 Feb 2023 07:56:19 -0800 Subject: [PATCH] Unique endpoints only, returns sitemaps to user as well --- sleuthing/check_robots.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sleuthing/check_robots.py b/sleuthing/check_robots.py index 48f51b7..ccdada4 100644 --- a/sleuthing/check_robots.py +++ b/sleuthing/check_robots.py @@ -24,7 +24,8 @@ def check_endpoint_list(endpoint_list): Checks all endpoints in the passed list """ for endpoint in endpoint_list: - check_endpoint(endpoint) + if endpoint != "": + check_endpoint(endpoint) def clean_list(endpoint_list): @@ -53,7 +54,7 @@ if __name__ == "__main__": robots_txt = r.content.decode() # Sep endpoints - endpoints = [x.strip() for x in robots_txt.split("\n")] + endpoints = {x.strip() for x in robots_txt.split("\n")} # Get sitemaps sitemaps = [x for x in endpoints if "sitemap" in x.lower()] @@ -75,6 +76,12 @@ if __name__ == "__main__": print("Endpoint -> Status Code") print("--------------------") + print() + print("Sitemaps") + print("-----") + for cs in clean_sitemaps: + print(cs) + print() print("Explicitly Allowed Somewhere") print("-----") -- 2.45.2