Jul-22-2020, 01:50 AM
How can I implement an if/else or case statement for choosing a file format of my choice?
def main():
parser = argparse.ArgumentParser(description="Link Extractor Tool with Python")
parser.add_argument("url", help="The URL to extract links from.")
parser.add_argument("-m", "--max-urls", help="Number of max URLs to crawl, default is 30.", default=30, type=int)
args = parser.parse_args()
url = args.url
max_urls = args.max_urls
domain_name = urlparse(url).netloc
res = requests.get(url)
statuscode = res.status_code
print("Status Code:", statuscode)
if statuscode == 200:
crawl(url, max_urls=max_urls)
else:
print("Failed to get a request response back.")
print("Total Internal Links:", len(internal_links))
print("Total External Links:", len(external_links))
print("Total Links:", len(external_links) + len(internal_links))
with open(f"{domain_name}_internal_links.txt", "w") as f:
for internal_link in internal_links:
print(internal_link.strip(), file=f)
with open(f"{domain_name}_external_links.txt", "w") as f:
for external_link in external_links:
print(external_link.strip(), file=f)
#writing to json files
f = open(f"{domain_name}_internal_links.json","w")
json.dump({'internal_links':list(internal_links)}, f, indent=6)
f.close()
f = open(f"{domain_name}_external_links.json","w")
json.dump({'external_links':list(external_links)}, f, indent=6)
f.close()
#writing to csv
df = pd.DataFrame(list(internal_links))
df.to_csv(f"{domain_name}_internal_links.csv", index=False, header=False)
df = pd.DataFrame(list(external_links))
df.to_csv(f"{domain_name}_external_links.csv", index=False, header=False)
#writing to xml
xmlformat = xml.Element("internal_links")
xmlformat_1 = xml.SubElement(xmlformat, "link")
for l in list(internal_links):
xmlformat_1.text = str(l)
xmlformat.append(xmlformat_1)
tree = xml.ElementTree(xmlformat)
tree.write(f"{domain_name}_internal_links.xml")
xmlformat = xml.Element("external_links")
xmlformat_1 = xml.SubElement(xmlformat, "link")
for l in list(external_links):
xmlformat_1.text = str(l)
xmlformat.append(xmlformat_1)
tree = xml.ElementTree(xmlformat)
tree.write(f"{domain_name}_external_links.xml")
#executing the python script
if __name__ == "__main__":
main()
