#pip install googletrans==4.0.0-rc1
#python -m pip install requests beautifulsoup4
import requests,re
from bs4 import BeautifulSoup
from googletrans import Translator
translator = Translator()
url = "https://blog.christophetd.fr/bypassing-cloudflare-using-internet-wide-scan-data/"
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
ptag_list_0 = soup.find_all('p')
for index,lines in enumerate(ptag_list_0):
if(re.match(r'\w',lines.text) != None ):
texts = re.sub(r'\.\s',"",lines.text)
#print(texts)
try:
translated = translator.translate(texts, dest='ja')
print(index, translated.text)
ptag_list_0[index].string = translated.text
except IndexError:
pass
print()
#--------------------------------------------------------
# check
#res2 = requests.get(url)
#soup2 = BeautifulSoup(res2.text, "html.parser")
#ptag_list_2 = soup2.find_all('p')
#
#
#for index,lines in enumerate(ptag_list_2):
# print(lines.text)
# print(ptag_list_0[index].text)
#--------------------------------------------------------
metatag = soup.new_tag('meta')
metatag.attrs['charset'] = "utf-8"
soup.head.append(metatag)
#--------------------------------------------------------
import os
filename = os.path.basename(url)
with open('sample.html', "wb") as f_output:
f_output.write(soup.prettify("utf-8"))
https://rentry.co/a4a8v