Oto co mam do tej pory:Usuń wszystkie style, skrypty i tagi HTML stronie html
from bs4 import BeautifulSoup
def cleanme(html):
soup = BeautifulSoup(html) # create a new bs4 object from the html data loaded
for script in soup(["script"]):
script.extract()
text = soup.get_text()
return text
testhtml = "<!DOCTYPE HTML>\n<head>\n<title>THIS IS AN EXAMPLE </title><style>.call {font-family:Arial;}</style><script>getit</script><body>I need this text captured<h1>And this</h1></body>"
cleaned = cleanme(testhtml)
print (cleaned)
To działa w celu usunięcia skryptu
Jaki jest Twój oczekiwany wynik? –