rururu
Profilo di
Nome | rururu |
---|---|
Indirizzo email | n/a |
Messaggi | 1 |
-
- 2018-05-22 22:56:33
- IndexError: list index out of range
- Forum >> Programmazione Python >> Web e Reti
- ciao, sono nuovo!
spero di avere scelto la giusta sezione, in caso contrario mi scuso e spero di poter rimediare.
in ogni caso, non sono esperto, ma agli inizi con python!
sto scrivendo un codice per fare scraping da un sito web (morningstar)
ma il codice, che dovrebbe scaricare informazioni, da una lista di oltre 500 siti, si blocca: avvolte a 80 siti, avvolte a 120, avvolte a 200...senza una logica (almeno secondo me).
il codice è il seguente...spero sappiate aiutarmi
from bs4 import BeautifulSoup
import csv
import requests
import lxml
urls = ('http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL89','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL85','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL84','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL88','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL87','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZL86','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000010BKQ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000010BKR','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0000108YH','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0000109KJ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0000108YF','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WLXZ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000000K07','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WLY0','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000003ST3','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WLXY','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000000739','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000V3US','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000OXFQ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000Y8W7','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000Y8W6','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000UDVR','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000UDVS','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04MRL','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000M7OK','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000PNT6','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000PNT7','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JUWT','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XC9','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XC2','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04MRJ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000M7OL','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000PNT8','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000PNT9','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000OKX6','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05KDV','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JUWU','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WPX6','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000YGD0','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000PHLV','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000ZZZC','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000005MTC','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000LWN8','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000005MTG','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000LWN3','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000LWN7','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000005MTH','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04M0Q','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000000GE1','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR052UE','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000MDF8','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XBE','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000MDFA','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XBI','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000MDF7','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XBK','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05K8F','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JNB3','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WXK5','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JNB4','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05K8G','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05K8K','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05K8M','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JNI3','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05K8O','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JNI4','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WXK6','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04TKN','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04UF7','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04TKL','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04TJT','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WXKD','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000POKP','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XKM','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04TK1','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XJ0','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000POKQ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=FOGBR05KST','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WXZM','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WQOM','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04MRU','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JOJY','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000WY0D','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F000000LLG','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F00000JOJZ','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR05XCE','http://www.morningstar.it/it/funds/snapshot/snapshot.aspx?id=F0GBR04MRX')
for url in urls:
r = requests.get(url)
soup = BeautifulSoup(r.content, "lxml")
nav =soup.find_all('td', class_= 'line text')0.get_text()
VarUltimaQ=soup.find_all('td', class_= 'line text')1.get_text()
CategoriaM =soup.find_all('td', class_='line value text')0.get_text()
CategoriaA =soup.find_all('td', class_= 'line text')2.get_text()
Isin =soup.find_all('td', class_= 'line text')3.get_text()
FundSize =soup.find_all('td', class_= 'line text')4.get_text()
ShareClassSize =soup.find_all('td', class_= 'line text')5.get_text()
Entrata =soup.find_all('td', class_= 'line text')6.get_text()
Spesecorrenti =soup.find_all('td', class_= 'line text')7.get_text()
print (nav.strip(),"|",VarUltimaQ.strip(),"|",CategoriaM.strip(),"|",CategoriaA.strip(),"|", Isin.strip(),"|",FundSize.strip(),"|",ShareClassSize.strip(),"|",Entrata.strip(), "|",Spesecorrenti.strip())
with open ('inserimentoV1.csv','a') as file:
writer=csv.writer(file)
writer.writerow([nav.strip(),"|",VarUltimaQ.strip(),"|",CategoriaM.strip(),"|",CategoriaA.strip(),"|", Isin.strip(),"|", FundSize.strip(),"|", ShareClassSize.strip(),"|", Entrata.strip(),"|", Spesecorrenti.strip(),"|"])