/ PROGRAMMING

웹크롤링 (1)
urllib사용을 통한 크롤링

웹크롤링 관련 글

urllib사용을 통한 크롤링

  • url을 입력하여 작동하는 라이브러리로 통신을 통해 데이터를 주고받는 기능을 한다.
  • 데이터를 받아오거나 다운로드할 수 있다

1.urlretrieve

url로 표시된 네트워크 정보를 파일로 저장할 수 있는 기능 (이미지 , html)

(filename, headers) 튜플로 반환

ex file, header = req.urlretrieve(url, path)

import urllib.request as req

url 에 접근할 url주소를 담고, path에 저장할 경로와 파일명을 적으면 된다. 파일명만 적을 경우 현재 위치로 저장이 된다.

url = "https://search.pstatic.net/common/?src=http%3A%2F%2Fblogfiles.naver.net%2FMjAyMTA4MjBfMTQx%2FMDAxNjI5NDIxNjQ5NzM5.D1F-l6COowiUicFVRlpfQeSJRtkR4f9lkbVZgwJm6r4g.lBjYtG_wiubtJdiCYg8reMDwyC3wkFhPy5Ou0VXWRIQg.JPEG.hyun_0930%2F1629420539963.jpg&type=sc960_832"

path = "test1.jpg"
url
'https://search.pstatic.net/common/?src=http%3A%2F%2Fblogfiles.naver.net%2FMjAyMTA4MjBfMTQx%2FMDAxNjI5NDIxNjQ5NzM5.D1F-l6COowiUicFVRlpfQeSJRtkR4f9lkbVZgwJm6r4g.lBjYtG_wiubtJdiCYg8reMDwyC3wkFhPy5Ou0VXWRIQg.JPEG.hyun_0930%2F1629420539963.jpg&type=sc960_832'
path
'test1.jpg'

현재위치 조회

import os
os.getcwd()
file, header = req.urlretrieve(url,path)
print(file)
test1.jpg
print(header)
accept-ranges: bytes
cache-control: max-age=2592000
content-length: 37532
content-type: image/jpeg
expires: Sun, 26 Sep 2021 23:53:24 GMT
last-modified: Fri, 27 Aug 2021 23:53:24 GMT
p3p: CP="ALL CURa ADMa DEVa TAIa OUR BUS IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC OTC"
date: Fri, 27 Aug 2021 23:53:24 GMT
age: 665797
server: Testa/5.1.1
strict-transport-security: max-age=31536000
connection: close

html저장

url2 = "https://www.naver.com/"
path2 = "naver.html"
file2, header2 = req.urlretrieve(url2, path2)
print("----------------------------------------------------")
print(f"file name: {file}")
print("----------------------------------------------------")
print("Header Info :")
print(header)
print("----------------------------------------------------")
print(f"file name: {file2}")
print("----------------------------------------------------")
print("Header Info :")
print(header2)
----------------------------------------------------
file name: test1.jpg
----------------------------------------------------
Header Info :
accept-ranges: bytes
cache-control: max-age=2592000
content-length: 37532
content-type: image/jpeg
expires: Sun, 26 Sep 2021 23:53:24 GMT
last-modified: Fri, 27 Aug 2021 23:53:24 GMT
p3p: CP="ALL CURa ADMa DEVa TAIa OUR BUS IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC OTC"
date: Fri, 27 Aug 2021 23:53:24 GMT
age: 665797
server: Testa/5.1.1
strict-transport-security: max-age=31536000
connection: close


----------------------------------------------------
file name: naver.html
----------------------------------------------------
Header Info :
Server: NWS
Date: Sat, 04 Sep 2021 16:50:00 GMT
Content-Type: text/html; charset=UTF-8
Transfer-Encoding: chunked
Connection: close
Set-Cookie: PM_CK_loc=4d397054570413bd11c4b9094901203f9fef0e3df1bd78a55cef2ac0fd1d9e5e; Expires=Sun, 05 Sep 2021 16:50:00 GMT; Path=/; HttpOnly
Cache-Control: no-cache, no-store, must-revalidate
Pragma: no-cache
P3P: CP="CAO DSP CURa ADMa TAIa PSAa OUR LAW STP PHY ONL UNI PUR FIN COM NAV INT DEM STA PRE"
X-Frame-Options: DENY
X-XSS-Protection: 1; mode=block
Strict-Transport-Security: max-age=63072000; includeSubdomains
Referrer-Policy: unsafe-url

2.urlerror

크롤링에서 발생할 수 있는 에러처리

에러처리를 통해서 어떤 에러가 발생하였는지 파악하고 코드를 수정

URLError: 요청한 곳의 서버가 없거나 네트워크 연결이 없는 상황

HTTPError: HTTP응답에 있는 status에 따라서 상태를 반환, status코드에 따라서 에러 유형이 다름

주의사항: URLError가 HTTPError도 잡기 때문에 HTTPError처리를 먼저 해줘야함

1번 예제와 다르게 list에 넣고 for문을 통한 이미지 다운로드를 실시

from urllib.error import URLError, HTTPError
url_list = ['https://search.pstatic.net/common/?src=http%3A%2F%2Fblogfiles.naver.net%2FMjAyMTA4MjBfMTQx%2FMDAxNjI5NDIxNjQ5NzM5.D1F-l6COowiUicFVRlpfQeSJRtkR4f9lkbVZgwJm6r4g.lBjYtG_wiubtJdiCYg8reMDwyC3wkFhPy5Ou0VXWRIQg.JPEG.hyun_0930%2F1629420539963.jpg&type=sc960_832',
            'https://search.pstatic.net/common/?src=http%3A%2F%2Fblogfiles.naver.net%2FMjAyMDA5MTNfNjMg%2FMDAxNjAwMDAxNjM1NzQ2.TuGLdOsJ8vLFnN589WEiiA5j5XrsWRA7lJUJicpozJwg.694y_QRQKQwqd7QR41nweA3T4vYnAGT4OqVuxWvJdrYg.JPEG.ecoanimal%2F51d63faf6312a3bc4873ee24d98cdfed.jpg&type=a340']
name_list = ['nuguli1.jpg', 'nuguli2.jpg']
for i,url in enumerate(url_list):
    # 예외 처리
    try:
        # 웹 수신 정보 읽기
        response = req.urlopen(url)
        
        # 수신 내용
        contents = response.read()

        print('----------------------------------------------------------------------------------------------------------------')

        # 상태 정보 중간 출력
        print(f'file_name : {name_list[i]}')
        print('<Header Info>')
        print(f'{response.info()}')
        print(f'Status Code : {response.getcode()}')
        print()
        print('----------------------------------------------------------------------------------------------------------------')

        # 파일 쓰기
        with open(name_list[i], 'wb') as c:
            c.write(contents)
        
    except HTTPError as e: # HTTP 에러
        print("다운로드 실패.")
        print('HTTPError Code : ', e.code)

    except URLError as e: # URL 에러
        print("Download failed.")
        print('URL Error Reason : ', e.reason)

        # 성공
    else:
        print()
        print(f'{name_list[i]}이미지 다운 완료.')
----------------------------------------------------------------------------------------------------------------
file_name : nuguli1.jpg
<Header Info>
accept-ranges: bytes
cache-control: max-age=2592000
content-length: 37532
content-type: image/jpeg
expires: Sun, 26 Sep 2021 23:53:24 GMT
last-modified: Fri, 27 Aug 2021 23:53:24 GMT
p3p: CP="ALL CURa ADMa DEVa TAIa OUR BUS IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC OTC"
date: Fri, 27 Aug 2021 23:53:24 GMT
age: 665797
server: Testa/5.1.1
strict-transport-security: max-age=31536000
connection: close


Status Code : 200

----------------------------------------------------------------------------------------------------------------

nuguli1.jpg이미지 다운 완료.
----------------------------------------------------------------------------------------------------------------
file_name : nuguli2.jpg
<Header Info>
accept-ranges: bytes
cache-control: max-age=2592000
content-length: 50098
content-type: image/jpeg
expires: Fri, 24 Sep 2021 14:11:17 GMT
last-modified: Wed, 25 Aug 2021 14:11:17 GMT
p3p: CP="ALL CURa ADMa DEVa TAIa OUR BUS IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC OTC"
date: Wed, 25 Aug 2021 14:11:17 GMT
age: 873524
server: Testa/5.1.1
strict-transport-security: max-age=31536000
connection: close


Status Code : 200

----------------------------------------------------------------------------------------------------------------

nuguli2.jpg이미지 다운 완료.

3.urlopen/ urlparse

import urllib.request as req
from urllib.parse import urlparse
url="https://www.seoultech.ac.kr/index.jsp"
ele=req.urlopen(url)
print('type : {}'.format(type(ele)))
print()
print("geturl : {}".format(ele.geturl()))
print()
print("status : {}".format(ele.status))
print()
print("headers : {}".format(ele.getheaders()))
print()
print()
print('parse : {}'.format(urlparse('https://www.smu.ac.kr/ko/index.do?param=test').query))
print()
type : <class 'http.client.HTTPResponse'>

geturl : https://www.seoultech.ac.kr/index.jsp

status : 200

headers : [('Date', 'Sat, 04 Sep 2021 16:50:01 GMT'), ('Content-Type', 'text/html; charset=UTF-8'), ('Set-Cookie', 'JSESSIONID=5BabR1bj8eGHwllZbNNO9LXiYD2V1HlqI1KZJiR7EG01ZnEBpYTlBwaFVwCi61YT.web1_servlet_www;Path=/;HttpOnly'), ('X-Cache', 'MISS from cf4.seoultech.ac.kr'), ('X-Cache-Lookup', 'HIT from cf4.seoultech.ac.kr:3128'), ('Transfer-Encoding', 'chunked'), ('Via', ''), ('Connection', 'close')]


parse : param=test
print(ele.info())
Date: Sat, 04 Sep 2021 16:50:01 GMT
Content-Type: text/html; charset=UTF-8
Set-Cookie: JSESSIONID=5BabR1bj8eGHwllZbNNO9LXiYD2V1HlqI1KZJiR7EG01ZnEBpYTlBwaFVwCi61YT.web1_servlet_www;Path=/;HttpOnly
X-Cache: MISS from cf4.seoultech.ac.kr
X-Cache-Lookup: HIT from cf4.seoultech.ac.kr:3128
Transfer-Encoding: chunked
Via: 
Connection: close

headers에 데이터 추가하기

# pip install fake_useragent
from fake_useragent import UserAgent
ua = UserAgent()
### fake_useragent
ua = UserAgent()
print(ua.random)
print(ua.ie)
print(ua.msie)
print(ua['Internet Explorer'])
print(ua.opera)
print(ua.chrome)
print(ua.google)
print(ua['google chrome'])
print(ua.firefox)
print(ua.ff)
print(ua.safari)
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.1; SV1; .NET CLR 2.8.52393; WOW64; en-US)
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.2; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; FunWebProducts)
Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11
Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36
Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0
Mozilla/5.0 (Windows; U; Windows NT 6.0; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4

야후 파이낸스 데이터 받아오기

import json
import urllib.request as req
from fake_useragent import UserAgent

url 주소를 찾는 것에서 시간이 조금 걸림 이미지같은 경우 주소가 연동되지만 변동되는 데이터는 적용되지않음 크롬 개발자도구에서 Network항목에서 RequestURL 찾기

import json
import urllib.request as req
from fake_useragent import UserAgent


# Fake Header 정보(가상으로 User-Agent 생성)
ua = UserAgent()

# 헤더 선언
headers = {
    'User-Agent': ua.ie,
    'referer': 'https://finance.yahoo.com/'
}

# 다음 주식 요청 URL
url = "https://query1.finance.yahoo.com/v7/finance/quote?formatted=true&crumb=5b5ru0zoR.q&lang=en-US&region=US&symbols=ADA-USD%2CBTC-USD%2CDOGE-USD%2CETH-USD%2CZM&fields=symbol%2CshortName%2ClongName%2CregularMarketPrice%2CregularMarketChange%2CregularMarketChangePercent&corsDomain=finance.yahoo.com"


res = req.urlopen(req.Request(url, headers=headers)).read().decode('utf-8')

# 응답 데이터 str -> json 변환 및 data 값 저장
_json_data = json.loads(res)


print( _json_data, '\n')

{'quoteResponse': {'result': [{'fullExchangeName': 'CCC', 'exchangeTimezoneName': 'Europe/London', 'symbol': 'ADA-USD', 'regularMarketChange': {'raw': -0.12517643, 'fmt': '-0.13'}, 'gmtOffSetMilliseconds': 3600000, 'firstTradeDateMilliseconds': 1506812400000, 'exchangeDataDelayedBy': 0, 'language': 'en-US', 'regularMarketTime': {'raw': 1630774030, 'fmt': '5:47PM BST'}, 'regularMarketChangePercent': {'raw': -4.1827593, 'fmt': '-4.18%'}, 'exchangeTimezoneShortName': 'BST', 'quoteType': 'CRYPTOCURRENCY', 'marketState': 'REGULAR', 'regularMarketPrice': {'raw': 2.8674967, 'fmt': '2.87'}, 'market': 'ccc_market', 'quoteSourceName': 'CoinMarketCap', 'tradeable': False, 'exchange': 'CCC', 'sourceInterval': 15, 'shortName': 'Cardano USD', 'region': 'US', 'regularMarketPreviousClose': {'raw': 2.9606647, 'fmt': '2.96'}, 'triggerable': True}, {'fullExchangeName': 'CCC', 'exchangeTimezoneName': 'Europe/London', 'symbol': 'BTC-USD', 'regularMarketChange': {'raw': -611.96094, 'fmt': '-611.96'}, 'gmtOffSetMilliseconds': 3600000, 'firstTradeDateMilliseconds': 1410908400000, 'exchangeDataDelayedBy': 0, 'language': 'en-US', 'regularMarketTime': {'raw': 1630774082, 'fmt': '5:48PM BST'}, 'regularMarketChangePercent': {'raw': -1.2106228, 'fmt': '-1.21%'}, 'exchangeTimezoneShortName': 'BST', 'quoteType': 'CRYPTOCURRENCY', 'marketState': 'REGULAR', 'regularMarketPrice': {'raw': 49937.133, 'fmt': '49,937.13'}, 'market': 'ccc_market', 'quoteSourceName': 'CoinMarketCap', 'tradeable': False, 'exchange': 'CCC', 'sourceInterval': 15, 'shortName': 'Bitcoin USD', 'region': 'US', 'regularMarketPreviousClose': {'raw': 49922.355, 'fmt': '49,922.36'}, 'triggerable': True}, {'fullExchangeName': 'CCC', 'exchangeTimezoneName': 'Europe/London', 'symbol': 'DOGE-USD', 'regularMarketChange': {'raw': 0.0015876293, 'fmt': '0.00'}, 'gmtOffSetMilliseconds': 3600000, 'firstTradeDateMilliseconds': 1410908400000, 'exchangeDataDelayedBy': 0, 'language': 'en-US', 'regularMarketTime': {'raw': 1630774083, 'fmt': '5:48PM BST'}, 'regularMarketChangePercent': {'raw': 0.52755743, 'fmt': '0.53%'}, 'exchangeTimezoneShortName': 'BST', 'quoteType': 'CRYPTOCURRENCY', 'marketState': 'REGULAR', 'regularMarketPrice': {'raw': 0.30252412, 'fmt': '0.30'}, 'market': 'ccc_market', 'quoteSourceName': 'CoinMarketCap', 'tradeable': False, 'exchange': 'CCC', 'sourceInterval': 15, 'shortName': 'Dogecoin USD', 'region': 'US', 'regularMarketPreviousClose': {'raw': 0.29575953, 'fmt': '0.30'}, 'triggerable': True}, {'fullExchangeName': 'CCC', 'exchangeTimezoneName': 'Europe/London', 'symbol': 'ETH-USD', 'regularMarketChange': {'raw': -63.86255, 'fmt': '-63.86'}, 'gmtOffSetMilliseconds': 3600000, 'firstTradeDateMilliseconds': 1438902000000, 'exchangeDataDelayedBy': 0, 'language': 'en-US', 'regularMarketTime': {'raw': 1630774082, 'fmt': '5:48PM BST'}, 'regularMarketChangePercent': {'raw': -1.608492, 'fmt': '-1.61%'}, 'exchangeTimezoneShortName': 'BST', 'quoteType': 'CRYPTOCURRENCY', 'marketState': 'REGULAR', 'regularMarketPrice': {'raw': 3906.476, 'fmt': '3,906.48'}, 'market': 'ccc_market', 'quoteSourceName': 'CoinMarketCap', 'tradeable': False, 'exchange': 'CCC', 'sourceInterval': 15, 'shortName': 'Ethereum USD', 'region': 'US', 'regularMarketPreviousClose': {'raw': 3933.8274, 'fmt': '3,933.83'}, 'triggerable': True}, {'fullExchangeName': 'NasdaqGS', 'symbol': 'ZM', 'gmtOffSetMilliseconds': -14400000, 'language': 'en-US', 'regularMarketTime': {'raw': 1630699203, 'fmt': '4:00PM EDT'}, 'regularMarketChangePercent': {'raw': 1.084419, 'fmt': '1.08%'}, 'quoteType': 'EQUITY', 'tradeable': False, 'regularMarketPreviousClose': {'raw': 295.09, 'fmt': '295.09'}, 'exchangeTimezoneName': 'America/New_York', 'regularMarketChange': {'raw': 3.2000122, 'fmt': '3.20'}, 'firstTradeDateMilliseconds': 1555594200000, 'exchangeDataDelayedBy': 0, 'exchangeTimezoneShortName': 'EDT', 'marketState': 'CLOSED', 'regularMarketPrice': {'raw': 298.29, 'fmt': '298.29'}, 'market': 'us_market', 'quoteSourceName': 'Delayed Quote', 'priceHint': 2, 'exchange': 'NMS', 'sourceInterval': 15, 'shortName': 'Zoom Video Communications, Inc.', 'region': 'US', 'triggerable': True, 'longName': 'Zoom Video Communications, Inc.'}], 'error': None}} 
data_list=_json_data['quoteResponse']['result']
from pprint import pprint
pprint(data_list)
[{'exchange': 'CCC',
  'exchangeDataDelayedBy': 0,
  'exchangeTimezoneName': 'Europe/London',
  'exchangeTimezoneShortName': 'BST',
  'firstTradeDateMilliseconds': 1506812400000,
  'fullExchangeName': 'CCC',
  'gmtOffSetMilliseconds': 3600000,
  'language': 'en-US',
  'market': 'ccc_market',
  'marketState': 'REGULAR',
  'quoteSourceName': 'CoinMarketCap',
  'quoteType': 'CRYPTOCURRENCY',
  'region': 'US',
  'regularMarketChange': {'fmt': '-0.13', 'raw': -0.12517643},
  'regularMarketChangePercent': {'fmt': '-4.18%', 'raw': -4.1827593},
  'regularMarketPreviousClose': {'fmt': '2.96', 'raw': 2.9606647},
  'regularMarketPrice': {'fmt': '2.87', 'raw': 2.8674967},
  'regularMarketTime': {'fmt': '5:47PM BST', 'raw': 1630774030},
  'shortName': 'Cardano USD',
  'sourceInterval': 15,
  'symbol': 'ADA-USD',
  'tradeable': False,
  'triggerable': True},
 {'exchange': 'CCC',
  'exchangeDataDelayedBy': 0,
  'exchangeTimezoneName': 'Europe/London',
  'exchangeTimezoneShortName': 'BST',
  'firstTradeDateMilliseconds': 1410908400000,
  'fullExchangeName': 'CCC',
  'gmtOffSetMilliseconds': 3600000,
  'language': 'en-US',
  'market': 'ccc_market',
  'marketState': 'REGULAR',
  'quoteSourceName': 'CoinMarketCap',
  'quoteType': 'CRYPTOCURRENCY',
  'region': 'US',
  'regularMarketChange': {'fmt': '-611.96', 'raw': -611.96094},
  'regularMarketChangePercent': {'fmt': '-1.21%', 'raw': -1.2106228},
  'regularMarketPreviousClose': {'fmt': '49,922.36', 'raw': 49922.355},
  'regularMarketPrice': {'fmt': '49,937.13', 'raw': 49937.133},
  'regularMarketTime': {'fmt': '5:48PM BST', 'raw': 1630774082},
  'shortName': 'Bitcoin USD',
  'sourceInterval': 15,
  'symbol': 'BTC-USD',
  'tradeable': False,
  'triggerable': True},
 {'exchange': 'CCC',
  'exchangeDataDelayedBy': 0,
  'exchangeTimezoneName': 'Europe/London',
  'exchangeTimezoneShortName': 'BST',
  'firstTradeDateMilliseconds': 1410908400000,
  'fullExchangeName': 'CCC',
  'gmtOffSetMilliseconds': 3600000,
  'language': 'en-US',
  'market': 'ccc_market',
  'marketState': 'REGULAR',
  'quoteSourceName': 'CoinMarketCap',
  'quoteType': 'CRYPTOCURRENCY',
  'region': 'US',
  'regularMarketChange': {'fmt': '0.00', 'raw': 0.0015876293},
  'regularMarketChangePercent': {'fmt': '0.53%', 'raw': 0.52755743},
  'regularMarketPreviousClose': {'fmt': '0.30', 'raw': 0.29575953},
  'regularMarketPrice': {'fmt': '0.30', 'raw': 0.30252412},
  'regularMarketTime': {'fmt': '5:48PM BST', 'raw': 1630774083},
  'shortName': 'Dogecoin USD',
  'sourceInterval': 15,
  'symbol': 'DOGE-USD',
  'tradeable': False,
  'triggerable': True},
 {'exchange': 'CCC',
  'exchangeDataDelayedBy': 0,
  'exchangeTimezoneName': 'Europe/London',
  'exchangeTimezoneShortName': 'BST',
  'firstTradeDateMilliseconds': 1438902000000,
  'fullExchangeName': 'CCC',
  'gmtOffSetMilliseconds': 3600000,
  'language': 'en-US',
  'market': 'ccc_market',
  'marketState': 'REGULAR',
  'quoteSourceName': 'CoinMarketCap',
  'quoteType': 'CRYPTOCURRENCY',
  'region': 'US',
  'regularMarketChange': {'fmt': '-63.86', 'raw': -63.86255},
  'regularMarketChangePercent': {'fmt': '-1.61%', 'raw': -1.608492},
  'regularMarketPreviousClose': {'fmt': '3,933.83', 'raw': 3933.8274},
  'regularMarketPrice': {'fmt': '3,906.48', 'raw': 3906.476},
  'regularMarketTime': {'fmt': '5:48PM BST', 'raw': 1630774082},
  'shortName': 'Ethereum USD',
  'sourceInterval': 15,
  'symbol': 'ETH-USD',
  'tradeable': False,
  'triggerable': True},
 {'exchange': 'NMS',
  'exchangeDataDelayedBy': 0,
  'exchangeTimezoneName': 'America/New_York',
  'exchangeTimezoneShortName': 'EDT',
  'firstTradeDateMilliseconds': 1555594200000,
  'fullExchangeName': 'NasdaqGS',
  'gmtOffSetMilliseconds': -14400000,
  'language': 'en-US',
  'longName': 'Zoom Video Communications, Inc.',
  'market': 'us_market',
  'marketState': 'CLOSED',
  'priceHint': 2,
  'quoteSourceName': 'Delayed Quote',
  'quoteType': 'EQUITY',
  'region': 'US',
  'regularMarketChange': {'fmt': '3.20', 'raw': 3.2000122},
  'regularMarketChangePercent': {'fmt': '1.08%', 'raw': 1.084419},
  'regularMarketPreviousClose': {'fmt': '295.09', 'raw': 295.09},
  'regularMarketPrice': {'fmt': '298.29', 'raw': 298.29},
  'regularMarketTime': {'fmt': '4:00PM EDT', 'raw': 1630699203},
  'shortName': 'Zoom Video Communications, Inc.',
  'sourceInterval': 15,
  'symbol': 'ZM',
  'tradeable': False,
  'triggerable': True}]
result_list = []
for data in data_list:
    _set={}
    _set['symbol'] = data['symbol']
    _set['Last_price'] = data['regularMarketPrice']['fmt']
    _set['Change'] = data['regularMarketChange']['fmt']
    _set['%Change'] = data['regularMarketChangePercent']['fmt']
    result_list.append(_set)
import pandas as pd
df = pd.DataFrame(result_list)
df
symbol Last_price Change %Change
0 ADA-USD 2.87 -0.13 -4.18%
1 BTC-USD 49,937.13 -611.96 -1.21%
2 DOGE-USD 0.30 0.00 0.53%
3 ETH-USD 3,906.48 -63.86 -1.61%
4 ZM 298.29 3.20 1.08%