Python 요청 및 영구 세션
내가 사용하고 요청 모듈 (파이썬 2.5 버전 0.10.0을). 웹 사이트의 로그인 양식에 데이터를 제출하고 세션 키를 검색하는 방법을 알아 냈지만 후속 요청에서이 세션 키를 사용하는 확실한 방법을 볼 수 없습니다. 누군가 아래 코드의 줄임표를 채우거나 다른 접근 방식을 제안 할 수 있습니까?
>>> import requests
>>> login_data = {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'}
>>> r = requests.post('https://localhost/login.py', login_data)
>>>
>>> r.text
u'You are being redirected <a href="profilePage?_ck=1349394964">here</a>'
>>> r.cookies
{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}
>>>
>>> r2 = requests.get('https://localhost/profile_data.json', ...)
다음을 사용하여 영구 세션을 쉽게 만들 수 있습니다.
s = requests.session()
그 후 다음과 같이 요청을 계속하십시오.
s.post('https://localhost/login.py', login_data)
#logged in! cookies saved for future requests.
r2 = s.get('https://localhost/profile_data.json', ...)
#cookies sent automatically!
#do whatever, s will keep your cookies intact :)
세션에 대한 자세한 내용 : http://docs.python-requests.org/en/latest/user/advanced/#session-objects
이 비슷한 질문에서 내 대답을 확인하십시오.
python : urllib2 urlopen 요청으로 쿠키를 보내는 방법
import urllib2
import urllib
from cookielib import CookieJar
cj = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
# input-type values from the html form
formdata = { "username" : username, "password": password, "form-id" : "1234" }
data_encoded = urllib.urlencode(formdata)
response = opener.open("https://page.com/login.php", data_encoded)
content = response.read()
편집하다:
내 답변에 대해 몇 개의 반대표를 받았지만 설명에 대한 설명은 없습니다. . urllib
대신 라이브러리를 참조하고 있기 때문이라고 생각합니다 requests
. OP가 도움을 요청 requests
하거나 누군가가 다른 접근 방식을 제안 하기 때문에 그렇게합니다 .
다른 답변은 그러한 세션을 유지하는 방법을 이해하는 데 도움이됩니다. 또한 다른 스크립트 실행 (캐시 파일 사용)에서 세션을 유지하는 클래스를 제공하고 싶습니다. 즉, 필요한 경우에만 적절한 "로그인"이 수행됩니다 (시간 초과 또는 캐시에 세션이 없음). 또한 'get'또는 'post'에 대한 후속 호출에 대한 프록시 설정을 지원합니다.
Python3으로 테스트되었습니다.
자신의 코드를위한 기초로 사용하십시오. 다음 스 니펫은 GPL v3와 함께 출시되었습니다.
import pickle
import datetime
import os
from urllib.parse import urlparse
import requests
class MyLoginSession:
"""
a class which handles and saves login sessions. It also keeps track of proxy settings.
It does also maintine a cache-file for restoring session data from earlier
script executions.
"""
def __init__(self,
loginUrl,
loginData,
loginTestUrl,
loginTestString,
sessionFileAppendix = '_session.dat',
maxSessionTimeSeconds = 30 * 60,
proxies = None,
userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
debug = True,
forceLogin = False,
**kwargs):
"""
save some information needed to login the session
you'll have to provide 'loginTestString' which will be looked for in the
responses html to make sure, you've properly been logged in
'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...
'loginData' will be sent as post data (dictionary of id : value).
'maxSessionTimeSeconds' will be used to determine when to re-login.
"""
urlData = urlparse(loginUrl)
self.proxies = proxies
self.loginData = loginData
self.loginUrl = loginUrl
self.loginTestUrl = loginTestUrl
self.maxSessionTime = maxSessionTimeSeconds
self.sessionFile = urlData.netloc + sessionFileAppendix
self.userAgent = userAgent
self.loginTestString = loginTestString
self.debug = debug
self.login(forceLogin, **kwargs)
def modification_date(self, filename):
"""
return last file modification date as datetime object
"""
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)
def login(self, forceLogin = False, **kwargs):
"""
login to a session. Try to read last saved session from cache file. If this fails
do proper login. If the last cache access was too old, also perform a proper login.
Always updates session cache file.
"""
wasReadFromCache = False
if self.debug:
print('loading or generating session...')
if os.path.exists(self.sessionFile) and not forceLogin:
time = self.modification_date(self.sessionFile)
# only load if file less than 30 minutes old
lastModification = (datetime.datetime.now() - time).seconds
if lastModification < self.maxSessionTime:
with open(self.sessionFile, "rb") as f:
self.session = pickle.load(f)
wasReadFromCache = True
if self.debug:
print("loaded session from cache (last access %ds ago) "
% lastModification)
if not wasReadFromCache:
self.session = requests.Session()
self.session.headers.update({'user-agent' : self.userAgent})
res = self.session.post(self.loginUrl, data = self.loginData,
proxies = self.proxies, **kwargs)
if self.debug:
print('created new session with login' )
self.saveSessionToCache()
# test login
res = self.session.get(self.loginTestUrl)
if res.text.lower().find(self.loginTestString.lower()) < 0:
raise Exception("could not log into provided site '%s'"
" (did not find successful login string)"
% self.loginUrl)
def saveSessionToCache(self):
"""
save session to a cache file
"""
# always save (to update timeout)
with open(self.sessionFile, "wb") as f:
pickle.dump(self.session, f)
if self.debug:
print('updated session cache-file %s' % self.sessionFile)
def retrieveContent(self, url, method = "get", postData = None, **kwargs):
"""
return the content of the url with respect to the session.
If 'method' is not 'get', the url will be called with 'postData'
as a post request.
"""
if method == 'get':
res = self.session.get(url , proxies = self.proxies, **kwargs)
else:
res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)
# the session has been updated on the server, so also update in cache
self.saveSessionToCache()
return res
A code snippet for using the above class may look like this:
if __name__ == "__main__":
# proxies = {'https' : 'https://user:pass@server:port',
# 'http' : 'http://user:pass@server:port'}
loginData = {'user' : 'usr',
'password' : 'pwd'}
loginUrl = 'https://...'
loginTestUrl = 'https://...'
successStr = 'Hello Tom'
s = MyLoginSession(loginUrl, loginData, loginTestUrl, successStr,
#proxies = proxies
)
res = s.retrieveContent('https://....')
print(res.text)
# if, for instance, login via JSON values required try this:
s = MyLoginSession(loginUrl, None, loginTestUrl, successStr,
#proxies = proxies,
json = loginData)
The documentation says that get
takes in an optional cookies
argument allowing you to specify cookies to use:
from the docs:
>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(cookies_are='working')
>>> r = requests.get(url, cookies=cookies)
>>> r.text
'{"cookies": {"cookies_are": "working"}}'
http://docs.python-requests.org/en/latest/user/quickstart/#cookies
Upon trying all the answers above, I found that using RequestsCookieJar instead of the regular CookieJar for subsequent requests fixed my problem.
import requests
import json
authUrl = 'https://whatever.com/login'
#The subsequent url
testUrl = 'https://whatever.com/someEndpoint'
#Whatever you are posting
login_data = {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'}
#The auth token or any other data that we will recieve from the authRequest.
token = ''
# Post the loginRequest
loginRequest = requests.post(authUrl,login_data)
print loginRequest.text
# Save the request content to your variable. In this case I needed a field called token.
token = str(json.loads(loginRequest.content)['token'])
print token
# Verify successfull login
print loginRequest.status_code
#Create your RequestsCookieJar for your subsequent requests and add the cookie
jar = requests.cookies.RequestsCookieJar()
jar.set('LWSSO_COOKIE_KEY', token)
#Execute your next request(s) with the RequestCookieJar set
r = requests.get(testUrl, cookies=jar)
print(r.text)
print(r.status_code)
snippet to retrieve json data, password protected
import requests
username = "my_user_name"
password = "my_super_secret"
url = "https://www.my_base_url.com"
the_page_i_want = "/my_json_data_page"
session = requests.Session()
# retrieve cookie value
resp = session.get(url+'/login')
csrf_token = resp.cookies['csrftoken']
# login, add referer
resp = session.post(url+"/login",
data={
'username': username,
'password': password,
'csrfmiddlewaretoken': csrf_token,
'next': the_page_i_want,
},
headers=dict(Referer=url+"/login"))
print(resp.json())
This will work for you in Python;
# Call JIRA API with HTTPBasicAuth
import json
import requests
from requests.auth import HTTPBasicAuth
JIRA_EMAIL = "****"
JIRA_TOKEN = "****"
BASE_URL = "https://****.atlassian.net"
API_URL = "/rest/api/3/serverInfo"
API_URL = BASE_URL+API_URL
BASIC_AUTH = HTTPBasicAuth(JIRA_EMAIL, JIRA_TOKEN)
HEADERS = {'Content-Type' : 'application/json;charset=iso-8859-1'}
response = requests.get(
API_URL,
headers=HEADERS,
auth=BASIC_AUTH
)
print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))
참고URL : https://stackoverflow.com/questions/12737740/python-requests-and-persistent-sessions
'programing tip' 카테고리의 다른 글
Clojure 네임 스페이스를 여러 파일로 분할 (0) | 2020.09.08 |
---|---|
복사 된 웹 텍스트에 추가 정보를 추가하는 방법 (0) | 2020.09.08 |
Python 추출 패턴 일치 (0) | 2020.09.07 |
오류 : 디스플레이를 열 수 없음 : (null) Xclip을 사용하여 SSH 공개 키를 복사 할 때 (0) | 2020.09.07 |
제거가 발생하기 전에 jQuery slideUp (). remove ()가 slideUp 애니메이션을 표시하지 않는 것 같습니다. (0) | 2020.09.07 |