Created
April 28, 2016 06:57
-
-
Save swiatczak/8c7d136cce961eea9a629d9b457ff4ca to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' basic CAS authenticated page scraping using Requests and BeautifulSoup | |
| ''' | |
| __author__ = "swiatczak" | |
| import requests | |
| from bs4 import BeautifulSoup as soup | |
| import urllib | |
| def casSoup(user, password, serviceURL, casURL, targets = None): | |
| ''' somehow simplistic approach that authenticates requests against CAS using Requests.Session | |
| to persist cookies etc. And then sends to soup made of the content to a coroutine based | |
| handlers. | |
| ''' | |
| with requests.Session() as sess: | |
| authResp = sess.get(serviceURL, verify = True) | |
| broth = soup(authResp.content, 'html.parser') | |
| ltParam = broth.form.find(name = 'input', attrs = {'name': 'lt'})['value'] | |
| payload = { '_eventId': 'submit' | |
| , 'execution': 'e1s1' | |
| , 'submit': 'Login' | |
| , 'username': user | |
| , 'password': password | |
| , 'lt': ltParam | |
| } | |
| casResp = sess.post(casURL.format(urllib.quote(serviceURL)), data=payload, verify=True) | |
| broth2 = soup(casResp.content, 'html.parser') | |
| if targets is not None: | |
| for target in targets: | |
| target.send((broth2, sess.cookies)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment