Skip to content

Instantly share code, notes, and snippets.

@swiatczak
Created April 28, 2016 06:57
Show Gist options
  • Select an option

  • Save swiatczak/8c7d136cce961eea9a629d9b457ff4ca to your computer and use it in GitHub Desktop.

Select an option

Save swiatczak/8c7d136cce961eea9a629d9b457ff4ca to your computer and use it in GitHub Desktop.
''' basic CAS authenticated page scraping using Requests and BeautifulSoup
'''
__author__ = "swiatczak"
import requests
from bs4 import BeautifulSoup as soup
import urllib
def casSoup(user, password, serviceURL, casURL, targets = None):
''' somehow simplistic approach that authenticates requests against CAS using Requests.Session
to persist cookies etc. And then sends to soup made of the content to a coroutine based
handlers.
'''
with requests.Session() as sess:
authResp = sess.get(serviceURL, verify = True)
broth = soup(authResp.content, 'html.parser')
ltParam = broth.form.find(name = 'input', attrs = {'name': 'lt'})['value']
payload = { '_eventId': 'submit'
, 'execution': 'e1s1'
, 'submit': 'Login'
, 'username': user
, 'password': password
, 'lt': ltParam
}
casResp = sess.post(casURL.format(urllib.quote(serviceURL)), data=payload, verify=True)
broth2 = soup(casResp.content, 'html.parser')
if targets is not None:
for target in targets:
target.send((broth2, sess.cookies))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment