"""fc2rss
Screen scrape an OU FirstClass conference or two into RSS files.
Avoid the appalling FC interface!

Stuart Langridge, http://www.kryogenix.org/

v1.0 2003-10-11
"""

import urllib,re,urlparse,os,cgi

# Required variables
OU_USERNAME = 'YOUR_OU_USERNAME'
OU_PASSWORD = 'YOUR_OU_PASSWORD'

CONFERENCES = [
  'http://oufcnt2.open.ac.uk/Login/0007584D-80000001/', # m874
  'http://oufcnt2.open.ac.uk/Login/00075E75-80000001/'  # m879
]

RSS_PATH = '/home/httpd/test/ou/rss'

debugmode = 0

reTitle = re.compile(r'^\s*<title>(?P<title>[^<]+)</title>\s*$')
reItem = re.compile(r'\s*leaf\[\d+\]=new Array\(\d+,\d+,\d+,\d+,"(?P<id>[^"]+)", "(?P<author>[^"]+)"\);\s*$')
reDiv = re.compile(r'\s*<div ')
reEndDiv = re.compile(r'\s*</div>')
reHTML = re.compile('<[^>]+>')
ws = re.compile(r'[\x0b\x0c\n\r]')


# Handle required authentication
class myOpener(urllib.FancyURLopener):
  def prompt_user_passwd(self,host,realm):
    return (OU_USERNAME,OU_PASSWORD)
urllib._urlopener = myOpener()

def parseConf(conf):
  title = ''
  items = []
  fp = urllib.urlopen(conf)
  data = ws.split(fp.read())
  for line in data:
    t = reTitle.match(line)
    if t: title = t.groupdict()['title']
    if t and debugmode: print '---%s---' % title
    i = reItem.match(line)
    if i:
      title2 = ''
      text2 = ''
      capture = 0
      iid = i.groupdict()['id']
      iauth = i.groupdict()['author']
      itemuri = urlparse.urljoin(conf,iid)
      fp2 = urllib.urlopen(itemuri)
      data2 = ws.split(fp2.read())
      for line2 in data2:
        t2 = reTitle.match(line2)
        if t2: title2 = t2.groupdict()['title']
        if t2 and debugmode: print '  %s' % title2
        divs = reDiv.match(line2)
        if divs: capture = 1
        if capture: text2 += line2
        dive = reEndDiv.search(line2)
        if dive: capture = 0
      fp2.close()
      text2 = re.sub('<br>','\n',text2)
      text2 = reHTML.sub('',text2)
      text2 = re.sub('\n','<br>',text2)
      text2 = re.sub('\x00','',text2)
      items.append((iid,iauth,itemuri,title2,text2))
  fp.close()
  return title,items


# Walk the conferences
confdata = {}
for conf in CONFERENCES:
  title,items = parseConf(conf)
  if title and items: confdata[title] = (conf,items)

for conf in confdata.keys():
  fp = open(os.path.join(RSS_PATH,conf.replace(' ','_')) + '.xml','w')
  url,items = confdata[conf]
  fp.write('''<rss version="2.0">
  <channel>
    <title>%s</title>
    <link>%s</link>
    <description>%s</description>
''' % (conf,url,conf))
  for item in items:
    iid,auth,uri,title,text = item
    fp.write('''    <item>
      <author>%s</author>
      <description>%s</description>
      <link>%s</link>
      <title>%s</title>
    </item>''' % (auth,cgi.escape(text),uri,title))
  fp.write('  </channel>\n</rss>\n')