45fan.com - 路饭网

搜索: 您的位置主页 > 网络频道 > 阅读资讯:Python批量把word转成html并发布到网站的实例

Python批量把word转成html并发布到网站的实例

2015-07-31 13:28:40 来源:www.45fan.com 【

Python批量把word转成html并发布到网站的实例

本文实例讲述了Python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:

#coding=utf-8
__author__ = 'zhm'
from win32com import client as wc
import os
import time
import random
import MySQLdb
import re
def wordsToHtml(dir):
#批量把文件夹的word文档转换成html文件
 #金山WPS调用,抢先版的用KWPS,正式版WPS
 word = wc.Dispatch('KWPS.Application')
 for path, subdirs, files in os.walk(dir):
 for wordFile in files:
  wordFullName = os.path.join(path, wordFile)
  #print "word:" + wordFullName
  doc = word.Documents.Open(wordFullName)
  wordFile2 = unicode(wordFile, "gbk")
  dotIndex = wordFile2.rfind(".")
  if(dotIndex == -1):
  print '********************ERROR: 未取得后缀名!'
  fileSuffix = wordFile2[(dotIndex + 1) : ]
  if(fileSuffix == "doc" or fileSuffix == "docx"):
  fileName = wordFile2[ : dotIndex]
  htmlName = fileName + ".html"
  htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)
  # htmlFullName = unicode(path, "gbk") + "\\" + htmlName
  print u'生成了html文件:' + htmlFullName
  doc.SaveAs(htmlFullName, 8)
  doc.Close()
 word.Quit()
 print ""
 print "Finished!"
def html_add_to_db(dir):
#将转换成功的html文件批量插入数据库中。
 conn = MySQLdb.connect(
 host='localhost',
 port=3306,
 user='root',
 passwd='root',
 db='test',
 charset='utf8'
 )
 cur = conn.cursor()
 for path, subdirs, files in os.walk(dir):
 for htmlFile in files:
  htmlFullName = os.path.join(path, htmlFile)
  title = os.path.splitext(htmlFile)[0]
  targetDir = 'D:/files/htmls/'
  #D:/files为web服务器配置的静态目录
  sconds = time.time()
  msconds = sconds * 1000
  targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +'.html')
  htmlFile2 = unicode(htmlFile, "gbk")
  dotIndex = htmlFile2.rfind(".")
  if(dotIndex == -1):
  print '********************ERROR: 未取得后缀名!'
  fileSuffix = htmlFile2[(dotIndex + 1) : ]
  if(fileSuffix == "htm" or fileSuffix == "html"):
  if not os.path.exists(targetDir):
   os.makedirs(targetDir)
  htmlFullName = os.path.join(unicode(path, "gbk"), htmlFullName)
  htFile = open(htmlFullName,'rb')
  #获取网页内容
  htmStrCotent = htFile.read()
  #找出里面的图片
  img=re.compile(r"""<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""",re.I)
  m = img.findall(htmStrCotent)
  for tagContent in m:
   imgSrc = unicode(tagContent, "gbk")
   imgSrcFullName = os.path.join(path, imgSrc)
   #上传图片
   imgTarget = 'D:/files/images/whzx/'
   img_sconds = time.time()
   img_msconds = sconds * 1000
   targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +'.png')
   if not os.path.exists(imgTarget):
   os.makedirs(imgTarget)
   if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):
   tmpImgFile = open(imgSrcFullName,'rb')
   tmpWriteImgFile = open(targetImgFile, "wb")
   tmpWriteImgFile.write(tmpImgFile.read())
   tmpImgFile.close()
   tmpWriteImgFile.close()
   htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])
  if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):
   #用iframe包装转换好的html文件。
   iframeHtml='''
   <script type="text/javascript" language="javascript">
   function iFrameHeight() {
    var ifm= document.getElementById("iframepage");
    var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;
    if(ifm != null && subWeb != null) {
    ifm.height = subWeb.body.scrollHeight;
    }
   }
   </script>
   <iframe src='''+targetFile.split(':')[1]+'''
   marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" ></iframe>
   '''
   tmpTargetFile = open(targetFile, "wb")
   tmpTargetFile.write(htmStrCotent)
   tmpTargetFile.close()
   htFile.close()
   try:
   # 执行
   sql = "insert into common_article(title,content) values(%s,%s)"
   param = (unicode(title, "gbk"),iframeHtml)
   cur.execute(sql,param)
   except:
   print "Error: unable to insert data"
 cur.close()
 conn.commit()
 # 关闭数据库连接
 conn.close()
if __name__ == '__main__':
 wordsToHtml('d:/word')
 html_add_to_db('d:/word')

希望本文所述对大家的Python程序设计有所帮助。


本文地址:http://www.45fan.com/a/question/16308.html
Tags: 实现 python 批量
编辑:路饭网
关于我们 | 联系我们 | 友情链接 | 网站地图 | Sitemap | App | 返回顶部