#coding:utf-8 #python生成sitemap,超过1万条数据自动生成新文件。 #from __future__ import division # import os,datetime import sys import pymysql.cursors reload(sys) sys.setdefaultencoding('utf-8') hosts = '域名/' dir = os.popen('mkdir /data/wwwroot/forwei/www/sitemaps') path = '/data/wwwroot/forwei/www/sitemaps/' paths = 'sitemaps/' lastmod = datetime.date.today() connection = pymysql.connect(host="127.0.0.1",user="用户名",password="密码",db="表名") sql = 'SELECT classpath FROM phome_enewsclass union select ztpath from phome_enewszt union SELECT titleurl FROM phome_ecms_news' try: with connection.cursor() as cursor: cursor.execute(sql) cnm = cursor.fetchall() pan = open('urls.txt',"w") #hu = open('mobile_url.txt',"w") for i in cnm: for item in i: if len(item) > 2: if item[0] == "/" : pan.write("域名/%s\n" % item[1:]) #hu.write("域名/%s\n" % item[1:]) else: if item[0] != "/": pan.write("域名/%s\n" % item) #hu.write("移动端/%s\n" % item) pan.close() #hu.close() cursor.close() finally: connection.close() def add_file(j,f1,hosts,paths): file_name = 'sitemap_%s.xml'%(j) f1.write("\n<sitemap>\n<loc>%s%s%s</loc>\n<lastmod>%s</lastmod>\n<priority>0.8</priority>\n</sitemap>"%(hosts,paths,file_name,lastmod)) f=open("%s%s"%(path,file_name),"w") f.write('<?xml version="1.0" encoding="utf-8"?>\n<urlset>') return f #判断总的URL数 c = 0 for i in open('urls.txt'): url = i.strip() if len(url)==0: pass else: c+=1 print c #判断需要生成的sitemap个数 file_num = c000 if file_num==0: file_num = c/10000 print '总共有%s条URL,生成%s个sitemap文件'%(c,file_num) else: file_num = (c/10000)+1 print '总共有%s条URL,生成%s个sitemap文件'%(c,file_num) #自动按1W条URL生成sitemap,并自动命名为sitemap_1.xml i = 0 j = 2 f = open('%s/sitemap_1.xml'%(path),'w+') f.write('<?xml version="1.0" encoding="utf-8"?>\n<urlset>') f1 = open('%s/sitemapindex.xml'%(path),'w') f1.write('<?xml version="1.0" encoding="utf-8"?>\n<sitemapindex>') f1.write("\n<sitemap>\n<loc>%s%s%s</loc>\n<lastmod>%s</lastmod>\n<priority>0.8</priority>\n</sitemap>"%(hosts,paths,'sitemap_1.xml',lastmod)) for url in open("urls.txt"): url = url.strip() i += 1 if i == 10000 or j == 10000: f.write('\n</urlset>') f.close() i = 0 f = add_file(j,f1,hosts,paths) j += 1 f.write("\n<url>\n<loc>%s</loc>\n<lastmod>%s</lastmod>\n<priority>0.8</priority>\n</url>"%(url,lastmod)) f.write('\n</urlset>') f1.write('\n</sitemapindex>') f1.close()