tag:crieit.net,2005:https://crieit.net/tags/ISBN/feed 「ISBN」の記事 - Crieit Crieitでタグ「ISBN」に投稿された最近の記事 2022-02-06T23:26:22+09:00 https://crieit.net/tags/ISBN/feed tag:crieit.net,2005:PublicArticle/17966 2022-02-04T21:49:20+09:00 2022-02-06T23:26:22+09:00 https://crieit.net/posts/d31bd6bca80198716bb2f002a715f976 タイトルから書籍情報を探す。 <p>データを詰め込んだデータベースから、本のタイトルを取り出して、もしあれば著者名を取り出して、国会図書館のNDL サーチで、タイトルを検索して、これかな ?? という書籍情報をデータベースに詰め込むプログラム。</p> <p>何をするためかというと、タイトルしかわからないマンガコミックの出版社の情報や、ISBN など詳細をたどって得るため。ISBN からタイトル情報へ行き着くものはあるが逆のものは知らないため。</p> <p>それ以上説明は省くが、こちらからさかのぼっていくと、きっとわかる。<br /> <a href="https://crieit.net/boards/manga-B/fc4a0259928ca001b58935ddd7cbb322">https://crieit.net/boards/manga-B/fc4a0259928ca001b58935ddd7cbb322</a></p> <p><a target="_blank" rel="nofollow noopener" href="https://note.com/haywhnk/n/n5195d2660f53">さかのぼる</a></p> <pre><code class="ruby">require "faraday" require "faraday/net_http" require "net/http" require 'nokogiri' require 'sqlite3' require 'time' require 'date' class NdlSearch def get_book_info(title, creator = nil) data = [] query = { :mediatype => 1, :cnt => 100 } query[:title] = title query[:creator] = creator if creator if creator == '' puts "::::::::::::::::::::::::::::::::::::" puts 'author :??' end puts print "query :#{query}" puts response = ndl_get('/api/opensearch', query) xml = Nokogiri::XML(response.body) xml.remove_namespaces! items = xml.xpath('//item') unless items.any? then puts puts 'ndl has no item' data << {"totalResults"=>"0"} else #pp items.to_s items.each do |item| #puts #puts "item :", item book = {} threads1 = [] # concurrency item.children.each do |c| threads1 << Thread.new { key = c.name next if key == 'text' val = "#{c.content}" label = c.attribute("type") if label label = "#{label}".gsub(/^dcndl:|^dcterms:/,'') book[label] ||= [] book[label] << val unless book[label].include?(val) val = "#{label}:#{val}" end book[key] ||= [] book[key] << val unless book[key].include?(val) } end threads1.each{ |thr| thr.join } book = book.map {|key,val| [key, val.join(',')]}.to_h data << book end end data end private def ndl_get(path, pram) con = Faraday.new(:url => 'https://iss.ndl.go.jp') do |f| f.request :url_encoded #f.response :logger f.adapter :net_http end con.get path, pram end end #DB SQL =<<EOS create table tbl_bookdata ( id INTEGER PRIMARY KEY AUTOINCREMENT, book_title text, url text, author text, creatortranscription text, volume text, seriestitle text, publisher text, isbn text, date text, W3CDTF integer, mangathank_title text, ex_id integer, tags text ); EOS count = 0 new_db = SQLite3::Database.open("bookdata_fbay_py_3.db") #new_db = SQLite3::Database.open("bookdata_fbay_py.db") db = SQLite3::Database.open("../gotest/fbay_python3.db") #db = SQLite3::Database.open("fbay_python3.db") #new_db = SQLite3::Database.open("bookdata.db") #db = SQLite3::Database.open("mangathank_new.db") #new_db.execute(SQL) temp_author = '' temp_title = '' $index = 0 db.execute("select id from tbl_manga order by id desc limit 1 ;") do |data| $index = data[0].to_i print "last id : '#{$index}'" puts end looptimes = $index new_db.execute("select id from tbl_bookdata order by id desc limit 1 ;") do |data| count = data[0].to_i end start = count + 1 #threads = [] #m = Mutex.new (start..looptimes).each do |api| # (0..4).each do |pac| fiber = Fiber.new do if count >= looptimes - 1 then break end # threads &lt;&lt; Thread.new { # m.synchronize{ count += 1 puts puts "::::::::::::::::::::::::::::::::::::::::" print 'id:',count,' ' search_data = db.execute("select book_title,author,title,id,tags from tbl_manga where id ='#{count}' ;") if search_data.empty? then puts 'empty' end *book_data = search_data.pop #book_data[0] #=> book_title #book_data[1] #=> author #book_data[2] #=> title #book_data[3] #==> id #book_data[4] #==> tags mangathank_title = book_data[2].to_s.gsub(/\'/, "\'\'") tags = book_data[4].to_s.gsub(/\'/, "\'\'") puts book_data[2] if book_data[2] == "null" then p count pp book_data # new_db.execute(&quot;insert into tbl_bookdata (book_title, author, mangathank_title, ex_id, tags ) values('book_title:nothing','author:nothing','#{mangathank_title}','#{book_data[3]}','#{tags}');&quot;) new_db.execute("insert into tbl_bookdata (id, book_title, author, mangathank_title, ex_id, tags ) values('#{count}','book_title:nothing','author:nothing','#{mangathank_title}','#{book_data[3]}','#{tags}');") else author_data = book_data[2].to_s.slice(/((?<=\[).*?(?=\]))/) #puts "author_dat:#{author_data}" if author_data != nil author_data.gsub!(/\ x\ /,' ') author_data.sub!(/((?<=[\p{Hiragana}\p{Han}\p{Katakana}])x(?=[\p{Hiragana}\p{Han}\p{Katakana}]))/,' ') author_data.gsub!(/\(|\)/,"\(" =>' ',"\)"=>'') author_data.gsub!(/×/,' ') author_data.gsub!(/\ &/,' ') end if /(\ )/.match(author_data) then #/(\S+$)/.match(author_data) #person = /(?<=['\ '])\S.*$/.match(author_data) #str_array = person.to_s.split str_array = author_data.to_s.split person = str_array.pop else person = author_data.to_s end print("author_data: ", author_data , " person: " , person) puts num = book_data[2].to_s.slice(/((?<=第)\d+(?=巻|卷$))/) #num = /((?<=第)\d+(?=巻$))/.match(book_data[0].to_s) #book_data_0 = book_data[0].to_s.sub(/((?=第).*巻)/,'') book_data_0 = book_data[2].to_s.gsub(/((?=第).*(巻|卷))/,'') book_data_0.gsub!(/((?=第).*話)/,'') book_data_0.gsub!(/(.(?<=\()文庫版(?=\)).)/,'') book_data_0.gsub!(/(.(?<=\[)文庫版(?=\]).)/,'') book_data_0.gsub!(/文庫版/,'') book_data_0.gsub!(/フルカラー版/,'') book_data_0.gsub!(/カラー版/,'') book_data_0.gsub!(/(.(?<=\()完(?=\)).)/,'') book_data_0.gsub!(/(.(?<=【).*(?=】).)/,'') book_data_0.gsub!(/(.(?<=\[).+?(?=\]).)/,'') book_data_0.lstrip! book_data_0.rstrip! if book_data_0 == temp_title then if str_array then person = temp_author end else temp_title = book_data_0 temp_author = person end if num != nil then num = num.to_i book_data_0 += ' ' + num.to_s end puts #puts book_data[0] puts book_data_0 puts ndl_search = NdlSearch.new onemore = 'true' while onemore == 'true' do res = ndl_search.get_book_info( book_data_0,person ) onemore = 'false' # puts res[0] if res[0] == nil then puts "res: empty" book_data_0.gsub!(/\'/,"\'\'") puts book_data_0 # new_db.execute(&quot;insert into tbl_bookdata (book_title, author, mangathank_title, ex_id,tags ) values('#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');&quot;) new_db.execute("insert into tbl_bookdata (id, book_title, author, mangathank_title, ex_id,tags ) values('#{count}','#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');") onemore = 'false' else done = false res[0..99].each_with_index do |book,index| if done == true then break end not_book = false book.each do |key, val| if key == "extent" then puts "#{key}:#{val}" if /ビデオ|DVD|dvd|ディスク/.match?(val) then not_book = true puts '' puts 'SKIP' break end end end if not_book == true then next end if book != "null" then # puts &quot;res:#{book}&quot; # puts &quot; :#{index}&quot; book.each do |key, val| # puts &quot;#{key}:#{val}&quot; if key == 'totalResults' then #puts #print "no match title name #{person} ",book_data[3],' ' book_data_0.gsub!(/\'/,"\'\'") #puts book_data_0,person,mangathank_title unless str_array.nil? then if str_array.size > 0 then person = str_array.shift puts print "#{person} ?" puts puts onemore = 'true' #sleep 3 break else # new_db.execute(&quot;insert into tbl_bookdata (book_title, author, mangathank_title, ex_id,tags ) values('#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');&quot;) new_db.execute("insert into tbl_bookdata (id, book_title, author, mangathank_title, ex_id,tags ) values('#{count}', '#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');") onemore = 'false' #sleep 10 break end else # new_db.execute(&quot;insert into tbl_bookdata (book_title, author, mangathank_title, ex_id, tags) values('#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');&quot;) new_db.execute("insert into tbl_bookdata (id, book_title, author, mangathank_title, ex_id, tags) values('#{count}', '#{book_data_0}','#{author_data}','#{mangathank_title}','#{book_data[3]}','#{tags}');") onemore = 'false' end break end if key == 'title' then temp_author = person puts "☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆" puts " #{count}" puts "" puts "#{key}:#{val}" title = val.to_s.gsub(/\'/, "\'\'") new_db.execute("insert into tbl_bookdata (id, mangathank_title, ex_id, tags ) values('#{count}', '#{mangathank_title}','#{book_data[3]}','#{tags}');") new_db.execute("update tbl_bookdata set book_title = '#{title}' where id = '#{count}';") elsif key == 'author' then puts "#{key}:#{val}" author = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set author = '#{author}' where id = '#{count}';") elsif key == 'creatorTranscription' then puts "#{key}:#{val}" creatortranscription = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set creatortranscription = '#{creatortranscription}' where id = '#{count}';") elsif key == 'volume' then volume = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set volume = '#{volume}' where id = '#{count}';") elsif key == 'link' then url = val new_db.execute("update tbl_bookdata set url = '#{url}' where id = '#{count}';") elsif key == 'publisher' then puts "#{key}:#{val}" publisher = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set publisher = '#{publisher}' where id = '#{count}';") elsif key == 'ISBN' then puts "#{key}:#{val}" puts "" puts "☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆" isbn = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set isbn = '#{isbn}' where id = '#{count}';") elsif key == 'seriesTitle' then puts "#{key}:#{val}" seriestitle = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set seriestitle = '#{seriestitle}' where id = '#{count}';") elsif key == 'date' then puts "#{key}:#{val}" published_date = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set date = '#{published_date}' where id = '#{count}';") elsif key == "W3CDTF" then puts "#{key}:#{val}" puts "" w3cdtf = val.to_s.gsub(/\'/, "\'\'") new_db.execute("update tbl_bookdata set W3CDTF = '#{w3cdtf}' where id = '#{count}';") else #new_db.execute("update tbl_bookdata set author = '', creatortranscription = '', volume = '', url = '', publisher = '', isbn = '', seriestitle = '' ;") onemore = 'false' done = true end end else onemore = 'false' puts "error" mangathank_title = book_data[2].to_s.gsub(/\'/, "\'\'") # new_db.execute(&quot;insert into tbl_bookdata (author, mangathank_title, ex_id ) values('#{author_data}','#{mangathank_title}','#{book_data[3]}');&quot;) new_db.execute("insert into tbl_bookdata (id, author, mangathank_title, ex_id ) values('#{count}','#{author_data}','#{mangathank_title}','#{book_data[3]}');") end end end end end # } # } end fiber.resume # threads.each {|th| th.join} end </code></pre> tomato