source: code/trunk/cli/convert.ml@ 7

Last change on this file since 7 was 3, checked in by fox, 3 years ago
  • Removed 'txt init'

Format

  • New B32 ID

Index

  • New option: txt index --print
  • Move scheme to peers
  • Replace peer.*.conf files with index packed locations Instead of adding a URL to peers.*.conf, run txt pull <url>

Conversion

  • Rewritten converters
  • txt-convert looks for a .convert.conf containing key: value lines.
  • Specifiable topic-roots from .convert.conf.
  • Added Topics: key, with comma seperated topics.

If set only those topics will appear in the main index and used as topic roots.
Other topics will have sub-indices generated, but won't be listed in the main index.

  • HTML converter header & footer options
  • HTML-index renamed to HTM-index

Internal

  • Change types: uuid:Uuid -> id:string
  • File_store merges identical texts
  • Use peer ID for store path, store peers' texts in .local/share/texts
  • Simple URN resolution for converter

Continue to next feed if parsing one fails

  • Phasing-out Archive, replaced by improved packs
  • Eliminate Bos, Cohttp, lwt, uri, tls, Re, Ptime, dependencies
  • Lock version for Cmdliner, fix dune-project
  • Optional resursive store
  • Improve header_pack
  • Fix recursive mkdir
File size: 3.0 KB
Line 
1open Logarion
2
3let is_older source dest = try
4 Unix.((stat dest).st_mtime < (stat source).st_mtime) with _-> true
5
6let convert cs r (text, files) = match Text.str "Content-Type" text with
7 | "" | "text/plain" ->
8 let source = List.hd files in
9 let dest = Filename.concat r.Conversion.dir (Text.short_id text) in
10 List.fold_left
11 (fun a f ->
12 let dest = dest ^ f.Conversion.ext in
13 if is_older source dest then (File_store.file dest (f.Conversion.page r text); true) else false
14 || a)
15 false cs
16 | x -> Printf.eprintf "Can't convert Content-Type: %s file: %s" x text.Text.title; false
17
18let converters types kv =
19 let t = [] in
20 let t = if ("htm" = types || "all" = types) then
21 (let htm = Html.init kv in
22 Conversion.{ ext = Html.ext; page = Html.page htm; indices = Html.indices htm })::t
23 else t in
24 let t = if ("gmi" = types || "all" = types) then
25 Conversion.{ ext = Gemini.ext; page = Gemini.page; indices = Gemini.indices}::t else t in
26 t
27
28let convert_all converters noindex dir id kv =
29 let empty = Topic_set.Map.empty in
30 let repo = Conversion.{ id; dir; kv; topic_roots = []; topics = empty; texts = [] } in
31 let fn (ts,ls,acc) ((elt,_) as r) =
32 (Topic_set.to_map ts (Text.set "topics" elt)), elt::ls,
33 if convert converters repo r then acc+1 else acc in
34 let topics, texts, count = File_store.(fold ~dir ~order:newest fn (empty,[],0)) in
35 let topic_roots = try List.rev @@ String_set.list_of_csv (Store.KV.find "Topics" kv)
36 with Not_found -> Topic_set.roots topics in
37 let repo = Conversion.{ repo with topic_roots; topics; texts } in
38 if not noindex then List.iter (fun c -> c.Conversion.indices repo) converters;
39 Printf.printf "Converted: %d Indexed: %d\n" count (List.length texts)
40
41let convert_dir types noindex dir =
42 match dir with "" -> prerr_endline "unspecified dir"
43 | dir ->
44 let fname = Filename.concat dir "index.pck" in
45 match Header_pack.of_string @@ File_store.to_string fname with
46 | Error s -> prerr_endline s
47 | Ok { info; _ } ->
48 let kv = let f = Filename.concat dir ".convert.conf" in (* TODO: better place to store convert conf? *)
49 if Sys.file_exists f then File_store.of_kv_file f else Store.KV.empty in
50 let kv = if Store.KV.mem "Title" kv then kv
51 else Store.KV.add "Title" info.Header_pack.title kv in
52 let kv = Store.KV.add "Locations" (String.concat ";\n" info.Header_pack.locations) kv in
53 let cs = converters types kv in
54 convert_all cs noindex dir info.Header_pack.id kv
55
56open Cmdliner
57let term =
58 let directory = Arg.(value & pos 0 string "" & info [] ~docv:"target directory"
59 ~doc:"Directory to convert") in
60 let types = Arg.(value & opt string "all" & info ["t"; "type"] ~docv:"TYPES"
61 ~doc:"Convert to type") in
62 let noindex = Arg.(value & flag & info ["noindex"]
63 ~doc:"don't create indices in target format") in
64 Term.(const convert_dir $ types $ noindex $ directory),
65 Term.info "convert" ~doc:"convert txts"
66 ~man:[ `S "DESCRIPTION"; `P "Convert texts within a directory to another format.
67 Directory must contain an index.pck. Run `txt index` first." ]
Note: See TracBrowser for help on using the repository browser.