Changeset 3 in code for trunk/cli


Ignore:
Timestamp:
Apr 15, 2022, 1:17:01 PM (3 years ago)
Author:
fox
Message:
  • Removed 'txt init'

Format

  • New B32 ID

Index

  • New option: txt index --print
  • Move scheme to peers
  • Replace peer.*.conf files with index packed locations Instead of adding a URL to peers.*.conf, run txt pull <url>

Conversion

  • Rewritten converters
  • txt-convert looks for a .convert.conf containing key: value lines.
  • Specifiable topic-roots from .convert.conf.
  • Added Topics: key, with comma seperated topics.

If set only those topics will appear in the main index and used as topic roots.
Other topics will have sub-indices generated, but won't be listed in the main index.

  • HTML converter header & footer options
  • HTML-index renamed to HTM-index

Internal

  • Change types: uuid:Uuid -> id:string
  • File_store merges identical texts
  • Use peer ID for store path, store peers' texts in .local/share/texts
  • Simple URN resolution for converter

Continue to next feed if parsing one fails

  • Phasing-out Archive, replaced by improved packs
  • Eliminate Bos, Cohttp, lwt, uri, tls, Re, Ptime, dependencies
  • Lock version for Cmdliner, fix dune-project
  • Optional resursive store
  • Improve header_pack
  • Fix recursive mkdir
Location:
trunk/cli
Files:
10 added
1 deleted
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/cli/atom.ml

    r2 r3  
    1010module P = Parsers.Plain_text.Make (Converter.Html)
    1111
    12 let id txt = "<id>urn:uuid:" ^ Logarion.(Id.to_string txt.Text.uuid) ^ "</id>"
     12let id txt = "<id>urn:uuid:" ^ Logarion.(txt.Text.id) ^ "</id>"
    1313let title text = "<title>" ^ esc text.Logarion.Text.title ^ "</title>"
    1414
    1515let authors text =
    16         let u acc addr = acc ^ element "uri" (Uri.to_string addr) in
     16        let u acc addr = acc ^ element "uri" addr in
    1717  let open Logarion in
    1818  let fn txt a =
     
    5252  ^ title ^ {|</title><link rel="alternate" type="|} ^ alternate_type ^ {|" href="|}
    5353  ^ base_url ^ {|/" /><link rel="self" type="application/atom+xml" href="|}
    54   ^ self ^ {|" /><id>urn:uuid:|} ^ Logarion.Id.to_string archive_id ^ "</id><updated>"
    55   ^ Ptime.to_rfc3339 (Ptime_clock.now ()) ^ "</updated>\n"
     54  ^ self ^ {|" /><id>urn:uuid:|} ^ archive_id ^ "</id><updated>"
     55  ^ Logarion.Date.now () ^ "</updated>\n"
    5656  ^ List.fold_left (fun acc t -> acc ^ entry base_url t) "" texts
    5757  ^ "</feed>"
  • trunk/cli/convert.ml

    r2 r3  
    11open Logarion
    2 module A = Archive.Make (Logarion.File_store)
    32
    4 let convert_modified source dest fn title text =
    5         if (try Unix.((stat dest).st_mtime < (stat source).st_mtime) with _-> true)
    6         then (File_store.file dest (fn title text); true) else false
     3let is_older source dest = try
     4        Unix.((stat dest).st_mtime < (stat source).st_mtime) with _-> true
    75
    8 let word_fname dir text = dir ^ "/" ^ Text.alias text
    9 let id_fname dir text = dir ^ "/" ^ Text.short_id text
     6let convert cs r (text, files) = match Text.str "Content-Type" text with
     7        | "" | "text/plain" ->
     8                let source = List.hd files in
     9                let dest = Filename.concat r.Conversion.dir (Text.short_id text) in
     10                List.fold_left
     11                        (fun a f ->
     12                                let dest = dest ^ f.Conversion.ext in
     13                                if is_older source dest then (File_store.file dest (f.Conversion.page r text); true) else false
     14                                || a)
     15                        false cs
     16        | x -> Printf.eprintf "Can't convert Content-Type: %s file: %s" x text.Text.title; false
    1017
    11 let writer types dir name (text,store_item) = (* todo: single_parser -> [files] *)
    12 (*      convert_modified store_item idfilename (fun _title -> Text.to_string) text.title text;*)
    13         let h = if "htm" = types || "all" = types then
    14                 convert_modified store_item (id_fname dir text ^ ".htm") Html.page name text
    15                 else false in
    16         let g = if "gmi" = types || "all" = types then
    17                 convert_modified store_item (id_fname dir text ^ ".gmi") Gemini.page name text
    18                 else false in
    19         h || g
     18let converters types kv =
     19        let t = [] in
     20        let t = if ("htm" = types || "all" = types) then
     21                (let htm = Html.init kv in
     22                 Conversion.{ ext = Html.ext; page = Html.page htm; indices = Html.indices htm })::t
     23                else t in
     24        let t = if ("gmi" = types || "all" = types) then
     25                Conversion.{ ext = Gemini.ext; page = Gemini.page; indices = Gemini.indices}::t else t in
     26        t
    2027
    21 let index_writer types noindex dir archive topic_roots topic_map texts =
    22         let name = archive.A.name in
    23         let file path = File_store.file (dir ^ path) in
    24         file "/index.pck"       (Header_pack.pack archive texts);
    25         if not noindex && ("htm" = types || "all" = types) then (
    26                 let index_name = try Store.KV.find "HTML-index" archive.File_store.kv
    27                         with Not_found -> "index.html" in
    28                 if index_name <> "" then
    29                         file ("/"^index_name) (Html.topic_main_index name topic_roots texts);
    30                 file "/index.date.htm" (Html.date_index name texts);
    31                 List.iter
    32                         (fun topic -> file ("/index." ^ topic ^ ".htm")
    33                          (Html.topic_sub_index name topic_map topic texts))
    34                         topic_roots;
    35                 let base_url = try Store.KV.find "HTTP-URL" archive.File_store.kv
    36                         with Not_found -> prerr_endline "Missing `HTTP-URL:` in config"; "" in
    37                 file "/feed.atom" (Atom.feed archive.A.name archive.A.id base_url "text/html" texts)
    38         );
    39         if not noindex && ("gmi" = types || "all" = types) then (
    40                 let index_name = try Store.KV.find "Gemini-index" archive.File_store.kv
    41                         with Not_found -> "index.gmi" in
    42                 if index_name <> "" then
    43                         file ("/"^index_name) (Gemini.topic_main_index name topic_roots texts);
    44                 file "/index.date.gmi" (Gemini.date_index name texts);
    45                 List.iter
    46                         (fun topic -> file ("/index." ^ topic ^ ".gmi")
    47                          (Gemini.topic_sub_index name topic_map topic texts))
    48                         topic_roots;
    49                 let base_url = try Store.KV.find "GEMINI-URL" archive.File_store.kv
    50                         with Not_found -> prerr_endline "Missing `GEMINI-URL:` in config"; "" in
    51                 file "/gmi.atom" (Atom.feed archive.A.name archive.A.id base_url "text/gemini" texts)
    52         )
     28let convert_all converters noindex dir id kv =
     29        let empty = Topic_set.Map.empty in
     30        let repo = Conversion.{ id; dir; kv; topic_roots = []; topics = empty; texts = [] } in
     31        let fn (ts,ls,acc) ((elt,_) as r) =
     32                (Topic_set.to_map ts (Text.set "topics" elt)), elt::ls,
     33                if convert converters repo r then acc+1 else acc in
     34        let topics, texts, count = File_store.(fold ~dir ~order:newest fn (empty,[],0)) in
     35        let topic_roots = try List.rev @@ String_set.list_of_csv (Store.KV.find "Topics" kv)
     36                with Not_found -> Topic_set.roots topics in
     37        let repo = Conversion.{ repo with topic_roots; topics; texts } in
     38        if not noindex then List.iter (fun c -> c.Conversion.indices repo) converters;
     39        Printf.printf "Converted: %d Indexed: %d\n" count (List.length texts)
    5340
    54 let txt_writer types dir name ((text, _store_item) as r) =
    55         match Text.str "Content-Type" text with
    56         | "" | "text/plain" -> writer types dir name r
    57         | x -> prerr_endline ("Can't convert Content-Type: "^x^" file: " ^text.Text.title); false
    58 
    59 let convert_all types noindex dir archive =
    60         let name = archive.A.name in
    61         let fn (ts,ls,acc) ((elt,_) as r) =
    62                 (Topic_set.to_map ts (Text.set "topics" elt)),
    63                 elt::ls, if txt_writer types dir name r then acc+1 else acc in
    64         let empty = Topic_set.Map.empty in
    65         let topic_map, texts, count = A.(fold ~order:newest fn (empty,[],0) archive) in
    66         let topic_roots = Topic_set.roots topic_map in
    67         index_writer types noindex dir archive topic_roots topic_map texts;
    68         print_endline @@ "Converted: " ^ string_of_int (count)
    69                 ^ "\nIndexed: " ^ string_of_int (List.length texts);
    70         Ok ()
    71 
    72 let convert_dir types noindex cmd_dir =
    73         let (>>=) = Result.bind in
    74         let with_dir dir =
    75                 Result.map_error (function `Msg m -> m)
    76                 Logarion.File_store.Directory.(directory dir |> print ~descr:"export" dir) in
    77         (A.of_path "."
    78                 >>= fun archive -> (match cmd_dir with "" -> Error "unspecified export dir" | x -> Ok x)
    79                 >>= fun dir -> with_dir dir
    80                 >>= fun _ -> convert_all types noindex dir { archive with store = dir })
    81          |> function Ok () -> () | Error x -> prerr_endline x
     41let convert_dir types noindex dir =
     42        match dir with "" -> prerr_endline "unspecified dir"
     43        | dir ->
     44                let fname = Filename.concat dir "index.pck" in
     45                match Header_pack.of_string @@ File_store.to_string fname with
     46                | Error s -> prerr_endline s
     47                | Ok { info; _ } ->
     48                        let kv = let f = Filename.concat dir ".convert.conf" in (* TODO: better place to store convert conf? *)
     49                                if Sys.file_exists f then File_store.of_kv_file f else Store.KV.empty in
     50                        let kv = if Store.KV.mem "Title" kv then kv
     51                                else Store.KV.add "Title" info.Header_pack.title kv in
     52                        let kv = Store.KV.add "Locations" (String.concat ";\n" info.Header_pack.locations) kv in
     53                        let cs = converters types kv in
     54                        convert_all cs noindex dir info.Header_pack.id kv
    8255
    8356open Cmdliner
    84 
    8557let term =
    86         let directory = Arg.(value & pos 0 string "" & info [] ~docv:"target directory" ~doc:"Directory to convert into") in
    87         let types = Arg.(value & opt string "all" & info ["t"; "type"] ~docv:"TYPES" ~doc:"Convert to type") in
    88         let noindex = Arg.(value & flag & info ["noindex"] ~doc:"don't write an index when converting") in
     58        let directory = Arg.(value & pos 0 string "" & info [] ~docv:"target directory"
     59                ~doc:"Directory to convert") in
     60        let types = Arg.(value & opt string "all" & info ["t"; "type"] ~docv:"TYPES"
     61         ~doc:"Convert to type") in
     62        let noindex = Arg.(value & flag & info ["noindex"]
     63                ~doc:"don't create indices in target format") in
    8964        Term.(const convert_dir $ types $ noindex $ directory),
    90         Term.info "convert" ~doc:"convert archive" ~man:[ `S "DESCRIPTION"; `P "Convert Logarion archive" ]
     65        Term.info "convert" ~doc:"convert txts"
     66                ~man:[ `S "DESCRIPTION"; `P "Convert texts within a directory to another format.
     67                Directory must contain an index.pck. Run `txt index` first." ]
  • trunk/cli/dune

    r2 r3  
    11(executable
    2  (name cli)
     2 (name txt)
    33 (public_name txt)
    4  (modules cli convert html atom gemini)
    5  (libraries logarion logarion.http re.str cmdliner bos ptime ptime.clock.os text_parse.converter text_parse.parsers msgpck))
     4 (modules txt authors convert conversion file index last listing new topics html atom gemini pull)
     5 (libraries text_parse.converter text_parse.parsers logarion msgpck curl str cmdliner))
  • trunk/cli/gemini.ml

    r2 r3  
    1 let page _archive_title text =
     1let ext = ".gmi"
     2
     3module GeminiConverter = struct
     4        include Converter.Gemini
     5        let angled_uri u a = if String.sub u 0 10 <> "urn:txtid:" then
     6                angled_uri u a else angled_uri (String.(sub u 10 (length u - 10)) ^ ext) a
     7end
     8
     9let page _conversion text =
    210  let open Logarion.Text in
    311  "# " ^ text.title
    412  ^ "\nAuthors: " ^ Logarion.Person.Set.to_string text.authors
    513  ^ "\nDate: " ^ Logarion.Date.(pretty_date @@ listing text.date)
    6   ^ let module T = Parsers.Plain_text.Make (Converter.Gemini) in
     14  ^ let module T = Parsers.Plain_text.Make (GeminiConverter) in
    715  "\n" ^ T.of_string text.body ""
    816
     
    3139    "" meta_list
    3240
    33 let topic_link root topic =
    34  "=> index." ^ root ^ ".gmi " ^ String.capitalize_ascii topic ^ "\n"
     41let topic_link root topic =
     42  let replaced_space = String.map (function ' '->'+' | x->x) in
     43         "=> index." ^ replaced_space  root ^ ".gmi " ^ String.capitalize_ascii topic ^ "\n"
    3544
    3645let text_item path meta =
     
    7281  "# " ^ title ^ "\n\n"
    7382  ^ listing_index topic_map [topic_root] "" metas
     83
     84let indices r =
     85        let open Logarion in
     86        let file name = File_store.file (Filename.concat r.Conversion.dir name) in
     87        let index_name = try Store.KV.find "Gemini-index" r.kv with Not_found -> "index.gmi" in
     88        let title = try Store.KV.find "Title" r.Conversion.kv with Not_found -> "" in
     89
     90        if index_name <> "" then
     91                file index_name (topic_main_index title r.topic_roots r.texts);
     92
     93        file "index.date.gmi" (date_index title r.texts);
     94
     95        List.iter
     96                (fun topic -> file ("index." ^ topic ^ ".gmi")
     97                 (topic_sub_index title r.topics topic r.texts))
     98                r.topic_roots;
     99
     100        let base_url = try
     101                let _i = Str.(search_forward (regexp "gemini?://[^;]*") (Store.KV.find "Locations" r.kv) 0) in
     102                Str.(matched_string (Store.KV.find "Locations" r.kv))
     103                with Not_found -> prerr_endline "Missing location for Gemini"; "" in
     104        file "gmi.atom" (Atom.feed title r.id base_url "text/gemini" r.texts)
  • trunk/cli/html.ml

    r2 r3  
    1 let wrap (title:string) (subtitle:string) body =
    2   {|<!DOCTYPE HTML>|}
    3   ^ {|<html><head><title>|}
    4   ^ subtitle ^ " | " ^ title
    5   ^ {|</title><link rel="stylesheet" href="main.css">|}
    6   ^ {|<link rel="alternate" href="feed.atom" type="application/atom+xml">|}
    7   ^ {|<meta charset="utf-8"/>|}
    8   ^ {|<meta name="viewport" content="width=device-width, initial-scale=1.0">|}
    9   ^ {|</head><body><header><a href=".">|} ^ title
    10   ^ {|</a> <nav><a href="feed.atom" id="feed">feed</a></nav></header>|} ^ body
    11   ^ "</body></html>"
     1type templates_t = { header: string option; footer: string option }
     2type t = { templates : templates_t }
     3
     4let ext = ".htm"
     5let empty_templates = { header = None; footer = None }
     6let default_opts = { templates = empty_templates }
     7
     8let init kv =
     9        let open Logarion in
     10        let header = match  Store.KV.find "HTM-header" kv with
     11                | fname -> Some (File_store.to_string fname)
     12                | exception Not_found -> None in
     13        let footer = match  Store.KV.find "HTM-footer" kv with
     14                | fname -> Some (File_store.to_string fname)
     15                | exception Not_found -> None in
     16        { templates = { header; footer} }
     17
     18let wrap c htm text_title body =
     19        let site_title = try Logarion.Store.KV.find "Title" c.Conversion.kv
     20                with Not_found -> "" in
     21        let replace x = let open Str in
     22                global_replace (regexp "{{archive-title}}") site_title x
     23                |> global_replace (regexp "{{text-title}}") text_title
     24        in
     25        let header = match htm.templates.header with
     26        | Some x -> replace x
     27        | None -> "<header><a href='.'>" ^ site_title ^
     28                "</a><nav><a href='feed.atom' id='feed'>feed</a></nav></header>"
     29        in
     30        let footer = match htm.templates.footer with  None -> "" | Some x -> replace x in
     31  "<!DOCTYPE HTML><html><head><title>" ^ text_title ^ " • " ^ site_title ^ "</title>\n\
     32  <link rel='stylesheet' href='main.css'>\
     33  <link rel='alternate' href='feed.atom' type='application/atom+xml'>\
     34  <meta charset='utf-8'/><meta name='viewport' content='width=device-width, initial-scale=1.0'>\
     35  </head><body>\n" ^ header ^ body ^ footer ^ "</body></html>"
    1236
    1337let topic_link root topic =
    1438  let replaced_space = String.map (function ' '->'+' | x->x) in
    15   {|<a href="index.|} ^ root ^ {|.htm#|} ^ replaced_space topic ^ {|">|}
     39  "<a href='index." ^ replaced_space root ^ ".htm#" ^ replaced_space topic ^ "'>"
    1640  ^ String.capitalize_ascii topic ^ "</a>"
    1741
    18 let page archive_title text =
     42module HtmlConverter = struct
     43        include Converter.Html
     44        let angled_uri u a = if String.sub u 0 10 <> "urn:txtid:" then
     45                angled_uri u a else angled_uri (String.(sub u 10 (length u - 10)) ^ ext) a
     46end
     47
     48let page htm conversion text =
    1949  let open Logarion in
    2050  let open Text in
    21   let module T = Parsers.Plain_text.Make (Converter.Html) in
     51  let module T = Parsers.Plain_text.Make (HtmlConverter) in
    2252  let sep_append ?(sep=", ") a x = match a,x with "",_ -> x | _, "" -> a | _ -> a ^ sep ^ x in
    23   let opt_kv key value = if String.length value > 0 then "<dt>" ^ key ^ "<dd>" ^ value else "" in
     53  let opt_kv key value = if String.length value > 0
     54        then "<dt>" ^ key ^ "<dd>" ^ value else "" in
    2455(*  let author acc auth = sep_append acc Person.(auth.name ^ " ") in*)
    2556  let authors = (Person.Set.to_string text.authors ^ " ") in
     
    3970    ^ opt_kv "Topics: " (topic_links (set "topics" text))
    4071    ^ opt_kv "Keywords: " keywords
    41     ^ opt_kv "Id: " (Id.to_string text.uuid)
     72    ^ opt_kv "Id: " text.id
    4273    ^ {|</dl></header><pre style="white-space:pre-wrap">|} in
    43   wrap archive_title text.title ((T.of_string text.body header) ^ "</pre></article>")
     74  wrap conversion htm text.title ((T.of_string text.body header) ^ "</pre></article>")
    4475
    4576let to_dated_links ?(limit) meta_list =
     
    5889    "" meta_list
    5990
    60 let date_index ?(limit) title meta_list =
     91let date_index ?(limit) conv htm meta_list =
    6192  match limit with
    62   | Some limit -> wrap title "Index" (to_dated_links ~limit meta_list)
    63   | None -> wrap title "Index" (to_dated_links meta_list)
     93  | Some limit -> wrap conv htm "Index" (to_dated_links ~limit meta_list)
     94  | None -> wrap conv htm "Index" (to_dated_links meta_list)
    6495
    6596let fold_topic_roots topic_roots =
     
    113144  "<nav><h1>Texts</h1>" ^ item_group topic_roots ^ "</nav>"
    114145
    115 let topic_main_index title topic_roots metas =
    116   wrap title "Topics"
     146let topic_main_index conv htm topic_roots metas =
     147  wrap conv htm "Topics"
    117148    (fold_topic_roots topic_roots
    118149     ^ "<nav><h1>Latest</h1>" ^ to_dated_links ~limit:10 metas
    119150     ^ {|<a href="index.date.htm">More by date</a></nav>|} )
    120151
    121 let topic_sub_index title topic_map topic_root metas =
    122   wrap title topic_root
     152let topic_sub_index conv htm topic_map topic_root metas =
     153  wrap conv htm topic_root
    123154    (fold_topics topic_map [topic_root] metas
    124155(*     ^ {|<a href=".atom" id="feed">|}^ String.capitalize_ascii topic_root ^{| feed </a>|}*)
    125156     ^ listing_index topic_map [topic_root] "" metas)
     157
     158open Logarion
     159let indices htm c =
     160        let file name = Logarion.File_store.file (Filename.concat c.Conversion.dir name) in
     161        let index_name = try Store.KV.find "HTM-index" c.Conversion.kv with Not_found -> "index.html" in
     162        let title = try Store.KV.find "Title" c.Conversion.kv with Not_found -> "" in
     163
     164        if index_name <> "" then
     165                file index_name (topic_main_index c htm c.topic_roots c.texts);
     166
     167        file "index.date.htm" (date_index c htm c.texts);
     168
     169        List.iter
     170                (fun root -> file ("index." ^ root ^ ".htm") (topic_sub_index c htm c.topics root c.texts))
     171                c.topic_roots;
     172
     173        let base_url = try
     174                let _i = Str.(search_forward (regexp "https?://[^;]*") (Store.KV.find "Locations" c.kv) 0) in
     175                Str.(matched_string (Store.KV.find "Locations" c.kv))
     176                with Not_found -> prerr_endline "Missing location for HTTP(S)"; "" in
     177        file "feed.atom" (Atom.feed title c.id base_url "text/html" c.texts)
Note: See TracChangeset for help on using the changeset viewer.